aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJesús <heckyel@hyperbola.info>2022-12-02 05:21:10 +0800
committerJesús <heckyel@hyperbola.info>2022-12-02 05:21:10 +0800
commiteaeeef9c1d1bedb76fea953c332ef84d53bffe2c (patch)
treec3cb5582247e47fc67c24cd7ff8ea857fb76821e
parent1e5a50b71d8f0eae6007bedc329eecb24bb5aba3 (diff)
downloadhypervideo-eaeeef9c1d1bedb76fea953c332ef84d53bffe2c.tar.lz
hypervideo-eaeeef9c1d1bedb76fea953c332ef84d53bffe2c.tar.xz
hypervideo-eaeeef9c1d1bedb76fea953c332ef84d53bffe2c.zip
update from upstream
-rw-r--r--.flake83
-rw-r--r--.gitlab-ci.yml33
-rw-r--r--AUTHORS122
-rw-r--r--CONTRIBUTING.md28
-rw-r--r--CONTRIBUTORS143
-rw-r--r--ChangeLog6142
-rw-r--r--Changelog.md1727
-rw-r--r--Makefile3
-rw-r--r--README.md2733
-rw-r--r--completions/zsh/_hypervideo2
-rw-r--r--devscripts/__init__.py1
-rwxr-xr-xdevscripts/bash-completion.py9
-rw-r--r--devscripts/buildserver.py435
-rw-r--r--devscripts/check-porn.py21
-rwxr-xr-xdevscripts/fish-completion.py12
-rw-r--r--devscripts/generate_aes_testdata.py12
-rw-r--r--devscripts/lazy_load_template.py39
-rwxr-xr-xdevscripts/make_contributing.py6
-rw-r--r--devscripts/make_lazy_extractors.py213
-rw-r--r--[-rwxr-xr-x]devscripts/make_readme.py90
-rw-r--r--devscripts/make_supportedsites.py42
-rwxr-xr-xdevscripts/posix-locale.sh6
-rw-r--r--devscripts/prepare_manpage.py43
-rw-r--r--devscripts/run_tests.bat1
-rwxr-xr-xdevscripts/run_tests.sh12
-rw-r--r--devscripts/set-variant.py36
-rw-r--r--devscripts/utils.py35
-rwxr-xr-xdevscripts/zsh-completion.py9
-rw-r--r--docs/.gitignore1
-rw-r--r--docs/Makefile177
-rw-r--r--docs/conf.py71
-rw-r--r--docs/index.rst23
-rw-r--r--docs/module_guide.rst67
-rw-r--r--docs/supportedsites.md1228
-rw-r--r--hypervideo.plugin.zsh24
-rw-r--r--hypervideo_dl/YoutubeDL.py1613
-rw-r--r--hypervideo_dl/__init__.py233
-rw-r--r--hypervideo_dl/__main__.py6
-rw-r--r--hypervideo_dl/aes.py101
-rw-r--r--hypervideo_dl/cache.py47
-rw-r--r--hypervideo_dl/compat.py330
-rw-r--r--hypervideo_dl/compat/__init__.py78
-rw-r--r--hypervideo_dl/compat/_deprecated.py16
-rw-r--r--hypervideo_dl/compat/_legacy.py97
-rw-r--r--hypervideo_dl/compat/compat_utils.py70
-rw-r--r--hypervideo_dl/compat/functools.py26
-rw-r--r--hypervideo_dl/compat/imghdr.py16
-rw-r--r--hypervideo_dl/compat/shutil.py30
-rw-r--r--hypervideo_dl/cookies.py514
-rw-r--r--hypervideo_dl/dependencies.py97
-rw-r--r--hypervideo_dl/downloader/__init__.py31
-rw-r--r--hypervideo_dl/downloader/common.py333
-rw-r--r--hypervideo_dl/downloader/dash.py23
-rw-r--r--hypervideo_dl/downloader/external.py219
-rw-r--r--hypervideo_dl/downloader/f4m.py54
-rw-r--r--hypervideo_dl/downloader/fc2.py11
-rw-r--r--hypervideo_dl/downloader/fragment.py191
-rw-r--r--hypervideo_dl/downloader/hls.py66
-rw-r--r--hypervideo_dl/downloader/http.py98
-rw-r--r--hypervideo_dl/downloader/ism.py64
-rw-r--r--hypervideo_dl/downloader/mhtml.py30
-rw-r--r--hypervideo_dl/downloader/niconico.py13
-rw-r--r--hypervideo_dl/downloader/rtmp.py12
-rw-r--r--hypervideo_dl/downloader/rtsp.py9
-rw-r--r--hypervideo_dl/downloader/websocket.py19
-rw-r--r--hypervideo_dl/downloader/youtube_live_chat.py53
-rw-r--r--hypervideo_dl/extractor/__init__.py52
-rw-r--r--hypervideo_dl/extractor/_extractors.py2354
-rw-r--r--hypervideo_dl/extractor/abc.py5
-rw-r--r--hypervideo_dl/extractor/abcnews.py4
-rw-r--r--hypervideo_dl/extractor/abcotvs.py6
-rw-r--r--hypervideo_dl/extractor/abematv.py254
-rw-r--r--hypervideo_dl/extractor/academicearth.py2
-rw-r--r--hypervideo_dl/extractor/acast.py4
-rw-r--r--hypervideo_dl/extractor/acfun.py199
-rw-r--r--hypervideo_dl/extractor/adn.py50
-rw-r--r--hypervideo_dl/extractor/adobeconnect.py3
-rw-r--r--hypervideo_dl/extractor/adobepass.py98
-rw-r--r--hypervideo_dl/extractor/adobetv.py5
-rw-r--r--hypervideo_dl/extractor/adultswim.py4
-rw-r--r--hypervideo_dl/extractor/aenetworks.py17
-rw-r--r--hypervideo_dl/extractor/aeonco.py40
-rw-r--r--hypervideo_dl/extractor/afreecatv.py71
-rw-r--r--hypervideo_dl/extractor/agora.py251
-rw-r--r--hypervideo_dl/extractor/airmozilla.py3
-rw-r--r--hypervideo_dl/extractor/aliexpress.py3
-rw-r--r--hypervideo_dl/extractor/aljazeera.py3
-rw-r--r--hypervideo_dl/extractor/allocine.py5
-rw-r--r--hypervideo_dl/extractor/alphaporno.py2
-rw-r--r--hypervideo_dl/extractor/alsace20tv.py4
-rw-r--r--hypervideo_dl/extractor/alura.py7
-rw-r--r--hypervideo_dl/extractor/amara.py3
-rw-r--r--hypervideo_dl/extractor/amazon.py31
-rw-r--r--hypervideo_dl/extractor/amazonminitv.py290
-rw-r--r--hypervideo_dl/extractor/amcnetworks.py6
-rw-r--r--hypervideo_dl/extractor/americastestkitchen.py57
-rw-r--r--hypervideo_dl/extractor/amp.py7
-rw-r--r--hypervideo_dl/extractor/angel.py56
-rw-r--r--hypervideo_dl/extractor/animelab.py278
-rw-r--r--hypervideo_dl/extractor/animeondemand.py284
-rw-r--r--hypervideo_dl/extractor/ant1newsgr.py19
-rw-r--r--hypervideo_dl/extractor/anvato.py233
-rw-r--r--hypervideo_dl/extractor/anvato_token_generator/__init__.py7
-rw-r--r--hypervideo_dl/extractor/anvato_token_generator/common.py6
-rw-r--r--hypervideo_dl/extractor/anvato_token_generator/nfl.py30
-rw-r--r--hypervideo_dl/extractor/aol.py6
-rw-r--r--hypervideo_dl/extractor/apa.py15
-rw-r--r--hypervideo_dl/extractor/aparat.py5
-rw-r--r--hypervideo_dl/extractor/appleconnect.py3
-rw-r--r--hypervideo_dl/extractor/applepodcasts.py3
-rw-r--r--hypervideo_dl/extractor/appletrailers.py5
-rw-r--r--hypervideo_dl/extractor/archiveorg.py437
-rw-r--r--hypervideo_dl/extractor/arcpublishing.py8
-rw-r--r--hypervideo_dl/extractor/ard.py7
-rw-r--r--hypervideo_dl/extractor/arkena.py17
-rw-r--r--hypervideo_dl/extractor/arnes.py6
-rw-r--r--hypervideo_dl/extractor/arte.py373
-rw-r--r--hypervideo_dl/extractor/asiancrush.py3
-rw-r--r--hypervideo_dl/extractor/atresplayer.py5
-rw-r--r--hypervideo_dl/extractor/atscaleconf.py34
-rw-r--r--hypervideo_dl/extractor/atttechchannel.py2
-rw-r--r--hypervideo_dl/extractor/atvat.py4
-rw-r--r--hypervideo_dl/extractor/audimedia.py4
-rw-r--r--hypervideo_dl/extractor/audioboom.py76
-rw-r--r--hypervideo_dl/extractor/audiodraft.py93
-rw-r--r--hypervideo_dl/extractor/audiomack.py3
-rw-r--r--hypervideo_dl/extractor/audius.py11
-rw-r--r--hypervideo_dl/extractor/awaan.py5
-rw-r--r--hypervideo_dl/extractor/aws.py5
-rw-r--r--hypervideo_dl/extractor/azmedien.py3
-rw-r--r--hypervideo_dl/extractor/baidu.py4
-rw-r--r--hypervideo_dl/extractor/banbye.py5
-rw-r--r--hypervideo_dl/extractor/bandaichannel.py7
-rw-r--r--hypervideo_dl/extractor/bandcamp.py23
-rw-r--r--hypervideo_dl/extractor/bannedvideo.py3
-rw-r--r--hypervideo_dl/extractor/bbc.py56
-rw-r--r--hypervideo_dl/extractor/beatport.py4
-rw-r--r--hypervideo_dl/extractor/beeg.py4
-rw-r--r--hypervideo_dl/extractor/behindkink.py4
-rw-r--r--hypervideo_dl/extractor/bellmedia.py14
-rw-r--r--hypervideo_dl/extractor/berufetv.py70
-rw-r--r--hypervideo_dl/extractor/bet.py2
-rw-r--r--hypervideo_dl/extractor/bfi.py3
-rw-r--r--hypervideo_dl/extractor/bfmtv.py5
-rw-r--r--hypervideo_dl/extractor/bibeltv.py3
-rw-r--r--hypervideo_dl/extractor/bigflix.py5
-rw-r--r--hypervideo_dl/extractor/bigo.py15
-rw-r--r--hypervideo_dl/extractor/bild.py3
-rw-r--r--hypervideo_dl/extractor/bilibili.py1007
-rw-r--r--hypervideo_dl/extractor/biobiochiletv.py3
-rw-r--r--hypervideo_dl/extractor/biqle.py4
-rw-r--r--hypervideo_dl/extractor/bitchute.py279
-rw-r--r--hypervideo_dl/extractor/bitwave.py3
-rw-r--r--hypervideo_dl/extractor/blackboardcollaborate.py4
-rw-r--r--hypervideo_dl/extractor/bleacherreport.py3
-rw-r--r--hypervideo_dl/extractor/blinkx.py86
-rw-r--r--hypervideo_dl/extractor/blogger.py11
-rw-r--r--hypervideo_dl/extractor/bloomberg.py14
-rw-r--r--hypervideo_dl/extractor/bokecc.py6
-rw-r--r--hypervideo_dl/extractor/bongacams.py21
-rw-r--r--hypervideo_dl/extractor/booyah.py86
-rw-r--r--hypervideo_dl/extractor/bostonglobe.py3
-rw-r--r--hypervideo_dl/extractor/box.py5
-rw-r--r--hypervideo_dl/extractor/bpb.py6
-rw-r--r--hypervideo_dl/extractor/br.py5
-rw-r--r--hypervideo_dl/extractor/bravotv.py3
-rw-r--r--hypervideo_dl/extractor/breakcom.py4
-rw-r--r--hypervideo_dl/extractor/breitbart.py6
-rw-r--r--hypervideo_dl/extractor/brightcove.py530
-rw-r--r--hypervideo_dl/extractor/bundesliga.py34
-rw-r--r--hypervideo_dl/extractor/businessinsider.py3
-rw-r--r--hypervideo_dl/extractor/buzzfeed.py5
-rw-r--r--hypervideo_dl/extractor/byutv.py4
-rw-r--r--hypervideo_dl/extractor/c56.py5
-rw-r--r--hypervideo_dl/extractor/cableav.py2
-rw-r--r--hypervideo_dl/extractor/callin.py6
-rw-r--r--hypervideo_dl/extractor/caltrans.py4
-rw-r--r--hypervideo_dl/extractor/cam4.py4
-rw-r--r--hypervideo_dl/extractor/camdemy.py3
-rw-r--r--hypervideo_dl/extractor/cammodels.py4
-rw-r--r--hypervideo_dl/extractor/camsoda.py57
-rw-r--r--hypervideo_dl/extractor/camtasia.py71
-rw-r--r--hypervideo_dl/extractor/camtube.py71
-rw-r--r--hypervideo_dl/extractor/camwithher.py2
-rw-r--r--hypervideo_dl/extractor/canalalpha.py4
-rw-r--r--hypervideo_dl/extractor/canalc2.py5
-rw-r--r--hypervideo_dl/extractor/canalplus.py5
-rw-r--r--hypervideo_dl/extractor/canvas.py2
-rw-r--r--hypervideo_dl/extractor/carambatv.py4
-rw-r--r--hypervideo_dl/extractor/cartoonnetwork.py3
-rw-r--r--hypervideo_dl/extractor/cbc.py9
-rw-r--r--hypervideo_dl/extractor/cbs.py5
-rw-r--r--hypervideo_dl/extractor/cbsinteractive.py6
-rw-r--r--hypervideo_dl/extractor/cbslocal.py7
-rw-r--r--hypervideo_dl/extractor/cbsnews.py8
-rw-r--r--hypervideo_dl/extractor/cbssports.py4
-rw-r--r--hypervideo_dl/extractor/ccc.py5
-rw-r--r--hypervideo_dl/extractor/ccma.py4
-rw-r--r--hypervideo_dl/extractor/cctv.py5
-rw-r--r--hypervideo_dl/extractor/cda.py97
-rw-r--r--hypervideo_dl/extractor/cellebrite.py63
-rw-r--r--hypervideo_dl/extractor/ceskatelevize.py77
-rw-r--r--hypervideo_dl/extractor/cgtn.py3
-rw-r--r--hypervideo_dl/extractor/channel9.py10
-rw-r--r--hypervideo_dl/extractor/charlierose.py4
-rw-r--r--hypervideo_dl/extractor/chaturbate.py3
-rw-r--r--hypervideo_dl/extractor/chilloutzone.py2
-rw-r--r--hypervideo_dl/extractor/chingari.py18
-rw-r--r--hypervideo_dl/extractor/chirbit.py3
-rw-r--r--hypervideo_dl/extractor/cinchcast.py6
-rw-r--r--hypervideo_dl/extractor/cinemax.py4
-rw-r--r--hypervideo_dl/extractor/cinetecamilano.py61
-rw-r--r--hypervideo_dl/extractor/ciscolive.py3
-rw-r--r--hypervideo_dl/extractor/ciscowebex.py4
-rw-r--r--hypervideo_dl/extractor/cjsw.py4
-rw-r--r--hypervideo_dl/extractor/cliphunter.py3
-rw-r--r--hypervideo_dl/extractor/clippit.py4
-rw-r--r--hypervideo_dl/extractor/cliprs.py3
-rw-r--r--hypervideo_dl/extractor/clipsyndicate.py2
-rw-r--r--hypervideo_dl/extractor/closertotruth.py3
-rw-r--r--hypervideo_dl/extractor/cloudflarestream.py16
-rw-r--r--hypervideo_dl/extractor/cloudy.py3
-rw-r--r--hypervideo_dl/extractor/clubic.py4
-rw-r--r--hypervideo_dl/extractor/clyp.py3
-rw-r--r--hypervideo_dl/extractor/cmt.py4
-rw-r--r--hypervideo_dl/extractor/cnbc.py4
-rw-r--r--hypervideo_dl/extractor/cnn.py60
-rw-r--r--hypervideo_dl/extractor/comedycentral.py2
-rw-r--r--hypervideo_dl/extractor/common.py1459
-rw-r--r--hypervideo_dl/extractor/commonmistakes.py12
-rw-r--r--hypervideo_dl/extractor/commonprotocols.py8
-rw-r--r--hypervideo_dl/extractor/condenast.py9
-rw-r--r--hypervideo_dl/extractor/contv.py5
-rw-r--r--hypervideo_dl/extractor/corus.py7
-rw-r--r--hypervideo_dl/extractor/coub.py5
-rw-r--r--hypervideo_dl/extractor/cozytv.py3
-rw-r--r--hypervideo_dl/extractor/cpac.py12
-rw-r--r--hypervideo_dl/extractor/cracked.py2
-rw-r--r--hypervideo_dl/extractor/crackle.py4
-rw-r--r--hypervideo_dl/extractor/craftsy.py3
-rw-r--r--hypervideo_dl/extractor/crooksandliars.py5
-rw-r--r--hypervideo_dl/extractor/crowdbunker.py4
-rw-r--r--hypervideo_dl/extractor/crunchyroll.py921
-rw-r--r--hypervideo_dl/extractor/cspan.py8
-rw-r--r--hypervideo_dl/extractor/ctsnews.py3
-rw-r--r--hypervideo_dl/extractor/ctv.py3
-rw-r--r--hypervideo_dl/extractor/ctvnews.py3
-rw-r--r--hypervideo_dl/extractor/cultureunplugged.py2
-rw-r--r--hypervideo_dl/extractor/curiositystream.py19
-rw-r--r--hypervideo_dl/extractor/cwtv.py4
-rw-r--r--hypervideo_dl/extractor/cybrary.py4
-rw-r--r--hypervideo_dl/extractor/daftsex.py5
-rw-r--r--hypervideo_dl/extractor/dailymail.py13
-rw-r--r--hypervideo_dl/extractor/dailymotion.py36
-rw-r--r--hypervideo_dl/extractor/dailywire.py113
-rw-r--r--hypervideo_dl/extractor/damtomo.py4
-rw-r--r--hypervideo_dl/extractor/daum.py6
-rw-r--r--hypervideo_dl/extractor/daystar.py1
-rw-r--r--hypervideo_dl/extractor/dbtv.py12
-rw-r--r--hypervideo_dl/extractor/dctp.py3
-rw-r--r--hypervideo_dl/extractor/deezer.py4
-rw-r--r--hypervideo_dl/extractor/defense.py2
-rw-r--r--hypervideo_dl/extractor/democracynow.py5
-rw-r--r--hypervideo_dl/extractor/detik.py159
-rw-r--r--hypervideo_dl/extractor/deuxm.py76
-rw-r--r--hypervideo_dl/extractor/dfb.py4
-rw-r--r--hypervideo_dl/extractor/dhm.py2
-rw-r--r--hypervideo_dl/extractor/digg.py2
-rw-r--r--hypervideo_dl/extractor/digitalconcerthall.py6
-rw-r--r--hypervideo_dl/extractor/digiteka.py16
-rw-r--r--hypervideo_dl/extractor/discovery.py2
-rw-r--r--hypervideo_dl/extractor/discoverygo.py3
-rw-r--r--hypervideo_dl/extractor/discoverynetworks.py42
-rw-r--r--hypervideo_dl/extractor/discoveryplusindia.py98
-rw-r--r--hypervideo_dl/extractor/discoveryvr.py59
-rw-r--r--hypervideo_dl/extractor/disney.py4
-rw-r--r--hypervideo_dl/extractor/dispeak.py3
-rw-r--r--hypervideo_dl/extractor/dlive.py4
-rw-r--r--hypervideo_dl/extractor/doodstream.py76
-rw-r--r--hypervideo_dl/extractor/dotsub.py2
-rw-r--r--hypervideo_dl/extractor/douyutv.py3
-rw-r--r--hypervideo_dl/extractor/dplay.py83
-rw-r--r--hypervideo_dl/extractor/drbonanza.py4
-rw-r--r--hypervideo_dl/extractor/dreisat.py4
-rw-r--r--hypervideo_dl/extractor/drooble.py3
-rw-r--r--hypervideo_dl/extractor/dropbox.py8
-rw-r--r--hypervideo_dl/extractor/dropout.py34
-rw-r--r--hypervideo_dl/extractor/drtuber.py10
-rw-r--r--hypervideo_dl/extractor/drtv.py51
-rw-r--r--hypervideo_dl/extractor/dtube.py3
-rw-r--r--hypervideo_dl/extractor/duboku.py53
-rw-r--r--hypervideo_dl/extractor/dumpert.py4
-rw-r--r--hypervideo_dl/extractor/dvtv.py4
-rw-r--r--hypervideo_dl/extractor/dw.py4
-rw-r--r--hypervideo_dl/extractor/eagleplatform.py39
-rw-r--r--hypervideo_dl/extractor/ebaumsworld.py2
-rw-r--r--hypervideo_dl/extractor/echomsk.py3
-rw-r--r--hypervideo_dl/extractor/egghead.py4
-rw-r--r--hypervideo_dl/extractor/ehow.py2
-rw-r--r--hypervideo_dl/extractor/eighttracks.py3
-rw-r--r--hypervideo_dl/extractor/einthusan.py5
-rw-r--r--hypervideo_dl/extractor/eitb.py5
-rw-r--r--hypervideo_dl/extractor/ellentube.py4
-rw-r--r--hypervideo_dl/extractor/elonet.py4
-rw-r--r--hypervideo_dl/extractor/elpais.py3
-rw-r--r--hypervideo_dl/extractor/embedly.py14
-rw-r--r--hypervideo_dl/extractor/engadget.py2
-rw-r--r--hypervideo_dl/extractor/epicon.py4
-rw-r--r--hypervideo_dl/extractor/epoch.py55
-rw-r--r--hypervideo_dl/extractor/eporner.py5
-rw-r--r--hypervideo_dl/extractor/eroprofile.py2
-rw-r--r--hypervideo_dl/extractor/ertgr.py26
-rw-r--r--hypervideo_dl/extractor/escapist.py3
-rw-r--r--hypervideo_dl/extractor/espn.py165
-rw-r--r--hypervideo_dl/extractor/esri.py4
-rw-r--r--hypervideo_dl/extractor/europa.py4
-rw-r--r--hypervideo_dl/extractor/europeantour.py3
-rw-r--r--hypervideo_dl/extractor/eurosport.py97
-rw-r--r--hypervideo_dl/extractor/euscreen.py4
-rw-r--r--hypervideo_dl/extractor/everyonesmixtape.py76
-rw-r--r--hypervideo_dl/extractor/expotv.py3
-rw-r--r--hypervideo_dl/extractor/expressen.py21
-rw-r--r--hypervideo_dl/extractor/extractors.py2162
-rw-r--r--hypervideo_dl/extractor/extremetube.py4
-rw-r--r--hypervideo_dl/extractor/eyedotv.py3
-rw-r--r--hypervideo_dl/extractor/facebook.py79
-rw-r--r--hypervideo_dl/extractor/fancode.py5
-rw-r--r--hypervideo_dl/extractor/faz.py4
-rw-r--r--hypervideo_dl/extractor/fc2.py26
-rw-r--r--hypervideo_dl/extractor/fczenit.py5
-rw-r--r--hypervideo_dl/extractor/fifa.py94
-rw-r--r--hypervideo_dl/extractor/filmmodu.py5
-rw-r--r--hypervideo_dl/extractor/filmon.py5
-rw-r--r--hypervideo_dl/extractor/filmweb.py3
-rw-r--r--hypervideo_dl/extractor/firsttv.py4
-rw-r--r--hypervideo_dl/extractor/fivemin.py54
-rw-r--r--hypervideo_dl/extractor/fivetv.py6
-rw-r--r--hypervideo_dl/extractor/flickr.py5
-rw-r--r--hypervideo_dl/extractor/folketinget.py4
-rw-r--r--hypervideo_dl/extractor/footyroom.py3
-rw-r--r--hypervideo_dl/extractor/formula1.py3
-rw-r--r--hypervideo_dl/extractor/fourtube.py3
-rw-r--r--hypervideo_dl/extractor/fourzerostudio.py106
-rw-r--r--hypervideo_dl/extractor/fox.py10
-rw-r--r--hypervideo_dl/extractor/fox9.py3
-rw-r--r--hypervideo_dl/extractor/foxgay.py6
-rw-r--r--hypervideo_dl/extractor/foxnews.py43
-rw-r--r--hypervideo_dl/extractor/foxsports.py2
-rw-r--r--hypervideo_dl/extractor/fptplay.py41
-rw-r--r--hypervideo_dl/extractor/franceculture.py128
-rw-r--r--hypervideo_dl/extractor/franceinter.py3
-rw-r--r--hypervideo_dl/extractor/francetv.py10
-rw-r--r--hypervideo_dl/extractor/freesound.py3
-rw-r--r--hypervideo_dl/extractor/freespeech.py2
-rw-r--r--hypervideo_dl/extractor/freetv.py139
-rw-r--r--hypervideo_dl/extractor/freshlive.py83
-rw-r--r--hypervideo_dl/extractor/frontendmasters.py4
-rw-r--r--hypervideo_dl/extractor/fujitv.py12
-rw-r--r--hypervideo_dl/extractor/funimation.py14
-rw-r--r--hypervideo_dl/extractor/funk.py4
-rw-r--r--hypervideo_dl/extractor/fusion.py3
-rw-r--r--hypervideo_dl/extractor/fuyintv.py30
-rw-r--r--hypervideo_dl/extractor/fxnetworks.py77
-rw-r--r--hypervideo_dl/extractor/gab.py6
-rw-r--r--hypervideo_dl/extractor/gaia.py5
-rw-r--r--hypervideo_dl/extractor/gameinformer.py3
-rw-r--r--hypervideo_dl/extractor/gamejolt.py1
-rw-r--r--hypervideo_dl/extractor/gamespot.py4
-rw-r--r--hypervideo_dl/extractor/gamestar.py4
-rw-r--r--hypervideo_dl/extractor/gaskrank.py4
-rw-r--r--hypervideo_dl/extractor/gazeta.py4
-rw-r--r--hypervideo_dl/extractor/gdcvault.py2
-rw-r--r--hypervideo_dl/extractor/gedidigital.py36
-rw-r--r--hypervideo_dl/extractor/generic.py1922
-rw-r--r--hypervideo_dl/extractor/genericembeds.py114
-rw-r--r--hypervideo_dl/extractor/genius.py127
-rw-r--r--hypervideo_dl/extractor/gettr.py7
-rw-r--r--hypervideo_dl/extractor/gfycat.py17
-rw-r--r--hypervideo_dl/extractor/giantbomb.py4
-rw-r--r--hypervideo_dl/extractor/giga.py13
-rw-r--r--hypervideo_dl/extractor/gigya.py2
-rw-r--r--hypervideo_dl/extractor/glide.py5
-rw-r--r--hypervideo_dl/extractor/globo.py24
-rw-r--r--hypervideo_dl/extractor/glomex.py16
-rw-r--r--hypervideo_dl/extractor/go.py59
-rw-r--r--hypervideo_dl/extractor/godtube.py3
-rw-r--r--hypervideo_dl/extractor/gofile.py53
-rw-r--r--hypervideo_dl/extractor/golem.py4
-rw-r--r--hypervideo_dl/extractor/goodgame.py57
-rw-r--r--hypervideo_dl/extractor/googledrive.py68
-rw-r--r--hypervideo_dl/extractor/googlepodcasts.py3
-rw-r--r--hypervideo_dl/extractor/googlesearch.py2
-rw-r--r--hypervideo_dl/extractor/goplay.py394
-rw-r--r--hypervideo_dl/extractor/gopro.py5
-rw-r--r--hypervideo_dl/extractor/goshgay.py3
-rw-r--r--hypervideo_dl/extractor/gotostage.py3
-rw-r--r--hypervideo_dl/extractor/gputechconf.py3
-rw-r--r--hypervideo_dl/extractor/gronkh.py76
-rw-r--r--hypervideo_dl/extractor/groupon.py2
-rw-r--r--hypervideo_dl/extractor/harpodeon.py70
-rw-r--r--hypervideo_dl/extractor/hbo.py4
-rw-r--r--hypervideo_dl/extractor/hearthisat.py5
-rw-r--r--hypervideo_dl/extractor/heise.py73
-rw-r--r--hypervideo_dl/extractor/hellporno.py3
-rw-r--r--hypervideo_dl/extractor/helsinki.py5
-rw-r--r--hypervideo_dl/extractor/hentaistigma.py2
-rw-r--r--hypervideo_dl/extractor/hgtv.py3
-rw-r--r--hypervideo_dl/extractor/hidive.py6
-rw-r--r--hypervideo_dl/extractor/historicfilms.py2
-rw-r--r--hypervideo_dl/extractor/hitbox.py13
-rw-r--r--hypervideo_dl/extractor/hitrecord.py2
-rw-r--r--hypervideo_dl/extractor/hketv.py4
-rw-r--r--hypervideo_dl/extractor/holodex.py100
-rw-r--r--hypervideo_dl/extractor/hornbunny.py49
-rw-r--r--hypervideo_dl/extractor/hotnewhiphop.py2
-rw-r--r--hypervideo_dl/extractor/hotstar.py291
-rw-r--r--hypervideo_dl/extractor/howcast.py2
-rw-r--r--hypervideo_dl/extractor/howstuffworks.py4
-rw-r--r--hypervideo_dl/extractor/hrfensehen.py58
-rw-r--r--hypervideo_dl/extractor/hrti.py4
-rw-r--r--hypervideo_dl/extractor/hse.py2
-rw-r--r--hypervideo_dl/extractor/huajiao.py3
-rw-r--r--hypervideo_dl/extractor/huffpost.py5
-rw-r--r--hypervideo_dl/extractor/hungama.py48
-rw-r--r--hypervideo_dl/extractor/huya.py14
-rw-r--r--hypervideo_dl/extractor/hypem.py2
-rw-r--r--hypervideo_dl/extractor/hytale.py58
-rw-r--r--hypervideo_dl/extractor/icareus.py179
-rw-r--r--hypervideo_dl/extractor/ichinanalive.py7
-rw-r--r--hypervideo_dl/extractor/ign.py4
-rw-r--r--hypervideo_dl/extractor/iheart.py3
-rw-r--r--hypervideo_dl/extractor/iltalehti.py51
-rw-r--r--hypervideo_dl/extractor/imdb.py3
-rw-r--r--hypervideo_dl/extractor/imggaming.py4
-rw-r--r--hypervideo_dl/extractor/imgur.py6
-rw-r--r--hypervideo_dl/extractor/ina.py110
-rw-r--r--hypervideo_dl/extractor/inc.py2
-rw-r--r--hypervideo_dl/extractor/indavideo.py28
-rw-r--r--hypervideo_dl/extractor/infoq.py15
-rw-r--r--hypervideo_dl/extractor/instagram.py359
-rw-r--r--hypervideo_dl/extractor/internazionale.py4
-rw-r--r--hypervideo_dl/extractor/internetvideoarchive.py3
-rw-r--r--hypervideo_dl/extractor/iprima.py8
-rw-r--r--hypervideo_dl/extractor/iqiyi.py18
-rw-r--r--hypervideo_dl/extractor/ir90tv.py42
-rw-r--r--hypervideo_dl/extractor/islamchannel.py81
-rw-r--r--hypervideo_dl/extractor/israelnationalnews.py50
-rw-r--r--hypervideo_dl/extractor/itprotv.py2
-rw-r--r--hypervideo_dl/extractor/itv.py4
-rw-r--r--hypervideo_dl/extractor/ivi.py5
-rw-r--r--hypervideo_dl/extractor/ivideon.py5
-rw-r--r--hypervideo_dl/extractor/iwara.py137
-rw-r--r--hypervideo_dl/extractor/ixigua.py83
-rw-r--r--hypervideo_dl/extractor/izlesene.py4
-rw-r--r--hypervideo_dl/extractor/jable.py103
-rw-r--r--hypervideo_dl/extractor/jamendo.py41
-rw-r--r--hypervideo_dl/extractor/japandiet.py274
-rw-r--r--hypervideo_dl/extractor/jeuxvideo.py5
-rw-r--r--hypervideo_dl/extractor/jixie.py47
-rw-r--r--hypervideo_dl/extractor/joj.py17
-rw-r--r--hypervideo_dl/extractor/jove.py3
-rw-r--r--hypervideo_dl/extractor/jwplatform.py46
-rw-r--r--hypervideo_dl/extractor/kakao.py6
-rw-r--r--hypervideo_dl/extractor/kaltura.py265
-rw-r--r--hypervideo_dl/extractor/kanal2.py66
-rw-r--r--hypervideo_dl/extractor/kanalplay.py96
-rw-r--r--hypervideo_dl/extractor/kankan.py48
-rw-r--r--hypervideo_dl/extractor/karaoketv.py3
-rw-r--r--hypervideo_dl/extractor/karrierevideos.py3
-rw-r--r--hypervideo_dl/extractor/keezmovies.py11
-rw-r--r--hypervideo_dl/extractor/kelbyone.py4
-rw-r--r--hypervideo_dl/extractor/ketnet.py2
-rw-r--r--hypervideo_dl/extractor/khanacademy.py19
-rw-r--r--hypervideo_dl/extractor/kicker.py55
-rw-r--r--hypervideo_dl/extractor/kickstarter.py3
-rw-r--r--hypervideo_dl/extractor/kinja.py17
-rw-r--r--hypervideo_dl/extractor/kinopoisk.py4
-rw-r--r--hypervideo_dl/extractor/kompas.py26
-rw-r--r--hypervideo_dl/extractor/konserthusetplay.py5
-rw-r--r--hypervideo_dl/extractor/koo.py3
-rw-r--r--hypervideo_dl/extractor/krasview.py3
-rw-r--r--hypervideo_dl/extractor/kth.py28
-rw-r--r--hypervideo_dl/extractor/ku6.py2
-rw-r--r--hypervideo_dl/extractor/kusi.py10
-rw-r--r--hypervideo_dl/extractor/kuwo.py6
-rw-r--r--hypervideo_dl/extractor/la7.py8
-rw-r--r--hypervideo_dl/extractor/laola1tv.py6
-rw-r--r--hypervideo_dl/extractor/lastfm.py5
-rw-r--r--hypervideo_dl/extractor/lbry.py86
-rw-r--r--hypervideo_dl/extractor/lci.py32
-rw-r--r--hypervideo_dl/extractor/lcp.py5
-rw-r--r--hypervideo_dl/extractor/lecture2go.py5
-rw-r--r--hypervideo_dl/extractor/lecturio.py4
-rw-r--r--hypervideo_dl/extractor/leeco.py6
-rw-r--r--hypervideo_dl/extractor/lego.py4
-rw-r--r--hypervideo_dl/extractor/lemonde.py2
-rw-r--r--hypervideo_dl/extractor/lenta.py3
-rw-r--r--hypervideo_dl/extractor/libraryofcongress.py5
-rw-r--r--hypervideo_dl/extractor/libsyn.py5
-rw-r--r--hypervideo_dl/extractor/lifenews.py5
-rw-r--r--hypervideo_dl/extractor/likee.py192
-rw-r--r--hypervideo_dl/extractor/limelight.py9
-rw-r--r--hypervideo_dl/extractor/line.py7
-rw-r--r--hypervideo_dl/extractor/linkedin.py13
-rw-r--r--hypervideo_dl/extractor/linuxacademy.py3
-rw-r--r--hypervideo_dl/extractor/liputan6.py64
-rw-r--r--hypervideo_dl/extractor/listennotes.py86
-rw-r--r--hypervideo_dl/extractor/litv.py3
-rw-r--r--hypervideo_dl/extractor/livejournal.py3
-rw-r--r--hypervideo_dl/extractor/liveleak.py191
-rw-r--r--hypervideo_dl/extractor/livestream.py7
-rw-r--r--hypervideo_dl/extractor/livestreamfails.py37
-rw-r--r--hypervideo_dl/extractor/lnkgo.py8
-rw-r--r--hypervideo_dl/extractor/localnews8.py4
-rw-r--r--hypervideo_dl/extractor/lovehomeporn.py3
-rw-r--r--hypervideo_dl/extractor/lrt.py58
-rw-r--r--hypervideo_dl/extractor/lynda.py4
-rw-r--r--hypervideo_dl/extractor/m6.py3
-rw-r--r--hypervideo_dl/extractor/magentamusik360.py3
-rw-r--r--hypervideo_dl/extractor/mailru.py4
-rw-r--r--hypervideo_dl/extractor/mainstreaming.py11
-rw-r--r--hypervideo_dl/extractor/malltv.py37
-rw-r--r--hypervideo_dl/extractor/mangomolo.py31
-rw-r--r--hypervideo_dl/extractor/manoto.py5
-rw-r--r--hypervideo_dl/extractor/manyvids.py122
-rw-r--r--hypervideo_dl/extractor/maoritv.py3
-rw-r--r--hypervideo_dl/extractor/markiza.py3
-rw-r--r--hypervideo_dl/extractor/massengeschmacktv.py4
-rw-r--r--hypervideo_dl/extractor/masters.py38
-rw-r--r--hypervideo_dl/extractor/matchtv.py4
-rw-r--r--hypervideo_dl/extractor/mdr.py5
-rw-r--r--hypervideo_dl/extractor/medaltv.py77
-rw-r--r--hypervideo_dl/extractor/mediaite.py3
-rw-r--r--hypervideo_dl/extractor/mediaklikk.py4
-rw-r--r--hypervideo_dl/extractor/medialaan.py7
-rw-r--r--hypervideo_dl/extractor/mediaset.py43
-rw-r--r--hypervideo_dl/extractor/mediasite.py19
-rw-r--r--hypervideo_dl/extractor/mediaworksnz.py103
-rw-r--r--hypervideo_dl/extractor/medici.py3
-rw-r--r--hypervideo_dl/extractor/megaphone.py11
-rw-r--r--hypervideo_dl/extractor/megatvcom.py11
-rw-r--r--hypervideo_dl/extractor/meipai.py7
-rw-r--r--hypervideo_dl/extractor/melonvod.py4
-rw-r--r--hypervideo_dl/extractor/meta.py3
-rw-r--r--hypervideo_dl/extractor/metacafe.py16
-rw-r--r--hypervideo_dl/extractor/metacritic.py3
-rw-r--r--hypervideo_dl/extractor/mgoon.py5
-rw-r--r--hypervideo_dl/extractor/mgtv.py11
-rw-r--r--hypervideo_dl/extractor/miaopai.py3
-rw-r--r--hypervideo_dl/extractor/microsoftembed.py65
-rw-r--r--hypervideo_dl/extractor/microsoftstream.py4
-rw-r--r--hypervideo_dl/extractor/microsoftvirtualacademy.py12
-rw-r--r--hypervideo_dl/extractor/mildom.py11
-rw-r--r--hypervideo_dl/extractor/minds.py8
-rw-r--r--hypervideo_dl/extractor/ministrygrid.py2
-rw-r--r--hypervideo_dl/extractor/minoto.py5
-rw-r--r--hypervideo_dl/extractor/miomio.py3
-rw-r--r--hypervideo_dl/extractor/mirrativ.py3
-rw-r--r--hypervideo_dl/extractor/mirrorcouk.py98
-rw-r--r--hypervideo_dl/extractor/mit.py2
-rw-r--r--hypervideo_dl/extractor/mitele.py5
-rw-r--r--hypervideo_dl/extractor/mixch.py2
-rw-r--r--hypervideo_dl/extractor/mixcloud.py11
-rw-r--r--hypervideo_dl/extractor/mlb.py120
-rw-r--r--hypervideo_dl/extractor/mlssoccer.py3
-rw-r--r--hypervideo_dl/extractor/mnet.py4
-rw-r--r--hypervideo_dl/extractor/mocha.py64
-rw-r--r--hypervideo_dl/extractor/moevideo.py4
-rw-r--r--hypervideo_dl/extractor/mofosex.py13
-rw-r--r--hypervideo_dl/extractor/mojvideo.py4
-rw-r--r--hypervideo_dl/extractor/morningstar.py4
-rw-r--r--hypervideo_dl/extractor/motherless.py31
-rw-r--r--hypervideo_dl/extractor/motorsport.py12
-rw-r--r--hypervideo_dl/extractor/movieclips.py3
-rw-r--r--hypervideo_dl/extractor/moviepilot.py112
-rw-r--r--hypervideo_dl/extractor/moview.py43
-rw-r--r--hypervideo_dl/extractor/moviezine.py6
-rw-r--r--hypervideo_dl/extractor/movingimage.py2
-rw-r--r--hypervideo_dl/extractor/msn.py4
-rw-r--r--hypervideo_dl/extractor/mtv.py26
-rw-r--r--hypervideo_dl/extractor/muenchentv.py4
-rw-r--r--hypervideo_dl/extractor/murrtube.py5
-rw-r--r--hypervideo_dl/extractor/musescore.py3
-rw-r--r--hypervideo_dl/extractor/musicdex.py5
-rw-r--r--hypervideo_dl/extractor/mwave.py3
-rw-r--r--hypervideo_dl/extractor/mxplayer.py151
-rw-r--r--hypervideo_dl/extractor/mychannels.py4
-rw-r--r--hypervideo_dl/extractor/myspace.py5
-rw-r--r--hypervideo_dl/extractor/myspass.py3
-rw-r--r--hypervideo_dl/extractor/myvi.py13
-rw-r--r--hypervideo_dl/extractor/myvideoge.py3
-rw-r--r--hypervideo_dl/extractor/myvidster.py2
-rw-r--r--hypervideo_dl/extractor/n1.py5
-rw-r--r--hypervideo_dl/extractor/nate.py4
-rw-r--r--hypervideo_dl/extractor/nationalgeographic.py4
-rw-r--r--hypervideo_dl/extractor/naver.py172
-rw-r--r--hypervideo_dl/extractor/nba.py4
-rw-r--r--hypervideo_dl/extractor/nbc.py191
-rw-r--r--hypervideo_dl/extractor/ndr.py252
-rw-r--r--hypervideo_dl/extractor/ndtv.py15
-rw-r--r--hypervideo_dl/extractor/nebula.py111
-rw-r--r--hypervideo_dl/extractor/nerdcubed.py3
-rw-r--r--hypervideo_dl/extractor/neteasemusic.py176
-rw-r--r--hypervideo_dl/extractor/netverse.py176
-rw-r--r--hypervideo_dl/extractor/netzkino.py5
-rw-r--r--hypervideo_dl/extractor/newgrounds.py4
-rw-r--r--hypervideo_dl/extractor/newspicks.py53
-rw-r--r--hypervideo_dl/extractor/newstube.py4
-rw-r--r--hypervideo_dl/extractor/newsy.py4
-rw-r--r--hypervideo_dl/extractor/nextmedia.py7
-rw-r--r--hypervideo_dl/extractor/nexx.py25
-rw-r--r--hypervideo_dl/extractor/nfb.py4
-rw-r--r--hypervideo_dl/extractor/nfhsnetwork.py7
-rw-r--r--hypervideo_dl/extractor/nfl.py15
-rw-r--r--hypervideo_dl/extractor/nhk.py27
-rw-r--r--hypervideo_dl/extractor/nhl.py4
-rw-r--r--hypervideo_dl/extractor/nick.py6
-rw-r--r--hypervideo_dl/extractor/niconico.py56
-rw-r--r--hypervideo_dl/extractor/ninecninemedia.py4
-rw-r--r--hypervideo_dl/extractor/ninegag.py48
-rw-r--r--hypervideo_dl/extractor/ninenow.py3
-rw-r--r--hypervideo_dl/extractor/nintendo.py3
-rw-r--r--hypervideo_dl/extractor/nitter.py3
-rw-r--r--hypervideo_dl/extractor/njpwworld.py5
-rw-r--r--hypervideo_dl/extractor/nobelprize.py4
-rw-r--r--hypervideo_dl/extractor/noco.py228
-rw-r--r--hypervideo_dl/extractor/nonktube.py2
-rw-r--r--hypervideo_dl/extractor/noodlemagazine.py5
-rw-r--r--hypervideo_dl/extractor/noovo.py3
-rw-r--r--hypervideo_dl/extractor/normalboots.py3
-rw-r--r--hypervideo_dl/extractor/nosnl.py95
-rw-r--r--hypervideo_dl/extractor/nosvideo.py3
-rw-r--r--hypervideo_dl/extractor/nova.py5
-rw-r--r--hypervideo_dl/extractor/novaplay.py54
-rw-r--r--hypervideo_dl/extractor/nowness.py3
-rw-r--r--hypervideo_dl/extractor/noz.py11
-rw-r--r--hypervideo_dl/extractor/npo.py10
-rw-r--r--hypervideo_dl/extractor/npr.py23
-rw-r--r--hypervideo_dl/extractor/nrk.py20
-rw-r--r--hypervideo_dl/extractor/nrl.py3
-rw-r--r--hypervideo_dl/extractor/ntvcojp.py3
-rw-r--r--hypervideo_dl/extractor/ntvde.py4
-rw-r--r--hypervideo_dl/extractor/ntvru.py4
-rw-r--r--hypervideo_dl/extractor/nuevo.py3
-rw-r--r--hypervideo_dl/extractor/nuvid.py3
-rw-r--r--hypervideo_dl/extractor/nytimes.py5
-rw-r--r--hypervideo_dl/extractor/nzherald.py49
-rw-r--r--hypervideo_dl/extractor/nzz.py3
-rw-r--r--hypervideo_dl/extractor/odatv.py3
-rw-r--r--hypervideo_dl/extractor/odnoklassniki.py107
-rw-r--r--hypervideo_dl/extractor/oftv.py54
-rw-r--r--hypervideo_dl/extractor/oktoberfesttv.py3
-rw-r--r--hypervideo_dl/extractor/olympics.py6
-rw-r--r--hypervideo_dl/extractor/on24.py4
-rw-r--r--hypervideo_dl/extractor/once.py5
-rw-r--r--hypervideo_dl/extractor/ondemandkorea.py25
-rw-r--r--hypervideo_dl/extractor/onefootball.py4
-rw-r--r--hypervideo_dl/extractor/onenewsnz.py111
-rw-r--r--hypervideo_dl/extractor/onet.py4
-rw-r--r--hypervideo_dl/extractor/onionstudios.py13
-rw-r--r--hypervideo_dl/extractor/ooyala.py27
-rw-r--r--hypervideo_dl/extractor/opencast.py5
-rw-r--r--hypervideo_dl/extractor/openload.py110
-rw-r--r--hypervideo_dl/extractor/openrec.py10
-rw-r--r--hypervideo_dl/extractor/ora.py4
-rw-r--r--hypervideo_dl/extractor/orf.py287
-rw-r--r--hypervideo_dl/extractor/outsidetv.py3
-rw-r--r--hypervideo_dl/extractor/packtpub.py2
-rw-r--r--hypervideo_dl/extractor/palcomp3.py4
-rw-r--r--hypervideo_dl/extractor/pandoratv.py5
-rw-r--r--hypervideo_dl/extractor/panopto.py9
-rw-r--r--hypervideo_dl/extractor/paramountplus.py69
-rw-r--r--hypervideo_dl/extractor/parler.py111
-rw-r--r--hypervideo_dl/extractor/parliamentliveuk.py80
-rw-r--r--hypervideo_dl/extractor/parlview.py4
-rw-r--r--hypervideo_dl/extractor/patreon.py368
-rw-r--r--hypervideo_dl/extractor/pbs.py4
-rw-r--r--hypervideo_dl/extractor/pearvideo.py13
-rw-r--r--hypervideo_dl/extractor/peekvids.py6
-rw-r--r--hypervideo_dl/extractor/peertube.py24
-rw-r--r--hypervideo_dl/extractor/peertv.py5
-rw-r--r--hypervideo_dl/extractor/peloton.py16
-rw-r--r--hypervideo_dl/extractor/people.py3
-rw-r--r--hypervideo_dl/extractor/performgroup.py5
-rw-r--r--hypervideo_dl/extractor/periscope.py14
-rw-r--r--hypervideo_dl/extractor/philharmoniedeparis.py43
-rw-r--r--hypervideo_dl/extractor/phoenix.py3
-rw-r--r--hypervideo_dl/extractor/photobucket.py2
-rw-r--r--hypervideo_dl/extractor/piapro.py17
-rw-r--r--hypervideo_dl/extractor/picarto.py5
-rw-r--r--hypervideo_dl/extractor/piksel.py15
-rw-r--r--hypervideo_dl/extractor/pinkbike.py4
-rw-r--r--hypervideo_dl/extractor/pinterest.py4
-rw-r--r--hypervideo_dl/extractor/pixivsketch.py4
-rw-r--r--hypervideo_dl/extractor/pladform.py15
-rw-r--r--hypervideo_dl/extractor/planetmarathi.py4
-rw-r--r--hypervideo_dl/extractor/platzi.py4
-rw-r--r--hypervideo_dl/extractor/playfm.py4
-rw-r--r--hypervideo_dl/extractor/playplustv.py4
-rw-r--r--hypervideo_dl/extractor/plays.py4
-rw-r--r--hypervideo_dl/extractor/playstuff.py2
-rw-r--r--hypervideo_dl/extractor/playsuisse.py147
-rw-r--r--hypervideo_dl/extractor/playtvak.py4
-rw-r--r--hypervideo_dl/extractor/playvid.py16
-rw-r--r--hypervideo_dl/extractor/playwire.py6
-rw-r--r--hypervideo_dl/extractor/pluralsight.py4
-rw-r--r--hypervideo_dl/extractor/plutotv.py4
-rw-r--r--hypervideo_dl/extractor/podbayfm.py75
-rw-r--r--hypervideo_dl/extractor/podchaser.py97
-rw-r--r--hypervideo_dl/extractor/podomatic.py2
-rw-r--r--hypervideo_dl/extractor/pokemon.py44
-rw-r--r--hypervideo_dl/extractor/pokergo.py3
-rw-r--r--hypervideo_dl/extractor/polsatgo.py4
-rw-r--r--hypervideo_dl/extractor/polskieradio.py5
-rw-r--r--hypervideo_dl/extractor/popcorntimes.py11
-rw-r--r--hypervideo_dl/extractor/popcorntv.py3
-rw-r--r--hypervideo_dl/extractor/porn91.py3
-rw-r--r--hypervideo_dl/extractor/porncom.py4
-rw-r--r--hypervideo_dl/extractor/pornez.py2
-rw-r--r--hypervideo_dl/extractor/pornflip.py4
-rw-r--r--hypervideo_dl/extractor/pornhd.py4
-rw-r--r--hypervideo_dl/extractor/pornhub.py47
-rw-r--r--hypervideo_dl/extractor/pornotube.py2
-rw-r--r--hypervideo_dl/extractor/pornovoisines.py5
-rw-r--r--hypervideo_dl/extractor/pornoxo.py3
-rw-r--r--hypervideo_dl/extractor/prankcast.py66
-rw-r--r--hypervideo_dl/extractor/premiershiprugby.py39
-rw-r--r--hypervideo_dl/extractor/presstv.py4
-rw-r--r--hypervideo_dl/extractor/projectveritas.py4
-rw-r--r--hypervideo_dl/extractor/prosiebensat1.py4
-rw-r--r--hypervideo_dl/extractor/prx.py3
-rw-r--r--hypervideo_dl/extractor/puhutv.py4
-rw-r--r--hypervideo_dl/extractor/puls4.py10
-rw-r--r--hypervideo_dl/extractor/pyvideo.py2
-rw-r--r--hypervideo_dl/extractor/qingting.py47
-rw-r--r--hypervideo_dl/extractor/qqmusic.py4
-rw-r--r--hypervideo_dl/extractor/r7.py4
-rw-r--r--hypervideo_dl/extractor/radiko.py76
-rw-r--r--hypervideo_dl/extractor/radiobremen.py4
-rw-r--r--hypervideo_dl/extractor/radiocanada.py5
-rw-r--r--hypervideo_dl/extractor/radiode.py3
-rw-r--r--hypervideo_dl/extractor/radiofrance.py53
-rw-r--r--hypervideo_dl/extractor/radiojavan.py3
-rw-r--r--hypervideo_dl/extractor/radiokapital.py2
-rw-r--r--hypervideo_dl/extractor/radiozet.py1
-rw-r--r--hypervideo_dl/extractor/radlive.py7
-rw-r--r--hypervideo_dl/extractor/rai.py247
-rw-r--r--hypervideo_dl/extractor/raywenderlich.py2
-rw-r--r--hypervideo_dl/extractor/rbmaradio.py3
-rw-r--r--hypervideo_dl/extractor/rcs.py49
-rw-r--r--hypervideo_dl/extractor/rcti.py5
-rw-r--r--hypervideo_dl/extractor/rds.py3
-rw-r--r--hypervideo_dl/extractor/redbee.py379
-rw-r--r--hypervideo_dl/extractor/redbulltv.py7
-rw-r--r--hypervideo_dl/extractor/reddit.py89
-rw-r--r--hypervideo_dl/extractor/redgifs.py40
-rw-r--r--hypervideo_dl/extractor/redtube.py12
-rw-r--r--hypervideo_dl/extractor/regiotv.py3
-rw-r--r--hypervideo_dl/extractor/rentv.py4
-rw-r--r--hypervideo_dl/extractor/restudy.py4
-rw-r--r--hypervideo_dl/extractor/reuters.py4
-rw-r--r--hypervideo_dl/extractor/reverbnation.py2
-rw-r--r--hypervideo_dl/extractor/rice.py4
-rw-r--r--hypervideo_dl/extractor/rmcdecouverte.py4
-rw-r--r--hypervideo_dl/extractor/ro220.py43
-rw-r--r--hypervideo_dl/extractor/rockstargames.py5
-rw-r--r--hypervideo_dl/extractor/rokfin.py173
-rw-r--r--hypervideo_dl/extractor/roosterteeth.py2
-rw-r--r--hypervideo_dl/extractor/rottentomatoes.py2
-rw-r--r--hypervideo_dl/extractor/roxwel.py52
-rw-r--r--hypervideo_dl/extractor/rozhlas.py3
-rw-r--r--hypervideo_dl/extractor/rtbf.py159
-rw-r--r--hypervideo_dl/extractor/rte.py5
-rw-r--r--hypervideo_dl/extractor/rtl2.py6
-rw-r--r--hypervideo_dl/extractor/rtlnl.py156
-rw-r--r--hypervideo_dl/extractor/rtnews.py3
-rw-r--r--hypervideo_dl/extractor/rtp.py3
-rw-r--r--hypervideo_dl/extractor/rtrfm.py2
-rw-r--r--hypervideo_dl/extractor/rts.py6
-rw-r--r--hypervideo_dl/extractor/rtve.py28
-rw-r--r--hypervideo_dl/extractor/rtvnh.py4
-rw-r--r--hypervideo_dl/extractor/rtvs.py4
-rw-r--r--hypervideo_dl/extractor/rtvslo.py150
-rw-r--r--hypervideo_dl/extractor/ruhd.py3
-rw-r--r--hypervideo_dl/extractor/rule34video.py4
-rw-r--r--hypervideo_dl/extractor/rumble.py213
-rw-r--r--hypervideo_dl/extractor/rutube.py13
-rw-r--r--hypervideo_dl/extractor/rutv.py33
-rw-r--r--hypervideo_dl/extractor/ruutu.py50
-rw-r--r--hypervideo_dl/extractor/ruv.py3
-rw-r--r--hypervideo_dl/extractor/safari.py3
-rw-r--r--hypervideo_dl/extractor/saitosan.py4
-rw-r--r--hypervideo_dl/extractor/samplefocus.py3
-rw-r--r--hypervideo_dl/extractor/sapo.py5
-rw-r--r--hypervideo_dl/extractor/savefrom.py3
-rw-r--r--hypervideo_dl/extractor/sbs.py16
-rw-r--r--hypervideo_dl/extractor/screen9.py62
-rw-r--r--hypervideo_dl/extractor/screencast.py14
-rw-r--r--hypervideo_dl/extractor/screencastify.py52
-rw-r--r--hypervideo_dl/extractor/screencastomatic.py27
-rw-r--r--hypervideo_dl/extractor/scrippsnetworks.py3
-rw-r--r--hypervideo_dl/extractor/scrolller.py102
-rw-r--r--hypervideo_dl/extractor/scte.py2
-rw-r--r--hypervideo_dl/extractor/seeker.py3
-rw-r--r--hypervideo_dl/extractor/senategov.py15
-rw-r--r--hypervideo_dl/extractor/senateisvp.py153
-rw-r--r--hypervideo_dl/extractor/sendtonews.py13
-rw-r--r--hypervideo_dl/extractor/servus.py4
-rw-r--r--hypervideo_dl/extractor/sevenplus.py7
-rw-r--r--hypervideo_dl/extractor/sexu.py3
-rw-r--r--hypervideo_dl/extractor/seznamzpravy.py16
-rw-r--r--hypervideo_dl/extractor/shahid.py4
-rw-r--r--hypervideo_dl/extractor/shared.py13
-rw-r--r--hypervideo_dl/extractor/sharevideos.py6
-rw-r--r--hypervideo_dl/extractor/shemaroome.py4
-rw-r--r--hypervideo_dl/extractor/showroomlive.py4
-rw-r--r--hypervideo_dl/extractor/simplecast.py19
-rw-r--r--hypervideo_dl/extractor/sina.py5
-rw-r--r--hypervideo_dl/extractor/sixplay.py5
-rw-r--r--hypervideo_dl/extractor/skeb.py3
-rw-r--r--hypervideo_dl/extractor/sky.py3
-rw-r--r--hypervideo_dl/extractor/skyit.py99
-rw-r--r--hypervideo_dl/extractor/skylinewebcams.py3
-rw-r--r--hypervideo_dl/extractor/skynewsarabia.py3
-rw-r--r--hypervideo_dl/extractor/skynewsau.py3
-rw-r--r--hypervideo_dl/extractor/slideshare.py2
-rw-r--r--hypervideo_dl/extractor/slideslive.py5
-rw-r--r--hypervideo_dl/extractor/slutload.py2
-rw-r--r--hypervideo_dl/extractor/smotrim.py65
-rw-r--r--hypervideo_dl/extractor/snotr.py4
-rw-r--r--hypervideo_dl/extractor/sohu.py4
-rw-r--r--hypervideo_dl/extractor/sonyliv.py34
-rw-r--r--hypervideo_dl/extractor/soundcloud.py370
-rw-r--r--hypervideo_dl/extractor/soundgasm.py3
-rw-r--r--hypervideo_dl/extractor/southpark.py54
-rw-r--r--hypervideo_dl/extractor/sovietscloset.py18
-rw-r--r--hypervideo_dl/extractor/spankbang.py4
-rw-r--r--hypervideo_dl/extractor/spankwire.py10
-rw-r--r--hypervideo_dl/extractor/spiegel.py3
-rw-r--r--hypervideo_dl/extractor/spiegeltv.py17
-rw-r--r--hypervideo_dl/extractor/spike.py2
-rw-r--r--hypervideo_dl/extractor/sport5.py5
-rw-r--r--hypervideo_dl/extractor/sportbox.py13
-rw-r--r--hypervideo_dl/extractor/sportdeutschland.py3
-rw-r--r--hypervideo_dl/extractor/spotify.py55
-rw-r--r--hypervideo_dl/extractor/spreaker.py3
-rw-r--r--hypervideo_dl/extractor/springboardplatform.py14
-rw-r--r--hypervideo_dl/extractor/sprout.py3
-rw-r--r--hypervideo_dl/extractor/srgssr.py5
-rw-r--r--hypervideo_dl/extractor/srmediathek.py3
-rw-r--r--hypervideo_dl/extractor/stanfordoc.py2
-rw-r--r--hypervideo_dl/extractor/startrek.py75
-rw-r--r--hypervideo_dl/extractor/startv.py3
-rw-r--r--hypervideo_dl/extractor/steam.py49
-rw-r--r--hypervideo_dl/extractor/stitcher.py2
-rw-r--r--hypervideo_dl/extractor/storyfire.py5
-rw-r--r--hypervideo_dl/extractor/streamable.py15
-rw-r--r--hypervideo_dl/extractor/streamanity.py4
-rw-r--r--hypervideo_dl/extractor/streamcloud.py3
-rw-r--r--hypervideo_dl/extractor/streamcz.py6
-rw-r--r--hypervideo_dl/extractor/streamff.py1
-rw-r--r--hypervideo_dl/extractor/streetvoice.py3
-rw-r--r--hypervideo_dl/extractor/stretchinternet.py2
-rw-r--r--hypervideo_dl/extractor/stripchat.py52
-rw-r--r--hypervideo_dl/extractor/stv.py6
-rw-r--r--hypervideo_dl/extractor/substack.py100
-rw-r--r--hypervideo_dl/extractor/sunporno.py3
-rw-r--r--hypervideo_dl/extractor/sverigesradio.py4
-rw-r--r--hypervideo_dl/extractor/svt.py12
-rw-r--r--hypervideo_dl/extractor/swearnet.py73
-rw-r--r--hypervideo_dl/extractor/swrmediathek.py4
-rw-r--r--hypervideo_dl/extractor/syfy.py2
-rw-r--r--hypervideo_dl/extractor/syvdk.py33
-rw-r--r--hypervideo_dl/extractor/sztvhu.py3
-rw-r--r--hypervideo_dl/extractor/tagesschau.py5
-rw-r--r--hypervideo_dl/extractor/tass.py4
-rw-r--r--hypervideo_dl/extractor/tastytrade.py43
-rw-r--r--hypervideo_dl/extractor/tbs.py3
-rw-r--r--hypervideo_dl/extractor/tdslifeway.py2
-rw-r--r--hypervideo_dl/extractor/teachable.py16
-rw-r--r--hypervideo_dl/extractor/teachertube.py5
-rw-r--r--hypervideo_dl/extractor/teachingchannel.py2
-rw-r--r--hypervideo_dl/extractor/teamcoco.py4
-rw-r--r--hypervideo_dl/extractor/teamtreehouse.py3
-rw-r--r--hypervideo_dl/extractor/techtalks.py2
-rw-r--r--hypervideo_dl/extractor/ted.py8
-rw-r--r--hypervideo_dl/extractor/tele13.py4
-rw-r--r--hypervideo_dl/extractor/tele5.py5
-rw-r--r--hypervideo_dl/extractor/telebruxelles.py4
-rw-r--r--hypervideo_dl/extractor/telecinco.py4
-rw-r--r--hypervideo_dl/extractor/telegraaf.py9
-rw-r--r--hypervideo_dl/extractor/telegram.py141
-rw-r--r--hypervideo_dl/extractor/telemb.py4
-rw-r--r--hypervideo_dl/extractor/telemundo.py4
-rw-r--r--hypervideo_dl/extractor/telequebec.py3
-rw-r--r--hypervideo_dl/extractor/teletask.py2
-rw-r--r--hypervideo_dl/extractor/telewebion.py3
-rw-r--r--hypervideo_dl/extractor/tempo.py53
-rw-r--r--hypervideo_dl/extractor/tencent.py452
-rw-r--r--hypervideo_dl/extractor/tennistv.py186
-rw-r--r--hypervideo_dl/extractor/tenplay.py4
-rw-r--r--hypervideo_dl/extractor/testurl.py47
-rw-r--r--hypervideo_dl/extractor/tf1.py3
-rw-r--r--hypervideo_dl/extractor/tfo.py3
-rw-r--r--hypervideo_dl/extractor/theholetv.py35
-rw-r--r--hypervideo_dl/extractor/theintercept.py3
-rw-r--r--hypervideo_dl/extractor/theplatform.py30
-rw-r--r--hypervideo_dl/extractor/thescene.py44
-rw-r--r--hypervideo_dl/extractor/thestar.py3
-rw-r--r--hypervideo_dl/extractor/thesun.py2
-rw-r--r--hypervideo_dl/extractor/theta.py5
-rw-r--r--hypervideo_dl/extractor/theweatherchannel.py6
-rw-r--r--hypervideo_dl/extractor/thisamericanlife.py2
-rw-r--r--hypervideo_dl/extractor/thisav.py4
-rw-r--r--hypervideo_dl/extractor/thisoldhouse.py3
-rw-r--r--hypervideo_dl/extractor/threeqsdn.py26
-rw-r--r--hypervideo_dl/extractor/threespeak.py4
-rw-r--r--hypervideo_dl/extractor/tiktok.py283
-rw-r--r--hypervideo_dl/extractor/tinypic.py2
-rw-r--r--hypervideo_dl/extractor/tmz.py62
-rw-r--r--hypervideo_dl/extractor/tnaflix.py205
-rw-r--r--hypervideo_dl/extractor/toggle.py4
-rw-r--r--hypervideo_dl/extractor/toggo.py11
-rw-r--r--hypervideo_dl/extractor/tokentube.py5
-rw-r--r--hypervideo_dl/extractor/tonline.py3
-rw-r--r--hypervideo_dl/extractor/toongoggles.py4
-rw-r--r--hypervideo_dl/extractor/toutv.py5
-rw-r--r--hypervideo_dl/extractor/toypics.py3
-rw-r--r--hypervideo_dl/extractor/traileraddict.py2
-rw-r--r--hypervideo_dl/extractor/triller.py294
-rw-r--r--hypervideo_dl/extractor/trilulilu.py3
-rw-r--r--hypervideo_dl/extractor/trovo.py308
-rw-r--r--hypervideo_dl/extractor/trueid.py3
-rw-r--r--hypervideo_dl/extractor/trunews.py2
-rw-r--r--hypervideo_dl/extractor/truth.py69
-rw-r--r--hypervideo_dl/extractor/trutv.py4
-rw-r--r--hypervideo_dl/extractor/tube8.py11
-rw-r--r--hypervideo_dl/extractor/tubetugraz.py233
-rw-r--r--hypervideo_dl/extractor/tubitv.py57
-rw-r--r--hypervideo_dl/extractor/tudou.py49
-rw-r--r--hypervideo_dl/extractor/tumblr.py5
-rw-r--r--hypervideo_dl/extractor/tunein.py11
-rw-r--r--hypervideo_dl/extractor/tunepk.py3
-rw-r--r--hypervideo_dl/extractor/turbo.py4
-rw-r--r--hypervideo_dl/extractor/turner.py7
-rw-r--r--hypervideo_dl/extractor/tv2.py37
-rw-r--r--hypervideo_dl/extractor/tv24ua.py78
-rw-r--r--hypervideo_dl/extractor/tv2dk.py4
-rw-r--r--hypervideo_dl/extractor/tv2hu.py3
-rw-r--r--hypervideo_dl/extractor/tv4.py5
-rw-r--r--hypervideo_dl/extractor/tv5mondeplus.py4
-rw-r--r--hypervideo_dl/extractor/tv5unis.py4
-rw-r--r--hypervideo_dl/extractor/tva.py3
-rw-r--r--hypervideo_dl/extractor/tvanouvelles.py3
-rw-r--r--hypervideo_dl/extractor/tvc.py14
-rw-r--r--hypervideo_dl/extractor/tver.py130
-rw-r--r--hypervideo_dl/extractor/tvigle.py6
-rw-r--r--hypervideo_dl/extractor/tviplayer.py78
-rw-r--r--hypervideo_dl/extractor/tvland.py3
-rw-r--r--hypervideo_dl/extractor/tvn24.py4
-rw-r--r--hypervideo_dl/extractor/tvnet.py4
-rw-r--r--hypervideo_dl/extractor/tvnoe.py3
-rw-r--r--hypervideo_dl/extractor/tvnow.py7
-rw-r--r--hypervideo_dl/extractor/tvopengr.py14
-rw-r--r--hypervideo_dl/extractor/tvp.py236
-rw-r--r--hypervideo_dl/extractor/tvplay.py7
-rw-r--r--hypervideo_dl/extractor/tvplayer.py4
-rw-r--r--hypervideo_dl/extractor/tweakers.py3
-rw-r--r--hypervideo_dl/extractor/twentyfourvideo.py4
-rw-r--r--hypervideo_dl/extractor/twentymin.py13
-rw-r--r--hypervideo_dl/extractor/twentythreevideo.py3
-rw-r--r--hypervideo_dl/extractor/twitcasting.py33
-rw-r--r--hypervideo_dl/extractor/twitch.py155
-rw-r--r--hypervideo_dl/extractor/twitter.py788
-rw-r--r--hypervideo_dl/extractor/udemy.py23
-rw-r--r--hypervideo_dl/extractor/udn.py6
-rw-r--r--hypervideo_dl/extractor/ufctv.py3
-rw-r--r--hypervideo_dl/extractor/ukcolumn.py2
-rw-r--r--hypervideo_dl/extractor/uktvplay.py8
-rw-r--r--hypervideo_dl/extractor/umg.py4
-rw-r--r--hypervideo_dl/extractor/unistra.py3
-rw-r--r--hypervideo_dl/extractor/unity.py2
-rw-r--r--hypervideo_dl/extractor/unscripted.py53
-rw-r--r--hypervideo_dl/extractor/unsupported.py143
-rw-r--r--hypervideo_dl/extractor/uol.py4
-rw-r--r--hypervideo_dl/extractor/uplynk.py7
-rw-r--r--hypervideo_dl/extractor/urort.py13
-rw-r--r--hypervideo_dl/extractor/urplay.py4
-rw-r--r--hypervideo_dl/extractor/usanetwork.py5
-rw-r--r--hypervideo_dl/extractor/usatoday.py3
-rw-r--r--hypervideo_dl/extractor/ustream.py12
-rw-r--r--hypervideo_dl/extractor/ustudio.py5
-rw-r--r--hypervideo_dl/extractor/utreon.py4
-rw-r--r--hypervideo_dl/extractor/varzesh3.py3
-rw-r--r--hypervideo_dl/extractor/vbox7.py14
-rw-r--r--hypervideo_dl/extractor/veehd.py2
-rw-r--r--hypervideo_dl/extractor/veo.py5
-rw-r--r--hypervideo_dl/extractor/veoh.py67
-rw-r--r--hypervideo_dl/extractor/vesti.py3
-rw-r--r--hypervideo_dl/extractor/vevo.py123
-rw-r--r--hypervideo_dl/extractor/vgtv.py12
-rw-r--r--hypervideo_dl/extractor/vh1.py3
-rw-r--r--hypervideo_dl/extractor/vice.py17
-rw-r--r--hypervideo_dl/extractor/vidbit.py2
-rw-r--r--hypervideo_dl/extractor/viddler.py6
-rw-r--r--hypervideo_dl/extractor/videa.py21
-rw-r--r--hypervideo_dl/extractor/videocampus_sachsen.py237
-rw-r--r--hypervideo_dl/extractor/videodetective.py2
-rw-r--r--hypervideo_dl/extractor/videofyme.py2
-rw-r--r--hypervideo_dl/extractor/videomore.py25
-rw-r--r--hypervideo_dl/extractor/videopress.py13
-rw-r--r--hypervideo_dl/extractor/vidio.py42
-rw-r--r--hypervideo_dl/extractor/vidlii.py6
-rw-r--r--hypervideo_dl/extractor/vidme.py295
-rw-r--r--hypervideo_dl/extractor/vidzi.py68
-rw-r--r--hypervideo_dl/extractor/vier.py264
-rw-r--r--hypervideo_dl/extractor/viewlift.py13
-rw-r--r--hypervideo_dl/extractor/viidea.py3
-rw-r--r--hypervideo_dl/extractor/viki.py3
-rw-r--r--hypervideo_dl/extractor/vimeo.py252
-rw-r--r--hypervideo_dl/extractor/vimm.py3
-rw-r--r--hypervideo_dl/extractor/vimple.py3
-rw-r--r--hypervideo_dl/extractor/vine.py8
-rw-r--r--hypervideo_dl/extractor/viqeo.py15
-rw-r--r--hypervideo_dl/extractor/viu.py47
-rw-r--r--hypervideo_dl/extractor/vk.py140
-rw-r--r--hypervideo_dl/extractor/vlive.py21
-rw-r--r--hypervideo_dl/extractor/vodlocker.py3
-rw-r--r--hypervideo_dl/extractor/vodpl.py3
-rw-r--r--hypervideo_dl/extractor/vodplatform.py5
-rw-r--r--hypervideo_dl/extractor/voicerepublic.py3
-rw-r--r--hypervideo_dl/extractor/voicy.py8
-rw-r--r--hypervideo_dl/extractor/voot.py9
-rw-r--r--hypervideo_dl/extractor/voxmedia.py6
-rw-r--r--hypervideo_dl/extractor/vrak.py3
-rw-r--r--hypervideo_dl/extractor/vrt.py4
-rw-r--r--hypervideo_dl/extractor/vrv.py19
-rw-r--r--hypervideo_dl/extractor/vshare.py22
-rw-r--r--hypervideo_dl/extractor/vtm.py3
-rw-r--r--hypervideo_dl/extractor/vube.py170
-rw-r--r--hypervideo_dl/extractor/vuclip.py2
-rw-r--r--hypervideo_dl/extractor/vupload.py3
-rw-r--r--hypervideo_dl/extractor/vvvvid.py21
-rw-r--r--hypervideo_dl/extractor/vyborymos.py3
-rw-r--r--hypervideo_dl/extractor/vzaar.py14
-rw-r--r--hypervideo_dl/extractor/wakanim.py3
-rw-r--r--hypervideo_dl/extractor/walla.py4
-rw-r--r--hypervideo_dl/extractor/wasdtv.py10
-rw-r--r--hypervideo_dl/extractor/washingtonpost.py10
-rw-r--r--hypervideo_dl/extractor/wat.py7
-rw-r--r--hypervideo_dl/extractor/watchbox.py5
-rw-r--r--hypervideo_dl/extractor/watchindianporn.py3
-rw-r--r--hypervideo_dl/extractor/wdr.py7
-rw-r--r--hypervideo_dl/extractor/webcaster.py20
-rw-r--r--hypervideo_dl/extractor/webofstories.py5
-rw-r--r--hypervideo_dl/extractor/weibo.py5
-rw-r--r--hypervideo_dl/extractor/weiqitv.py3
-rw-r--r--hypervideo_dl/extractor/whowatch.py4
-rw-r--r--hypervideo_dl/extractor/wikimedia.py55
-rw-r--r--hypervideo_dl/extractor/willow.py2
-rw-r--r--hypervideo_dl/extractor/wimtv.py19
-rw-r--r--hypervideo_dl/extractor/wistia.py250
-rw-r--r--hypervideo_dl/extractor/wordpress.py154
-rw-r--r--hypervideo_dl/extractor/worldstarhiphop.py2
-rw-r--r--hypervideo_dl/extractor/wppilot.py10
-rw-r--r--hypervideo_dl/extractor/wsj.py7
-rw-r--r--hypervideo_dl/extractor/wwe.py2
-rw-r--r--hypervideo_dl/extractor/xbef.py2
-rw-r--r--hypervideo_dl/extractor/xboxclips.py3
-rw-r--r--hypervideo_dl/extractor/xfileshare.py31
-rw-r--r--hypervideo_dl/extractor/xhamster.py27
-rw-r--r--hypervideo_dl/extractor/xiami.py3
-rw-r--r--hypervideo_dl/extractor/ximalaya.py161
-rw-r--r--hypervideo_dl/extractor/xinpianchang.py5
-rw-r--r--hypervideo_dl/extractor/xminus.py3
-rw-r--r--hypervideo_dl/extractor/xnxx.py4
-rw-r--r--hypervideo_dl/extractor/xstream.py4
-rw-r--r--hypervideo_dl/extractor/xtube.py3
-rw-r--r--hypervideo_dl/extractor/xuite.py4
-rw-r--r--hypervideo_dl/extractor/xvideos.py4
-rw-r--r--hypervideo_dl/extractor/xxxymovies.py3
-rw-r--r--hypervideo_dl/extractor/yahoo.py143
-rw-r--r--hypervideo_dl/extractor/yandexdisk.py4
-rw-r--r--hypervideo_dl/extractor/yandexmusic.py6
-rw-r--r--hypervideo_dl/extractor/yandexvideo.py182
-rw-r--r--hypervideo_dl/extractor/yapfiles.py14
-rw-r--r--hypervideo_dl/extractor/yesjapan.py3
-rw-r--r--hypervideo_dl/extractor/yinyuetai.py4
-rw-r--r--hypervideo_dl/extractor/yle_areena.py71
-rw-r--r--hypervideo_dl/extractor/ynet.py10
-rw-r--r--hypervideo_dl/extractor/youjizz.py3
-rw-r--r--hypervideo_dl/extractor/youku.py4
-rw-r--r--hypervideo_dl/extractor/younow.py5
-rw-r--r--hypervideo_dl/extractor/youporn.py39
-rw-r--r--hypervideo_dl/extractor/yourporn.py2
-rw-r--r--hypervideo_dl/extractor/yourupload.py3
-rw-r--r--hypervideo_dl/extractor/youtube.py2997
-rw-r--r--hypervideo_dl/extractor/zapiks.py5
-rw-r--r--hypervideo_dl/extractor/zaq1.py101
-rw-r--r--hypervideo_dl/extractor/zattoo.py708
-rw-r--r--hypervideo_dl/extractor/zdf.py150
-rw-r--r--hypervideo_dl/extractor/zee5.py49
-rw-r--r--hypervideo_dl/extractor/zeenews.py57
-rw-r--r--hypervideo_dl/extractor/zhihu.py6
-rw-r--r--hypervideo_dl/extractor/zingmp3.py392
-rw-r--r--hypervideo_dl/extractor/zoom.py6
-rw-r--r--hypervideo_dl/extractor/zype.py13
-rw-r--r--hypervideo_dl/jsinterp.py785
-rw-r--r--hypervideo_dl/minicurses.py7
-rw-r--r--hypervideo_dl/options.py565
-rw-r--r--hypervideo_dl/postprocessor/__init__.py15
-rw-r--r--hypervideo_dl/postprocessor/common.py64
-rw-r--r--hypervideo_dl/postprocessor/embedthumbnail.py79
-rw-r--r--hypervideo_dl/postprocessor/exec.py8
-rw-r--r--hypervideo_dl/postprocessor/execafterdownload.py31
-rw-r--r--hypervideo_dl/postprocessor/ffmpeg.py521
-rw-r--r--hypervideo_dl/postprocessor/metadatafromtitle.py48
-rw-r--r--hypervideo_dl/postprocessor/metadataparser.py28
-rw-r--r--hypervideo_dl/postprocessor/modify_chapters.py37
-rw-r--r--hypervideo_dl/postprocessor/movefilesafterdownload.py7
-rw-r--r--hypervideo_dl/postprocessor/sponskrub.py25
-rw-r--r--hypervideo_dl/postprocessor/sponsorblock.py29
-rw-r--r--hypervideo_dl/postprocessor/xattrpp.py85
-rw-r--r--hypervideo_dl/socks.py70
-rw-r--r--hypervideo_dl/utils.py2873
-rw-r--r--hypervideo_dl/version.py6
-rw-r--r--hypervideo_dl/webvtt.py61
-rw-r--r--pytest.ini4
-rw-r--r--requirements.txt2
-rw-r--r--setup.cfg49
-rw-r--r--setup.py5
-rw-r--r--test/helper.py128
-rw-r--r--test/parameters.json4
-rw-r--r--test/test_InfoExtractor.py356
-rw-r--r--test/test_YoutubeDL.py263
-rw-r--r--test/test_YoutubeDLCookieJar.py13
-rw-r--r--test/test_aes.py71
-rw-r--r--test/test_age_restriction.py4
-rw-r--r--test/test_all_urls.py19
-rw-r--r--test/test_cache.py8
-rw-r--r--test/test_compat.py105
-rw-r--r--test/test_cookies.py173
-rwxr-xr-xtest/test_download.py124
-rw-r--r--test/test_downloader_http.py21
-rw-r--r--test/test_execution.py59
-rw-r--r--test/test_http.py126
-rw-r--r--test/test_netrc.py5
-rw-r--r--test/test_options.py26
-rw-r--r--test/test_overwrites.py13
-rw-r--r--test/test_post_hooks.py11
-rw-r--r--test/test_postprocessors.py39
-rw-r--r--test/test_socks.py35
-rw-r--r--test/test_subtitles.py168
-rw-r--r--test/test_unicode_literals.py63
-rw-r--r--test/test_utils.py468
-rw-r--r--test/test_verbose_output.py24
-rw-r--r--test/test_write_annotations.py80
-rw-r--r--test/test_youtube_lists.py22
-rw-r--r--test/test_youtube_misc.py2
-rw-r--r--test/testdata/certificate/ca.crt10
-rw-r--r--test/testdata/certificate/ca.key5
-rw-r--r--test/testdata/certificate/ca.srl1
-rw-r--r--test/testdata/certificate/client.crt9
-rw-r--r--test/testdata/certificate/client.csr7
-rw-r--r--test/testdata/certificate/client.key5
-rw-r--r--test/testdata/certificate/clientencrypted.key8
-rw-r--r--test/testdata/certificate/clientwithencryptedkey.crt17
-rw-r--r--test/testdata/certificate/clientwithkey.crt14
-rw-r--r--test/testdata/certificate/instructions.md19
-rw-r--r--test/testdata/ism/ec-3_test.Manifest1
-rw-r--r--test/testdata/m3u8/pluzz_francetv_11507.m3u814
-rw-r--r--test/testdata/m3u8/teamcoco_11995.m3u816
-rw-r--r--test/testdata/m3u8/ted_18923.m3u828
-rw-r--r--test/testdata/m3u8/toggle_mobile_12211.m3u813
-rw-r--r--test/testdata/m3u8/twitch_vod.m3u820
-rw-r--r--test/testdata/m3u8/vidio.m3u810
-rw-r--r--tox.ini15
1177 files changed, 38160 insertions, 33901 deletions
diff --git a/.flake8 b/.flake8
deleted file mode 100644
index 2b9db86..0000000
--- a/.flake8
+++ /dev/null
@@ -1,3 +0,0 @@
-[flake8]
-exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,devscripts/lazy_load_template.py,setup.py,build,.git,venv
-ignore = W503,W504,E402,E501,E731,E741
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
deleted file mode 100644
index 8cbf5df..0000000
--- a/.gitlab-ci.yml
+++ /dev/null
@@ -1,33 +0,0 @@
-image: debian:sid
-
-before_script:
-- apt-get update -y
-- apt-get install -y python3-pip jython virtualenv
-
-test_core:
- script:
- - virtualenv -p python3 venv
- - source venv/bin/activate
- - python --version
- - pip install nose
- - export YTDL_TEST_SET=core
- - export JYTHON=true;
- - bash ./devscripts/run_tests.sh || true
-
-test_download:
- script:
- - virtualenv -p python3 venv
- - source venv/bin/activate
- - python --version
- - pip install nose
- - export YTDL_TEST_SET=download
- - export JYTHON=true;
- - bash ./devscripts/run_tests.sh || true
-
-test_flake8:
- script:
- - virtualenv -p python3 venv
- - source venv/bin/activate
- - python --version
- - pip install flake8
- - flake8 .
diff --git a/AUTHORS b/AUTHORS
index 33923ec..8dafe32 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -18,6 +18,7 @@ Aaron Wojnowski
Aaron Zeng
Abdullah Ibn Fulan
Abhishek Kedia
+Abubukker Chaudhary
Adam
Adam Glenn
Adam Malcontenti-Wilson
@@ -28,14 +29,19 @@ Adrian Heine né Lang
Adrian Kretz
Adrik
Aidan Rowe
+Ajay Ramachandran
+Akmal
Alan Yee
Albert Kim
Aldo Gunsing
+Aldo Ridhoni
Aleksandar Topuzovic
Aleksander Nitecki
Aleri Kaisattera
Ales Jirasek
Alessandro Ghedini
+Alex
+Alex Karabanov
Alex Merkel
Alex Monk
Alex Seiler
@@ -57,6 +63,7 @@ Alpesh Valia
Amaury Gauthier
Amish Bhadeshia
Anand Babu Periasamy
+Anant Murmu
Anarky
Anders Einar Hilden
Andras Elso
@@ -64,6 +71,7 @@ Andre Walker
Andreas Schmitz
Andrei Troie
AndreiArba
+Andrew
Andrew "Akari" Alexeyew
Andrew Bottom
Andrew J. Erickson
@@ -75,6 +83,7 @@ AndroKev
Andrzej Lichnerowicz
András Veres-Szentkirályi
Andy Savicki
+Angel Toloza
Anh Nhan Nguyen
Aniruddh Joshi
Aniruddh-J
@@ -98,6 +107,7 @@ Ashish Gupta
Ashutosh Chaudhary
Atlas Sullivan
Attila-Mihaly Balazs
+Audrey
Aurora
Aurélien Dunand
Aurélien Grosdidier
@@ -113,20 +123,26 @@ Bastian de Groot
Batuhan's Unmaintained Account
Behrooz
Ben Rog-Wilhelm
+Ben Welsh
Benedikt Wildenhain
Benjamin Congdon
Bepis
Bernhard M. Wiedemann
Bjorn Heesakkers
BlahGeek
+Bnyro
Bob Poekert
+Bobscorn
BohwaZ
Bojidar Qnkov
Boris Wachtmeister
+Brett824
Brian Foley
Brian Marks
Bricio
+Bruno Guerreiro
BunnyHelp
+Burve
CHJ85
CXwudi
Camillo Dell'mour
@@ -141,23 +157,28 @@ Ching Yi, Chan
Chirantan Ekbote
Chris Gavin
Chris Hranj
+Chris Lamb
Christian Albrecht
Christian Paul
Christian Pointner
Christoph Döpmann
+Christoph Moench-Tegeder
Christopher Krooss
Christopher Neugebauer
Christopher Smith
Chuck Cho
Cian Ruane
CkuT
+ClosedPort22
Clément DAVID
+Conner
Corey Farwell
Corey Nicholson
Cory Hall
Costy Petrisor
CplPwnies
Craig Markwardt
+CrankDatSouljaBoy
CrypticSignal
CyberJacob
Cyril Roelandt
@@ -174,6 +195,7 @@ Dan Weber
Daniel
Daniel Bolton
Daniel Höpfl
+Daniel Lindholm
Daniel Peukert
Daniel Twardowski
Daniel.Zeng
@@ -201,49 +223,61 @@ David Triendl
David Wagner
Deer-Spangle
Delon
+DepFA
Derek Land
DesweR
Devin J. Pohly
Devon Meunier
+Dhruv
Diego Fernando Rodríguez Varón
DigitalDJ
Dimitre Liotev
+Djeson
Dobrosław Żybort
Dominik
Dominik Heidler
Dorian Westacott
+Dosychev Peter
Douglas Su
DrWursterich
Dracony
DroidFreak32
Duncan
Duncan Keall
+Dzmitry Neviadomski
Déstin Reed
Eduardo Ferro
Edward Betts
+Ehtisham Sabir
Eitan Adler
Eitan Postavsky
Elan Ruusamäe
Elias Probst
+Elyse
Emanuel Hoogeveen
Emilien Kenler
Emmanuel Froissart
Enes
EntranceJew
Entropy
+Eren Kemer
Eric Wong
Erik
Erik Johnson
Erwin de Haan
+Evan Spensley
FND
+Fabi019
Fabian Stahl
Fai
Fam0r
Felix S
Felix Stupp
Felix Yan
+Ferdinand Bachmann
FestplattenSchnitzel
Filip B
+Filip Hedman
Filippo Valsorda
Finn Petersen
FireDart
@@ -259,9 +293,11 @@ Frederic Bournival
GDR!
Gabriel Schubiner
Gaetan Gilbert
+Galiley
Gary
Gaurav
Gautam M
+GautamMKGarg
Genki Sky
Georg Jaehnig
George Boyle
@@ -284,6 +320,7 @@ Grom PE
Grzegorz P
Grzegorz Ruciński
Guillem Vela
+HE7086
Ha Tien Loi
Hadi0609
Hakim Boyles
@@ -296,6 +333,7 @@ Hendrik Schröter
Hendrik v. Raven
Henrik Heimbuerger
Hirokuni Yano
+HobbyistDev
Hongjie Dong
Hormoz K
Hubert Hirtz
@@ -307,15 +345,18 @@ InfernalUnderling
Irfan Charania
Isaac-the-Man
Ismael Mejia
+Itachi
Itay Brandes
Iulian Onofrei
Ivan Kozik
J
J.D. Purcell
JChris246
+Jacek Nowacki
Jack Danger Canty
Jacob Chapman
Jacob Kaplan-Moss
+Jacob Truman
Jai Grimshaw
Jaime Marquínez Ferrándiz
Jaime Marquínez Ferrándiz
@@ -323,6 +364,7 @@ Jakub Adam Wieczorek
Jakub Wilk
Jalaz Kumar
JamKage
+James Woglom
Jan 'Yenda' Trmal
Jan Friesse
Jan Kratochvil
@@ -336,11 +378,13 @@ Jeff Buchbinder
Jeff Crouse
Jeff Huffman
Jeff Smith
+Jelle Besseling
Jelle van der Waa
Jens Rutschmann
Jens Timmerman
Jens Wille
Jeremie J. Jarosh
+Jeroen Jacobs
Jertzukka
Jesse
Jesse de Zwart
@@ -367,7 +411,7 @@ John Peel
Johny Mo Swag
Joost Verdoorn
Joram Schrijver
-JordanWeatherby
+Jordan Weatherby
Joseph Frazier
Joseph Spiros
Josh Soref
@@ -380,6 +424,7 @@ Juan Carlos Garcia Segovia
Juan Francisco Cantero Hurtado
Juan M
Juanjo Benages
+Juhmer Tena
Jules-A
Julien Hadley Jack
Justin Keogh
@@ -400,6 +445,7 @@ Kevin Kwan
Kevin Ngo
Kevin O'Connor
Kevin Velghe
+Kevin Wood
Kfir Breger
Khang Nguyen
KiberInfinity
@@ -407,6 +453,7 @@ Kid
Kieran O'Reilly
Kitten King
Kyle
+Kyle Anthony Williams
Kyu Yeun Kim
LE
Laneone
@@ -418,10 +465,12 @@ Laurent Raufaste
Leonardo Amaral
Leonardo Taccari
Leslie P. Polzer
-Lesmiscore (Naoya Ozaki)
+Lesmiscore
Li4ick
Lionel Elie Mamane
Liu DongMiao
+Livia Medeiros
+Locke
Logan B
Logan Fleur
Lovius
@@ -451,6 +500,7 @@ Marco Fantauzzo
Marco Ferragina
Marco Schuster
Marek Rusinowski
+Marenga
Marian Sigler
Mark Lee
Mark Oteiza
@@ -462,6 +512,7 @@ Martin Ström
Martin Trigaux
Martin Weinelt
Marvin Ewald
+Marwen Dallel
Matej Dujava
Mathias Rav
Mats
@@ -478,6 +529,7 @@ Max
Max Mehl
Max Teegen
MaxReimann
+Mehavoid
Mel Shafer
Meneth32
Mevious
@@ -497,6 +549,7 @@ Mike Fährmann
MikeCol
MinePlayersPE
Miroslav Šedivý
+Misael Aguayo
Mister Hat
Mitsukarenai
MobiDotS
@@ -507,7 +560,9 @@ Mohammed Yaseen Mowzer
Moises Lima
Moritz Patelscheck
MrDoritos
+MrOctopus
MrRawes
+Mudassir Chapra
Muratcan Simsek
N1k145
NRTICN
@@ -526,7 +581,10 @@ Niklas Haas
Niklas Laxström
Nikoli
Nil Admirari
+Nitish Kumar
+Noah
NotFound
+OHaiiBuzzle
Odd Stråbø
OhMyBahGosh
Ole Ernst
@@ -585,6 +643,7 @@ Pornophage
Poschi
Pratyush Singh
PrinceOfPuppers
+Pritam Das
Protuhj
Puck Meerburg
Purdea Andrei
@@ -608,8 +667,10 @@ Ricardo
Ricardo Constantino
Ricardo Garcia
Richard Clamp
+Richard Gibson
Rob
Rob van Bekkum
+Robert Geislinger
Robert Smith
Robin
Robin Dunn
@@ -632,6 +693,7 @@ Sahebjot singh
Saimadhav Heblikar
Sainyam Kapoor
Sam
+Samantaz Fox
Samik Some
Sander
Sander van den Oever
@@ -641,6 +703,7 @@ Seamus Phelan
Sebastian Blunt
Sebastian Haas
Sebastian Leske
+Sebastian Wallkötter
Sematre
Sen Jiang
SeonjaeHyeon
@@ -653,10 +716,12 @@ Shadab Zafar
Shai Coleman
Shaun Walbridge
Shaya G
+Shreyas Minocha
Shrimadhav U K
Sidney de Koning
Silvan Mosberger
Simon Morgan
+Simon Sawicki
Simon W. Jackson
Singwai Chan
Sipherdrakon
@@ -669,6 +734,8 @@ Stanislav Kupryakhin
Stanny Nuytkens
Starsam80
Stavros Ntentos
+Stefan Borer
+Stefan Lobbenmeier
Stefan Pöschel
Stefan-Gabriel Muscalu
Steffan Donal
@@ -683,8 +750,10 @@ SyxbEaEQ2
TRox1972
Tailszefox
Takuya Tsuchida
+Tanner Anderson
Tatsuyuki Ishi
Teemu Ikonen
+Tejas Arlimatti
TheRealDude2
Thijs Vermeir
Thomas Christlieb
@@ -700,6 +769,7 @@ Tim Douglas
Tim Landscheidt
Tim Schindler
Tim Sogard
+Tim Weber
Timendum
Timmy
TinyToweringTree
@@ -712,6 +782,7 @@ Tobias Gruetzmacher
Tobias Kunze
Tobias Salzmann
Todoroki
+TokyoBlackHole
Tom
Tom Gijselinck
Tom-Oliver Heidel
@@ -733,6 +804,7 @@ Vijay Singh
Viktor Szakats
Viren Rajput
Vitaliy Syrchikov
+Vitaly Khabarov
Vobe
Vrihub
Vukkk
@@ -753,8 +825,11 @@ Xie Yanbo
Xu Cheng
Xuan Hu (Sean)
Yakabuff
+Yash Kumar
Yasoob
Yen Chi Hsuan
+Yifu Yu
+Yipten
Your Name
Yuan Chao
YuenSzeHong
@@ -764,8 +839,10 @@ Zach Bruggeman
Zack Fernandes
Zenon Mousmoulas
Zhong Jianxin
+Zhymabek Roman
Zirro
aarubui
+adamanldo
aegamesi
aeph6Ee0
aerworker
@@ -787,6 +864,7 @@ aviperes
axelerometer
aystroganov@gmail.com
azeem
+bashonly
bastik
bato3
beefchop
@@ -796,7 +874,10 @@ blissland
bonfy
bopol
bpfoley
+bsun0000
+bubbleguuum
bzc6p
+ca-za
cant-think-of-a-name
cantandwont
capital-G
@@ -805,27 +886,33 @@ catlover999
cazulu
cclauss
cdarlint
+cgrigis
+changren-wcr
chaos33
chaoskagami
charon2019
+chengzhicn
chien-yu
chio0hai
chocolateboy
chris
+christoph-heinrich
ckuu
cladmi
clauderains
cntrl-s
codelol
codesparkle
-coletdev
coletdjnz
+columndeeply
compujo
comsomisha
coolsa
coreynicholson
corone17
cpm
+crazymoose77756
+cruel-efficiency
cryptonaut
cryzed
cyberfox1691
@@ -838,6 +925,7 @@ danut007ro
davex25
denneboomyo
dequis
+diegorodriguezv
dimqua
dinesh
dirkf
@@ -849,6 +937,7 @@ dundua
dwemthy
dyn888
ealgase
+ekangmonyet
enigmaquip
epitron
ericpardee
@@ -862,8 +951,10 @@ fnord
foghawk
forDream
frenchy1983
+ftk
funniray
gam2046
+gamer191
gcmalloc
gdzx
geauxlo
@@ -878,6 +969,7 @@ gustaf
h-collector
ha shao
hakatashi
+haobinliang
hassaanaliw
hcwhan
hdclark
@@ -897,6 +989,7 @@ ian
igv
inondle
insaneracist
+invertico
ipaha
ischmidt20
ispedals
@@ -904,6 +997,7 @@ iwconfig
j
j54vc1bk
jahudka
+jahway603
james
james mike dupont
jamiejones
@@ -913,6 +1007,7 @@ jjatria
jnozsc
joehillen
jomo
+josanabr
julien
jxu
k3ns1n
@@ -933,11 +1028,13 @@ knapior
kr4ssi
krichbanana
kurumigi
+lauren
lazypete365
light94
lightmare
linhua55
lkho
+llamasblade
llyyr
logon84
lorpus
@@ -946,6 +1043,7 @@ luboss
luceatnobis
lyz-code
m0viefreak
+m4tu4g
mahanstreamer
main()
makeworld
@@ -953,16 +1051,21 @@ marcwebbie
marieell
mars67857
martin54
+masta79
mc2avr
mcd1992
+megapro17
megustamucho
mehq
mexican porn commits
midas02
migbac
minusf
+miseran
mjdubell
+mlampe
mlindner
+monnef
motophil
mpeter50
mrBliss
@@ -983,12 +1086,15 @@ nikhil
nixxo
nmeum
nmrugg
+nomevi
+nosoop
nto
nulloz
nyorain
nyuszika7h
obeythepenguin@gmail.com
octotherp
+odo2063
ofkz
oittaa
opusforlife2
@@ -997,6 +1103,7 @@ ouwou
ovitei
ozburo
pachacamac
+panatexxa
patrickslin
peugeot
pgaig
@@ -1017,6 +1124,7 @@ pypy
quinlander
quyleanh
raleeper
+rand-net
random-nick
rawcoder
reddraggone9
@@ -1039,12 +1147,15 @@ ruuk
rzhxeo
s0u1h
sahutd
+sam
+satan1st
satunnainen
sceext
schn0sch
schnusch
scil
sh!zeeg
+shirt
shirt-dev
sian1468
sichuan-pepper
@@ -1059,6 +1170,7 @@ sofutru
sourcerect
sprhawk
spvkgn
+sqrtNOT
squibbysquibby
ssaqua
stanoarn
@@ -1087,8 +1199,10 @@ tiktok
timethrow
tinybug
tippfeler
+tkgmomosheep
tlonic
tlsssl
+tobi1805
tom
toniz4
trasssh
@@ -1101,11 +1215,13 @@ utlasidyo
v-delta
venth
vijayanand nandam
+vkorablin
vobe
vordep
vvto33
wankerer
willbeaufoy
+winterbird-code
winwon
wolfy1339
xantares
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 86a9e4c..0ed1eb4 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -74,7 +74,7 @@ Most users do not need to build hypervideo and can [download the builds](https:/
To run hypervideo as a developer, you don't need to build anything either. Simply execute
- python -m hypervideo_dl
+ python -m youtube_dl
To run the test, simply invoke your favorite test runner, or execute a test file directly; any of the following work:
@@ -98,17 +98,17 @@ If you want to add support for a new site, first of all **make sure** this site
After you have ensured this site is distributing its content legally, you can follow this quick list (assuming your service is called `yourextractor`):
-1. [Fork this repository](https://git.conocimientoslibres.ga/software/hypervideo)
+1. [Fork this repository](https://c.hgit.ga/software/hypervideo)
2. Check out the source code with:
- git clone https://git.conocimientoslibres.ga/software/hypervideo
+ git clone https://c.hgit.ga/software/hypervideo
3. Start a new git branch with
cd hypervideo
git checkout -b yourextractor
-4. Start with this simple template and save it to `hypervideo_dl/extractor/yourextractor.py`:
+4. Start with this simple template and save it to `youtube_dl/extractor/yourextractor.py`:
```python
# coding: utf-8
@@ -147,21 +147,21 @@ After you have ensured this site is distributing its content legally, you can fo
'title': title,
'description': self._og_search_description(webpage),
'uploader': self._search_regex(r'<div[^>]+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False),
- # TODO more properties (see hypervideo_dl/extractor/common.py)
+ # TODO more properties (see youtube_dl/extractor/common.py)
}
```
-5. Add an import in [`hypervideo_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/hypervideo_dl/extractor/extractors.py).
+5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
-7. Have a look at [`hypervideo_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/hypervideo_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/hypervideo_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
+7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
8. Make sure your code follows [hypervideo coding conventions](#hypervideo-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart):
- $ flake8 hypervideo_dl/extractor/yourextractor.py
+ $ flake8 youtube_dl/extractor/yourextractor.py
9. Make sure your code works under all [Python](https://www.python.org/) versions claimed supported by hypervideo, namely 2.6, 2.7, and 3.2+.
10. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
- $ git add hypervideo_dl/extractor/extractors.py
- $ git add hypervideo_dl/extractor/yourextractor.py
+ $ git add youtube_dl/extractor/extractors.py
+ $ git add youtube_dl/extractor/yourextractor.py
$ git commit -m '[yourextractor] Add new extractor'
$ git push origin yourextractor
@@ -177,7 +177,7 @@ Extractors are very fragile by nature since they depend on the layout of the sou
### Mandatory and optional metafields
-For extraction to work hypervideo relies on metadata your extractor extracts and provides to hypervideo expressed by an [information dictionary](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/hypervideo_dl/extractor/common.py#L94-L303) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl:
+For extraction to work hypervideo relies on metadata your extractor extracts and provides to hypervideo expressed by an [information dictionary](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl:
- `id` (media identifier)
- `title` (media title)
@@ -185,7 +185,7 @@ For extraction to work hypervideo relies on metadata your extractor extracts and
In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention hypervideo also treats `id` and `title` as mandatory. Thus the aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken.
-[Any field](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/hypervideo_dl/extractor/common.py#L188-L303) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
+[Any field](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L188-L303) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
#### Example
@@ -407,7 +407,7 @@ Incorrect:
### Use convenience conversion and parsing functions
-Wrap all extracted numeric data into safe functions from [`hypervideo_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/hypervideo_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
+Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
Use `url_or_none` for safe URL processing.
@@ -415,7 +415,7 @@ Use `try_get` for safe metadata extraction from parsed JSON.
Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction.
-Explore [`hypervideo_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/hypervideo_dl/utils.py) for more useful convenience functions.
+Explore [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions.
#### More examples
diff --git a/CONTRIBUTORS b/CONTRIBUTORS
index 8d62c04..f2a1368 100644
--- a/CONTRIBUTORS
+++ b/CONTRIBUTORS
@@ -214,3 +214,146 @@ pycabbage
regarten
Ronnnny
schn0sch
+s0u1h
+MrRawes
+cffswb
+danielyli
+1-Byte
+mehq
+dzek69
+aaearon
+panatexxa
+kmark
+un-def
+goggle
+Soebb
+Fam0r
+bohwaz
+dodrian
+vvto33
+ca-za
+connercsbn
+diegorodriguezv
+ekangmonyet
+elyse0
+evansp
+GiedriusS
+HE7086
+JordanWeatherby
+m4tu4g
+MarwenDallel
+nevack
+putnam
+rand-net
+vertan
+Wikidepia
+Yipten
+moench-tegeder
+christoph-heinrich
+HobbyistDev
+LunarFang416
+sbor23
+aurelg
+adamanldo
+gamer191
+vkorablin
+Burve
+mnn
+ZhymabekRoman
+mozbugbox
+aejdl
+ping
+sqrtNOT
+bubbleguuum
+darkxex
+miseran
+StefanLobbenmeier
+crazymoose77756
+nomevi
+Brett824
+pingiun
+dosy4ev
+EhtishamSabir
+Ferdi265
+FirefoxMetzger
+ftk
+lamby
+llamasblade
+lockmatrix
+misaelaguayo
+odo2063
+pritam20ps05
+scy
+sheerluck
+AxiosDeminence
+DjesonPV
+eren-kemer
+freezboltz
+Galiley
+haobinliang
+Mehavoid
+winterbird-code
+yashkc2025
+aldoridhoni
+bashonly
+jacobtruman
+masta79
+palewire
+cgrigis
+DavidH-2022
+dfaker
+jackyyf
+ohaiibuzzle
+SamantazFox
+shreyasminocha
+tejasa97
+xenov
+satan1st
+0xGodspeed
+5736d79
+587021c
+basrieter
+Bobscorn
+CNugteren
+columndeeply
+DoubleCouponDay
+Fabi019
+GautamMKGarg
+Grub4K
+itachi-19
+jeroenj
+josanabr
+LiviaMedeiros
+nikita-moor
+snapdgn
+SuperSonicHub1
+tannertechnology
+Timendum
+tobi1805
+TokyoBlackHole
+ajayyy
+Alienmaster
+bsun0000
+changren-wcr
+ClosedPort22
+CrankDatSouljaBoy
+cruel-efficiency
+endotronic
+Generator
+gibson042
+How-Bout-No
+invertico
+jahway603
+jwoglom
+lksj
+megapro17
+mlampe
+MrOctopus
+nosoop
+puc9
+sashashura
+schnusch
+SG5
+the-marenga
+tkgmomosheep
+vitkhab
diff --git a/ChangeLog b/ChangeLog
deleted file mode 100644
index 680fffd..0000000
--- a/ChangeLog
+++ /dev/null
@@ -1,6142 +0,0 @@
-version 2021.06.06
-
-Extractors
-* [facebook] Improve login required detection
-* [youporn] Fix formats and view count extraction (#29216)
-* [orf:tvthek] Fix thumbnails extraction (#29217)
-* [formula1] Fix extraction (#29206)
-* [ard] Relax URL regular expression and fix video ids (#22724, #29091)
-+ [ustream] Detect https embeds (#29133)
-* [ted] Prefer own formats over external sources (#29142)
-* [twitch:clips] Improve extraction (#29149)
-+ [twitch:clips] Add access token query to download URLs (#29136)
-* [youtube] Fix get_video_info request (#29086, #29165)
-* [vimeo] Fix vimeo pro embed extraction (#29126)
-* [redbulltv] Fix embed data extraction (#28770)
-* [shahid] Relax URL regular expression (#28772, #28930)
-
-
-version 2021.05.16
-
-Core
-* [options] Fix thumbnail option group name (#29042)
-* [YoutubeDL] Improve extract_info doc (#28946)
-
-Extractors
-+ [playstuff] Add support for play.stuff.co.nz (#28901, #28931)
-* [eroprofile] Fix extraction (#23200, #23626, #29008)
-+ [vivo] Add support for vivo.st (#29009)
-+ [generic] Add support for og:audio (#28311, #29015)
-* [phoenix] Fix extraction (#29057)
-+ [generic] Add support for sibnet embeds
-+ [vk] Add support for sibnet embeds (#9500)
-+ [generic] Add Referer header for direct videojs download URLs (#2879,
- #20217, #29053)
-* [orf:radio] Switch download URLs to HTTPS (#29012, #29046)
-- [blinkx] Remove extractor (#28941)
-* [medaltv] Relax URL regular expression (#28884)
-+ [funimation] Add support for optional lang code in URLs (#28950)
-+ [gdcvault] Add support for HTML5 videos
-* [dispeak] Improve FLV extraction (#13513, #28970)
-* [kaltura] Improve iframe extraction (#28969)
-* [kaltura] Make embed code alternatives actually work
-* [cda] Improve extraction (#28709, #28937)
-* [twitter] Improve formats extraction from vmap URL (#28909)
-* [xtube] Fix formats extraction (#28870)
-* [svtplay] Improve extraction (#28507, #28876)
-* [tv2dk] Fix extraction (#28888)
-
-
-version 2021.04.26
-
-Extractors
-+ [xfileshare] Add support for wolfstream.tv (#28858)
-* [francetvinfo] Improve video id extraction (#28792)
-* [medaltv] Fix extraction (#28807)
-* [tver] Redirect all downloads to Brightcove (#28849)
-* [go] Improve video id extraction (#25207, #25216, #26058)
-* [youtube] Fix lazy extractors (#28780)
-+ [bbc] Extract description and timestamp from __INITIAL_DATA__ (#28774)
-* [cbsnews] Fix extraction for python <3.6 (#23359)
-
-
-version 2021.04.17
-
-Core
-+ [utils] Add support for experimental HTTP response status code
- 308 Permanent Redirect (#27877, #28768)
-
-Extractors
-+ [lbry] Add support for HLS videos (#27877, #28768)
-* [youtube] Fix stretched ratio calculation
-* [youtube] Improve stretch extraction (#28769)
-* [youtube:tab] Improve grid extraction (#28725)
-+ [youtube:tab] Detect series playlist on playlists page (#28723)
-+ [youtube] Add more invidious instances (#28706)
-* [pluralsight] Extend anti-throttling timeout (#28712)
-* [youtube] Improve URL to extractor routing (#27572, #28335, #28742)
-+ [maoritv] Add support for maoritelevision.com (#24552)
-+ [youtube:tab] Pass innertube context and x-goog-visitor-id header along with
- continuation requests (#28702)
-* [mtv] Fix Viacom A/B Testing Video Player extraction (#28703)
-+ [pornhub] Extract DASH and HLS formats from get_media end point (#28698)
-* [cbssports] Fix extraction (#28682)
-* [jamendo] Fix track extraction (#28686)
-* [curiositystream] Fix format extraction (#26845, #28668)
-
-
-version 2021.04.07
-
-Core
-* [extractor/common] Use compat_cookies_SimpleCookie for _get_cookies
-+ [compat] Introduce compat_cookies_SimpleCookie
-* [extractor/common] Improve JSON-LD author extraction
-* [extractor/common] Fix _get_cookies on python 2 (#20673, #23256, #20326,
- #28640)
-
-Extractors
-* [youtube] Fix extraction of videos with restricted location (#28685)
-+ [line] Add support for live.line.me (#17205, #28658)
-* [vimeo] Improve extraction (#28591)
-* [youku] Update ccode (#17852, #28447, #28460, #28648)
-* [youtube] Prefer direct entry metadata over entry metadata from playlist
- (#28619, #28636)
-* [screencastomatic] Fix extraction (#11976, #24489)
-+ [palcomp3] Add support for palcomp3.com (#13120)
-+ [arnes] Add support for video.arnes.si (#28483)
-+ [youtube:tab] Add support for hashtags (#28308)
-
-
-version 2021.04.01
-
-Extractors
-* [youtube] Setup CONSENT cookie when needed (#28604)
-* [vimeo] Fix password protected review extraction (#27591)
-* [youtube] Improve age-restricted video extraction (#28578)
-
-
-version 2021.03.31
-
-Extractors
-* [vlive] Fix inkey request (#28589)
-* [francetvinfo] Improve video id extraction (#28584)
-+ [instagram] Extract duration (#28469)
-* [instagram] Improve title extraction (#28469)
-+ [sbs] Add support for ondemand watch URLs (#28566)
-* [youtube] Fix video's channel extraction (#28562)
-* [picarto] Fix live stream extraction (#28532)
-* [vimeo] Fix unlisted video extraction (#28414)
-* [youtube:tab] Fix playlist/community continuation items extraction (#28266)
-* [ard] Improve clip id extraction (#22724, #28528)
-
-
-version 2021.03.25
-
-Extractors
-+ [zoom] Add support for zoom.us (#16597, #27002, #28531)
-* [bbc] Fix BBC IPlayer Episodes/Group extraction (#28360)
-* [youtube] Fix default value for youtube_include_dash_manifest (#28523)
-* [zingmp3] Fix extraction (#11589, #16409, #16968, #27205)
-+ [vgtv] Add support for new tv.aftonbladet.se URL schema (#28514)
-+ [tiktok] Detect private videos (#28453)
-* [vimeo:album] Fix extraction for albums with number of videos multiple
- to page size (#28486)
-* [vvvvid] Fix kenc format extraction (#28473)
-* [mlb] Fix video extraction (#21241)
-* [svtplay] Improve extraction (#28448)
-* [applepodcasts] Fix extraction (#28445)
-* [rtve] Improve extraction
- + Extract all formats
- * Fix RTVE Infantil extraction (#24851)
- + Extract is_live and series
-
-
-version 2021.03.14
-
-Core
-+ Introduce release_timestamp meta field (#28386)
-
-Extractors
-+ [southpark] Add support for southparkstudios.com (#28413)
-* [southpark] Fix extraction (#26763, #28413)
-* [sportdeutschland] Fix extraction (#21856, #28425)
-* [pinterest] Reduce the number of HLS format requests
-* [peertube] Improve thumbnail extraction (#28419)
-* [tver] Improve title extraction (#28418)
-* [fujitv] Fix HLS formats extension (#28416)
-* [shahid] Fix format extraction (#28383)
-+ [lbry] Add support for channel filters (#28385)
-+ [bandcamp] Extract release timestamp
-+ [lbry] Extract release timestamp (#28386)
-* [pornhub] Detect flagged videos
-+ [pornhub] Extract formats from get_media end point (#28395)
-* [bilibili] Fix video info extraction (#28341)
-+ [cbs] Add support for Paramount+ (#28342)
-+ [trovo] Add Origin header to VOD formats (#28346)
-* [voxmedia] Fix volume embed extraction (#28338)
-
-
-version 2021.03.03
-
-Extractors
-* [youtube:tab] Switch continuation to browse API (#28289, #28327)
-* [9c9media] Fix extraction for videos with multiple ContentPackages (#28309)
-+ [bbc] Add support for BBC Reel videos (#21870, #23660, #28268)
-
-
-version 2021.03.02
-
-Extractors
-* [zdf] Rework extractors (#11606, #13473, #17354, #21185, #26711, #27068,
- #27930, #28198, #28199, #28274)
- * Generalize cross-extractor video ids for zdf based extractors
- * Improve extraction
- * Fix 3sat and phoenix
-* [stretchinternet] Fix extraction (#28297)
-* [urplay] Fix episode data extraction (#28292)
-+ [bandaichannel] Add support for b-ch.com (#21404)
-* [srgssr] Improve extraction (#14717, #14725, #27231, #28238)
- + Extract subtitle
- * Fix extraction for new videos
- * Update srf download domains
-* [vvvvid] Reduce season request payload size
-+ [vvvvid] Extract series sublists playlist title (#27601, #27618)
-+ [dplay] Extract Ad-Free uplynk URLs (#28160)
-+ [wat] Detect DRM protected videos (#27958)
-* [tf1] Improve extraction (#27980, #28040)
-* [tmz] Fix and improve extraction (#24603, #24687, 28211)
-+ [gedidigital] Add support for Gedi group sites (#7347, #26946)
-* [youtube] Fix get_video_info request
-
-
-version 2021.02.22
-
-Core
-+ [postprocessor/embedthumbnail] Recognize atomicparsley binary in lowercase
- (#28112)
-
-Extractors
-* [apa] Fix and improve extraction (#27750)
-+ [youporn] Extract duration (#28019)
-+ [peertube] Add support for canard.tube (#28190)
-* [youtube] Fixup m4a_dash formats (#28165)
-+ [samplefocus] Add support for samplefocus.com (#27763)
-+ [vimeo] Add support for unlisted video source format extraction
-* [viki] Improve extraction (#26522, #28203)
- * Extract uploader URL and episode number
- * Report login required error
- + Extract 480p formats
- * Fix API v4 calls
-* [ninegag] Unescape title (#28201)
-* [youtube] Improve URL regular expression (#28193)
-+ [youtube] Add support for redirect.invidious.io (#28193)
-+ [dplay] Add support for de.hgtv.com (#28182)
-+ [dplay] Add support for discoveryplus.com (#24698)
-+ [simplecast] Add support for simplecast.com (#24107)
-* [youtube] Fix uploader extraction in flat playlist mode (#28045)
-* [yandexmusic:playlist] Request missing tracks in chunks (#27355, #28184)
-+ [storyfire] Add support for storyfire.com (#25628, #26349)
-+ [zhihu] Add support for zhihu.com (#28177)
-* [youtube] Fix controversial videos when authenticated with cookies (#28174)
-* [ccma] Fix timestamp parsing in python 2
-+ [videopress] Add support for video.wordpress.com
-* [kakao] Improve info extraction and detect geo restriction (#26577)
-* [xboxclips] Fix extraction (#27151)
-* [ard] Improve formats extraction (#28155)
-+ [canvas] Add support for dagelijksekost.een.be (#28119)
-
-
-version 2021.02.10
-
-Extractors
-* [youtube:tab] Improve grid continuation extraction (#28130)
-* [ign] Fix extraction (#24771)
-+ [xhamster] Extract format filesize
-+ [xhamster] Extract formats from xplayer settings (#28114)
-+ [youtube] Add support phone/tablet JS player (#26424)
-* [archiveorg] Fix and improve extraction (#21330, #23586, #25277, #26780,
- #27109, #27236, #28063)
-+ [cda] Detect geo restricted videos (#28106)
-* [urplay] Fix extraction (#28073, #28074)
-* [youtube] Fix release date extraction (#28094)
-+ [youtube] Extract abr and vbr (#28100)
-* [youtube] Skip OTF formats (#28070)
-
-
-version 2021.02.04.1
-
-Extractors
-* [youtube] Prefer DASH formats (#28070)
-* [azmedien] Fix extraction (#28064)
-
-
-version 2021.02.04
-
-Extractors
-* [pornhub] Implement lazy playlist extraction
-* [svtplay] Fix video id extraction (#28058)
-+ [pornhub] Add support for authentication (#18797, #21416, #24294)
-* [pornhub:user] Improve paging
-+ [pornhub:user] Add support for URLs unavailable via /videos page (#27853)
-+ [bravotv] Add support for oxygen.com (#13357, #22500)
-+ [youtube] Pass embed URL to get_video_info request
-* [ccma] Improve metadata extraction (#27994)
- + Extract age limit, alt title, categories, series and episode number
- * Fix timestamp multiple subtitles extraction
-* [egghead] Update API domain (#28038)
-- [vidzi] Remove extractor (#12629)
-* [vidio] Improve metadata extraction
-* [youtube] Improve subtitles extraction
-* [youtube] Fix chapter extraction fallback
-* [youtube] Rewrite extractor
- * Improve format sorting
- * Remove unused code
- * Fix series metadata extraction
- * Fix trailer video extraction
- * Improve error reporting
- + Extract video location
-+ [vvvvid] Add support for youtube embeds (#27825)
-* [googledrive] Report download page errors (#28005)
-* [vlive] Fix error message decoding for python 2 (#28004)
-* [youtube] Improve DASH formats file size extraction
-* [cda] Improve birth validation detection (#14022, #27929)
-+ [awaan] Extract uploader id (#27963)
-+ [medialaan] Add support DPG Media MyChannels based websites (#14871, #15597,
- #16106, #16489)
-* [abcnews] Fix extraction (#12394, #27920)
-* [AMP] Fix upload date and timestamp extraction (#27970)
-* [tv4] Relax URL regular expression (#27964)
-+ [tv2] Add support for mtvuutiset.fi (#27744)
-* [adn] Improve login warning reporting
-* [zype] Fix uplynk id extraction (#27956)
-+ [adn] Add support for authentication (#17091, #27841, #27937)
-
-
-version 2021.01.24.1
-
-Core
-* Introduce --output-na-placeholder (#27896)
-
-Extractors
-* [franceculture] Make thumbnail optional (#18807)
-* [franceculture] Fix extraction (#27891, #27903)
-* [njpwworld] Fix extraction (#27890)
-* [comedycentral] Fix extraction (#27905)
-* [wat] Fix format extraction (#27901)
-+ [americastestkitchen:season] Add support for seasons (#27861)
-+ [trovo] Add support for trovo.live (#26125)
-+ [aol] Add support for yahoo videos (#26650)
-* [yahoo] Fix single video extraction
-* [lbry] Unescape lbry URI (#27872)
-* [9gag] Fix and improve extraction (#23022)
-* [americastestkitchen] Improve metadata extraction for ATK episodes (#27860)
-* [aljazeera] Fix extraction (#20911, #27779)
-+ [minds] Add support for minds.com (#17934)
-* [ard] Fix title and description extraction (#27761)
-+ [spotify] Add support for Spotify Podcasts (#27443)
-
-
-version 2021.01.16
-
-Core
-* [YoutubeDL] Protect from infinite recursion due to recursively nested
- playlists (#27833)
-* [YoutubeDL] Ignore failure to create existing directory (#27811)
-* [YoutubeDL] Raise syntax error for format selection expressions with multiple
- + operators (#27803)
-
-Extractors
-+ [animeondemand] Add support for lazy playlist extraction (#27829)
-* [youporn] Restrict fallback download URL (#27822)
-* [youporn] Improve height and tbr extraction (#20425, #23659)
-* [youporn] Fix extraction (#27822)
-+ [twitter] Add support for unified cards (#27826)
-+ [twitch] Add Authorization header with OAuth token for GraphQL requests
- (#27790)
-* [mixcloud:playlist:base] Extract video id in flat playlist mode (#27787)
-* [cspan] Improve info extraction (#27791)
-* [adn] Improve info extraction
-* [adn] Fix extraction (#26963, #27732)
-* [youtube:search] Extract from all sections (#27604)
-* [youtube:search] fix viewcount and try to extract all video sections (#27604)
-* [twitch] Improve login error extraction
-* [twitch] Fix authentication (#27743)
-* [3qsdn] Improve extraction (#21058)
-* [peertube] Extract formats from streamingPlaylists (#26002, #27586, #27728)
-* [khanacademy] Fix extraction (#2887, #26803)
-* [spike] Update Paramount Network feed URL (#27715)
-
-
-version 2021.01.08
-
-Core
-* [downloader/hls] Disable decryption in tests (#27660)
-+ [utils] Add a function to clean podcast URLs
-
-Extractors
-* [rai] Improve subtitles extraction (#27698, #27705)
-* [canvas] Match only supported VRT NU URLs (#27707)
-+ [bibeltv] Add support for bibeltv.de (#14361)
-+ [bfmtv] Add support for bfmtv.com (#16053, #26615)
-+ [sbs] Add support for ondemand play and news embed URLs (#17650, #27629)
-* [twitch] Drop legacy kraken API v5 code altogether and refactor
-* [twitch:vod] Switch to GraphQL for video metadata
-* [canvas] Fix VRT NU extraction (#26957, #27053)
-* [twitch] Switch access token to GraphQL and refactor (#27646)
-+ [rai] Detect ContentItem in iframe (#12652, #27673)
-* [ketnet] Fix extraction (#27662)
-+ [dplay] Add suport Discovery+ domains (#27680)
-* [motherless] Improve extraction (#26495, #27450)
-* [motherless] Fix recent videos upload date extraction (#27661)
-* [nrk] Fix extraction for videos without a legalAge rating
-- [googleplus] Remove extractor (#4955, #7400)
-+ [applepodcasts] Add support for podcasts.apple.com (#25918)
-+ [googlepodcasts] Add support for podcasts.google.com
-+ [iheart] Add support for iheart.com (#27037)
-* [acast] Clean podcast URLs
-* [stitcher] Clean podcast URLs
-+ [xfileshare] Add support for aparat.cam (#27651)
-+ [twitter] Add support for summary card (#25121)
-* [twitter] Try to use a Generic fallback for unknown twitter cards (#25982)
-+ [stitcher] Add support for shows and show metadata extraction (#20510)
-* [stv] Improve episode id extraction (#23083)
-
-
-version 2021.01.03
-
-Extractors
-* [nrk] Improve series metadata extraction (#27473)
-+ [nrk] Extract subtitles
-* [nrk] Fix age limit extraction
-* [nrk] Improve video id extraction
-+ [nrk] Add support for podcasts (#27634, #27635)
-* [nrk] Generalize and delegate all item extractors to nrk
-+ [nrk] Add support for mp3 formats
-* [nrktv] Switch to playback endpoint
-* [vvvvid] Fix season metadata extraction (#18130)
-* [stitcher] Fix extraction (#20811, #27606)
-* [acast] Fix extraction (#21444, #27612, #27613)
-+ [arcpublishing] Add support for arcpublishing.com (#2298, #9340, #17200)
-+ [sky] Add support for Sports News articles and Brighcove videos (#13054)
-+ [vvvvid] Extract akamai formats
-* [vvvvid] Skip unplayable episodes (#27599)
-* [yandexvideo] Fix extraction for Python 3.4
-
-
-version 2020.12.31
-
-Core
-* [utils] Accept only supported protocols in url_or_none
-* [YoutubeDL] Allow format filtering using audio language (#16209)
-
-Extractors
-+ [redditr] Extract all thumbnails (#27503)
-* [vvvvid] Improve info extraction
-+ [vvvvid] Add support for playlists (#18130, #27574)
-+ [yandexdisk] Extract info from webpage
-* [yandexdisk] Fix extraction (#17861, #27131)
-* [yandexvideo] Use old API call as fallback
-* [yandexvideo] Fix extraction (#25000)
-- [nbc] Remove CSNNE extractor
-* [nbc] Fix NBCSport VPlayer URL extraction (#16640)
-+ [aenetworks] Add support for biography.com (#3863)
-* [uktvplay] Match new video URLs (#17909)
-* [sevenplay] Detect API errors
-* [tenplay] Fix format extraction (#26653)
-* [brightcove] Raise error for DRM protected videos (#23467, #27568)
-
-
-version 2020.12.29
-
-Extractors
-* [youtube] Improve yt initial data extraction (#27524)
-* [youtube:tab] Improve URL matching #27559)
-* [youtube:tab] Restore retry on browse requests (#27313, #27564)
-* [aparat] Fix extraction (#22285, #22611, #23348, #24354, #24591, #24904,
- #25418, #26070, #26350, #26738, #27563)
-- [brightcove] Remove sonyliv specific code
-* [piksel] Improve format extraction
-+ [zype] Add support for uplynk videos
-+ [toggle] Add support for live.mewatch.sg (#27555)
-+ [go] Add support for fxnow.fxnetworks.com (#13972, #22467, #23754, #26826)
-* [teachable] Improve embed detection (#26923)
-* [mitele] Fix free video extraction (#24624, #25827, #26757)
-* [telecinco] Fix extraction
-* [youtube] Update invidious.snopyta.org (#22667)
-* [amcnetworks] Improve auth only video detection (#27548)
-+ [generic] Add support for VHX Embeds (#27546)
-
-
-version 2020.12.26
-
-Extractors
-* [instagram] Fix comment count extraction
-+ [instagram] Add support for reel URLs (#26234, #26250)
-* [bbc] Switch to media selector v6 (#23232, #23933, #26303, #26432, #26821,
- #27538)
-* [instagram] Improve thumbnail extraction
-* [instagram] Fix extraction when authenticated (#22880, #26377, #26981,
- #27422)
-* [spankbang:playlist] Fix extraction (#24087)
-+ [spankbang] Add support for playlist videos
-* [pornhub] Improve like and dislike count extraction (#27356)
-* [pornhub] Fix lq formats extraction (#27386, #27393)
-+ [bongacams] Add support for bongacams.com (#27440)
-* [youtube:tab] Extend URL regular expression (#27501)
-* [theweatherchannel] Fix extraction (#25930, #26051)
-+ [sprout] Add support for Universal Kids (#22518)
-* [theplatform] Allow passing geo bypass countries from other extractors
-+ [wistia] Add support for playlists (#27533)
-+ [ctv] Add support for ctv.ca (#27525)
-* [9c9media] Improve info extraction
-* [youtube] Fix automatic captions extraction (#27162, #27388)
-* [sonyliv] Fix title for movies
-* [sonyliv] Fix extraction (#25667)
-* [streetvoice] Fix extraction (#27455, #27492)
-+ [facebook] Add support for watchparty pages (#27507)
-* [cbslocal] Fix video extraction
-+ [brightcove] Add another method to extract policyKey
-* [mewatch] Relax URL regular expression (#27506)
-
-
-version 2020.12.22
-
-Core
-* [common] Remove unwanted query params from unsigned akamai manifest URLs
-
-Extractors
-- [tastytrade] Remove extractor (#25716)
-* [niconico] Fix playlist extraction (#27428)
-- [everyonesmixtape] Remove extractor
-- [kanalplay] Remove extractor
-* [arkena] Fix extraction
-* [nba] Rewrite extractor
-* [turner] Improve info extraction
-* [youtube] Improve xsrf token extraction (#27442)
-* [generic] Improve RSS age limit extraction
-* [generic] Fix RSS itunes thumbnail extraction (#27405)
-+ [redditr] Extract duration (#27426)
-- [zaq1] Remove extractor
-+ [asiancrush] Add support for retrocrush.tv
-* [asiancrush] Fix extraction
-- [noco] Remove extractor (#10864)
-* [nfl] Fix extraction (#22245)
-* [skysports] Relax URL regular expression (#27435)
-+ [tv5unis] Add support for tv5unis.ca (#22399, #24890)
-+ [videomore] Add support for more.tv (#27088)
-+ [yandexmusic] Add support for music.yandex.com (#27425)
-+ [nhk:program] Add support for audio programs and program clips
-+ [nhk] Add support for NHK video programs (#27230)
-
-
-version 2020.12.14
-
-Core
-* [extractor/common] Improve JSON-LD interaction statistic extraction (#23306)
-* [downloader/hls] Delegate manifests with media initialization to ffmpeg
-+ [extractor/common] Document duration meta field for playlists
-
-Extractors
-* [mdr] Bypass geo restriction
-* [mdr] Improve extraction (#24346, #26873)
-* [yandexmusic:album] Improve album title extraction (#27418)
-* [eporner] Fix view count extraction and make optional (#23306)
-+ [eporner] Extend URL regular expression
-* [eporner] Fix hash extraction and extend _VALID_URL (#27396)
-* [slideslive] Use m3u8 entry protocol for m3u8 formats (#27400)
-* [twitcasting] Fix format extraction and improve info extraction (#24868)
-* [linuxacademy] Fix authentication and extraction (#21129, #26223, #27402)
-* [itv] Clean description from HTML tags (#27399)
-* [vlive] Sort live formats (#27404)
-* [hotstart] Fix and improve extraction
- * Fix format extraction (#26690)
- + Extract thumbnail URL (#16079, #20412)
- + Add support for country specific playlist URLs (#23496)
- * Select the last id in video URL (#26412)
-+ [youtube] Add some invidious instances (#27373)
-
-
-version 2020.12.12
-
-Core
-* [YoutubeDL] Improve thumbnail filename deducing (#26010, #27244)
-
-Extractors
-+ [ruutu] Extract more metadata
-+ [ruutu] Detect non-free videos (#21154)
-* [ruutu] Authenticate format URLs (#21031, #26782)
-+ [ruutu] Add support for static.nelonenmedia.fi (#25412)
-+ [ruutu] Extend URL regular expression (#24839)
-+ [facebook] Add support archived live video URLs (#15859)
-* [wdr] Improve overall extraction
-+ [wdr] Extend subtitles extraction (#22672, #22723)
-+ [facebook] Add support for videos attached to Relay based story pages
- (#10795)
-+ [wdr:page] Add support for kinder.wdr.de (#27350)
-+ [facebook] Add another regular expression for handleServerJS
-* [facebook] Fix embed page extraction
-+ [facebook] Add support for Relay post pages (#26935)
-+ [facebook] Add support for watch videos (#22795, #27062)
-+ [facebook] Add support for group posts with multiple videos (#19131)
-* [itv] Fix series metadata extraction (#26897)
-- [itv] Remove old extraction method (#23177)
-* [facebook] Redirect mobile URLs to desktop URLs (#24831, #25624)
-+ [facebook] Add support for Relay based pages (#26823)
-* [facebook] Try to reduce unnecessary tahoe requests
-- [facebook] Remove hardcoded Chrome User-Agent (#18974, #25411, #26958,
- #27329)
-- [smotri] Remove extractor (#27358)
-- [beampro] Remove extractor (#17290, #22871, #23020, #23061, #26099)
-
-
-version 2020.12.09
-
-Core
-* [extractor/common] Fix inline HTML5 media tags processing (#27345)
-
-Extractors
-* [youtube:tab] Improve identity token extraction (#27197)
-* [youtube:tab] Make click tracking params on continuation optional
-* [youtube:tab] Delegate inline playlists to tab-based playlists (27298)
-+ [tubitv] Extract release year (#27317)
-* [amcnetworks] Fix free content extraction (#20354)
-+ [lbry:channel] Add support for channels (#25584)
-+ [lbry] Add support for short and embed URLs
-* [lbry] Fix channel metadata extraction
-+ [telequebec] Add support for video.telequebec.tv (#27339)
-* [telequebec] Fix extraction (#25733, #26883)
-+ [youtube:tab] Capture and output alerts (#27340)
-* [tvplay:home] Fix extraction (#21153)
-* [americastestkitchen] Fix Extraction and add support
- for Cook's Country and Cook's Illustrated (#17234, #27322)
-+ [slideslive] Add support for yoda service videos and extract subtitles
- (#27323)
-
-
-version 2020.12.07
-
-Core
-* [extractor/common] Extract timestamp from Last-Modified header
-+ [extractor/common] Add support for dl8-* media tags (#27283)
-* [extractor/common] Fix media type extraction for HTML5 media tags
- in start/end form
-
-Extractors
-* [aenetworks] Fix extraction (#23363, #23390, #26795, #26985)
- * Fix Fastly format extraction
- + Add support for play and watch subdomains
- + Extract series metadata
-* [youtube] Improve youtu.be extraction in non-existing playlists (#27324)
-+ [generic] Extract RSS video description, timestamp and itunes metadata
- (#27177)
-* [nrk] Reduce the number of instalments and episodes requests
-* [nrk] Improve extraction
- * Improve format extraction for old akamai formats
- + Add is_live value to entry info dict
- * Request instalments only when available
- * Fix skole extraction
-+ [peertube] Extract fps
-+ [peertube] Recognize audio-only formats (#27295)
-
-
-version 2020.12.05
-
-Core
-* [extractor/common] Improve Akamai HTTP format extraction
- * Allow m3u8 manifest without an additional audio format
- * Fix extraction for qualities starting with a number
-
-Extractors
-* [teachable:course] Improve extraction (#24507, #27286)
-* [nrk] Improve error extraction
-* [nrktv:series] Improve extraction (#21926)
-* [nrktv:season] Improve extraction
-* [nrk] Improve format extraction and geo-restriction detection (#24221)
-* [pornhub] Handle HTTP errors gracefully (#26414)
-* [nrktv] Relax URL regular expression (#27299, #26185)
-+ [zdf] Extract webm formats (#26659)
-+ [gamespot] Extract DASH and HTTP formats
-+ [tver] Add support for tver.jp (#26662, #27284)
-+ [pornhub] Add support for pornhub.org (#27276)
-
-
-version 2020.12.02
-
-Extractors
-+ [tva] Add support for qub.ca (#27235)
-+ [toggle] Detect DRM protected videos (#16479, #20805)
-+ [toggle] Add support for new MeWatch URLs (#27256)
-* [youtube:tab] Extract channels only from channels tab (#27266)
-+ [cspan] Extract info from jwplayer data (#3672, #3734, #10638, #13030,
- #18806, #23148, #24461, #26171, #26800, #27263)
-* [cspan] Pass Referer header with format's video URL (#26032, #25729)
-* [youtube] Improve age-gated videos extraction (#27259)
-+ [mediaset] Add support for movie URLs (#27240)
-* [yandexmusic] Refactor
-+ [yandexmusic] Add support for artist's tracks and albums (#11887, #22284)
-* [yandexmusic:track] Fix extraction (#26449, #26669, #26747, #26748, #26762)
-
-
-version 2020.11.29
-
-Core
-* [YoutubeDL] Write static debug to stderr and respect quiet for dynamic debug
- (#14579, #22593)
-
-Extractors
-* [drtv] Extend URL regular expression (#27243)
-* [tiktok] Fix extraction (#20809, #22838, #22850, #25987, #26281, #26411,
- #26639, #26776, #27237)
-+ [ina] Add support for mobile URLs (#27229)
-* [pornhub] Fix like and dislike count extraction (#27227, #27234)
-* [youtube] Improve yt initial player response extraction (#27216)
-* [videa] Fix extraction (#25650, #25973, #26301)
-
-
-version 2020.11.26
-
-Core
-* [downloader/fragment] Set final file's mtime according to last fragment's
- Last-Modified header (#11718, #18384, #27138)
-
-Extractors
-+ [spreaker] Add support for spreaker.com (#13480, #13877)
-* [vlive] Improve extraction for geo-restricted videos
-+ [vlive] Add support for post URLs (#27122, #27123)
-* [viki] Fix video API request (#27184)
-* [bbc] Fix BBC Three clip extraction
-* [bbc] Fix BBC News videos extraction
-+ [medaltv] Add support for medal.tv (#27149)
-* [youtube] Improve music metadata and license extraction (#26013)
-* [nrk] Fix extraction
-* [cda] Fix extraction (#17803, #24458, #24518, #26381)
-
-
-version 2020.11.24
-
-Core
-+ [extractor/common] Add generic support for akamai HTTP format extraction
-
-Extractors
-* [youtube:tab] Fix feeds extraction (#25695, #26452)
-* [youtube:favorites] Restore extractor
-* [youtube:tab] Fix some weird typo (#27157)
-+ [pinterest] Add support for large collections (more than 25 pins)
-+ [franceinter] Extract thumbnail (#27153)
-+ [box] Add support for box.com (#5949)
-+ [nytimes] Add support for cooking.nytimes.com (#27112, #27143)
-* [lbry] Relax URL regular expression (#27144)
-+ [rumble] Add support for embed pages (#10785)
-+ [skyit] Add support for multiple Sky Italia websites (#26629)
-+ [pinterest] Add support for pinterest.com (#25747)
-
-
-version 2020.11.21.1
-
-Core
-* [downloader/http] Fix crash during urlopen caused by missing reason
- of URLError
-* [YoutubeDL] Fix --ignore-errors for playlists with generator-based entries
- of url_transparent (#27064)
-
-Extractors
-+ [svtplay] Add support for svt.se/barnkanalen (#24817)
-+ [svt] Extract timestamp (#27130)
-* [svtplay] Improve thumbnail extraction (#27130)
-* [youtube] Fix error reason extraction (#27081)
-* [youtube] Fix like and dislike count extraction (#25977)
-+ [youtube:tab] Add support for current video and fix lives extraction (#27126)
-* [infoq] Fix format extraction (#25984)
-* [francetv] Update to fix thumbnail URL issue (#27120)
-* [youtube] Improve yt initial data extraction (#27093)
-+ [discoverynetworks] Add support new TLC/DMAX URLs (#27100)
-* [rai] Fix protocol relative relinker URLs (#22766)
-* [rai] Fix unavailable video format detection
-* [rai] Improve extraction
-* [rai] Fix extraction (#27077)
-* [viki] Improve format extraction
-* [viki] Fix stream extraction from MPD (#27092)
-* [googledrive] Fix format extraction (#26979)
-+ [amara] Add support for amara.org (#20618)
-* [vimeo:album] Fix extraction (#27079)
-* [mtv] Fix mgid extraction (#26841)
-
-
-version 2020.11.19
-
-Core
-* [extractor/common] Output error for invalid URLs in _is_valid_url (#21400,
- #24151, #25617, #25618, #25586, #26068, #27072)
-
-Extractors
-* [youporn] Fix upload date extraction
-* [youporn] Make comment count optional (#26986)
-* [arte] Rework extractors
- * Reimplement embed and playlist extractors to delegate to the single
- entrypoint artetv extractor
- * Improve embeds detection (#27057)
-+ [arte] Extract m3u8 formats (#27061)
-* [mgtv] Fix format extraction (#26415)
-+ [lbry] Add support for odysee.com (#26806)
-* [francetv] Improve info extraction
-+ [francetv] Add fallback video URL extraction (#27047)
-
-
-version 2020.11.18
-
-Extractors
-* [spiegel] Fix extraction (#24206, #24767)
-* [youtube] Improve extraction
- + Add support for --no-playlist (#27009)
- * Improve playlist and mix extraction (#26390, #26509, #26534, #27011)
- + Extract playlist uploader data
-* [youtube:tab] Fix view count extraction (#27051)
-* [malltv] Fix extraction (#27035)
-+ [bandcamp] Extract playlist description (#22684)
-* [urplay] Fix extraction (#26828)
-* [youtube:tab] Fix playlist title extraction (#27015)
-* [youtube] Fix chapters extraction (#26005)
-
-
-version 2020.11.17
-
-Core
-* [utils] Skip ! prefixed code in js_to_json
-
-Extractors
-* [youtube:tab] Fix extraction with cookies provided (#27005)
-* [lrt] Fix extraction with empty tags (#20264)
-+ [ndr:embed:base] Extract subtitles (#25447, #26106)
-+ [servus] Add support for pm-wissen.com (#25869)
-* [servus] Fix extraction (#26872, #26967, #26983, #27000)
-* [xtube] Fix extraction (#26996)
-* [lrt] Fix extraction
-+ [lbry] Add support for lbry.tv
-+ [condenast] Extract subtitles
-* [condenast] Fix extraction
-* [bandcamp] Fix extraction (#26681, #26684)
-* [rai] Fix RaiPlay extraction (#26064, #26096)
-* [vlive] Fix extraction
-* [usanetwork] Fix extraction
-* [nbc] Fix NBCNews/Today/MSNBC extraction
-* [cnbc] Fix extraction
-
-
-version 2020.11.12
-
-Extractors
-* [youtube] Rework extractors
-
-
-version 2020.11.01
-
-Core
-* [utils] Don't attempt to coerce JS strings to numbers in js_to_json (#26851)
-* [downloader/http] Properly handle missing message in SSLError (#26646)
-* [downloader/http] Fix access to not yet opened stream in retry
-
-Extractors
-* [youtube] Fix JS player URL extraction
-* [ytsearch] Fix extraction (#26920)
-* [afreecatv] Fix typo (#26970)
-* [23video] Relax URL regular expression (#26870)
-+ [ustream] Add support for video.ibm.com (#26894)
-* [iqiyi] Fix typo (#26884)
-+ [expressen] Add support for di.se (#26670)
-* [iprima] Improve video id extraction (#26507, #26494)
-
-
-version 2020.09.20
-
-Core
-* [extractor/common] Relax interaction count extraction in _json_ld
-+ [extractor/common] Extract author as uploader for VideoObject in _json_ld
-* [downloader/hls] Fix incorrect end byte in Range HTTP header for
- media segments with EXT-X-BYTERANGE (#14748, #24512)
-* [extractor/common] Handle ssl.CertificateError in _request_webpage (#26601)
-* [downloader/http] Improve timeout detection when reading block of data
- (#10935)
-* [downloader/http] Retry download when urlopen times out (#10935, #26603)
-
-Extractors
-* [redtube] Extend URL regular expression (#26506)
-* [twitch] Refactor
-* [twitch:stream] Switch to GraphQL and fix reruns (#26535)
-+ [telequebec] Add support for brightcove videos (#25833)
-* [pornhub] Extract metadata from JSON-LD (#26614)
-* [pornhub] Fix view count extraction (#26621, #26614)
-
-
-version 2020.09.14
-
-Core
-+ [postprocessor/embedthumbnail] Add support for non jpg/png thumbnails
- (#25687, #25717)
-
-Extractors
-* [rtlnl] Extend URL regular expression (#26549, #25821)
-* [youtube] Fix empty description extraction (#26575, #26006)
-* [srgssr] Extend URL regular expression (#26555, #26556, #26578)
-* [googledrive] Use redirect URLs for source format (#18877, #23919, #24689,
- #26565)
-* [svtplay] Fix id extraction (#26576)
-* [redbulltv] Improve support for rebull.com TV localized URLs (#22063)
-+ [redbulltv] Add support for new redbull.com TV URLs (#22037, #22063)
-* [soundcloud:pagedplaylist] Reduce pagination limit (#26557)
-
-
-version 2020.09.06
-
-Core
-+ [utils] Recognize wav mimetype (#26463)
-
-Extractors
-* [nrktv:episode] Improve video id extraction (#25594, #26369, #26409)
-* [youtube] Fix age gate content detection (#26100, #26152, #26311, #26384)
-* [youtube:user] Extend URL regular expression (#26443)
-* [xhamster] Improve initials regular expression (#26526, #26353)
-* [svtplay] Fix video id extraction (#26425, #26428, #26438)
-* [twitch] Rework extractors (#12297, #20414, #20604, #21811, #21812, #22979,
- #24263, #25010, #25553, #25606)
- * Switch to GraphQL
- + Add support for collections
- + Add support for clips and collections playlists
-* [biqle] Improve video ext extraction
-* [xhamster] Fix extraction (#26157, #26254)
-* [xhamster] Extend URL regular expression (#25789, #25804, #25927))
-
-
-version 2020.07.28
-
-Extractors
-* [youtube] Fix sigfunc name extraction (#26134, #26135, #26136, #26137)
-* [youtube] Improve description extraction (#25937, #25980)
-* [wistia] Restrict embed regular expression (#25969)
-* [youtube] Prevent excess HTTP 301 (#25786)
-+ [youtube:playlists] Extend URL regular expression (#25810)
-+ [bellmedia] Add support for cp24.com clip URLs (#25764)
-* [brightcove] Improve embed detection (#25674)
-
-
-version 2020.06.16.1
-
-Extractors
-* [youtube] Force old layout (#25682, #25683, #25680, #25686)
-* [youtube] Fix categories and improve tags extraction
-
-
-version 2020.06.16
-
-Extractors
-* [youtube] Fix uploader id and uploader URL extraction
-* [youtube] Improve view count extraction
-* [youtube] Fix upload date extraction (#25677)
-* [youtube] Fix thumbnails extraction (#25676)
-* [youtube] Fix playlist and feed extraction (#25675)
-+ [facebook] Add support for single-video ID links
-+ [youtube] Extract chapters from JSON (#24819)
-+ [kaltura] Add support for multiple embeds on a webpage (#25523)
-
-
-version 2020.06.06
-
-Extractors
-* [tele5] Bypass geo restriction
-+ [jwplatform] Add support for bypass geo restriction
-* [tele5] Prefer jwplatform over nexx (#25533)
-* [twitch:stream] Expect 400 and 410 HTTP errors from API
-* [twitch:stream] Fix extraction (#25528)
-* [twitch] Fix thumbnails extraction (#25531)
-+ [twitch] Pass v5 Accept HTTP header (#25531)
-* [brightcove] Fix subtitles extraction (#25540)
-+ [malltv] Add support for sk.mall.tv (#25445)
-* [periscope] Fix untitled broadcasts (#25482)
-* [jwplatform] Improve embeds extraction (#25467)
-
-
-version 2020.05.29
-
-Core
-* [postprocessor/ffmpeg] Embed series metadata with --add-metadata
-* [utils] Fix file permissions in write_json_file (#12471, #25122)
-
-Extractors
-* [ard:beta] Extend URL regular expression (#25405)
-+ [youtube] Add support for more invidious instances (#25417)
-* [giantbomb] Extend URL regular expression (#25222)
-* [ard] Improve URL regular expression (#25134, #25198)
-* [redtube] Improve formats extraction and extract m3u8 formats (#25311,
- #25321)
-* [indavideo] Switch to HTTPS for API request (#25191)
-* [redtube] Improve title extraction (#25208)
-* [vimeo] Improve format extraction and sorting (#25285)
-* [soundcloud] Reduce API playlist page limit (#25274)
-+ [youtube] Add support for yewtu.be (#25226)
-* [mailru] Fix extraction (#24530, #25239)
-* [bellator] Fix mgid extraction (#25195)
-
-
-version 2020.05.08
-
-Core
-* [downloader/http] Request last data block of exact remaining size
-* [downloader/http] Finish downloading once received data length matches
- expected
-* [extractor/common] Use compat_cookiejar_Cookie for _set_cookie to always
- ensure cookie name and value are bytestrings on python 2 (#23256, #24776)
-+ [compat] Introduce compat_cookiejar_Cookie
-* [utils] Improve cookie files support
- + Add support for UTF-8 in cookie files
- * Skip malformed cookie file entries instead of crashing (invalid entry
- length, invalid expires at)
-
-Extractors
-* [youtube] Improve signature cipher extraction (#25187, #25188)
-* [iprima] Improve extraction (#25138)
-* [uol] Fix extraction (#22007)
-+ [orf] Add support for more radio stations (#24938, #24968)
-* [dailymotion] Fix typo
-- [puhutv] Remove no longer available HTTP formats (#25124)
-
-
-version 2020.05.03
-
-Core
-+ [extractor/common] Extract multiple JSON-LD entries
-* [options] Clarify doc on --exec command (#19087, #24883)
-* [extractor/common] Skip malformed ISM manifest XMLs while extracting
- ISM formats (#24667)
-
-Extractors
-* [crunchyroll] Fix and improve extraction (#25096, #25060)
-* [youtube] Improve player id extraction
-* [youtube] Use redirected video id if any (#25063)
-* [yahoo] Fix GYAO Player extraction and relax URL regular expression
- (#24178, #24778)
-* [tvplay] Fix Viafree extraction (#15189, #24473, #24789)
-* [tenplay] Relax URL regular expression (#25001)
-+ [prosiebensat1] Extract series metadata
-* [prosiebensat1] Improve extraction and remove 7tv.de support (#24948)
-- [prosiebensat1] Remove 7tv.de support (#24948)
-* [youtube] Fix DRM videos detection (#24736)
-* [thisoldhouse] Fix video id extraction (#24548, #24549)
-+ [soundcloud] Extract AAC format (#19173, #24708)
-* [youtube] Skip broken multifeed videos (#24711)
-* [nova:embed] Fix extraction (#24700)
-* [motherless] Fix extraction (#24699)
-* [twitch:clips] Extend URL regular expression (#24290, #24642)
-* [tv4] Fix ISM formats extraction (#24667)
-* [tele5] Fix extraction (#24553)
-+ [mofosex] Add support for generic embeds (#24633)
-+ [youporn] Add support for generic embeds
-+ [spankwire] Add support for generic embeds (#24633)
-* [spankwire] Fix extraction (#18924, #20648)
-
-
-version 2020.03.24
-
-Core
-- [utils] Revert support for cookie files with spaces used instead of tabs
-
-Extractors
-* [teachable] Update upskillcourses and gns3 domains
-* [generic] Look for teachable embeds before wistia
-+ [teachable] Extract chapter metadata (#24421)
-+ [bilibili] Add support for player.bilibili.com (#24402)
-+ [bilibili] Add support for new URL schema with BV ids (#24439, #24442)
-* [limelight] Remove disabled API requests (#24255)
-* [soundcloud] Fix download URL extraction (#24394)
-+ [cbc:watch] Add support for authentication (#19160)
-* [hellporno] Fix extraction (#24399)
-* [xtube] Fix formats extraction (#24348)
-* [ndr] Fix extraction (#24326)
-* [nhk] Update m3u8 URL and use native HLS downloader (#24329)
-- [nhk] Remove obsolete rtmp formats (#24329)
-* [nhk] Relax URL regular expression (#24329)
-- [vimeo] Revert fix showcase password protected video extraction (#24224)
-
-
-version 2020.03.08
-
-Core
-+ [utils] Add support for cookie files with spaces used instead of tabs
-
-Extractors
-+ [pornhub] Add support for pornhubpremium.com (#24288)
-- [youtube] Remove outdated code and unnecessary requests
-* [youtube] Improve extraction in 429 HTTP error conditions (#24283)
-* [nhk] Update API version (#24270)
-
-
-version 2020.03.06
-
-Extractors
-* [youtube] Fix age-gated videos support without login (#24248)
-* [vimeo] Fix showcase password protected video extraction (#24224)
-* [pornhub] Improve title extraction (#24184)
-* [peertube] Improve extraction (#23657)
-+ [servus] Add support for new URL schema (#23475, #23583, #24142)
-* [vimeo] Fix subtitles URLs (#24209)
-
-
-version 2020.03.01
-
-Core
-* [YoutubeDL] Force redirect URL to unicode on python 2
-- [options] Remove duplicate short option -v for --version (#24162)
-
-Extractors
-* [xhamster] Fix extraction (#24205)
-* [franceculture] Fix extraction (#24204)
-+ [telecinco] Add support for article opening videos
-* [telecinco] Fix extraction (#24195)
-* [xtube] Fix metadata extraction (#21073, #22455)
-* [youjizz] Fix extraction (#24181)
-- Remove no longer needed compat_str around geturl
-* [pornhd] Fix extraction (#24128)
-+ [teachable] Add support for multiple videos per lecture (#24101)
-+ [wistia] Add support for multiple generic embeds (#8347, 11385)
-* [imdb] Fix extraction (#23443)
-* [tv2dk:bornholm:play] Fix extraction (#24076)
-
-
-version 2020.02.16
-
-Core
-* [YoutubeDL] Fix playlist entry indexing with --playlist-items (#10591,
- #10622)
-* [update] Fix updating via symlinks (#23991)
-+ [compat] Introduce compat_realpath (#23991)
-
-Extractors
-+ [npr] Add support for streams (#24042)
-+ [24video] Add support for porn.24video.net (#23779, #23784)
-- [jpopsuki] Remove extractor (#23858)
-* [nova] Improve extraction (#23690)
-* [nova:embed] Improve (#23690)
-* [nova:embed] Fix extraction (#23672)
-+ [abc:iview] Add support for 720p (#22907, #22921)
-* [nytimes] Improve format sorting (#24010)
-+ [toggle] Add support for mewatch.sg (#23895, #23930)
-* [thisoldhouse] Fix extraction (#23951)
-+ [popcorntimes] Add support for popcorntimes.tv (#23949)
-* [sportdeutschland] Update to new API
-* [twitch:stream] Lowercase channel id for stream request (#23917)
-* [tv5mondeplus] Fix extraction (#23907, #23911)
-* [tva] Relax URL regular expression (#23903)
-* [vimeo] Fix album extraction (#23864)
-* [viewlift] Improve extraction
- * Fix extraction (#23851)
- + Add support for authentication
- + Add support for more domains
-* [svt] Fix series extraction (#22297)
-* [svt] Fix article extraction (#22897, #22919)
-* [soundcloud] Improve private playlist/set tracks extraction (#3707)
-
-
-version 2020.01.24
-
-Extractors
-* [youtube] Fix sigfunc name extraction (#23819)
-* [stretchinternet] Fix extraction (#4319)
-* [voicerepublic] Fix extraction
-* [azmedien] Fix extraction (#23783)
-* [businessinsider] Fix jwplatform id extraction (#22929, #22954)
-+ [24video] Add support for 24video.vip (#23753)
-* [ivi:compilation] Fix entries extraction (#23770)
-* [ard] Improve extraction (#23761)
- * Simplify extraction
- + Extract age limit and series
- * Bypass geo-restriction
-+ [nbc] Add support for nbc multi network URLs (#23049)
-* [americastestkitchen] Fix extraction
-* [zype] Improve extraction
- + Extract subtitles (#21258)
- + Support URLs with alternative keys/tokens (#21258)
- + Extract more metadata
-* [orf:tvthek] Improve geo restricted videos detection (#23741)
-* [soundcloud] Restore previews extraction (#23739)
-
-
-version 2020.01.15
-
-Extractors
-* [yourporn] Fix extraction (#21645, #22255, #23459)
-+ [canvas] Add support for new API endpoint (#17680, #18629)
-* [ndr:base:embed] Improve thumbnails extraction (#23731)
-+ [vodplatform] Add support for embed.kwikmotion.com domain
-+ [twitter] Add support for promo_video_website cards (#23711)
-* [orf:radio] Clean description and improve extraction
-* [orf:fm4] Fix extraction (#23599)
-* [safari] Fix kaltura session extraction (#23679, #23670)
-* [lego] Fix extraction and extract subtitle (#23687)
-* [cloudflarestream] Improve extraction
- + Add support for bytehighway.net domain
- + Add support for signed URLs
- + Extract thumbnail
-* [naver] Improve extraction
- * Improve geo-restriction handling
- + Extract automatic captions
- + Extract uploader metadata
- + Extract VLive HLS formats
- * Improve metadata extraction
-- [pandatv] Remove extractor (#23630)
-* [dctp] Fix format extraction (#23656)
-+ [scrippsnetworks] Add support for www.discovery.com videos
-* [discovery] Fix anonymous token extraction (#23650)
-* [nrktv:seriebase] Fix extraction (#23625, #23537)
-* [wistia] Improve format extraction and extract subtitles (#22590)
-* [vice] Improve extraction (#23631)
-* [redtube] Detect private videos (#23518)
-
-
-version 2020.01.01
-
-Extractors
-* [brightcove] Invalidate policy key cache on failing requests
-* [pornhub] Improve locked videos detection (#22449, #22780)
-+ [pornhub] Add support for m3u8 formats
-* [pornhub] Fix extraction (#22749, #23082)
-* [brightcove] Update policy key on failing requests
-* [spankbang] Improve removed video detection (#23423)
-* [spankbang] Fix extraction (#23307, #23423, #23444)
-* [soundcloud] Automatically update client id on failing requests
-* [prosiebensat1] Improve geo restriction handling (#23571)
-* [brightcove] Cache brightcove player policy keys
-* [teachable] Fail with error message if no video URL found
-* [teachable] Improve locked lessons detection (#23528)
-+ [scrippsnetworks] Add support for Scripps Networks sites (#19857, #22981)
-* [mitele] Fix extraction (#21354, #23456)
-* [soundcloud] Update client id (#23516)
-* [mailru] Relax URL regular expressions (#23509)
-
-
-version 2019.12.25
-
-Core
-* [utils] Improve str_to_int
-+ [downloader/hls] Add ability to override AES decryption key URL (#17521)
-
-Extractors
-* [mediaset] Fix parse formats (#23508)
-+ [tv2dk:bornholm:play] Add support for play.tv2bornholm.dk (#23291)
-+ [slideslive] Add support for url and vimeo service names (#23414)
-* [slideslive] Fix extraction (#23413)
-* [twitch:clips] Fix extraction (#23375)
-+ [soundcloud] Add support for token protected embeds (#18954)
-* [vk] Improve extraction
- * Fix User Videos extraction (#23356)
- * Extract all videos for lists with more than 1000 videos (#23356)
- + Add support for video albums (#14327, #14492)
-- [kontrtube] Remove extractor
-- [videopremium] Remove extractor
-- [musicplayon] Remove extractor (#9225)
-+ [ufctv] Add support for ufcfightpass.imgdge.com and
- ufcfightpass.imggaming.com (#23343)
-+ [twitch] Extract m3u8 formats frame rate (#23333)
-+ [imggaming] Add support for playlists and extract subtitles
-+ [ufcarabia] Add support for UFC Arabia (#23312)
-* [ufctv] Fix extraction
-* [yahoo] Fix gyao brightcove player id (#23303)
-* [vzaar] Override AES decryption key URL (#17521)
-+ [vzaar] Add support for AES HLS manifests (#17521, #23299)
-* [nrl] Fix extraction
-* [teachingchannel] Fix extraction
-* [nintendo] Fix extraction and partially add support for Nintendo Direct
- videos (#4592)
-+ [ooyala] Add better fallback values for domain and streams variables
-+ [youtube] Add support youtubekids.com (#23272)
-* [tv2] Detect DRM protection
-+ [tv2] Add support for katsomo.fi and mtv.fi (#10543)
-* [tv2] Fix tv2.no article extraction
-* [msn] Improve extraction
- + Add support for YouTube and NBCSports embeds
- + Add support for articles with multiple videos
- * Improve AOL embed support
- * Improve format extraction
-* [abcotvs] Relax URL regular expression and improve metadata extraction
- (#18014)
-* [channel9] Reduce response size
-* [adobetv] Improve extraction
- * Use OnDemandPagedList for list extractors
- * Reduce show extraction requests
- * Extract original video format and subtitles
- + Add support for adobe tv embeds
-
-
-version 2019.11.28
-
-Core
-+ [utils] Add generic caesar cipher and rot47
-* [utils] Handle rd-suffixed day parts in unified_strdate (#23199)
-
-Extractors
-* [vimeo] Improve extraction
- * Fix review extraction
- * Fix ondemand extraction
- * Make password protected player case as an expected error (#22896)
- * Simplify channel based extractors code
-- [openload] Remove extractor (#11999)
-- [verystream] Remove extractor
-- [streamango] Remove extractor (#15406)
-* [dailymotion] Improve extraction
- * Extract http formats included in m3u8 manifest
- * Fix user extraction (#3553, #21415)
- + Add support for User Authentication (#11491)
- * Fix password protected videos extraction (#23176)
- * Respect age limit option and family filter cookie value (#18437)
- * Handle video url playlist query param
- * Report allowed countries for geo-restricted videos
-* [corus] Improve extraction
- + Add support for Series Plus, W Network, YTV, ABC Spark, disneychannel.com
- and disneylachaine.ca (#20861)
- + Add support for self hosted videos (#22075)
- * Detect DRM protection (#14910, #9164)
-* [vivo] Fix extraction (#22328, #22279)
-+ [bitchute] Extract upload date (#22990, #23193)
-* [soundcloud] Update client id (#23214)
-
-
-version 2019.11.22
-
-Core
-+ [extractor/common] Clean jwplayer description HTML tags
-+ [extractor/common] Add data, headers and query to all major extract formats
- methods
-
-Extractors
-* [chaturbate] Fix extraction (#23010, #23012)
-+ [ntvru] Add support for non relative file URLs (#23140)
-* [vk] Fix wall audio thumbnails extraction (#23135)
-* [ivi] Fix format extraction (#21991)
-- [comcarcoff] Remove extractor
-+ [drtv] Add support for new URL schema (#23059)
-+ [nexx] Add support for Multi Player JS Setup (#23052)
-+ [teamcoco] Add support for new videos (#23054)
-* [soundcloud] Check if the soundtrack has downloads left (#23045)
-* [facebook] Fix posts video data extraction (#22473)
-- [addanime] Remove extractor
-- [minhateca] Remove extractor
-- [daisuki] Remove extractor
-* [seeker] Fix extraction
-- [revision3] Remove extractors
-* [twitch] Fix video comments URL (#18593, #15828)
-* [twitter] Improve extraction
- + Add support for generic embeds (#22168)
- * Always extract http formats for native videos (#14934)
- + Add support for Twitter Broadcasts (#21369)
- + Extract more metadata
- * Improve VMap format extraction
- * Unify extraction code for both twitter statuses and cards
-+ [twitch] Add support for Clip embed URLs
-* [lnkgo] Fix extraction (#16834)
-* [mixcloud] Improve extraction
- * Improve metadata extraction (#11721)
- * Fix playlist extraction (#22378)
- * Fix user mixes extraction (#15197, #17865)
-+ [kinja] Add support for Kinja embeds (#5756, #11282, #22237, #22384)
-* [onionstudios] Fix extraction
-+ [hotstar] Pass Referer header to format requests (#22836)
-* [dplay] Minimize response size
-+ [patreon] Extract uploader_id and filesize
-* [patreon] Minimize response size
-* [roosterteeth] Fix login request (#16094, #22689)
-
-
-version 2019.11.05
-
-Extractors
-+ [scte] Add support for learning.scte.org (#22975)
-+ [msn] Add support for Vidible and AOL embeds (#22195, #22227)
-* [myspass] Fix video URL extraction and improve metadata extraction (#22448)
-* [jamendo] Improve extraction
- * Fix album extraction (#18564)
- * Improve metadata extraction (#18565, #21379)
-* [mediaset] Relax URL guid matching (#18352)
-+ [mediaset] Extract unprotected M3U and MPD manifests (#17204)
-* [telegraaf] Fix extraction
-+ [bellmedia] Add support for marilyn.ca videos (#22193)
-* [stv] Fix extraction (#22928)
-- [iconosquare] Remove extractor
-- [keek] Remove extractor
-- [gameone] Remove extractor (#21778)
-- [flipagram] Remove extractor
-- [bambuser] Remove extractor
-* [wistia] Reduce embed extraction false positives
-+ [wistia] Add support for inline embeds (#22931)
-- [go90] Remove extractor
-* [kakao] Remove raw request
-+ [kakao] Extract format total bitrate
-* [daum] Fix VOD and Clip extraction (#15015)
-* [kakao] Improve extraction
- + Add support for embed URLs
- + Add support for Kakao Legacy vid based embed URLs
- * Only extract fields used for extraction
- * Strip description and extract tags
-* [mixcloud] Fix cloudcast data extraction (#22821)
-* [yahoo] Improve extraction
- + Add support for live streams (#3597, #3779, #22178)
- * Bypass cookie consent page for european domains (#16948, #22576)
- + Add generic support for embeds (#20332)
-* [tv2] Fix and improve extraction (#22787)
-+ [tv2dk] Add support for TV2 DK sites
-* [onet] Improve extraction …
- + Add support for onet100.vod.pl
- + Extract m3u8 formats
- * Correct audio only format info
-* [fox9] Fix extraction
-
-
-version 2019.10.29
-
-Core
-* [utils] Actualize major IPv4 address blocks per country
-
-Extractors
-+ [go] Add support for abc.com and freeform.com (#22823, #22864)
-+ [mtv] Add support for mtvjapan.com
-* [mtv] Fix extraction for mtv.de (#22113)
-* [videodetective] Fix extraction
-* [internetvideoarchive] Fix extraction
-* [nbcnews] Fix extraction (#12569, #12576, #21703, #21923)
-- [hark] Remove extractor
-- [tutv] Remove extractor
-- [learnr] Remove extractor
-- [macgamestore] Remove extractor
-* [la7] Update Kaltura service URL (#22358)
-* [thesun] Fix extraction (#16966)
-- [makertv] Remove extractor
-+ [tenplay] Add support for 10play.com.au (#21446)
-* [soundcloud] Improve extraction
- * Improve format extraction (#22123)
- + Extract uploader_id and uploader_url (#21916)
- + Extract all known thumbnails (#19071, #20659)
- * Fix extraction for private playlists (#20976)
- + Add support for playlist embeds (#20976)
- * Skip preview formats (#22806)
-* [dplay] Improve extraction
- + Add support for dplay.fi, dplay.jp and es.dplay.com (#16969)
- * Fix it.dplay.com extraction (#22826)
- + Extract creator, tags and thumbnails
- * Handle playback API call errors
-+ [discoverynetworks] Add support for dplay.co.uk
-* [vk] Improve extraction
- + Add support for Odnoklassniki embeds
- + Extract more videos from user lists (#4470)
- + Fix wall post audio extraction (#18332)
- * Improve error detection (#22568)
-+ [odnoklassniki] Add support for embeds
-* [puhutv] Improve extraction
- * Fix subtitles extraction
- * Transform HLS URLs to HTTP URLs
- * Improve metadata extraction
-* [ceskatelevize] Skip DRM media
-+ [facebook] Extract subtitles (#22777)
-* [globo] Handle alternative hash signing method
-
-
-version 2019.10.22
-
-Core
-* [utils] Improve subtitles_filename (#22753)
-
-Extractors
-* [facebook] Bypass download rate limits (#21018)
-+ [contv] Add support for contv.com
-- [viewster] Remove extractor
-* [xfileshare] Improve extractor (#17032, #17906, #18237, #18239)
- * Update the list of domains
- + Add support for aa-encoded video data
- * Improve jwplayer format extraction
- + Add support for Clappr sources
-* [mangomolo] Fix video format extraction and add support for player URLs
-* [audioboom] Improve metadata extraction
-* [twitch] Update VOD URL matching (#22395, #22727)
-- [mit] Remove support for video.mit.edu (#22403)
-- [servingsys] Remove extractor (#22639)
-* [dumpert] Fix extraction (#22428, #22564)
-* [atresplayer] Fix extraction (#16277, #16716)
-
-
-version 2019.10.16
-
-Core
-* [extractor/common] Make _is_valid_url more relaxed
-
-Extractors
-* [vimeo] Improve album videos id extraction (#22599)
-+ [globo] Extract subtitles (#22713)
-* [bokecc] Improve player params extraction (#22638)
-* [nexx] Handle result list (#22666)
-* [vimeo] Fix VHX embed extraction
-* [nbc] Switch to graphql API (#18581, #22693, #22701)
-- [vessel] Remove extractor
-- [promptfile] Remove extractor (#6239)
-* [kaltura] Fix service URL extraction (#22658)
-* [kaltura] Fix embed info strip (#22658)
-* [globo] Fix format extraction (#20319)
-* [redtube] Improve metadata extraction (#22492, #22615)
-* [pornhub:uservideos:upload] Fix extraction (#22619)
-+ [telequebec:squat] Add support for squat.telequebec.tv (#18503)
-- [wimp] Remove extractor (#22088, #22091)
-+ [gfycat] Extend URL regular expression (#22225)
-+ [chaturbate] Extend URL regular expression (#22309)
-* [peertube] Update instances (#22414)
-+ [telequebec] Add support for coucou.telequebec.tv (#22482)
-+ [xvideos] Extend URL regular expression (#22471)
-- [youtube] Remove support for invidious.enkirton.net (#22543)
-+ [openload] Add support for oload.monster (#22592)
-* [nrktv:seriebase] Fix extraction (#22596)
-+ [youtube] Add support for yt.lelux.fi (#22597)
-* [orf:tvthek] Make manifest requests non fatal (#22578)
-* [teachable] Skip login when already logged in (#22572)
-* [viewlift] Improve extraction (#22545)
-* [nonktube] Fix extraction (#22544)
-
-
-version 2019.09.28
-
-Core
-* [YoutubeDL] Honour all --get-* options with --flat-playlist (#22493)
-
-Extractors
-* [vk] Fix extraction (#22522)
-* [heise] Fix kaltura embeds extraction (#22514)
-* [ted] Check for resources validity and extract subtitled downloads (#22513)
-+ [youtube] Add support for
- owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya.b32.i2p (#22292)
-+ [nhk] Add support for clips
-* [nhk] Fix video extraction (#22249, #22353)
-* [byutv] Fix extraction (#22070)
-+ [openload] Add support for oload.online (#22304)
-+ [youtube] Add support for invidious.drycat.fr (#22451)
-* [jwplatfom] Do not match video URLs (#20596, #22148)
-* [youtube:playlist] Unescape playlist uploader (#22483)
-+ [bilibili] Add support audio albums and songs (#21094)
-+ [instagram] Add support for tv URLs
-+ [mixcloud] Allow uppercase letters in format URLs (#19280)
-* [brightcove] Delegate all supported legacy URLs to new extractor (#11523,
- #12842, #13912, #15669, #16303)
-* [hotstar] Use native HLS downloader by default
-+ [hotstar] Extract more formats (#22323)
-* [9now] Fix extraction (#22361)
-* [zdf] Bypass geo restriction
-+ [tv4] Extract series metadata
-* [tv4] Fix extraction (#22443)
-
-
-version 2019.09.12.1
-
-Extractors
-* [youtube] Remove quality and tbr for itag 43 (#22372)
-
-
-version 2019.09.12
-
-Extractors
-* [youtube] Quick extraction tempfix (#22367, #22163)
-
-
-version 2019.09.01
-
-Core
-+ [extractor/generic] Add support for squarespace embeds (#21294, #21802,
- #21859)
-+ [downloader/external] Respect mtime option for aria2c (#22242)
-
-Extractors
-+ [xhamster:user] Add support for user pages (#16330, #18454)
-+ [xhamster] Add support for more domains
-+ [verystream] Add support for woof.tube (#22217)
-+ [dailymotion] Add support for lequipe.fr (#21328, #22152)
-+ [openload] Add support for oload.vip (#22205)
-+ [bbccouk] Extend URL regular expression (#19200)
-+ [youtube] Add support for invidious.nixnet.xyz and yt.elukerio.org (#22223)
-* [safari] Fix authentication (#22161, #22184)
-* [usanetwork] Fix extraction (#22105)
-+ [einthusan] Add support for einthusan.ca (#22171)
-* [youtube] Improve unavailable message extraction (#22117)
-+ [piksel] Extract subtitles (#20506)
-
-
-version 2019.08.13
-
-Core
-* [downloader/fragment] Fix ETA calculation of resumed download (#21992)
-* [YoutubeDL] Check annotations availability (#18582)
-
-Extractors
-* [youtube:playlist] Improve flat extraction (#21927)
-* [youtube] Fix annotations extraction (#22045)
-+ [discovery] Extract series meta field (#21808)
-* [youtube] Improve error detection (#16445)
-* [vimeo] Fix album extraction (#1933, #15704, #15855, #18967, #21986)
-+ [roosterteeth] Add support for watch URLs
-* [discovery] Limit video data by show slug (#21980)
-
-
-version 2019.08.02
-
-Extractors
-+ [tvigle] Add support for HLS and DASH formats (#21967)
-* [tvigle] Fix extraction (#21967)
-+ [yandexvideo] Add support for DASH formats (#21971)
-* [discovery] Use API call for video data extraction (#21808)
-+ [mgtv] Extract format_note (#21881)
-* [tvn24] Fix metadata extraction (#21833, #21834)
-* [dlive] Relax URL regular expression (#21909)
-+ [openload] Add support for oload.best (#21913)
-* [youtube] Improve metadata extraction for age gate content (#21943)
-
-
-version 2019.07.30
-
-Extractors
-* [youtube] Fix and improve title and description extraction (#21934)
-
-
-version 2019.07.27
-
-Extractors
-+ [yahoo:japannews] Add support for yahoo.co.jp (#21698, #21265)
-+ [discovery] Add support go.discovery.com URLs
-* [youtube:playlist] Relax video regular expression (#21844)
-* [generic] Restrict --default-search schemeless URLs detection pattern
- (#21842)
-* [vrv] Fix CMS signing query extraction (#21809)
-
-
-version 2019.07.16
-
-Extractors
-+ [asiancrush] Add support for yuyutv.com, midnightpulp.com and cocoro.tv
- (#21281, #21290)
-* [kaltura] Check source format URL (#21290)
-* [ctsnews] Fix YouTube embeds extraction (#21678)
-+ [einthusan] Add support for einthusan.com (#21748, #21775)
-+ [youtube] Add support for invidious.mastodon.host (#21777)
-+ [gfycat] Extend URL regular expression (#21779, #21780)
-* [youtube] Restrict is_live extraction (#21782)
-
-
-version 2019.07.14
-
-Extractors
-* [porn91] Fix extraction (#21312)
-+ [yandexmusic] Extract track number and disk number (#21421)
-+ [yandexmusic] Add support for multi disk albums (#21420, #21421)
-* [lynda] Handle missing subtitles (#20490, #20513)
-+ [youtube] Add more invidious instances to URL regular expression (#21694)
-* [twitter] Improve uploader id extraction (#21705)
-* [spankbang] Fix and improve metadata extraction
-* [spankbang] Fix extraction (#21763, #21764)
-+ [dlive] Add support for dlive.tv (#18080)
-+ [livejournal] Add support for livejournal.com (#21526)
-* [roosterteeth] Fix free episode extraction (#16094)
-* [dbtv] Fix extraction
-* [bellator] Fix extraction
-- [rudo] Remove extractor (#18430, #18474)
-* [facebook] Fallback to twitter:image meta for thumbnail extraction (#21224)
-* [bleacherreport] Fix Bleacher Report CMS extraction
-* [espn] Fix fivethirtyeight.com extraction
-* [5tv] Relax video URL regular expression and support https URLs
-* [youtube] Fix is_live extraction (#21734)
-* [youtube] Fix authentication (#11270)
-
-
-version 2019.07.12
-
-Core
-+ [adobepass] Add support for AT&T U-verse (mso ATT) (#13938, #21016)
-
-Extractors
-+ [mgtv] Pass Referer HTTP header for format URLs (#21726)
-+ [beeg] Add support for api/v6 v2 URLs without t argument (#21701)
-* [voxmedia:volume] Improvevox embed extraction (#16846)
-* [funnyordie] Move extraction to VoxMedia extractor (#16846)
-* [gameinformer] Fix extraction (#8895, #15363, #17206)
-* [funk] Fix extraction (#17915)
-* [packtpub] Relax lesson URL regular expression (#21695)
-* [packtpub] Fix extraction (#21268)
-* [philharmoniedeparis] Relax URL regular expression (#21672)
-* [peertube] Detect embed URLs in generic extraction (#21666)
-* [mixer:vod] Relax URL regular expression (#21657, #21658)
-+ [lecturio] Add support id based URLs (#21630)
-+ [go] Add site info for disneynow (#21613)
-* [ted] Restrict info regular expression (#21631)
-* [twitch:vod] Actualize m3u8 URL (#21538, #21607)
-* [vzaar] Fix videos with empty title (#21606)
-* [tvland] Fix extraction (#21384)
-* [arte] Clean extractor (#15583, #21614)
-
-
-version 2019.07.02
-
-Core
-+ [utils] Introduce random_user_agent and use as default User-Agent (#21546)
-
-Extractors
-+ [vevo] Add support for embed.vevo.com URLs (#21565)
-+ [openload] Add support for oload.biz (#21574)
-* [xiami] Update API base URL (#21575)
-* [yourporn] Fix extraction (#21585)
-+ [acast] Add support for URLs with episode id (#21444)
-+ [dailymotion] Add support for DM.player embeds
-* [soundcloud] Update client id
-
-
-version 2019.06.27
-
-Extractors
-+ [go] Add support for disneynow.com (#21528)
-* [mixer:vod] Relax URL regular expression (#21531, #21536)
-* [drtv] Relax URL regular expression
-* [fusion] Fix extraction (#17775, #21269)
-- [nfb] Remove extractor (#21518)
-+ [beeg] Add support for api/v6 v2 URLs (#21511)
-+ [brightcove:new] Add support for playlists (#21331)
-+ [openload] Add support for oload.life (#21495)
-* [vimeo:channel,group] Make title extraction non fatal
-* [vimeo:likes] Implement extrator in terms of channel extractor (#21493)
-+ [pornhub] Add support for more paged video sources
-+ [pornhub] Add support for downloading single pages and search pages (#15570)
-* [pornhub] Rework extractors (#11922, #16078, #17454, #17936)
-+ [youtube] Add another signature function pattern
-* [tf1] Fix extraction (#21365, #21372)
-* [crunchyroll] Move Accept-Language workaround to video extractor since
- it causes playlists not to list any videos
-* [crunchyroll:playlist] Fix and relax title extraction (#21291, #21443)
-
-
-version 2019.06.21
-
-Core
-* [utils] Restrict parse_codecs and add theora as known vcodec (#21381)
-
-Extractors
-* [youtube] Update signature function patterns (#21469, #21476)
-* [youtube] Make --write-annotations non fatal (#21452)
-+ [sixplay] Add support for rtlmost.hu (#21405)
-* [youtube] Hardcode codec metadata for av01 video only formats (#21381)
-* [toutv] Update client key (#21370)
-+ [biqle] Add support for new embed domain
-* [cbs] Improve DRM protected videos detection (#21339)
-
-
-version 2019.06.08
-
-Core
-* [downloader/common] Improve rate limit (#21301)
-* [utils] Improve strip_or_none
-* [extractor/common] Strip src attribute for HTML5 entries code (#18485,
- #21169)
-
-Extractors
-* [ted] Fix playlist extraction (#20844, #21032)
-* [vlive:playlist] Fix video extraction when no playlist is found (#20590)
-+ [vlive] Add CH+ support (#16887, #21209)
-+ [openload] Add support for oload.website (#21329)
-+ [tvnow] Extract HD formats (#21201)
-+ [redbulltv] Add support for rrn:content URLs (#21297)
-* [youtube] Fix average rating extraction (#21304)
-+ [bitchute] Extract HTML5 formats (#21306)
-* [cbsnews] Fix extraction (#9659, #15397)
-* [vvvvid] Relax URL regular expression (#21299)
-+ [prosiebensat1] Add support for new API (#21272)
-+ [vrv] Extract adaptive_hls formats (#21243)
-* [viki] Switch to HTTPS (#21001)
-* [LiveLeak] Check if the original videos exist (#21206, #21208)
-* [rtp] Fix extraction (#15099)
-* [youtube] Improve DRM protected videos detection (#1774)
-+ [srgssrplay] Add support for popupvideoplayer URLs (#21155)
-+ [24video] Add support for porno.24video.net (#21194)
-+ [24video] Add support for 24video.site (#21193)
-- [pornflip] Remove extractor
-- [criterion] Remove extractor (#21195)
-* [pornhub] Use HTTPS (#21061)
-* [bitchute] Fix uploader extraction (#21076)
-* [streamcloud] Reduce waiting time to 6 seconds (#21092)
-- [novamov] Remove extractors (#21077)
-+ [openload] Add support for oload.press (#21135)
-* [vivo] Fix extraction (#18906, #19217)
-
-
-version 2019.05.20
-
-Core
-+ [extractor/common] Move workaround for applying first Set-Cookie header
- into a separate _apply_first_set_cookie_header method
-
-Extractors
-* [safari] Fix authentication (#21090)
-* [vk] Use _apply_first_set_cookie_header
-* [vrt] Fix extraction (#20527)
-+ [canvas] Add support for vrtnieuws and sporza site ids and extract
- AES HLS formats
-+ [vrv] Extract captions (#19238)
-* [tele5] Improve video id extraction
-* [tele5] Relax URL regular expression (#21020, #21063)
-* [svtplay] Update API URL (#21075)
-+ [yahoo:gyao] Add X-User-Agent header to dam proxy requests (#21071)
-
-
-version 2019.05.11
-
-Core
-* [utils] Transliterate "þ" as "th" (#20897)
-
-Extractors
-+ [cloudflarestream] Add support for videodelivery.net (#21049)
-+ [byutv] Add support for DVR videos (#20574, #20676)
-+ [gfycat] Add support for URLs with tags (#20696, #20731)
-+ [openload] Add support for verystream.com (#20701, #20967)
-* [youtube] Use sp field value for signature field name (#18841, #18927,
- #21028)
-+ [yahoo:gyao] Extend URL regular expression (#21008)
-* [youtube] Fix channel id extraction (#20982, #21003)
-+ [sky] Add support for news.sky.com (#13055)
-+ [youtube:entrylistbase] Retry on 5xx HTTP errors (#20965)
-+ [francetvinfo] Extend video id extraction (#20619, #20740)
-* [4tube] Update token hosts (#20918)
-* [hotstar] Move to API v2 (#20931)
-* [fox] Fix API error handling under python 2 (#20925)
-+ [redbulltv] Extend URL regular expression (#20922)
-
-
-version 2019.04.30
-
-Extractors
-* [openload] Use real Chrome versions (#20902)
-- [youtube] Remove info el for get_video_info request
-* [youtube] Improve extraction robustness
-- [dramafever] Remove extractor (#20868)
-* [adn] Fix subtitle extraction (#12724)
-+ [ccc] Extract creator (#20355)
-+ [ccc:playlist] Add support for media.ccc.de playlists (#14601, #20355)
-+ [sverigesradio] Add support for sverigesradio.se (#18635)
-+ [cinemax] Add support for cinemax.com
-* [sixplay] Try extracting non-DRM protected manifests (#20849)
-+ [youtube] Extract Youtube Music Auto-generated metadata (#20599, #20742)
-- [wrzuta] Remove extractor (#20684, #20801)
-* [twitch] Prefer source format (#20850)
-+ [twitcasting] Add support for private videos (#20843)
-* [reddit] Validate thumbnail URL (#20030)
-* [yandexmusic] Fix track URL extraction (#20820)
-
-
-version 2019.04.24
-
-Extractors
-* [youtube] Fix extraction (#20758, #20759, #20761, #20762, #20764, #20766,
- #20767, #20769, #20771, #20768, #20770)
-* [toutv] Fix extraction and extract series info (#20757)
-+ [vrv] Add support for movie listings (#19229)
-+ [youtube] Print error when no data is available (#20737)
-+ [soundcloud] Add support for new rendition and improve extraction (#20699)
-+ [ooyala] Add support for geo verification proxy
-+ [nrl] Add support for nrl.com (#15991)
-+ [vimeo] Extract live archive source format (#19144)
-+ [vimeo] Add support for live streams and improve info extraction (#19144)
-+ [ntvcojp] Add support for cu.ntv.co.jp
-+ [nhk] Extract RTMPT format
-+ [nhk] Add support for audio URLs
-+ [udemy] Add another course id extraction pattern (#20491)
-+ [openload] Add support for oload.services (#20691)
-+ [openload] Add support for openloed.co (#20691, #20693)
-* [bravotv] Fix extraction (#19213)
-
-
-version 2019.04.17
-
-Extractors
-* [openload] Randomize User-Agent (#20688)
-+ [openload] Add support for oladblock domains (#20471)
-* [adn] Fix subtitle extraction (#12724)
-+ [aol] Add support for localized websites
-+ [yahoo] Add support GYAO episode URLs
-+ [yahoo] Add support for streaming.yahoo.co.jp (#5811, #7098)
-+ [yahoo] Add support for gyao.yahoo.co.jp
-* [aenetworks] Fix history topic extraction and extract more formats
-+ [cbs] Extract smpte and vtt subtitles
-+ [streamango] Add support for streamcherry.com (#20592)
-+ [yourporn] Add support for sxyprn.com (#20646)
-* [mgtv] Fix extraction (#20650)
-* [linkedin:learning] Use urljoin for form action URL (#20431)
-+ [gdc] Add support for kaltura embeds (#20575)
-* [dispeak] Improve mp4 bitrate extraction
-* [kaltura] Sanitize embed URLs
-* [jwplatfom] Do not match manifest URLs (#20596)
-* [aol] Restrict URL regular expression and improve format extraction
-+ [tiktok] Add support for new URL schema (#20573)
-+ [stv:player] Add support for player.stv.tv (#20586)
-
-
-version 2019.04.07
-
-Core
-+ [downloader/external] Pass rtmp_conn to ffmpeg
-
-Extractors
-+ [ruutu] Add support for audio podcasts (#20473, #20545)
-+ [xvideos] Extract all thumbnails (#20432)
-+ [platzi] Add support for platzi.com (#20562)
-* [dvtv] Fix extraction (#18514, #19174)
-+ [vrv] Add basic support for individual movie links (#19229)
-+ [bfi:player] Add support for player.bfi.org.uk (#19235)
-* [hbo] Fix extraction and extract subtitles (#14629, #13709)
-* [youtube] Extract srv[1-3] subtitle formats (#20566)
-* [adultswim] Fix extraction (#18025)
-* [teamcoco] Fix extraction and add support for subdomains (#17099, #20339)
-* [adn] Fix subtitle compatibility with ffmpeg
-* [adn] Fix extraction and add support for positioning styles (#20549)
-* [vk] Use unique video id (#17848)
-* [newstube] Fix extraction
-* [rtl2] Actualize extraction
-+ [adobeconnect] Add support for adobeconnect.com (#20283)
-+ [gaia] Add support for authentication (#14605)
-+ [mediasite] Add support for dashed ids and named catalogs (#20531)
-
-
-version 2019.04.01
-
-Core
-* [utils] Improve int_or_none and float_or_none (#20403)
-* Check for valid --min-sleep-interval when --max-sleep-interval is specified
- (#20435)
-
-Extractors
-+ [weibo] Extend URL regular expression (#20496)
-+ [xhamster] Add support for xhamster.one (#20508)
-+ [mediasite] Add support for catalogs (#20507)
-+ [teamtreehouse] Add support for teamtreehouse.com (#9836)
-+ [ina] Add support for audio URLs
-* [ina] Improve extraction
-* [cwtv] Fix episode number extraction (#20461)
-* [npo] Improve DRM detection
-+ [pornhub] Add support for DASH formats (#20403)
-* [svtplay] Update API endpoint (#20430)
-
-
-version 2019.03.18
-
-Core
-* [extractor/common] Improve HTML5 entries extraction
-+ [utils] Introduce parse_bitrate
-* [update] Hide update URLs behind redirect
-* [extractor/common] Fix url meta field for unfragmented DASH formats (#20346)
-
-Extractors
-+ [yandexvideo] Add extractor
-* [openload] Improve embed detection
-+ [corus] Add support for bigbrothercanada.ca (#20357)
-+ [orf:radio] Extract series (#20012)
-+ [cbc:watch] Add support for gem.cbc.ca (#20251, #20359)
-- [anysex] Remove extractor (#19279)
-+ [ciscolive] Add support for new URL schema (#20320, #20351)
-+ [youtube] Add support for invidiou.sh (#20309)
-- [anitube] Remove extractor (#20334)
-- [ruleporn] Remove extractor (#15344, #20324)
-* [npr] Fix extraction (#10793, #13440)
-* [biqle] Fix extraction (#11471, #15313)
-* [viddler] Modernize
-* [moevideo] Fix extraction
-* [primesharetv] Remove extractor
-* [hypem] Modernize and extract more metadata (#15320)
-* [veoh] Fix extraction
-* [escapist] Modernize
-- [videomega] Remove extractor (#10108)
-+ [beeg] Add support for beeg.porn (#20306)
-* [vimeo:review] Improve config url extraction and extract original format
- (#20305)
-* [fox] Detect geo restriction and authentication errors (#20208)
-
-
-version 2019.03.09
-
-Core
-* [extractor/common] Use compat_etree_Element
-+ [compat] Introduce compat_etree_Element
-* [extractor/common] Fallback url to base URL for DASH formats
-* [extractor/common] Do not fail on invalid data while parsing F4M manifest
- in non fatal mode
-* [extractor/common] Return MPD manifest as format's url meta field (#20242)
-* [utils] Strip #HttpOnly_ prefix from cookies files (#20219)
-
-Extractors
-* [francetv:site] Relax video id regular expression (#20268)
-* [toutv] Detect invalid login error
-* [toutv] Fix authentication (#20261)
-+ [urplay] Extract timestamp (#20235)
-+ [openload] Add support for oload.space (#20246)
-* [facebook] Improve uploader extraction (#20250)
-* [bbc] Use compat_etree_Element
-* [crunchyroll] Use compat_etree_Element
-* [npo] Improve ISM extraction
-* [rai] Improve extraction (#20253)
-* [paramountnetwork] Fix mgid extraction (#20241)
-* [libsyn] Improve extraction (#20229)
-+ [youtube] Add more invidious instances to URL regular expression (#20228)
-* [spankbang] Fix extraction (#20023)
-* [espn] Extend URL regular expression (#20013)
-* [sixplay] Handle videos with empty assets (#20016)
-+ [vimeo] Add support for Vimeo Pro portfolio protected videos (#20070)
-
-
-version 2019.03.01
-
-Core
-+ [downloader/external] Add support for rate limit and retries for wget
-* [downloader/external] Fix infinite retries for curl (#19303)
-
-Extractors
-* [npo] Fix extraction (#20084)
-* [francetv:site] Extend video id regex (#20029, #20071)
-+ [periscope] Extract width and height (#20015)
-* [servus] Fix extraction (#19297)
-* [bbccouk] Make subtitles non fatal (#19651)
-* [metacafe] Fix family filter bypass (#19287)
-
-
-version 2019.02.18
-
-Extractors
-* [tvp:website] Fix and improve extraction
-+ [tvp] Detect unavailable videos
-* [tvp] Fix description extraction and make thumbnail optional
-+ [linuxacademy] Add support for linuxacademy.com (#12207)
-* [bilibili] Update keys (#19233)
-* [udemy] Extend URL regular expressions (#14330, #15883)
-* [udemy] Update User-Agent and detect captcha (#14713, #15839, #18126)
-* [noovo] Fix extraction (#19230)
-* [rai] Relax URL regular expression (#19232)
-+ [vshare] Pass Referer to download request (#19205, #19221)
-+ [openload] Add support for oload.live (#19222)
-* [imgur] Use video id as title fallback (#18590)
-+ [twitch] Add new source format detection approach (#19193)
-* [tvplayhome] Fix video id extraction (#19190)
-* [tvplayhome] Fix episode metadata extraction (#19190)
-* [rutube:embed] Fix extraction (#19163)
-+ [rutube:embed] Add support private videos (#19163)
-+ [soundcloud] Extract more metadata
-+ [trunews] Add support for trunews.com (#19153)
-+ [linkedin:learning] Extract chapter_number and chapter_id (#19162)
-
-
-version 2019.02.08
-
-Core
-* [utils] Improve JSON-LD regular expression (#18058)
-* [YoutubeDL] Fallback to ie_key of matching extractor while making
- download archive id when no explicit ie_key is provided (#19022)
-
-Extractors
-+ [malltv] Add support for mall.tv (#18058, #17856)
-+ [spankbang:playlist] Add support for playlists (#19145)
-* [spankbang] Extend URL regular expression
-* [trutv] Fix extraction (#17336)
-* [toutv] Fix authentication (#16398, #18700)
-* [pornhub] Fix tags and categories extraction (#13720, #19135)
-* [pornhd] Fix formats extraction
-+ [pornhd] Extract like count (#19123, #19125)
-* [radiocanada] Switch to the new media requests (#19115)
-+ [teachable] Add support for courses.workitdaily.com (#18871)
-- [vporn] Remove extractor (#16276)
-+ [soundcloud:pagedplaylist] Add ie and title to entries (#19022, #19086)
-+ [drtuber] Extract duration (#19078)
-* [soundcloud] Fix paged playlists extraction, add support for albums and update client id
-* [soundcloud] Update client id
-* [drtv] Improve preference (#19079)
-+ [openload] Add support for openload.pw and oload.pw (#18930)
-+ [openload] Add support for oload.info (#19073)
-* [crackle] Authorize media detail request (#16931)
-
-
-version 2019.01.30.1
-
-Core
-* [postprocessor/ffmpeg] Fix avconv processing broken in #19025 (#19067)
-
-
-version 2019.01.30
-
-Core
-* [postprocessor/ffmpeg] Do not copy Apple TV chapter tracks while embedding
- subtitles (#19024, #19042)
-* [postprocessor/ffmpeg] Disable "Last message repeated" messages (#19025)
-
-Extractors
-* [yourporn] Fix extraction and extract duration (#18815, #18852, #19061)
-* [drtv] Improve extraction (#19039)
- + Add support for EncryptedUri videos
- + Extract more metadata
- * Fix subtitles extraction
-+ [fox] Add support for locked videos using cookies (#19060)
-* [fox] Fix extraction for free videos (#19060)
-+ [zattoo] Add support for tv.salt.ch (#19059)
-
-
-version 2019.01.27
-
-Core
-+ [extractor/common] Extract season in _json_ld
-* [postprocessor/ffmpeg] Fallback to ffmpeg/avconv for audio codec detection
- (#681)
-
-Extractors
-* [vice] Fix extraction for locked videos (#16248)
-+ [wakanim] Detect DRM protected videos
-+ [wakanim] Add support for wakanim.tv (#14374)
-* [usatoday] Fix extraction for videos with custom brightcove partner id
- (#18990)
-* [drtv] Fix extraction (#18989)
-* [nhk] Extend URL regular expression (#18968)
-* [go] Fix Adobe Pass requests for Disney Now (#18901)
-+ [openload] Add support for oload.club (#18969)
-
-
-version 2019.01.24
-
-Core
-* [YoutubeDL] Fix negation for string operators in format selection (#18961)
-
-
-version 2019.01.23
-
-Core
-* [utils] Fix urljoin for paths with non-http(s) schemes
-* [extractor/common] Improve jwplayer relative URL handling (#18892)
-+ [YoutubeDL] Add negation support for string comparisons in format selection
- expressions (#18600, #18805)
-* [extractor/common] Improve HLS video-only format detection (#18923)
-
-Extractors
-* [crunchyroll] Extend URL regular expression (#18955)
-* [pornhub] Bypass scrape detection (#4822, #5930, #7074, #10175, #12722,
- #17197, #18338 #18842, #18899)
-+ [vrv] Add support for authentication (#14307)
-* [videomore:season] Fix extraction
-* [videomore] Improve extraction (#18908)
-+ [tnaflix] Pass Referer in metadata request (#18925)
-* [radiocanada] Relax DRM check (#18608, #18609)
-* [vimeo] Fix video password verification for videos protected by
- Referer HTTP header
-+ [hketv] Add support for hkedcity.net (#18696)
-+ [streamango] Add support for fruithosts.net (#18710)
-+ [instagram] Add support for tags (#18757)
-+ [odnoklassniki] Detect paid videos (#18876)
-* [ted] Correct acodec for HTTP formats (#18923)
-* [cartoonnetwork] Fix extraction (#15664, #17224)
-* [vimeo] Fix extraction for password protected player URLs (#18889)
-
-
-version 2019.01.17
-
-Extractors
-* [youtube] Extend JS player signature function name regular expressions
- (#18890, #18891, #18893)
-
-
-version 2019.01.16
-
-Core
-+ [test/helper] Add support for maxcount and count collection len checkers
-* [downloader/hls] Fix uplynk ad skipping (#18824)
-* [postprocessor/ffmpeg] Improve ffmpeg version parsing (#18813)
-
-Extractors
-* [youtube] Skip unsupported adaptive stream type (#18804)
-+ [youtube] Extract DASH formats from player response (#18804)
-* [funimation] Fix extraction (#14089)
-* [skylinewebcams] Fix extraction (#18853)
-+ [curiositystream] Add support for non app URLs
-+ [bitchute] Check formats (#18833)
-* [wistia] Extend URL regular expression (#18823)
-+ [playplustv] Add support for playplus.com (#18789)
-
-
-version 2019.01.10
-
-Core
-* [extractor/common] Use episode name as title in _json_ld
-+ [extractor/common] Add support for movies in _json_ld
-* [postprocessor/ffmpeg] Embed subtitles with non-standard language codes
- (#18765)
-+ [utils] Add language codes replaced in 1989 revision of ISO 639
- to ISO639Utils (#18765)
-
-Extractors
-* [youtube] Extract live HLS URL from player response (#18799)
-+ [outsidetv] Add support for outsidetv.com (#18774)
-* [jwplatform] Use JW Platform Delivery API V2 and add support for more URLs
-+ [fox] Add support National Geographic (#17985, #15333, #14698)
-+ [playplustv] Add support for playplus.tv (#18789)
-* [globo] Set GLBID cookie manually (#17346)
-+ [gaia] Add support for gaia.com (#14605)
-* [youporn] Fix title and description extraction (#18748)
-+ [hungama] Add support for hungama.com (#17402, #18771)
-* [dtube] Fix extraction (#18741)
-* [tvnow] Fix and rework extractors and prepare for a switch to the new API
- (#17245, #18499)
-* [carambatv:page] Fix extraction (#18739)
-
-
-version 2019.01.02
-
-Extractors
-* [discovery] Use geo verification headers (#17838)
-+ [packtpub] Add support for subscription.packtpub.com (#18718)
-* [yourporn] Fix extraction (#18583)
-+ [acast:channel] Add support for play.acast.com (#18587)
-+ [extractors] Add missing age limits (#18621)
-+ [rmcdecouverte] Add support for live stream
-* [rmcdecouverte] Bypass geo restriction
-* [rmcdecouverte] Update URL regular expression (#18595, 18697)
-* [manyvids] Fix extraction (#18604, #18614)
-* [bitchute] Fix extraction (#18567)
-
-
-version 2018.12.31
-
-Extractors
-+ [bbc] Add support for another embed pattern (#18643)
-+ [npo:live] Add support for npostart.nl (#18644)
-* [beeg] Fix extraction (#18610, #18626)
-* [youtube] Unescape HTML for series (#18641)
-+ [youtube] Extract more format metadata
-* [youtube] Detect DRM protected videos (#1774)
-* [youtube] Relax HTML5 player regular expressions (#18465, #18466)
-* [youtube] Extend HTML5 player regular expression (#17516)
-+ [liveleak] Add support for another embed type and restore original
- format extraction
-+ [crackle] Extract ISM and HTTP formats
-+ [twitter] Pass Referer with card request (#18579)
-* [mediasite] Extend URL regular expression (#18558)
-+ [lecturio] Add support for lecturio.de (#18562)
-+ [discovery] Add support for Scripps Networks watch domains (#17947)
-
-
-version 2018.12.17
-
-Extractors
-* [ard:beta] Improve geo restricted videos extraction
-* [ard:beta] Fix subtitles extraction
-* [ard:beta] Improve extraction robustness
-* [ard:beta] Relax URL regular expression (#18441)
-* [acast] Add support for embed.acast.com and play.acast.com (#18483)
-* [iprima] Relax URL regular expression (#18515, #18540)
-* [vrv] Fix initial state extraction (#18553)
-* [youtube] Fix mark watched (#18546)
-+ [safari] Add support for learning.oreilly.com (#18510)
-* [youtube] Fix multifeed extraction (#18531)
-* [lecturio] Improve subtitles extraction (#18488)
-* [uol] Fix format URL extraction (#18480)
-+ [ard:mediathek] Add support for classic.ardmediathek.de (#18473)
-
-
-version 2018.12.09
-
-Core
-* [YoutubeDL] Keep session cookies in cookie file between runs
-* [YoutubeDL] Recognize session cookies with expired set to 0 (#12929)
-
-Extractors
-+ [teachable] Add support for teachable platform sites (#5451, #18150, #18272)
-+ [aenetworks] Add support for historyvault.com (#18460)
-* [imgur] Improve gallery and album detection and extraction (#9133, #16577,
- #17223, #18404)
-* [iprima] Relax URL regular expression (#18453)
-* [hotstar] Fix video data extraction (#18386)
-* [ard:mediathek] Fix title and description extraction (#18349, #18371)
-* [xvideos] Switch to HTTPS (#18422, #18427)
-+ [lecturio] Add support for lecturio.com (#18405)
-+ [nrktv:series] Add support for extra materials
-* [nrktv:season,series] Fix extraction (#17159, #17258)
-* [nrktv] Relax URL regular expression (#18304, #18387)
-* [yourporn] Fix extraction (#18424, #18425)
-* [tbs] Fix info extraction (#18403)
-+ [gamespot] Add support for review URLs
-
-
-version 2018.12.03
-
-Core
-* [utils] Fix random_birthday to generate existing dates only (#18284)
-
-Extractors
-+ [tiktok] Add support for tiktok.com (#18108, #18135)
-* [pornhub] Use actual URL host for requests (#18359)
-* [lynda] Fix authentication (#18158, #18217)
-* [gfycat] Update API endpoint (#18333, #18343)
-+ [hotstar] Add support for alternative app state layout (#18320)
-* [azmedien] Fix extraction (#18334, #18336)
-+ [vimeo] Add support for VHX (Vimeo OTT) (#14835)
-* [joj] Fix extraction (#18280, #18281)
-+ [wistia] Add support for fast.wistia.com (#18287)
-
-
-version 2018.11.23
-
-Core
-+ [setup.py] Add more relevant classifiers
-
-Extractors
-* [mixcloud] Fallback to hardcoded decryption key (#18016)
-* [nbc:news] Fix article extraction (#16194)
-* [foxsports] Fix extraction (#17543)
-* [loc] Relax regular expression and improve formats extraction
-+ [ciscolive] Add support for ciscolive.cisco.com (#17984)
-* [nzz] Relax kaltura regex (#18228)
-* [sixplay] Fix formats extraction
-* [bitchute] Improve title extraction
-* [kaltura] Limit requested MediaEntry fields
-+ [americastestkitchen] Add support for zype embeds (#18225)
-+ [pornhub] Add pornhub.net alias
-* [nova:embed] Fix extraction (#18222)
-
-
-version 2018.11.18
-
-Extractors
-+ [wwe] Extract subtitles
-+ [wwe] Add support for playlists (#14781)
-+ [wwe] Add support for wwe.com (#14781, #17450)
-* [vk] Detect geo restriction (#17767)
-* [openload] Use original host during extraction (#18211)
-* [atvat] Fix extraction (#18041)
-+ [rte] Add support for new API endpoint (#18206)
-* [tnaflixnetwork:embed] Fix extraction (#18205)
-* [picarto] Use API and add token support (#16518)
-+ [zype] Add support for player.zype.com (#18143)
-* [vivo] Fix extraction (#18139)
-* [ruutu] Update API endpoint (#18138)
-
-
-version 2018.11.07
-
-Extractors
-+ [youtube] Add another JS signature function name regex (#18091, #18093,
- #18094)
-* [facebook] Fix tahoe request (#17171)
-* [cliphunter] Fix extraction (#18083)
-+ [youtube:playlist] Add support for invidio.us (#18077)
-* [zattoo] Arrange API hosts for derived extractors (#18035)
-+ [youtube] Add fallback metadata extraction from videoDetails (#18052)
-
-
-version 2018.11.03
-
-Core
-* [extractor/common] Ensure response handle is not prematurely closed before
- it can be read if it matches expected_status (#17195, #17846, #17447)
-
-Extractors
-* [laola1tv:embed] Set correct stream access URL scheme (#16341)
-+ [ehftv] Add support for ehftv.com (#15408)
-* [azmedien] Adopt to major site redesign (#17745, #17746)
-+ [twitcasting] Add support for twitcasting.tv (#17981)
-* [orf:tvthek] Fix extraction (#17737, #17956, #18024)
-+ [openload] Add support for oload.fun (#18045)
-* [njpwworld] Fix authentication (#17427)
-+ [linkedin:learning] Add support for linkedin.com/learning (#13545)
-* [theplatform] Improve error detection (#13222)
-* [cnbc] Simplify extraction (#14280, #17110)
-+ [cbnc] Add support for new URL schema (#14193)
-* [aparat] Improve extraction and extract more metadata (#17445, #18008)
-* [aparat] Fix extraction
-
-
-version 2018.10.29
-
-Core
-+ [extractor/common] Add validation for JSON-LD URLs
-
-Extractors
-+ [sportbox] Add support for matchtv.ru
-* [sportbox] Fix extraction (#17978)
-* [screencast] Fix extraction (#14590, #14617, #17990)
-+ [openload] Add support for oload.icu
-+ [ivi] Add support for ivi.tv
-* [crunchyroll] Improve extraction failsafeness (#17991)
-* [dailymail] Fix formats extraction (#17976)
-* [viewster] Reduce format requests
-* [cwtv] Handle API errors (#17905)
-+ [rutube] Use geo verification headers (#17897)
-+ [brightcove:legacy] Add fallbacks to brightcove:new (#13912)
-- [tv3] Remove extractor (#10461, #15339)
-* [ted] Fix extraction for HTTP and RTMP formats (#5941, #17572, #17894)
-+ [openload] Add support for oload.cc (#17823)
-+ [patreon] Extract post_file URL (#17792)
-* [patreon] Fix extraction (#14502, #10471)
-
-
-version 2018.10.05
-
-Extractors
-* [pluralsight] Improve authentication (#17762)
-* [dailymotion] Fix extraction (#17699)
-* [crunchyroll] Switch to HTTPS for RpcApi (#17749)
-+ [philharmoniedeparis] Add support for pad.philharmoniedeparis.fr (#17705)
-* [philharmoniedeparis] Fix extraction (#17705)
-+ [jamendo] Add support for licensing.jamendo.com (#17724)
-+ [openload] Add support for oload.cloud (#17710)
-* [pluralsight] Fix subtitles extraction (#17726, #17728)
-+ [vimeo] Add another config regular expression (#17690)
-* [spike] Fix Paramount Network extraction (#17677)
-* [hotstar] Fix extraction (#14694, #14931, #17637)
-
-
-version 2018.09.26
-
-Extractors
-* [pluralsight] Fix subtitles extraction (#17671)
-* [mediaset] Improve embed support (#17668)
-+ [youtube] Add support for invidio.us (#17613)
-+ [zattoo] Add support for more zattoo platform sites
-* [zattoo] Fix extraction (#17175, #17542)
-
-
-version 2018.09.18
-
-Core
-+ [extractor/common] Introduce channel meta fields
-
-Extractors
-* [adobepass] Don't pollute default headers dict
-* [udemy] Don't pollute default headers dict
-* [twitch] Don't pollute default headers dict
-* [youtube] Don't pollute default query dict (#17593)
-* [crunchyroll] Prefer hardsubless formats and formats in locale language
-* [vrv] Make format ids deterministic
-* [vimeo] Fix ondemand playlist extraction (#14591)
-+ [pornhub] Extract upload date (#17574)
-+ [porntube] Extract channel meta fields
-+ [vimeo] Extract channel meta fields
-+ [youtube] Extract channel meta fields (#9676, #12939)
-* [porntube] Fix extraction (#17541)
-* [asiancrush] Fix extraction (#15630)
-+ [twitch:clips] Extend URL regular expression (#17559)
-+ [vzaar] Add support for HLS
-* [tube8] Fix metadata extraction (#17520)
-* [eporner] Extract JSON-LD (#17519)
-
-
-version 2018.09.10
-
-Core
-+ [utils] Properly recognize AV1 codec (#17506)
-
-Extractors
-+ [iprima] Add support for prima.iprima.cz (#17514)
-+ [tele5] Add support for tele5.de (#7805, #7922, #17331, #17414)
-* [nbc] Fix extraction of percent encoded URLs (#17374)
-
-
-version 2018.09.08
-
-Extractors
-* [youtube] Fix extraction (#17457, #17464)
-+ [pornhub:uservideos] Add support for new URLs (#17388)
-* [iprima] Confirm adult check (#17437)
-* [slideslive] Make check for video service name case-insensitive (#17429)
-* [radiojavan] Fix extraction (#17151)
-* [generic] Skip unsuccessful jwplayer extraction (#16735)
-
-
-version 2018.09.01
-
-Core
-* [utils] Skip remote IP addresses non matching to source address' IP version
- when creating a connection (#13422, #17362)
-
-Extractors
-+ [ard] Add support for one.ard.de (#17397)
-* [niconico] Fix extraction on python3 (#17393, #17407)
-* [ard] Extract f4m formats
-* [crunchyroll] Parse vilos media data (#17343)
-+ [ard] Add support for Beta ARD Mediathek
-+ [bandcamp] Extract more metadata (#13197)
-* [internazionale] Fix extraction of non-available-abroad videos (#17386)
-
-
-version 2018.08.28
-
-Extractors
-+ [youtube:playlist] Add support for music album playlists (OLAK5uy_ prefix)
- (#17361)
-* [bitchute] Fix extraction by pass custom User-Agent (#17360)
-* [webofstories:playlist] Fix extraction (#16914)
-+ [tvplayhome] Add support for new tvplay URLs (#17344)
-+ [generic] Allow relative src for videojs embeds (#17324)
-+ [xfileshare] Add support for vidto.se (#17317)
-+ [vidzi] Add support for vidzi.nu (#17316)
-+ [nova:embed] Add support for media.cms.nova.cz (#17282)
-
-
-version 2018.08.22
-
-Core
-* [utils] Use pure browser header for User-Agent (#17236)
-
-Extractors
-+ [kinopoisk] Add support for kinopoisk.ru (#17283)
-+ [yourporn] Add support for yourporn.sexy (#17298)
-+ [go] Add support for disneynow.go.com (#16299, #17264)
-+ [6play] Add support for play.rtl.hr (#17249)
-* [anvato] Fallback to generic API key for access-key-to-API-key lookup
- (#16788, #17254)
-* [lci] Fix extraction (#17274)
-* [bbccouk] Extend id URL regular expression (#17270)
-* [cwtv] Fix extraction (#17256)
-* [nova] Fix extraction (#17241)
-+ [generic] Add support for expressen embeds
-* [raywenderlich] Adapt to site redesign (#17225)
-+ [redbulltv] Add support redbull.com tv URLs (#17218)
-+ [bitchute] Add support for bitchute.com (#14052)
-+ [clyp] Add support for token protected media (#17184)
-* [imdb] Fix extension extraction (#17167)
-
-
-version 2018.08.04
-
-Extractors
-* [funk:channel] Improve byChannelAlias extraction (#17142)
-* [twitch] Fix authentication (#17024, #17126)
-* [twitch:vod] Improve URL regular expression (#17135)
-* [watchbox] Fix extraction (#17107)
-* [pbs] Fix extraction (#17109)
-* [theplatform] Relax URL regular expression (#16181, #17097)
-+ [viqeo] Add support for viqeo.tv (#17066)
-
-
-version 2018.07.29
-
-Extractors
-* [crunchyroll:playlist] Restrict URL regular expression (#17069, #17076)
-+ [pornhub] Add support for subtitles (#16924, #17088)
-* [ceskatelevize] Use https for API call (#16997, #16999)
-* [dailymotion:playlist] Fix extraction (#16894)
-* [ted] Improve extraction
-* [ted] Fix extraction for videos without nativeDownloads (#16756, #17085)
-* [telecinco] Fix extraction (#17080)
-* [mitele] Reduce number of requests
-* [rai] Return non HTTP relinker URL intact (#17055)
-* [vk] Fix extraction for inline only videos (#16923)
-* [streamcloud] Fix extraction (#17054)
-* [facebook] Fix tahoe player extraction with authentication (#16655)
-+ [puhutv] Add support for puhutv.com (#12712, #16010, #16269)
-
-
-version 2018.07.21
-
-Core
-+ [utils] Introduce url_or_none
-* [utils] Allow JSONP without function name (#17028)
-+ [extractor/common] Extract DASH and MSS formats from SMIL manifests
-
-Extractors
-+ [bbc] Add support for BBC Radio Play pages (#17022)
-* [iwara] Fix download URLs (#17026)
-* [vrtnu] Relax title extraction and extract JSON-LD (#17018)
-+ [viu] Pass Referer and Origin headers and area id (#16992)
-+ [vimeo] Add another config regular expression (#17013)
-+ [facebook] Extract view count (#16942)
-* [dailymotion] Improve description extraction (#16984)
-* [slutload] Fix and improve extraction (#17001)
-* [mediaset] Fix extraction (#16977)
-+ [theplatform] Add support for theplatform TLD customization (#16977)
-* [imgur] Relax URL regular expression (#16987)
-* [pornhub] Improve extraction and extract all formats (#12166, #15891, #16262,
- #16959)
-
-
-version 2018.07.10
-
-Core
-* [utils] Share JSON-LD regular expression
-* [downloader/dash] Improve error handling (#16927)
-
-Extractors
-+ [nrktv] Add support for new season and serie URL schema
-+ [nrktv] Add support for new episode URL schema (#16909)
-+ [frontendmasters] Add support for frontendmasters.com (#3661, #16328)
-* [funk] Fix extraction (#16918)
-* [watchbox] Fix extraction (#16904)
-* [dplayit] Sort formats
-* [dplayit] Fix extraction (#16901)
-* [youtube] Improve login error handling (#13822)
-
-
-version 2018.07.04
-
-Core
-* [extractor/common] Properly escape % in MPD templates (#16867)
-* [extractor/common] Use source URL as Referer for HTML5 entries (16849)
-* Prefer ffmpeg over avconv by default (#8622)
-
-Extractors
-* [pluralsight] Switch to graphql (#16889, #16895, #16896, #16899)
-* [lynda] Simplify login and improve error capturing (#16891)
-+ [go90] Add support for embed URLs (#16873)
-* [go90] Detect geo restriction error and pass geo verification headers
- (#16874)
-* [vlive] Fix live streams extraction (#16871)
-* [npo] Fix typo (#16872)
-+ [mediaset] Add support for new videos and extract all formats (#16568)
-* [dctptv] Restore extraction based on REST API (#16850)
-* [svt] Improve extraction and add support for pages (#16802)
-* [porncom] Fix extraction (#16808)
-
-
-version 2018.06.25
-
-Extractors
-* [joj] Relax URL regular expression (#16771)
-* [brightcove] Workaround sonyliv DRM protected videos (#16807)
-* [motherless] Fix extraction (#16786)
-* [itv] Make SOAP request non fatal and extract metadata from webpage (#16780)
-- [foxnews:insider] Remove extractor (#15810)
-+ [foxnews] Add support for iframe embeds (#15810, #16711)
-
-
-version 2018.06.19
-
-Core
-+ [extractor/common] Introduce expected_status in _download_* methods
- for convenient accept of HTTP requests failed with non 2xx status codes
-+ [compat] Introduce compat_integer_types
-
-Extractors
-* [peertube] Improve generic support (#16733)
-+ [6play] Use geo verification headers
-* [rtbf] Fix extraction for python 3.2
-* [vgtv] Improve HLS formats extraction
-+ [vgtv] Add support for www.aftonbladet.se/tv URLs
-* [bbccouk] Use expected_status
-* [markiza] Expect 500 HTTP status code
-* [tvnow] Try all clear manifest URLs (#15361)
-
-
-version 2018.06.18
-
-Core
-* [downloader/rtmp] Fix downloading in verbose mode (#16736)
-
-Extractors
-+ [markiza] Add support for markiza.sk (#16750)
-* [wat] Try all supported adaptive URLs
-+ [6play] Add support for rtlplay.be and extract hd usp formats
-+ [rtbf] Add support for audio and live streams (#9638, #11923)
-+ [rtbf] Extract HLS, DASH and all HTTP formats
-+ [rtbf] Extract subtitles
-+ [rtbf] Fixup specific HTTP URLs (#16101)
-+ [expressen] Add support for expressen.se
-* [vidzi] Fix extraction (#16678)
-* [pbs] Improve extraction (#16623, #16684)
-* [bilibili] Restrict cid regular expression (#16638, #16734)
-
-
-version 2018.06.14
-
-Core
-* [downloader/http] Fix retry on error when streaming to stdout (#16699)
-
-Extractors
-+ [discoverynetworks] Add support for disco-api videos (#16724)
-+ [dailymotion] Add support for password protected videos (#9789)
-+ [abc:iview] Add support for livestreams (#12354)
-* [abc:iview] Fix extraction (#16704)
-+ [crackle] Add support for sonycrackle.com (#16698)
-+ [tvnet] Add support for tvnet.gov.vn (#15462)
-* [nrk] Update API hosts and try all previously known ones (#16690)
-* [wimp] Fix Youtube embeds extraction
-
-
-version 2018.06.11
-
-Extractors
-* [npo] Extend URL regular expression and add support for npostart.nl (#16682)
-+ [inc] Add support for another embed schema (#16666)
-* [tv4] Fix format extraction (#16650)
-+ [nexx] Add support for free cdn (#16538)
-+ [pbs] Add another cove id pattern (#15373)
-+ [rbmaradio] Add support for 192k format (#16631)
-
-
-version 2018.06.04
-
-Extractors
-+ [camtube] Add support for camtube.co
-+ [twitter:card] Extract guest token (#16609)
-+ [chaturbate] Use geo verification headers
-+ [bbc] Add support for bbcthree (#16612)
-* [youtube] Move metadata extraction after video availability check
-+ [youtube] Extract track and artist
-+ [safari] Add support for new URL schema (#16614)
-* [adn] Fix extraction
-
-
-version 2018.06.02
-
-Core
-* [utils] Improve determine_ext
-
-Extractors
-+ [facebook] Add support for tahoe player videos (#15441, #16554)
-* [cbc] Improve extraction (#16583, #16593)
-* [openload] Improve ext extraction (#16595)
-+ [twitter:card] Add support for another endpoint (#16586)
-+ [openload] Add support for oload.win and oload.download (#16592)
-* [audimedia] Fix extraction (#15309)
-+ [francetv] Add support for sport.francetvinfo.fr (#15645)
-* [mlb] Improve extraction (#16587)
-- [nhl] Remove old extractors
-* [rbmaradio] Check formats availability (#16585)
-
-
-version 2018.05.30
-
-Core
-* [downloader/rtmp] Generalize download messages and report time elapsed
- on finish
-* [downloader/rtmp] Gracefully handle live streams interrupted by user
-
-Extractors
-* [teamcoco] Fix extraction for full episodes (#16573)
-* [spiegel] Fix info extraction (#16538)
-+ [apa] Add support for apa.at (#15041, #15672)
-+ [bellmedia] Add support for bnnbloomberg.ca (#16560)
-+ [9c9media] Extract MPD formats and subtitles
-* [cammodels] Use geo verification headers
-+ [ufctv] Add support for authentication (#16542)
-+ [cammodels] Add support for cammodels.com (#14499)
-* [utils] Fix style id extraction for namespaced id attribute in dfxp2srt
- (#16551)
-* [soundcloud] Detect format extension (#16549)
-* [cbc] Fix playlist title extraction (#16502)
-+ [tumblr] Detect and report sensitive media (#13829)
-+ [tumblr] Add support for authentication (#15133)
-
-
-version 2018.05.26
-
-Core
-* [utils] Improve parse_age_limit
-
-Extractors
-* [audiomack] Stringify video id (#15310)
-* [izlesene] Fix extraction (#16233, #16271, #16407)
-+ [indavideo] Add support for generic embeds (#11989)
-* [indavideo] Fix extraction (#11221)
-* [indavideo] Sign download URLs (#16174)
-+ [peertube] Add support for PeerTube based sites (#16301, #16329)
-* [imgur] Fix extraction (#16537)
-+ [hidive] Add support for authentication (#16534)
-+ [nbc] Add support for stream.nbcsports.com (#13911)
-+ [viewlift] Add support for hoichoi.tv (#16536)
-* [go90] Extract age limit and detect DRM protection(#10127)
-* [viewlift] fix extraction for snagfilms.com (#15766)
-* [globo] Improve extraction (#4189)
- * Add support for authentication
- * Simplify URL signing
- * Extract DASH and MSS formats
-* [leeco] Fix extraction (#16464)
-* [teamcoco] Add fallback for format extraction (#16484)
-* [teamcoco] Improve URL regular expression (#16484)
-* [imdb] Improve extraction (#4085, #14557)
-
-
-version 2018.05.18
-
-Extractors
-* [vimeo:likes] Relax URL regular expression and fix single page likes
- extraction (#16475)
-* [pluralsight] Fix clip id extraction (#16460)
-+ [mychannels] Add support for mychannels.com (#15334)
-- [moniker] Remove extractor (#15336)
-* [pbs] Fix embed data extraction (#16474)
-+ [mtv] Add support for paramountnetwork.com and bellator.com (#15418)
-* [youtube] Fix hd720 format position
-* [dailymotion] Remove fragment part from m3u8 URLs (#8915)
-* [3sat] Improve extraction (#15350)
- * Extract all formats
- * Extract more format metadata
- * Improve format sorting
- * Use hls native downloader
- * Detect and bypass geo-restriction
-+ [dtube] Add support for d.tube (#15201)
-* [options] Fix typo (#16450)
-* [youtube] Improve format filesize extraction (#16453)
-* [youtube] Make uploader extraction non fatal (#16444)
-* [youtube] Fix extraction for embed restricted live streams (#16433)
-* [nbc] Improve info extraction (#16440)
-* [twitch:clips] Fix extraction (#16429)
-* [redditr] Relax URL regular expression (#16426, #16427)
-* [mixcloud] Bypass throttling for HTTP formats (#12579, #16424)
-+ [nick] Add support for nickjr.de (#13230)
-* [teamcoco] Fix extraction (#16374)
-
-
-version 2018.05.09
-
-Core
-* [YoutubeDL] Ensure ext exists for automatic captions
-* Introduce --geo-bypass-ip-block
-
-Extractors
-+ [udemy] Extract asset captions
-+ [udemy] Extract stream URLs (#16372)
-+ [businessinsider] Add support for businessinsider.com (#16387, #16388, #16389)
-+ [cloudflarestream] Add support for cloudflarestream.com (#16375)
-* [watchbox] Fix extraction (#16356)
-* [discovery] Extract Affiliate/Anonymous Auth Token from cookies (#14954)
-+ [itv:btcc] Add support for itv.com/btcc (#16139)
-* [tunein] Use live title for live streams (#16347)
-* [itv] Improve extraction (#16253)
-
-
-version 2018.05.01
-
-Core
-* [downloader/fragment] Restart download if .ytdl file is corrupt (#16312)
-+ [extractor/common] Extract interaction statistic
-+ [utils] Add merge_dicts
-+ [extractor/common] Add _download_json_handle
-
-Extractors
-* [kaltura] Improve iframe embeds detection (#16337)
-+ [udemy] Extract outputs renditions (#16289, #16291, #16320, #16321, #16334,
- #16335)
-+ [zattoo] Add support for zattoo.com and mobiltv.quickline.com (#14668, #14676)
-* [yandexmusic] Convert release_year to int
-* [udemy] Override _download_webpage_handle instead of _download_webpage
-* [xiami] Override _download_webpage_handle instead of _download_webpage
-* [yandexmusic] Override _download_webpage_handle instead of _download_webpage
-* [youtube] Correctly disable polymer on all requests (#16323, #16326)
-* [generic] Prefer enclosures over links in RSS feeds (#16189)
-+ [redditr] Add support for old.reddit.com URLs (#16274)
-* [nrktv] Update API host (#16324)
-+ [imdb] Extract all formats (#16249)
-+ [vimeo] Extract JSON-LD (#16295)
-* [funk:channel] Improve extraction (#16285)
-
-
-version 2018.04.25
-
-Core
-* [utils] Fix match_str for boolean meta fields
-+ [Makefile] Add support for pandoc 2 and disable smart extension (#16251)
-* [YoutubeDL] Fix typo in media extension compatibility checker (#16215)
-
-Extractors
-+ [openload] Recognize IPv6 stream URLs (#16136, #16137, #16205, #16246,
- #16250)
-+ [twitch] Extract is_live according to status (#16259)
-* [pornflip] Relax URL regular expression (#16258)
-- [etonline] Remove extractor (#16256)
-* [breakcom] Fix extraction (#16254)
-+ [youtube] Add ability to authenticate with cookies
-* [youtube:feed] Implement lazy playlist extraction (#10184)
-+ [svt] Add support for TV channel live streams (#15279, #15809)
-* [ccma] Fix video extraction (#15931)
-* [rentv] Fix extraction (#15227)
-+ [nick] Add support for nickjr.nl (#16230)
-* [extremetube] Fix metadata extraction
-+ [keezmovies] Add support for generic embeds (#16134, #16154)
-* [nexx] Extract new azure URLs (#16223)
-* [cbssports] Fix extraction (#16217)
-* [kaltura] Improve embeds detection (#16201)
-* [instagram:user] Fix extraction (#16119)
-* [cbs] Skip DRM asset types (#16104)
-
-
-version 2018.04.16
-
-Extractors
-* [smotri:broadcast] Fix extraction (#16180)
-+ [picarto] Add support for picarto.tv (#6205, #12514, #15276, #15551)
-* [vine:user] Fix extraction (#15514, #16190)
-* [pornhub] Relax URL regular expression (#16165)
-* [cbc:watch] Re-acquire device token when expired (#16160)
-+ [fxnetworks] Add support for https theplatform URLs (#16125, #16157)
-+ [instagram:user] Add request signing (#16119)
-+ [twitch] Add support for mobile URLs (#16146)
-
-
-version 2018.04.09
-
-Core
-* [YoutubeDL] Do not save/restore console title while simulate (#16103)
-* [extractor/common] Relax JSON-LD context check (#16006)
-
-Extractors
-+ [generic] Add support for tube8 embeds
-+ [generic] Add support for share-videos.se embeds (#16089, #16115)
-* [odnoklassniki] Extend URL regular expression (#16081)
-* [steam] Bypass mature content check (#16113)
-+ [acast] Extract more metadata
-* [acast] Fix extraction (#16118)
-* [instagram:user] Fix extraction (#16119)
-* [drtuber] Fix title extraction (#16107, #16108)
-* [liveleak] Extend URL regular expression (#16117)
-+ [openload] Add support for oload.xyz
-* [openload] Relax stream URL regular expression
-* [openload] Fix extraction (#16099)
-+ [svtplay:series] Add support for season URLs
-+ [svtplay:series] Add support for series (#11130, #16059)
-
-
-version 2018.04.03
-
-Extractors
-+ [tvnow] Add support for shows (#15837)
-* [dramafever] Fix authentication (#16067)
-* [afreecatv] Use partial view only when necessary (#14450)
-+ [afreecatv] Add support for authentication (#14450)
-+ [nationalgeographic] Add support for new URL schema (#16001, #16054)
-* [xvideos] Fix thumbnail extraction (#15978, #15979)
-* [medialaan] Fix vod id (#16038)
-+ [openload] Add support for oload.site (#16039)
-* [naver] Fix extraction (#16029)
-* [dramafever] Partially switch to API v5 (#16026)
-* [abc:iview] Unescape title and series meta fields (#15994)
-* [videa] Extend URL regular expression (#16003)
-
-
-version 2018.03.26.1
-
-Core
-+ [downloader/external] Add elapsed time to progress hook (#10876)
-* [downloader/external,fragment] Fix download finalization when writing file
- to stdout (#10809, #10876, #15799)
-
-Extractors
-* [vrv] Fix extraction on python2 (#15928)
-* [afreecatv] Update referrer (#15947)
-+ [24video] Add support for 24video.sexy (#15973)
-* [crackle] Bypass geo restriction
-* [crackle] Fix extraction (#15969)
-+ [lenta] Add support for lenta.ru (#15953)
-+ [instagram:user] Add pagination (#15934)
-* [youku] Update ccode (#15939)
-* [libsyn] Adapt to new page structure
-
-
-version 2018.03.20
-
-Core
-* [extractor/common] Improve thumbnail extraction for HTML5 entries
-* Generalize XML manifest processing code and improve XSPF parsing
-+ [extractor/common] Add _download_xml_handle
-+ [extractor/common] Add support for relative URIs in _parse_xspf (#15794)
-
-Extractors
-+ [7plus] Extract series metadata (#15862, #15906)
-* [9now] Bypass geo restriction (#15920)
-* [cbs] Skip unavailable assets (#13490, #13506, #15776)
-+ [canalc2] Add support for HTML5 videos (#15916, #15919)
-+ [ceskatelevize] Add support for iframe embeds (#15918)
-+ [prosiebensat1] Add support for galileo.tv (#15894)
-+ [generic] Add support for xfileshare embeds (#15879)
-* [bilibili] Switch to v2 playurl API
-* [bilibili] Fix and improve extraction (#15048, #15430, #15622, #15863)
-* [heise] Improve extraction (#15496, #15784, #15026)
-* [instagram] Fix user videos extraction (#15858)
-
-
-version 2018.03.14
-
-Extractors
-* [soundcloud] Update client id (#15866)
-+ [tennistv] Add support for tennistv.com
-+ [line] Add support for tv.line.me (#9427)
-* [xnxx] Fix extraction (#15817)
-* [njpwworld] Fix authentication (#15815)
-
-
-version 2018.03.10
-
-Core
-* [downloader/hls] Skip uplynk ad fragments (#15748)
-
-Extractors
-* [pornhub] Don't override session cookies (#15697)
-+ [raywenderlich] Add support for videos.raywenderlich.com (#15251)
-* [funk] Fix extraction and rework extractors (#15792)
-* [nexx] Restore reverse engineered approach
-+ [heise] Add support for kaltura embeds (#14961, #15728)
-+ [tvnow] Extract series metadata (#15774)
-* [ruutu] Continue formats extraction on NOT-USED URLs (#15775)
-* [vrtnu] Use redirect URL for building video JSON URL (#15767, #15769)
-* [vimeo] Modernize login code and improve error messaging
-* [archiveorg] Fix extraction (#15770, #15772)
-+ [hidive] Add support for hidive.com (#15494)
-* [afreecatv] Detect deleted videos
-* [afreecatv] Fix extraction (#15755)
-* [vice] Fix extraction and rework extractors (#11101, #13019, #13622, #13778)
-+ [vidzi] Add support for vidzi.si (#15751)
-* [npo] Fix typo
-
-
-version 2018.03.03
-
-Core
-+ [utils] Add parse_resolution
-Revert respect --prefer-insecure while updating
-
-Extractors
-+ [yapfiles] Add support for yapfiles.ru (#15726, #11085)
-* [spankbang] Fix formats extraction (#15727)
-* [adn] Fix extraction (#15716)
-+ [toggle] Extract DASH and ISM formats (#15721)
-+ [nickelodeon] Add support for nickelodeon.com.tr (#15706)
-* [npo] Validate and filter format URLs (#15709)
-
-
-version 2018.02.26
-
-Extractors
-* [udemy] Use custom User-Agent (#15571)
-
-
-version 2018.02.25
-
-Core
-* [postprocessor/embedthumbnail] Skip embedding when there aren't any
- thumbnails (#12573)
-* [extractor/common] Improve jwplayer subtitles extraction (#15695)
-
-Extractors
-+ [vidlii] Add support for vidlii.com (#14472, #14512, #14779)
-+ [streamango] Capture and output error messages
-* [streamango] Fix extraction (#14160, #14256)
-+ [telequebec] Add support for emissions (#14649, #14655)
-+ [telequebec:live] Add support for live streams (#15688)
-+ [mailru:music] Add support for mail.ru/music (#15618)
-* [aenetworks] Switch to akamai HLS formats (#15612)
-* [ytsearch] Fix flat title extraction (#11260, #15681)
-
-
-version 2018.02.22
-
-Core
-+ [utils] Fixup some common URL typos in sanitize_url (#15649)
-* Respect --prefer-insecure while updating (#15497)
-
-Extractors
-* [vidio] Fix HLS URL extraction (#15675)
-+ [nexx] Add support for arc.nexx.cloud URLs
-* [nexx] Switch to arc API (#15652)
-* [redtube] Fix duration extraction (#15659)
-+ [sonyliv] Respect referrer (#15648)
-+ [brightcove:new] Use referrer for formats' HTTP headers
-+ [cbc] Add support for olympics.cbc.ca (#15535)
-+ [fusion] Add support for fusion.tv (#15628)
-* [npo] Improve quality metadata extraction
-* [npo] Relax URL regular expression (#14987, #14994)
-+ [npo] Capture and output error message
-+ [pornhub] Add support for channels (#15613)
-* [youtube] Handle shared URLs with generic extractor (#14303)
-
-
-version 2018.02.11
-
-Core
-+ [YoutubeDL] Add support for filesize_approx in format selector (#15550)
-
-Extractors
-+ [francetv] Add support for live streams (#13689)
-+ [francetv] Add support for zouzous.fr and ludo.fr (#10454, #13087, #13103,
- #15012)
-* [francetv] Separate main extractor and rework others to delegate to it
-* [francetv] Improve manifest URL signing (#15536)
-+ [francetv] Sign m3u8 manifest URLs (#15565)
-+ [veoh] Add support for embed URLs (#15561)
-* [afreecatv] Fix extraction (#15556)
-* [periscope] Use accessVideoPublic endpoint (#15554)
-* [discovery] Fix auth request (#15542)
-+ [6play] Extract subtitles (#15541)
-* [newgrounds] Fix metadata extraction (#15531)
-+ [nbc] Add support for stream.nbcolympics.com (#10295)
-* [dvtv] Fix live streams extraction (#15442)
-
-
-version 2018.02.08
-
-Extractors
-+ [myvi] Extend URL regular expression
-+ [myvi:embed] Add support for myvi.tv embeds (#15521)
-+ [prosiebensat1] Extend URL regular expression (#15520)
-* [pokemon] Relax URL regular expression and extend title extraction (#15518)
-+ [gameinformer] Use geo verification headers
-* [la7] Fix extraction (#15501, #15502)
-* [gameinformer] Fix brightcove id extraction (#15416)
-+ [afreecatv] Pass referrer to video info request (#15507)
-+ [telebruxelles] Add support for live streams
-* [telebruxelles] Relax URL regular expression
-* [telebruxelles] Fix extraction (#15504)
-* [extractor/common] Respect secure schemes in _extract_wowza_formats
-
-
-version 2018.02.04
-
-Core
-* [downloader/http] Randomize HTTP chunk size
-+ [downloader/http] Add ability to pass downloader options via info dict
-* [downloader/http] Fix 302 infinite loops by not reusing requests
-+ Document http_chunk_size
-
-Extractors
-+ [brightcove] Pass embed page URL as referrer (#15486)
-+ [youtube] Enforce using chunked HTTP downloading for DASH formats
-
-
-version 2018.02.03
-
-Core
-+ Introduce --http-chunk-size for chunk-based HTTP downloading
-+ Add support for IronPython
-* [downloader/ism] Fix Python 3.2 support
-
-Extractors
-* [redbulltv] Fix extraction (#15481)
-* [redtube] Fix metadata extraction (#15472)
-* [pladform] Respect platform id and extract HLS formats (#15468)
-- [rtlnl] Remove progressive formats (#15459)
-* [6play] Do no modify asset URLs with a token (#15248)
-* [nationalgeographic] Relax URL regular expression
-* [dplay] Relax URL regular expression (#15458)
-* [cbsinteractive] Fix data extraction (#15451)
-+ [amcnetworks] Add support for sundancetv.com (#9260)
-
-
-version 2018.01.27
-
-Core
-* [extractor/common] Improve _json_ld for articles
-* Switch codebase to use compat_b64decode
-+ [compat] Add compat_b64decode
-
-Extractors
-+ [seznamzpravy] Add support for seznam.cz and seznamzpravy.cz (#14102, #14616)
-* [dplay] Bypass geo restriction
-+ [dplay] Add support for disco-api videos (#15396)
-* [youtube] Extract precise error messages (#15284)
-* [teachertube] Capture and output error message
-* [teachertube] Fix and relax thumbnail extraction (#15403)
-+ [prosiebensat1] Add another clip id regular expression (#15378)
-* [tbs] Update tokenizer url (#15395)
-* [mixcloud] Use compat_b64decode (#15394)
-- [thesixtyone] Remove extractor (#15341)
-
-
-version 2018.01.21
-
-Core
-* [extractor/common] Improve jwplayer DASH formats extraction (#9242, #15187)
-* [utils] Improve scientific notation handling in js_to_json (#14789)
-
-Extractors
-+ [southparkdk] Add support for southparkstudios.nu
-+ [southpark] Add support for collections (#14803)
-* [franceinter] Fix upload date extraction (#14996)
-+ [rtvs] Add support for rtvs.sk (#9242, #15187)
-* [restudy] Fix extraction and extend URL regular expression (#15347)
-* [youtube:live] Improve live detection (#15365)
-+ [springboardplatform] Add support for springboardplatform.com
-* [prosiebensat1] Add another clip id regular expression (#15290)
-- [ringtv] Remove extractor (#15345)
-
-
-version 2018.01.18
-
-Extractors
-* [soundcloud] Update client id (#15306)
-- [kamcord] Remove extractor (#15322)
-+ [spiegel] Add support for nexx videos (#15285)
-* [twitch] Fix authentication and error capture (#14090, #15264)
-* [vk] Detect more errors due to copyright complaints (#15259)
-
-
-version 2018.01.14
-
-Extractors
-* [youtube] Fix live streams extraction (#15202)
-* [wdr] Bypass geo restriction
-* [wdr] Rework extractors (#14598)
-+ [wdr] Add support for wdrmaus.de/elefantenseite (#14598)
-+ [gamestar] Add support for gamepro.de (#3384)
-* [viafree] Skip rtmp formats (#15232)
-+ [pandoratv] Add support for mobile URLs (#12441)
-+ [pandoratv] Add support for new URL format (#15131)
-+ [ximalaya] Add support for ximalaya.com (#14687)
-+ [digg] Add support for digg.com (#15214)
-* [limelight] Tolerate empty pc formats (#15150, #15151, #15207)
-* [ndr:embed:base] Make separate formats extraction non fatal (#15203)
-+ [weibo] Add extractor (#15079)
-+ [ok] Add support for live streams
-* [canalplus] Fix extraction (#15072)
-* [bilibili] Fix extraction (#15188)
-
-
-version 2018.01.07
-
-Core
-* [utils] Fix youtube-dl under PyPy3 on Windows
-* [YoutubeDL] Output python implementation in debug header
-
-Extractors
-+ [jwplatform] Add support for multiple embeds (#15192)
-* [mitele] Fix extraction (#15186)
-+ [motherless] Add support for groups (#15124)
-* [lynda] Relax URL regular expression (#15185)
-* [soundcloud] Fallback to avatar picture for thumbnail (#12878)
-* [youku] Fix list extraction (#15135)
-* [openload] Fix extraction (#15166)
-* [lynda] Skip invalid subtitles (#15159)
-* [twitch] Pass video id to url_result when extracting playlist (#15139)
-* [rtve.es:alacarta] Fix extraction of some new URLs
-* [acast] Fix extraction (#15147)
-
-
-version 2017.12.31
-
-Core
-+ [extractor/common] Add container meta field for formats extracted
- in _parse_mpd_formats (#13616)
-+ [downloader/hls] Use HTTP headers for key request
-* [common] Use AACL as the default fourcc when AudioTag is 255
-* [extractor/common] Fix extraction of DASH formats with the same
- representation id (#15111)
-
-Extractors
-+ [slutload] Add support for mobile URLs (#14806)
-* [abc:iview] Bypass geo restriction
-* [abc:iview] Fix extraction (#14711, #14782, #14838, #14917, #14963, #14985,
- #15035, #15057, #15061, #15071, #15095, #15106)
-* [openload] Fix extraction (#15118)
-- [sandia] Remove extractor
-- [collegerama] Remove extractor
-+ [mediasite] Add support for sites based on Mediasite Video Platform (#5428,
- #11185, #14343)
-+ [ufctv] Add support for ufc.tv (#14520)
-* [pluralsight] Fix missing first line of subtitles (#11118)
-* [openload] Fallback on f-page extraction (#14665, #14879)
-* [vimeo] Improve password protected videos extraction (#15114)
-* [aws] Fix canonical/signed headers generation on python 2 (#15102)
-
-
-version 2017.12.28
-
-Extractors
-+ [internazionale] Add support for internazionale.it (#14973)
-* [playtvak] Relax video regular expression and make description optional
- (#15037)
-+ [filmweb] Add support for filmweb.no (#8773, #10368)
-+ [23video] Add support for 23video.com
-+ [espn] Add support for fivethirtyeight.com (#6864)
-+ [umg:de] Add support for universal-music.de (#11582, #11584)
-+ [espn] Add support for espnfc and extract more formats (#8053)
-* [youku] Update ccode (#14880)
-+ [openload] Add support for oload.stream (#15070)
-* [youku] Fix list extraction (#15065)
-
-
-version 2017.12.23
-
-Core
-* [extractor/common] Move X-Forwarded-For setup code into _request_webpage
-+ [YoutubeDL] Add support for playlist_uploader and playlist_uploader_id in
- output template (#11427, #15018)
-+ [extractor/common] Introduce uploader, uploader_id and uploader_url
- meta fields for playlists (#11427, #15018)
-* [downloader/fragment] Encode filename of fragment being removed (#15020)
-+ [utils] Add another date format pattern (#14999)
-
-Extractors
-+ [kaltura] Add another embed pattern for entry_id
-+ [7plus] Add support for 7plus.com.au (#15043)
-* [animeondemand] Relax login error regular expression
-+ [shahid] Add support for show pages (#7401)
-+ [youtube] Extract uploader, uploader_id and uploader_url for playlists
- (#11427, #15018)
-* [afreecatv] Improve format extraction (#15019)
-+ [cspan] Add support for audio only pages and catch page errors (#14995)
-+ [mailru] Add support for embed URLs (#14904)
-* [crunchyroll] Future-proof XML element checks (#15013)
-* [cbslocal] Fix timestamp extraction (#14999, #15000)
-* [discoverygo] Correct TTML subtitle extension
-* [vk] Make view count optional (#14979)
-* [disney] Skip Apple FairPlay formats (#14982)
-* [voot] Fix format extraction (#14758)
-
-
-version 2017.12.14
-
-Core
-* [postprocessor/xattr] Clarify NO_SPACE message (#14970)
-* [downloader/http] Return actual download result from real_download (#14971)
-
-Extractors
-+ [itv] Extract more subtitles and duration
-* [itv] Improve extraction (#14944)
-+ [byutv] Add support for geo restricted videos
-* [byutv] Fix extraction (#14966, #14967)
-+ [bbccouk] Fix extraction for 320k HLS streams
-+ [toutv] Add support for special video URLs (#14179)
-* [discovery] Fix free videos extraction (#14157, #14954)
-* [tvnow] Fix extraction (#7831)
-+ [nickelodeon:br] Add support for nickelodeon brazil websites (#14893)
-* [nick] Improve extraction (#14876)
-* [tbs] Fix extraction (#13658)
-
-
-version 2017.12.10
-
-Core
-+ [utils] Add sami mimetype to mimetype2ext
-
-Extractors
-* [culturebox] Improve video id extraction (#14947)
-* [twitter] Improve extraction (#14197)
-+ [udemy] Extract more HLS formats
-* [udemy] Improve course id extraction (#14938)
-+ [stretchinternet] Add support for portal.stretchinternet.com (#14576)
-* [ellentube] Fix extraction (#14407, #14570)
-+ [raiplay:playlist] Add support for playlists (#14563)
-* [sonyliv] Bypass geo restriction
-* [sonyliv] Extract higher quality formats (#14922)
-* [fox] Extract subtitles
-+ [fox] Add support for Adobe Pass authentication (#14205, #14489)
-- [dailymotion:cloud] Remove extractor (#6794)
-* [xhamster] Fix thumbnail extraction (#14780)
-+ [xhamster] Add support for mobile URLs (#14780)
-* [generic] Don't pass video id as mpd id while extracting DASH (#14902)
-* [ard] Skip invalid stream URLs (#14906)
-* [porncom] Fix metadata extraction (#14911)
-* [pluralsight] Detect agreement request (#14913)
-* [toutv] Fix login (#14614)
-
-
-version 2017.12.02
-
-Core
-+ [downloader/fragment] Commit part file after each fragment
-+ [extractor/common] Add durations for DASH fragments with bare SegmentURLs
-+ [extractor/common] Add support for DASH manifests with SegmentLists with
- bare SegmentURLs (#14844)
-+ [utils] Add hvc1 codec code to parse_codecs
-
-Extractors
-* [xhamster] Fix extraction (#14884)
-* [youku] Update ccode (#14872)
-* [mnet] Fix format extraction (#14883)
-+ [xiami] Add Referer header to API request
-* [mtv] Correct scc extension in extracted subtitles (#13730)
-* [vvvvid] Fix extraction for kenc videos (#13406)
-+ [br] Add support for BR Mediathek videos (#14560, #14788)
-+ [daisuki] Add support for motto.daisuki.com (#14681)
-* [odnoklassniki] Fix API metadata request (#14862)
-* [itv] Fix HLS formats extraction
-+ [pbs] Add another media id regular expression
-
-
-version 2017.11.26
-
-Core
-* [extractor/common] Use final URL when dumping request (#14769)
-
-Extractors
-* [fczenit] Fix extraction
-- [firstpost] Remove extractor
-* [freespeech] Fix extraction
-* [nexx] Extract more formats
-+ [openload] Add support for openload.link (#14763)
-* [empflix] Relax URL regular expression
-* [empflix] Fix extraction
-* [tnaflix] Don't modify download URLs (#14811)
-- [gamersyde] Remove extractor
-* [francetv:generationwhat] Fix extraction
-+ [massengeschmacktv] Add support for Massengeschmack TV
-* [fox9] Fix extraction
-* [faz] Fix extraction and add support for Perform Group embeds (#14714)
-+ [performgroup] Add support for performgroup.com
-+ [jwplatform] Add support for iframes (#14828)
-* [culturebox] Fix extraction (#14827)
-* [youku] Fix extraction; update ccode (#14815)
-* [livestream] Make SMIL extraction non fatal (#14792)
-+ [drtuber] Add support for mobile URLs (#14772)
-+ [spankbang] Add support for mobile URLs (#14771)
-* [instagram] Fix description, timestamp and counters extraction (#14755)
-
-
-version 2017.11.15
-
-Core
-* [common] Skip Apple FairPlay m3u8 manifests (#14741)
-* [YoutubeDL] Fix playlist range optimization for --playlist-items (#14740)
-
-Extractors
-* [vshare] Capture and output error message
-* [vshare] Fix extraction (#14473)
-* [crunchyroll] Extract old RTMP formats
-* [tva] Fix extraction (#14736)
-* [gamespot] Lower preference of HTTP formats (#14652)
-* [instagram:user] Fix extraction (#14699)
-* [ccma] Fix typo (#14730)
-- Remove sensitive data from logging in messages
-* [instagram:user] Fix extraction (#14699)
-+ [gamespot] Add support for article URLs (#14652)
-* [gamespot] Skip Brightcove Once HTTP formats (#14652)
-* [cartoonnetwork] Update tokenizer_src (#14666)
-+ [wsj] Recognize another URL pattern (#14704)
-* [pandatv] Update API URL and sign format URLs (#14693)
-* [crunchyroll] Use old login method (#11572)
-
-
-version 2017.11.06
-
-Core
-+ [extractor/common] Add protocol for f4m formats
-* [f4m] Prefer baseURL for relative URLs (#14660)
-* [extractor/common] Respect URL query in _extract_wowza_formats (14645)
-
-Extractors
-+ [hotstar:playlist] Add support for playlists (#12465)
-* [hotstar] Bypass geo restriction (#14672)
-- [22tracks] Remove extractor (#11024, #14628)
-+ [skysport] Sdd support ooyala videos protected with embed_token (#14641)
-* [gamespot] Extract formats referenced with new data fields (#14652)
-* [spankbang] Detect unavailable videos (#14644)
-
-
-version 2017.10.29
-
-Core
-* [extractor/common] Prefix format id for audio only HLS formats
-+ [utils] Add support for zero years and months in parse_duration
-
-Extractors
-* [egghead] Fix extraction (#14388)
-+ [fxnetworks] Extract series metadata (#14603)
-+ [younow] Add support for younow.com (#9255, #9432, #12436)
-* [dctptv] Fix extraction (#14599)
-* [youtube] Restrict embed regular expression (#14600)
-* [vimeo] Restrict iframe embed regular expression (#14600)
-* [soundgasm] Improve extraction (#14588)
-- [myvideo] Remove extractor (#8557)
-+ [nbc] Add support for classic-tv videos (#14575)
-+ [vrtnu] Add support for cookies authentication and simplify (#11873)
-+ [canvas] Add support for vrt.be/vrtnu (#11873)
-* [twitch:clips] Fix title extraction (#14566)
-+ [ndtv] Add support for sub-sites (#14534)
-* [dramafever] Fix login error message extraction
-+ [nick] Add support for more nickelodeon sites (no, dk, se, ch, fr, es, pt,
- ro, hu) (#14553)
-
-
-version 2017.10.20
-
-Core
-* [downloader/fragment] Report warning instead of error on inconsistent
- download state
-* [downloader/hls] Fix total fragments count when ad fragments exist
-
-Extractors
-* [parliamentliveuk] Fix extraction (#14524)
-* [soundcloud] Update client id (#14546)
-+ [servus] Add support for servus.com (#14362)
-+ [unity] Add support for unity3d.com (#14528)
-* [youtube] Replace youtube redirect URLs in description (#14517)
-* [pbs] Restrict direct video URL regular expression (#14519)
-* [drtv] Respect preference for direct HTTP formats (#14509)
-+ [eporner] Add support for embed URLs (#14507)
-* [arte] Capture and output error message
-* [niconico] Improve uploader metadata extraction robustness (#14135)
-
-
-version 2017.10.15.1
-
-Core
-* [downloader/hls] Ignore anvato ad fragments (#14496)
-* [downloader/fragment] Output ad fragment count
-
-Extractors
-* [scrippsnetworks:watch] Bypass geo restriction
-+ [anvato] Add ability to bypass geo restriction
-* [redditr] Fix extraction for URLs with query (#14495)
-
-
-version 2017.10.15
-
-Core
-+ [common] Add support for jwplayer youtube embeds
-
-Extractors
-* [scrippsnetworks:watch] Fix extraction (#14389)
-* [anvato] Process master m3u8 manifests
-* [youtube] Fix relative URLs in description
-* [spike] Bypass geo restriction
-+ [howstuffworks] Add support for more domains
-* [infoq] Fix http format downloading
-+ [rtlnl] Add support for another type of embeds
-+ [onionstudios] Add support for bulbs-video embeds
-* [udn] Fix extraction
-* [shahid] Fix extraction (#14448)
-* [kaltura] Ignore Widevine encrypted video (.wvm) (#14471)
-* [vh1] Fix extraction (#9613)
-
-
-version 2017.10.12
-
-Core
-* [YoutubeDL] Improve _default_format_spec (#14461)
-
-Extractors
-* [steam] Fix extraction (#14067)
-+ [funk] Add support for funk.net (#14464)
-+ [nexx] Add support for shortcuts and relax domain id extraction
-+ [voxmedia] Add support for recode.net (#14173)
-+ [once] Add support for vmap URLs
-+ [generic] Add support for channel9 embeds (#14469)
-* [tva] Fix extraction (#14328)
-+ [tubitv] Add support for new URL format (#14460)
-- [afreecatv:global] Remove extractor
-- [youtube:shared] Removed extractor (#14420)
-+ [slideslive] Add support for slideslive.com (#2680)
-+ [facebook] Support thumbnails (#14416)
-* [vvvvid] Fix episode number extraction (#14456)
-* [hrti:playlist] Relax URL regular expression
-* [wdr] Relax media link regular expression (#14447)
-* [hrti] Relax URL regular expression (#14443)
-* [fox] Delegate extraction to uplynk:preplay (#14147)
-+ [youtube] Add support for hooktube.com (#14437)
-
-
-version 2017.10.07
-
-Core
-* [YoutubeDL] Ignore duplicates in --playlist-items
-* [YoutubeDL] Fix out of range --playlist-items for iterable playlists and
- reduce code duplication (#14425)
-+ [utils] Use cache in OnDemandPagedList by default
-* [postprocessor/ffmpeg] Convert to opus using libopus (#14381)
-
-Extractors
-* [reddit] Sort formats (#14430)
-* [lnkgo] Relax URL regular expression (#14423)
-* [pornflip] Extend URL regular expression (#14405, #14406)
-+ [xtube] Add support for embed URLs (#14417)
-+ [xvideos] Add support for embed URLs and improve extraction (#14409)
-* [beeg] Fix extraction (#14403)
-* [tvn24] Relax URL regular expression (#14395)
-* [nbc] Fix extraction (#13651, #13715, #14137, #14198, #14312, #14314, #14378,
- #14392, #14414, #14419, #14431)
-+ [ketnet] Add support for videos without direct sources (#14377)
-* [canvas] Generalize mediazone.vrt.be extractor and rework canvas and een
-+ [afreecatv] Add support for adult videos (#14376)
-
-
-version 2017.10.01
-
-Core
-* [YoutubeDL] Document youtube_include_dash_manifest
-
-Extractors
-+ [tvp] Add support for new URL schema (#14368)
-+ [generic] Add support for single format Video.js embeds (#14371)
-* [yahoo] Bypass geo restriction for brightcove (#14210)
-* [yahoo] Use extracted brightcove account id (#14210)
-* [rtve:alacarta] Fix extraction (#14290)
-+ [yahoo] Add support for custom brightcove embeds (#14210)
-+ [generic] Add support for Video.js embeds
-+ [gfycat] Add support for /gifs/detail URLs (#14322)
-* [generic] Fix infinite recursion for twitter:player URLs (#14339)
-* [xhamsterembed] Fix extraction (#14308)
-
-
-version 2017.09.24
-
-Core
-+ [options] Accept lrc as a subtitle conversion target format (#14292)
-* [utils] Fix handling raw TTML subtitles (#14191)
-
-Extractors
-* [24video] Fix timestamp extraction and make non fatal (#14295)
-+ [24video] Add support for 24video.adult (#14295)
-+ [kakao] Add support for tv.kakao.com (#12298, #14007)
-+ [twitter] Add support for URLs without user id (#14270)
-+ [americastestkitchen] Add support for americastestkitchen.com (#10764,
- #13996)
-* [generic] Fix support for multiple HTML5 videos on one page (#14080)
-* [mixcloud] Fix extraction (#14088, #14132)
-+ [lynda] Add support for educourse.ga (#14286)
-* [beeg] Fix extraction (#14275)
-* [nbcsports:vplayer] Correct theplatform URL (#13873)
-* [twitter] Fix duration extraction (#14141)
-* [tvplay] Bypass geo restriction
-+ [heise] Add support for YouTube embeds (#14109)
-+ [popcorntv] Add support for popcorntv.it (#5914, #14211)
-* [viki] Update app data (#14181)
-* [morningstar] Relax URL regular expression (#14222)
-* [openload] Fix extraction (#14225, #14257)
-* [noovo] Fix extraction (#14214)
-* [dailymotion:playlist] Relax URL regular expression (#14219)
-+ [twitch] Add support for go.twitch.tv URLs (#14215)
-* [vgtv] Relax URL regular expression (#14223)
-
-
-version 2017.09.15
-
-Core
-* [downloader/fragment] Restart inconsistent incomplete fragment downloads
- (#13731)
-* [YoutubeDL] Download raw subtitles files (#12909, #14191)
-
-Extractors
-* [condenast] Fix extraction (#14196, #14207)
-+ [orf] Add support for f4m stories
-* [tv4] Relax URL regular expression (#14206)
-* [animeondemand] Bypass geo restriction
-+ [animeondemand] Add support for flash videos (#9944)
-
-
-version 2017.09.11
-
-Extractors
-* [rutube:playlist] Fix suitable (#14166)
-
-
-version 2017.09.10
-
-Core
-+ [utils] Introduce bool_or_none
-* [YoutubeDL] Ensure dir existence for each requested format (#14116)
-
-Extractors
-* [fox] Fix extraction (#14147)
-* [rutube] Use bool_or_none
-* [rutube] Rework and generalize playlist extractors (#13565)
-+ [rutube:playlist] Add support for playlists (#13534, #13565)
-+ [radiocanada] Add fallback for title extraction (#14145)
-* [vk] Use dedicated YouTube embeds extraction routine
-* [vice] Use dedicated YouTube embeds extraction routine
-* [cracked] Use dedicated YouTube embeds extraction routine
-* [chilloutzone] Use dedicated YouTube embeds extraction routine
-* [abcnews] Use dedicated YouTube embeds extraction routine
-* [youtube] Separate methods for embeds extraction
-* [redtube] Fix formats extraction (#14122)
-* [arte] Relax unavailability check (#14112)
-+ [manyvids] Add support for preview videos from manyvids.com (#14053, #14059)
-* [vidme:user] Relax URL regular expression (#14054)
-* [bpb] Fix extraction (#14043, #14086)
-* [soundcloud] Fix download URL with private tracks (#14093)
-* [aliexpress:live] Add support for live.aliexpress.com (#13698, #13707)
-* [viidea] Capture and output lecture error message (#14099)
-* [radiocanada] Skip unsupported platforms (#14100)
-
-
-version 2017.09.02
-
-Extractors
-* [youtube] Force old layout for each webpage (#14068, #14072, #14074, #14076,
- #14077, #14079, #14082, #14083, #14094, #14095, #14096)
-* [youtube] Fix upload date extraction (#14065)
-+ [charlierose] Add support for episodes (#14062)
-+ [bbccouk] Add support for w-prefixed ids (#14056)
-* [googledrive] Extend URL regular expression (#9785)
-+ [googledrive] Add support for source format (#14046)
-* [pornhd] Fix extraction (#14005)
-
-
-version 2017.08.27.1
-
-Extractors
-
-* [youtube] Fix extraction with --youtube-skip-dash-manifest enabled (#14037)
-
-
-version 2017.08.27
-
-Core
-+ [extractor/common] Extract height and format id for HTML5 videos (#14034)
-* [downloader/http] Rework HTTP downloader (#506, #809, #2849, #4240, #6023,
- #8625, #9483)
- * Simplify code and split into separate routines to facilitate maintaining
- * Make retry mechanism work on errors during actual download not only
- during connection establishment phase
- * Retry on ECONNRESET and ETIMEDOUT during reading data from network
- * Retry on content too short
- * Show error description on retry
-
-Extractors
-* [generic] Lower preference for extraction from LD-JSON
-* [rai] Fix audio formats extraction (#14024)
-* [youtube] Fix controversy videos extraction (#14027, #14029)
-* [mixcloud] Fix extraction (#14015, #14020)
-
-
-version 2017.08.23
-
-Core
-+ [extractor/common] Introduce _parse_xml
-* [extractor/common] Make HLS and DASH extraction in_parse_html5_media_entries
- non fatal (#13970)
-* [utils] Fix unescapeHTML for misformed string like "&a&quot;" (#13935)
-
-Extractors
-* [cbc:watch] Bypass geo restriction (#13993)
-* [toutv] Relax DRM check (#13994)
-+ [googledrive] Add support for subtitles (#13619, #13638)
-* [pornhub] Relax uploader regular expression (#13906, #13975)
-* [bandcamp:album] Extract track titles (#13962)
-+ [bbccouk] Add support for events URLs (#13893)
-+ [liveleak] Support multi-video pages (#6542)
-+ [liveleak] Support another liveleak embedding pattern (#13336)
-* [cda] Fix extraction (#13935)
-+ [laola1tv] Add support for tv.ittf.com (#13965)
-* [mixcloud] Fix extraction (#13958, #13974, #13980, #14003)
-
-
-version 2017.08.18
-
-Core
-* [YoutubeDL] Sanitize byte string format URLs (#13951)
-+ [extractor/common] Add support for float durations in _parse_mpd_formats
- (#13919)
-
-Extractors
-* [arte] Detect unavailable videos (#13945)
-* [generic] Convert redirect URLs to unicode strings (#13951)
-* [udemy] Fix paid course detection (#13943)
-* [pluralsight] Use RPC API for course extraction (#13937)
-+ [clippit] Add support for clippituser.tv
-+ [qqmusic] Support new URL schemes (#13805)
-* [periscope] Renew HLS extraction (#13917)
-* [mixcloud] Extract decrypt key
-
-
-version 2017.08.13
-
-Core
-* [YoutubeDL] Make sure format id is not empty
-* [extractor/common] Make _family_friendly_search optional
-* [extractor/common] Respect source's type attribute for HTML5 media (#13892)
-
-Extractors
-* [pornhub:playlistbase] Skip videos from drop-down menu (#12819, #13902)
-+ [fourtube] Add support pornerbros.com (#6022)
-+ [fourtube] Add support porntube.com (#7859, #13901)
-+ [fourtube] Add support fux.com
-* [limelight] Improve embeds detection (#13895)
-+ [reddit] Add support for v.redd.it and reddit.com (#13847)
-* [aparat] Extract all formats (#13887)
-* [mixcloud] Fix play info decryption (#13885)
-+ [generic] Add support for vzaar embeds (#13876)
-
-
-version 2017.08.09
-
-Core
-* [utils] Skip missing params in cli_bool_option (#13865)
-
-Extractors
-* [xxxymovies] Fix title extraction (#13868)
-+ [nick] Add support for nick.com.pl (#13860)
-* [mixcloud] Fix play info decryption (#13867)
-* [20min] Fix embeds extraction (#13852)
-* [dplayit] Fix extraction (#13851)
-+ [niconico] Support videos with multiple formats (#13522)
-+ [niconico] Support HTML5-only videos (#13806)
-
-
-version 2017.08.06
-
-Core
-* Use relative paths for DASH fragments (#12990)
-
-Extractors
-* [pluralsight] Fix format selection
-- [mpora] Remove extractor (#13826)
-+ [voot] Add support for voot.com (#10255, #11644, #11814, #12350, #13218)
-* [vlive:channel] Limit number of videos per page to 100 (#13830)
-* [podomatic] Extend URL regular expression (#13827)
-* [cinchcast] Extend URL regular expression
-* [yandexdisk] Relax URL regular expression (#13824)
-* [vidme] Extract DASH and HLS formats
-- [teamfour] Remove extractor (#13782)
-* [pornhd] Fix extraction (#13783)
-* [udemy] Fix subtitles extraction (#13812)
-* [mlb] Extend URL regular expression (#13740, #13773)
-+ [pbs] Add support for new URL schema (#13801)
-* [nrktv] Update API host (#13796)
-
-
-version 2017.07.30.1
-
-Core
-* [downloader/hls] Use redirect URL as manifest base (#13755)
-* [options] Correctly hide login info from debug outputs (#13696)
-
-Extractors
-+ [watchbox] Add support for watchbox.de (#13739)
-- [clipfish] Remove extractor
-+ [youjizz] Fix extraction (#13744)
-+ [generic] Add support for another ooyala embed pattern (#13727)
-+ [ard] Add support for lives (#13771)
-* [soundcloud] Update client id
-+ [soundcloud:trackstation] Add support for track stations (#13733)
-* [svtplay] Use geo verification proxy for API request
-* [svtplay] Update API URL (#13767)
-+ [yandexdisk] Add support for yadi.sk (#13755)
-+ [megaphone] Add support for megaphone.fm
-* [amcnetworks] Make rating optional (#12453)
-* [cloudy] Fix extraction (#13737)
-+ [nickru] Add support for nickelodeon.ru
-* [mtv] Improve thumbnail extraction
-* [nick] Automate geo-restriction bypass (#13711)
-* [niconico] Improve error reporting (#13696)
-
-
-version 2017.07.23
-
-Core
-* [YoutubeDL] Improve default format specification (#13704)
-* [YoutubeDL] Do not override id, extractor and extractor_key for
- url_transparent entities
-* [extractor/common] Fix playlist_from_matches
-
-Extractors
-* [itv] Fix production id extraction (#13671, #13703)
-* [vidio] Make duration non fatal and fix typo
-* [mtv] Skip missing video parts (#13690)
-* [sportbox:embed] Fix extraction
-+ [npo] Add support for npo3.nl URLs (#13695)
-* [dramafever] Remove video id from title (#13699)
-+ [egghead:lesson] Add support for lessons (#6635)
-* [funnyordie] Extract more metadata (#13677)
-* [youku:show] Fix playlist extraction (#13248)
-+ [dispeak] Recognize sevt subdomain (#13276)
-* [adn] Improve error reporting (#13663)
-* [crunchyroll] Relax series and season regular expression (#13659)
-+ [spiegel:article] Add support for nexx iframe embeds (#13029)
-+ [nexx:embed] Add support for iframe embeds
-* [nexx] Improve JS embed extraction
-+ [pearvideo] Add support for pearvideo.com (#13031)
-
-
-version 2017.07.15
-
-Core
-* [YoutubeDL] Don't expand environment variables in meta fields (#13637)
-
-Extractors
-* [spiegeltv] Delegate extraction to nexx extractor (#13159)
-+ [nexx] Add support for nexx.cloud (#10807, #13465)
-* [generic] Fix rutube embeds extraction (#13641)
-* [karrierevideos] Fix title extraction (#13641)
-* [youtube] Don't capture YouTube Red ad for creator meta field (#13621)
-* [slideshare] Fix extraction (#13617)
-+ [5tv] Add another video URL pattern (#13354, #13606)
-* [drtv] Make HLS and HDS extraction non fatal
-* [ted] Fix subtitles extraction (#13628, #13629)
-* [vine] Make sure the title won't be empty
-+ [twitter] Support HLS streams in vmap URLs
-+ [periscope] Support pscp.tv URLs in embedded frames
-* [twitter] Extract mp4 urls via mobile API (#12726)
-* [niconico] Fix authentication error handling (#12486)
-* [giantbomb] Extract m3u8 formats (#13626)
-+ [vlive:playlist] Add support for playlists (#13613)
-
-
-version 2017.07.09
-
-Core
-+ [extractor/common] Add support for AMP tags in _parse_html5_media_entries
-+ [utils] Support attributes with no values in get_elements_by_attribute
-
-Extractors
-+ [dailymail] Add support for embeds
-+ [joj] Add support for joj.sk (#13268)
-* [abc.net.au:iview] Extract more formats (#13492, #13489)
-* [egghead:course] Fix extraction (#6635, #13370)
-+ [cjsw] Add support for cjsw.com (#13525)
-+ [eagleplatform] Add support for referrer protected videos (#13557)
-+ [eagleplatform] Add support for another embed pattern (#13557)
-* [veoh] Extend URL regular expression (#13601)
-* [npo:live] Fix live stream id extraction (#13568, #13605)
-* [googledrive] Fix height extraction (#13603)
-+ [dailymotion] Add support for new layout (#13580)
-- [yam] Remove extractor
-* [xhamster] Extract all formats and fix duration extraction (#13593)
-+ [xhamster] Add support for new URL schema (#13593)
-* [espn] Extend URL regular expression (#13244, #13549)
-* [kaltura] Fix typo in subtitles extraction (#13569)
-* [vier] Adapt extraction to redesign (#13575)
-
-
-version 2017.07.02
-
-Core
-* [extractor/common] Improve _json_ld
-
-Extractors
-+ [thisoldhouse] Add more fallbacks for video id
-* [thisoldhouse] Fix video id extraction (#13540, #13541)
-* [xfileshare] Extend format regular expression (#13536)
-* [ted] Fix extraction (#13535)
-+ [tastytrade] Add support for tastytrade.com (#13521)
-* [dplayit] Relax video id regular expression (#13524)
-+ [generic] Extract more generic metadata (#13527)
-+ [bbccouk] Capture and output error message (#13501, #13518)
-* [cbsnews] Relax video info regular expression (#13284, #13503)
-+ [facebook] Add support for plugin video embeds and multiple embeds (#13493)
-* [soundcloud] Switch to https for API requests (#13502)
-* [pandatv] Switch to https for API and download URLs
-+ [pandatv] Add support for https URLs (#13491)
-+ [niconico] Support sp subdomain (#13494)
-
-
-version 2017.06.25
-
-Core
-+ [adobepass] Add support for DIRECTV NOW (mso ATTOTT) (#13472)
-* [YoutubeDL] Skip malformed formats for better extraction robustness
-
-Extractors
-+ [wsj] Add support for barrons.com (#13470)
-+ [ign] Add another video id pattern (#13328)
-+ [raiplay:live] Add support for live streams (#13414)
-+ [redbulltv] Add support for live videos and segments (#13486)
-+ [onetpl] Add support for videos embedded via pulsembed (#13482)
-* [ooyala] Make more robust
-* [ooyala] Skip empty format URLs (#13471, #13476)
-* [hgtv.com:show] Fix typo
-
-
-version 2017.06.23
-
-Core
-* [adobepass] Fix extraction on older python 2.6
-
-Extractors
-* [youtube] Adapt to new automatic captions rendition (#13467)
-* [hgtv.com:show] Relax video config regular expression (#13279, #13461)
-* [drtuber] Fix formats extraction (#12058)
-* [youporn] Fix upload date extraction
-* [youporn] Improve formats extraction
-* [youporn] Fix title extraction (#13456)
-* [googledrive] Fix formats sorting (#13443)
-* [watchindianporn] Fix extraction (#13411, #13415)
-+ [vimeo] Add fallback mp4 extension for original format
-+ [ruv] Add support for ruv.is (#13396)
-* [viu] Fix extraction on older python 2.6
-* [pandora.tv] Fix upload_date extraction (#12846)
-+ [asiancrush] Add support for asiancrush.com (#13420)
-
-
-version 2017.06.18
-
-Core
-* [downloader/common] Use utils.shell_quote for debug command line
-* [utils] Use compat_shlex_quote in shell_quote
-* [postprocessor/execafterdownload] Encode command line (#13407)
-* [compat] Fix compat_shlex_quote on Windows (#5889, #10254)
-* [postprocessor/metadatafromtitle] Fix missing optional meta fields processing
- in --metadata-from-title (#13408)
-* [extractor/common] Fix json dumping with --geo-bypass
-+ [extractor/common] Improve jwplayer subtitles extraction
-+ [extractor/common] Improve jwplayer formats extraction (#13379)
-
-Extractors
-* [polskieradio] Fix extraction (#13392)
-+ [xfileshare] Add support for fastvideo.me (#13385)
-* [bilibili] Fix extraction of videos with double quotes in titles (#13387)
-* [4tube] Fix extraction (#13381, #13382)
-+ [disney] Add support for disneychannel.de (#13383)
-* [npo] Improve URL regular expression (#13376)
-+ [corus] Add support for showcase.ca
-+ [corus] Add support for history.ca (#13359)
-
-
-version 2017.06.12
-
-Core
-* [utils] Handle compat_HTMLParseError in extract_attributes (#13349)
-+ [compat] Introduce compat_HTMLParseError
-* [utils] Improve unified_timestamp
-* [extractor/generic] Ensure format id is unicode string
-* [extractor/common] Return unicode string from _match_id
-+ [YoutubeDL] Sanitize more fields (#13313)
-
-Extractors
-+ [xfileshare] Add support for rapidvideo.tv (#13348)
-* [xfileshare] Modernize and pass Referer
-+ [rutv] Add support for testplayer.vgtrk.com (#13347)
-+ [newgrounds] Extract more metadata (#13232)
-+ [newgrounds:playlist] Add support for playlists (#10611)
-* [newgrounds] Improve formats and uploader extraction (#13346)
-* [msn] Fix formats extraction
-* [turbo] Ensure format id is string
-* [sexu] Ensure height is int
-* [jove] Ensure comment count is int
-* [golem] Ensure format id is string
-* [gfycat] Ensure filesize is int
-* [foxgay] Ensure height is int
-* [flickr] Ensure format id is string
-* [sohu] Fix numeric fields
-* [safari] Improve authentication detection (#13319)
-* [liveleak] Ensure height is int (#13313)
-* [streamango] Make title optional (#13292)
-* [rtlnl] Improve URL regular expression (#13295)
-* [tvplayer] Fix extraction (#13291)
-
-
-version 2017.06.05
-
-Core
-* [YoutubeDL] Don't emit ANSI escape codes on Windows (#13270)
-
-Extractors
-+ [bandcamp:weekly] Add support for bandcamp weekly (#12758)
-* [pornhub:playlist] Fix extraction (#13281)
-- [godtv] Remove extractor (#13175)
-* [safari] Fix typo (#13252)
-* [youtube] Improve chapters extraction (#13247)
-* [1tv] Lower preference for HTTP formats (#13246)
-* [francetv] Relax URL regular expression
-* [drbonanza] Fix extraction (#13231)
-* [packtpub] Fix authentication (#13240)
-
-
-version 2017.05.29
-
-Extractors
-* [youtube] Fix DASH MPD extraction for videos with non-encrypted format URLs
- (#13211)
-* [xhamster] Fix uploader and like/dislike count extraction (#13216))
-+ [xhamster] Extract categories (#11728)
-+ [abcnews] Add support for embed URLs (#12851)
-* [gaskrank] Fix extraction (#12493)
-* [medialaan] Fix videos with missing videoUrl (#12774)
-* [dvtv] Fix playlist support
-+ [dvtv] Add support for DASH and HLS formats (#3063)
-+ [beam:vod] Add support for beam.pro/mixer.com VODs (#13032))
-* [cbsinteractive] Relax URL regular expression (#13213)
-* [adn] Fix formats extraction
-+ [youku] Extract more metadata (#10433)
-* [cbsnews] Fix extraction (#13205)
-
-
-version 2017.05.26
-
-Core
-+ [utils] strip_jsonp() can recognize more patterns
-* [postprocessor/ffmpeg] Fix metadata filename handling on Python 2 (#13182)
-
-Extractors
-+ [youtube] DASH MPDs with cipher signatures are recognized now (#11381)
-+ [bbc] Add support for authentication
-* [tudou] Merge into youku extractor (#12214)
-* [youku:show] Fix extraction
-* [youku] Fix extraction (#13191)
-* [udemy] Fix extraction for outputs' format entries without URL (#13192)
-* [vimeo] Fix formats' sorting (#13189)
-* [cbsnews] Fix extraction for 60 Minutes videos (#12861)
-
-
-version 2017.05.23
-
-Core
-+ [downloader/external] Pass -loglevel to ffmpeg downloader (#13183)
-+ [adobepass] Add support for Bright House Networks (#13149)
-
-Extractors
-+ [streamcz] Add support for subtitles (#13174)
-* [youtube] Fix DASH manifest signature decryption (#8944, #13156)
-* [toggle] Relax URL regular expression (#13172)
-* [toypics] Fix extraction (#13077)
-* [njpwworld] Fix extraction (#13162, #13169)
-+ [hitbox] Add support for smashcast.tv (#13154)
-* [mitele] Update app key regular expression (#13158)
-
-
-version 2017.05.18.1
-
-Core
-* [jsinterp] Fix typo and cleanup regular expressions (#13134)
-
-
-version 2017.05.18
-
-Core
-+ [jsinterp] Add support for quoted names and indexers (#13123, #13124, #13125,
- #13126, #13128, #13129, #13130, #13131, #13132)
-+ [extractor/common] Add support for schemeless URLs in _extract_wowza_formats
- (#13088, #13092)
-+ [utils] Recognize more audio codecs (#13081)
-
-Extractors
-+ [vier] Extract more metadata (#12539)
-* [vier] Improve extraction (#12801)
- + Add support for authentication
- * Bypass authentication when no credentials provided
- * Improve extraction robustness
-* [dailymail] Fix sources extraction (#13057)
-* [dailymotion] Extend URL regular expression (#13079)
-
-
-version 2017.05.14
-
-Core
-+ [extractor/common] Respect Width and Height attributes in ISM manifests
-+ [postprocessor/metadatafromtitle] Add support regular expression syntax for
- --metadata-from-title (#13065)
-
-Extractors
-+ [mediaset] Add support for video.mediaset.it (#12708, #12964)
-* [orf:radio] Fix extraction (#11643, #12926)
-* [aljazeera] Extend URL regular expression (#13053)
-* [imdb] Relax URL regular expression (#13056)
-+ [francetv] Add support for mobile.france.tv (#13068)
-+ [upskill] Add support for upskillcourses.com (#13043)
-* [thescene] Fix extraction (#13061)
-* [condenast] Improve embed support
-* [liveleak] Fix extraction (#12053)
-+ [douyu] Support Douyu shows (#12228)
-* [myspace] Improve URL regular expression (#13040)
-* [adultswim] Use desktop platform in assets URL (#13041)
-
-
-version 2017.05.09
-
-Core
-* [YoutubeDL] Force --restrict-filenames when no locale is set on all python
- versions (#13027)
-
-Extractors
-* [francetv] Adapt to site redesign (#13034)
-+ [packtpub] Add support for authentication (#12622)
-* [drtv] Lower preference for SignLanguage formats (#13013, #13016)
-+ [cspan] Add support for brightcove live embeds (#13028)
-* [vrv] Extract DASH formats and subtitles
-* [funimation] Fix authentication (#13021)
-* [adultswim] Fix extraction (#8640, #10950, #11042, #12121)
- + Add support for Adobe Pass authentication
- + Add support for live streams
- + Add support for show pages
-* [turner] Extract thumbnail, is_live and strip description
-+ [nonktube] Add support for nonktube.com (#8647, #13024)
-+ [nuevo] Pass headers to _extract_nuevo
-* [nbc] Improve extraction (#12364)
-
-
-version 2017.05.07
-
-Common
-* [extractor/common] Fix typo in _extract_akamai_formats
-+ [postprocessor/ffmpeg] Embed chapters into media file with --add-metadata
-+ [extractor/common] Introduce chapters meta field
-
-Extractors
-* [youtube] Fix authentication (#12820, #12927, #12973, #12992, #12993, #12995,
- #13003)
-* [bilibili] Fix video downloading (#13001)
-* [rmcdecouverte] Fix extraction (#12937)
-* [theplatform] Extract chapters
-* [bandcamp] Fix thumbnail extraction (#12980)
-* [pornhub] Extend URL regular expression (#12996)
-+ [youtube] Extract chapters
-+ [nrk] Extract chapters
-+ [vice] Add support for ooyala embeds in article pages
-+ [vice] Support vice articles (#12968)
-* [vice] Fix extraction for non en_us videos (#12967)
-* [gdcvault] Fix extraction for some videos (#12733)
-* [pbs] Improve multipart video support (#12981)
-* [laola1tv] Fix extraction (#12880)
-+ [cda] Support birthday verification (#12789)
-* [leeco] Fix extraction (#12974)
-+ [pbs] Extract chapters
-* [amp] Improve thumbnail and subtitles extraction
-* [foxsports] Fix extraction (#12945)
-- [coub] Remove comment count extraction (#12941)
-
-
-version 2017.05.01
-
-Core
-+ [extractor/common] Extract view count from JSON-LD
-* [utils] Improve unified_timestamp
-+ [utils] Add video/mp2t to mimetype2ext
-* [downloader/external] Properly handle live stream downloading cancellation
- (#8932)
-+ [utils] Add support for unicode whitespace in clean_html on python 2 (#12906)
-
-Extractors
-* [infoq] Make audio format extraction non fatal (#12938)
-* [brightcove] Allow whitespace around attribute names in embedded code
-+ [zaq1] Add support for zaq1.pl (#12693)
-+ [xvideos] Extract duration (#12828)
-* [vevo] Fix extraction (#12879)
-+ [noovo] Add support for noovo.ca (#12792)
-+ [washingtonpost] Add support for embeds (#12699)
-* [yandexmusic:playlist] Fix extraction for python 3 (#12888)
-* [anvato] Improve extraction (#12913)
- * Promote to regular shortcut based extractor
- * Add mcp to access key mapping table
- * Add support for embeds extraction
- * Add support for anvato embeds in generic extractor
-* [xtube] Fix extraction for older FLV videos (#12734)
-* [tvplayer] Fix extraction (#12908)
-
-
-version 2017.04.28
-
-Core
-+ [adobepass] Use geo verification headers for all requests
-- [downloader/fragment] Remove assert for resume_len when no fragments
- downloaded
-+ [extractor/common] Add manifest_url for explicit group rendition formats
-* [extractor/common] Fix manifest_url for m3u8 formats
-- [extractor/common] Don't list master m3u8 playlists in format list (#12832)
-
-Extractor
-* [aenetworks] Fix extraction for shows with single season
-+ [go] Add support for Disney, DisneyJunior and DisneyXD show pages
-* [youtube] Recognize new locale-based player URLs (#12885)
-+ [streamable] Add support for new embedded URL schema (#12844)
-* [arte:+7] Relax URL regular expression (#12837)
-
-
-version 2017.04.26
-
-Core
-* Introduce --keep-fragments for keeping fragments of fragmented download
- on disk after download is finished
-* [YoutubeDL] Fix output template for missing timestamp (#12796)
-* [socks] Handle cases where credentials are required but missing
-* [extractor/common] Improve HLS extraction (#12211)
- * Extract m3u8 parsing to separate method
- * Improve rendition groups extraction
- * Build stream name according stream GROUP-ID
- * Ignore reference to AUDIO group without URI when stream has no CODECS
- * Use float for scaled tbr in _parse_m3u8_formats
-* [utils] Add support for TTML styles in dfxp2srt
-* [downloader/hls] No need to download keys for fragments that have been
- already downloaded
-* [downloader/fragment] Improve fragment downloading
- * Resume immediately
- * Don't concatenate fragments and decrypt them on every resume
- * Optimize disk storage usage, don't store intermediate fragments on disk
- * Store bookkeeping download state file
-+ [extractor/common] Add support for multiple getters in try_get
-+ [extractor/common] Add support for video of WebPage context in _json_ld
- (#12778)
-+ [extractor/common] Relax JWPlayer regular expression and remove
- duplicate URLs (#12768)
-
-Extractors
-* [iqiyi] Fix extraction of Yule videos
-* [vidio] Improve extraction and sort formats
-+ [brightcove] Match only video elements with data-video-id attribute
-* [iqiyi] Fix playlist detection (#12504)
-- [azubu] Remove extractor (#12813)
-* [porn91] Fix extraction (#12814)
-* [vidzi] Fix extraction (#12793)
-+ [amp] Extract error message (#12795)
-+ [xfileshare] Add support for gorillavid.com and daclips.com (#12776)
-* [instagram] Fix extraction (#12777)
-+ [generic] Support Brightcove videos in <iframe> (#12482)
-+ [brightcove] Support URLs with bcpid instead of playerID (#12482)
-* [brightcove] Fix _extract_url (#12782)
-+ [odnoklassniki] Extract HLS formats
-
-
-version 2017.04.17
-
-Extractors
-* [limelight] Improve extraction LimelightEmbeddedPlayerFlash media embeds and
- add support for channel and channelList embeds
-* [generic] Extract multiple Limelight embeds (#12761)
-+ [itv] Extract series metadata
-* [itv] Fix RTMP formats downloading (#12759)
-* [itv] Use native HLS downloader by default
-+ [go90] Extract subtitles (#12752)
-+ [go90] Extract series metadata (#12752)
-
-
-version 2017.04.16
-
-Core
-* [YoutubeDL] Apply expand_path after output template substitution
-+ [YoutubeDL] Propagate overridden meta fields to extraction results of type
- url (#11163)
-
-Extractors
-+ [generic] Extract RSS entries as url_transparent (#11163)
-+ [streamango] Add support for streamango.com (#12643)
-+ [wsj:article] Add support for articles (#12558)
-* [brightcove] Relax video tag embeds extraction and validate ambiguous embeds'
- URLs (#9163, #12005, #12178, #12480)
-+ [udemy] Add support for react rendition (#12744)
-
-
-version 2017.04.15
-
-Extractors
-* [youku] Fix fileid extraction (#12741, #12743)
-
-
-version 2017.04.14
-
-Core
-+ [downloader/hls] Add basic support for EXT-X-BYTERANGE tag (#10955)
-+ [adobepass] Improve Comcast and Verizon login code (#10803)
-+ [adobepass] Add support for Verizon (#10803)
-
-Extractors
-+ [aenetworks] Add support for specials (#12723)
-+ [hbo] Extract HLS formats
-+ [go90] Add support for go90.com (#10127)
-+ [tv2hu] Add support for tv2.hu (#10509)
-+ [generic] Exclude URLs with xml ext from valid video URLs (#10768, #11654)
-* [youtube] Improve HLS formats extraction
-* [afreecatv] Fix extraction for videos with different key layout (#12718)
-- [youtube] Remove explicit preference for audio-only and video-only formats in
- order not to break sorting when new formats appear
-* [canalplus] Bypass geo restriction
-
-
-version 2017.04.11
-
-Extractors
-* [afreecatv] Fix extraction (#12706)
-+ [generic] Add support for <object> YouTube embeds (#12637)
-* [bbccouk] Treat bitrate as audio+video bitrate in media selector
-+ [bbccouk] Skip unrecognized formats in media selector (#12701)
-+ [bbccouk] Add support for https protocol in media selector (#12701)
-* [curiositystream] Fix extraction (#12638)
-* [adn] Update subtitle decryption key
-* [chaturbate] Fix extraction (#12665, #12688, #12690)
-
-
-version 2017.04.09
-
-Extractors
-+ [medici] Add support for medici.tv (#3406)
-+ [rbmaradio] Add support for redbullradio.com URLs (#12687)
-+ [npo:live] Add support for default URL (#12555)
-* [mixcloud:playlist] Fix title, description and view count extraction (#12582)
-+ [thesun] Add support for thesun.co.uk (#11298, #12674)
-+ [ceskateleveize:porady] Add support for porady (#7411, #12645)
-* [ceskateleveize] Improve extraction and remove URL replacement hacks
-+ [kaltura] Add support for iframe embeds (#12679)
-* [airmozilla] Fix extraction (#12670)
-* [wshh] Extract html5 entries and delegate to generic extractor (12676)
-+ [raiplay] Extract subtitles
-+ [xfileshare] Add support for vidlo.us (#12660)
-+ [xfileshare] Add support for vidbom.com (#12661)
-+ [aenetworks] Add more video URL regular expressions (#12657)
-+ [odnoklassniki] Fix format sorting for 1080p quality
-+ [rtl2] Add support for you.rtl2.de (#10257)
-+ [vshare] Add support for vshare.io (#12278)
-
-
-version 2017.04.03
-
-Core
-+ [extractor/common] Add censorship check for TransTelekom ISP
-* [extractor/common] Move censorship checks to a separate method
-
-Extractors
-+ [discoveryvr] Add support for discoveryvr.com (#12578)
-+ [tv5mondeplus] Add support for tv5mondeplus.com (#11386)
-+ [periscope] Add support for pscp.tv URLs (#12618, #12625)
-
-
-version 2017.04.02
-
-Core
-* [YoutubeDL] Return early when extraction of url_transparent fails
-
-Extractors
-* [rai] Fix and improve extraction (#11790)
-+ [vrv] Add support for series pages
-* [limelight] Improve extraction for audio only formats
-* [funimation] Fix extraction (#10696, #11773)
-+ [xfileshare] Add support for vidabc.com (#12589)
-+ [xfileshare] Improve extraction and extract hls formats
-+ [crunchyroll] Pass geo verification proxy
-+ [cwtv] Extract ISM formats
-+ [tvplay] Bypass geo restriction
-+ [vrv] Add support for vrv.co
-+ [packtpub] Add support for packtpub.com (#12610)
-+ [generic] Pass base_url to _parse_jwplayer_data
-+ [adn] Add support for animedigitalnetwork.fr (#4866)
-+ [allocine] Extract more metadata
-* [allocine] Fix extraction (#12592)
-* [openload] Fix extraction
-
-
-version 2017.03.26
-
-Core
-* Don't raise an error if JWPlayer config data is not a Javascript object
- literal. _find_jwplayer_data now returns a dict rather than an str. (#12307)
-* Expand environment variables for options representing paths (#12556)
-+ [utils] Introduce expand_path
-* [downloader/hls] Delegate downloading to ffmpeg immediately for live streams
-
-Extractors
-* [afreecatv] Fix extraction (#12179)
-+ [atvat] Add support for atv.at (#5325)
-+ [fox] Add metadata extraction (#12391)
-+ [atresplayer] Extract DASH formats
-+ [atresplayer] Extract HD manifest (#12548)
-* [atresplayer] Fix login error detection (#12548)
-* [franceculture] Fix extraction (#12547)
-* [youtube] Improve URL regular expression (#12538)
-* [generic] Do not follow redirects to the same URL
-
-
-version 2017.03.24
-
-Extractors
-- [9c9media] Remove mp4 URL extraction request
-+ [bellmedia] Add support for etalk.ca and space.ca (#12447)
-* [channel9] Fix extraction (#11323)
-* [cloudy] Fix extraction (#12525)
-+ [hbo] Add support for free episode URLs and new formats extraction (#12519)
-* [condenast] Fix extraction and style (#12526)
-* [viu] Relax URL regular expression (#12529)
-
-
-version 2017.03.22
-
-Extractors
-- [pluralsight] Omit module title from video title (#12506)
-* [pornhub] Decode obfuscated video URL (#12470, #12515)
-* [senateisvp] Allow https URL scheme for embeds (#12512)
-
-
-version 2017.03.20
-
-Core
-+ [YoutubeDL] Allow multiple input URLs to be used with stdout (-) as
- output template
-+ [adobepass] Detect and output error on authz token extraction (#12472)
-
-Extractors
-+ [bostonglobe] Add extractor for bostonglobe.com (#12099)
-+ [toongoggles] Add support for toongoggles.com (#12171)
-+ [medialaan] Add support for Medialaan sites (#9974, #11912)
-+ [discoverynetworks] Add support for more domains and bypass geo restriction
-* [openload] Fix extraction (#10408)
-
-
-version 2017.03.16
-
-Core
-+ [postprocessor/ffmpeg] Add support for flac
-+ [extractor/common] Extract SMIL formats from jwplayer
-
-Extractors
-+ [generic] Add forgotten return for jwplayer formats
-* [redbulltv] Improve extraction
-
-
-version 2017.03.15
-
-Core
-* Fix missing subtitles if --add-metadata is used (#12423)
-
-Extractors
-* [facebook] Make title optional (#12443)
-+ [mitele] Add support for ooyala videos (#12430)
-* [openload] Fix extraction (#12435, #12446)
-* [streamable] Update API URL (#12433)
-+ [crunchyroll] Extract season name (#12428)
-* [discoverygo] Bypass geo restriction
-+ [discoverygo:playlist] Add support for playlists (#12424)
-
-
-version 2017.03.10
-
-Extractors
-* [generic] Make title optional for jwplayer embeds (#12410)
-* [wdr:maus] Fix extraction (#12373)
-* [prosiebensat1] Improve title extraction (#12318, #12327)
-* [dplayit] Separate and rewrite extractor and bypass geo restriction (#12393)
-* [miomio] Fix extraction (#12291, #12388, #12402)
-* [telequebec] Fix description extraction (#12399)
-* [openload] Fix extraction (#12357)
-* [brightcove:legacy] Relax videoPlayer validation check (#12381)
-
-
-version 2017.03.07
-
-Core
-* Metadata are now added after conversion (#5594)
-
-Extractors
-* [soundcloud] Update client id (#12376)
-* [openload] Fix extraction (#10408, #12357)
-
-
-version 2017.03.06
-
-Core
-+ [utils] Process bytestrings in urljoin (#12369)
-* [extractor/common] Improve height extraction and extract bitrate
-* [extractor/common] Move jwplayer formats extraction in separate method
-+ [external:ffmpeg] Limit test download size to 10KiB (#12362)
-
-Extractors
-+ [drtv] Add geo countries to GeoRestrictedError
-+ [drtv:live] Bypass geo restriction
-+ [tunepk] Add extractor (#12197, #12243)
-
-
-version 2017.03.05
-
-Extractors
-+ [twitch] Add basic support for two-factor authentication (#11974)
-+ [vier] Add support for vijf.be (#12304)
-+ [redbulltv] Add support for redbull.tv (#3919, #11948)
-* [douyutv] Switch to the PC API to escape the 5-min limitation (#12316)
-+ [generic] Add support for rutube embeds
-+ [rutube] Relax URL regular expression
-+ [vrak] Add support for vrak.tv (#11452)
-+ [brightcove:new] Add ability to smuggle geo_countries into URL
-+ [brightcove:new] Raise GeoRestrictedError
-* [go] Relax URL regular expression (#12341)
-* [24video] Use original host for requests (#12339)
-* [ruutu] Disable DASH formats (#12322)
-
-
-version 2017.03.02
-
-Core
-+ [adobepass] Add support for Charter Spectrum (#11465)
-* [YoutubeDL] Don't sanitize identifiers in output template (#12317)
-
-Extractors
-* [facebook] Fix extraction (#12323, #12330)
-* [youtube] Mark errors about rental videos as expected (#12324)
-+ [npo] Add support for audio
-* [npo] Adapt to app.php API (#12311, #12320)
-
-
-version 2017.02.28
-
-Core
-+ [utils] Add bytes_to_long and long_to_bytes
-+ [utils] Add pkcs1pad
-+ [aes] Add aes_cbc_encrypt
-
-Extractors
-+ [azmedien:showplaylist] Add support for show playlists (#12160)
-+ [youtube:playlist] Recognize another playlist pattern (#11928, #12286)
-+ [daisuki] Add support for daisuki.net (#2486, #3186, #4738, #6175, #7776,
- #10060)
-* [douyu] Fix extraction (#12301)
-
-
-version 2017.02.27
-
-Core
-* [downloader/common] Limit displaying 2 digits after decimal point in sleep
- interval message (#12183)
-+ [extractor/common] Add preference to _parse_html5_media_entries
-
-Extractors
-+ [npo] Add support for zapp.nl
-+ [npo] Add support for hetklokhuis.nl (#12293)
-- [scivee] Remove extractor (#9315)
-+ [cda] Decode download URL (#12255)
-+ [crunchyroll] Improve uploader extraction (#12267)
-+ [youtube] Raise GeoRestrictedError
-+ [dailymotion] Raise GeoRestrictedError
-+ [mdr] Recognize more URL patterns (#12169)
-+ [tvigle] Raise GeoRestrictedError
-* [vevo] Fix extraction for videos with the new streams/streamsV3 format
- (#11719)
-+ [freshlive] Add support for freshlive.tv (#12175)
-+ [xhamster] Capture and output videoClosed error (#12263)
-+ [etonline] Add support for etonline.com (#12236)
-+ [njpwworld] Add support for njpwworld.com (#11561)
-* [amcnetworks] Relax URL regular expression (#12127)
-
-
-version 2017.02.24.1
-
-Extractors
-* [noco] Modernize
-* [noco] Switch login URL to https (#12246)
-+ [thescene] Extract more metadata
-* [thescene] Fix extraction (#12235)
-+ [tubitv] Use geo bypass mechanism
-* [openload] Fix extraction (#10408)
-+ [ivi] Raise GeoRestrictedError
-
-
-version 2017.02.24
-
-Core
-* [options] Hide deprecated options from --help
-* [options] Deprecate --autonumber-size
-+ [YoutubeDL] Add support for string formatting operations in output template
- (#5185, #5748, #6841, #9929, #9966 #9978, #12189)
-
-Extractors
-+ [lynda:course] Add webpage extraction fallback (#12238)
-* [go] Sign all uplynk URLs and use geo bypass only for free videos
- (#12087, #12210)
-+ [skylinewebcams] Add support for skylinewebcams.com (#12221)
-+ [instagram] Add support for multi video posts (#12226)
-+ [crunchyroll] Extract playlist entries ids
-* [mgtv] Fix extraction
-+ [sohu] Raise GeoRestrictedError
-+ [leeco] Raise GeoRestrictedError and use geo bypass mechanism
-
-
-version 2017.02.22
-
-Extractors
-* [crunchyroll] Fix descriptions with double quotes (#12124)
-* [dailymotion] Make comment count optional (#12209)
-+ [vidzi] Add support for vidzi.cc (#12213)
-+ [24video] Add support for 24video.tube (#12217)
-+ [crackle] Use geo bypass mechanism
-+ [viewster] Use geo verification headers
-+ [tfo] Improve geo restriction detection and use geo bypass mechanism
-+ [telequebec] Use geo bypass mechanism
-+ [limelight] Extract PlaylistService errors and improve geo restriction
- detection
-
-
-version 2017.02.21
-
-Core
-* [extractor/common] Allow calling _initialize_geo_bypass from extractors
- (#11970)
-+ [adobepass] Add support for Time Warner Cable (#12191)
-+ [travis] Run tests in parallel
-+ [downloader/ism] Honor HTTP headers when downloading fragments
-+ [downloader/dash] Honor HTTP headers when downloading fragments
-+ [utils] Add GeoUtils class for working with geo tools and GeoUtils.random_ipv4
-+ Add option --geo-bypass-country for explicit geo bypass on behalf of
- specified country
-+ Add options to control geo bypass mechanism --geo-bypass and --no-geo-bypass
-+ Add experimental geo restriction bypass mechanism based on faking
- X-Forwarded-For HTTP header
-+ [utils] Introduce GeoRestrictedError for geo restricted videos
-+ [utils] Introduce YoutubeDLError base class for all youtube-dl exceptions
-
-Extractors
-+ [ninecninemedia] Use geo bypass mechanism
-* [spankbang] Make uploader optional (#12193)
-+ [iprima] Improve geo restriction detection and disable geo bypass
-* [iprima] Modernize
-* [commonmistakes] Disable UnicodeBOM extractor test for python 3.2
-+ [prosiebensat1] Throw ExtractionError on unsupported page type (#12180)
-* [nrk] Update _API_HOST and relax _VALID_URL
-+ [tv4] Bypass geo restriction and improve detection
-* [tv4] Switch to hls3 protocol (#12177)
-+ [viki] Improve geo restriction detection
-+ [vgtv] Improve geo restriction detection
-+ [srgssr] Improve geo restriction detection
-+ [vbox7] Improve geo restriction detection and use geo bypass mechanism
-+ [svt] Improve geo restriction detection and use geo bypass mechanism
-+ [pbs] Improve geo restriction detection and use geo bypass mechanism
-+ [ondemandkorea] Improve geo restriction detection and use geo bypass mechanism
-+ [nrk] Improve geo restriction detection and use geo bypass mechanism
-+ [itv] Improve geo restriction detection and use geo bypass mechanism
-+ [go] Improve geo restriction detection and use geo bypass mechanism
-+ [dramafever] Improve geo restriction detection and use geo bypass mechanism
-* [brightcove:legacy] Restrict videoPlayer value (#12040)
-+ [tvn24] Add support for tvn24.pl and tvn24bis.pl (#11679)
-+ [thisav] Add support for HTML5 media (#11771)
-* [metacafe] Bypass family filter (#10371)
-* [viceland] Improve info extraction
-
-
-version 2017.02.17
-
-Extractors
-* [heise] Improve extraction (#9725)
-* [ellentv] Improve (#11653)
-* [openload] Fix extraction (#10408, #12002)
-+ [theplatform] Recognize URLs with whitespaces (#12044)
-* [einthusan] Relax URL regular expression (#12141, #12159)
-+ [generic] Support complex JWPlayer embedded videos (#12030)
-* [elpais] Improve extraction (#12139)
-
-
-version 2017.02.16
-
-Core
-+ [utils] Add support for quoted string literals in --match-filter (#8050,
- #12142, #12144)
-
-Extractors
-* [ceskatelevize] Lower priority for audio description sources (#12119)
-* [amcnetworks] Fix extraction (#12127)
-* [pinkbike] Fix uploader extraction (#12054)
-+ [onetpl] Add support for businessinsider.com.pl and plejada.pl
-+ [onetpl] Add support for onet.pl (#10507)
-+ [onetmvp] Add shortcut extractor
-+ [vodpl] Add support for vod.pl (#12122)
-+ [pornhub] Extract video URL from tv platform site (#12007, #12129)
-+ [ceskatelevize] Extract DASH formats (#12119, #12133)
-
-
-version 2017.02.14
-
-Core
-* TypeError is fixed with Python 2.7.13 on Windows (#11540, #12085)
-
-Extractor
-* [zdf] Fix extraction (#12117)
-* [xtube] Fix extraction for both kinds of video id (#12088)
-* [xtube] Improve title extraction (#12088)
-+ [lemonde] Fallback delegate extraction to generic extractor (#12115, #12116)
-* [bellmedia] Allow video id longer than 6 characters (#12114)
-+ [limelight] Add support for referer protected videos
-* [disney] Improve extraction (#4975, #11000, #11882, #11936)
-* [hotstar] Improve extraction (#12096)
-* [einthusan] Fix extraction (#11416)
-+ [aenetworks] Add support for lifetimemovieclub.com (#12097)
-* [youtube] Fix parsing codecs (#12091)
-
-
-version 2017.02.11
-
-Core
-+ [utils] Introduce get_elements_by_class and get_elements_by_attribute
- utility functions
-+ [extractor/common] Skip m3u8 manifests protected with Adobe Flash Access
-
-Extractor
-* [pluralsight:course] Fix extraction (#12075)
-+ [bbc] Extract m3u8 formats with 320k audio
-* [facebook] Relax video id matching (#11017, #12055, #12056)
-+ [corus] Add support for Corus Entertainment sites (#12060, #9164)
-+ [pluralsight] Detect blocked account error message (#12070)
-+ [bloomberg] Add another video id pattern (#12062)
-* [extractor/commonmistakes] Restrict URL regular expression (#12050)
-+ [tvplayer] Add support for tvplayer.com
-
-
-version 2017.02.10
-
-Extractors
-* [xtube] Fix extraction (#12023)
-* [pornhub] Fix extraction (#12007, #12018)
-* [facebook] Improve JS data regular expression (#12042)
-* [kaltura] Improve embed partner id extraction (#12041)
-+ [sprout] Add support for sproutonline.com
-* [6play] Improve extraction
-+ [scrippsnetworks:watch] Add support for Scripps Networks sites (#10765)
-+ [go] Add support for Adobe Pass authentication (#11468, #10831)
-* [6play] Fix extraction (#12011)
-+ [nbc] Add support for Adobe Pass authentication (#12006)
-
-
-version 2017.02.07
-
-Core
-* [extractor/common] Fix audio only with audio group in m3u8 (#11995)
-+ [downloader/fragment] Respect --no-part
-* [extractor/common] Speed-up HTML5 media entries extraction (#11979)
-
-Extractors
-* [pornhub] Fix extraction (#11997)
-+ [canalplus] Add support for cstar.fr (#11990)
-+ [extractor/generic] Improve RTMP support (#11993)
-+ [gaskrank] Add support for gaskrank.tv (#11685)
-* [bandcamp] Fix extraction for incomplete albums (#11727)
-* [iwara] Fix extraction (#11781)
-* [googledrive] Fix extraction on Python 3.6
-+ [videopress] Add support for videopress.com
-+ [afreecatv] Extract RTMP formats
-
-
-version 2017.02.04.1
-
-Extractors
-+ [twitch:stream] Add support for player.twitch.tv (#11971)
-* [radiocanada] Fix extraction for toutv rtmp formats
-
-
-version 2017.02.04
-
-Core
-+ Add --playlist-random to shuffle playlists (#11889, #11901)
-* [utils] Improve comments processing in js_to_json (#11947)
-* [utils] Handle single-line comments in js_to_json
-* [downloader/external:ffmpeg] Minimize the use of aac_adtstoasc filter
-
-Extractors
-+ [piksel] Add another app token pattern (#11969)
-+ [vk] Capture and output author blocked error message (#11965)
-+ [turner] Fix secure HLS formats downloading with ffmpeg (#11358, #11373,
- #11800)
-+ [drtv] Add support for live and radio sections (#1827, #3427)
-* [myspace] Fix extraction and extract HLS and HTTP formats
-+ [youtube] Add format info for itag 325 and 328
-* [vine] Fix extraction (#11955)
-- [sportbox] Remove extractor (#11954)
-+ [filmon] Add support for filmon.com (#11187)
-+ [infoq] Add audio only formats (#11565)
-* [douyutv] Improve room id regular expression (#11931)
-* [iprima] Fix extraction (#11920, #11896)
-* [youtube] Fix ytsearch when cookies are provided (#11924)
-* [go] Relax video id regular expression (#11937)
-* [facebook] Fix title extraction (#11941)
-+ [youtube:playlist] Recognize TL playlists (#11945)
-+ [bilibili] Support new Bangumi URLs (#11845)
-+ [cbc:watch] Extract audio codec for audio only formats (#11893)
-+ [elpais] Fix extraction for some URLs (#11765)
-
-
-version 2017.02.01
-
-Extractors
-+ [facebook] Add another fallback extraction scenario (#11926)
-* [prosiebensat1] Fix extraction of descriptions (#11810, #11929)
-- [crunchyroll] Remove ScaledBorderAndShadow settings (#9028)
-+ [vimeo] Extract upload timestamp
-+ [vimeo] Extract license (#8726, #11880)
-+ [nrk:series] Add support for series (#11571, #11711)
-
-
-version 2017.01.31
-
-Core
-+ [compat] Add compat_etree_register_namespace
-
-Extractors
-* [youtube] Fix extraction for domainless player URLs (#11890, #11891, #11892,
- #11894, #11895, #11897, #11900, #11903, #11904, #11906, #11907, #11909,
- #11913, #11914, #11915, #11916, #11917, #11918, #11919)
-+ [vimeo] Extract both mixed and separated DASH formats
-+ [ruutu] Extract DASH formats
-* [itv] Fix extraction for python 2.6
-
-
-version 2017.01.29
-
-Core
-* [extractor/common] Fix initialization template (#11605, #11825)
-+ [extractor/common] Document fragment_base_url and fragment's path fields
-* [extractor/common] Fix duration per DASH segment (#11868)
-+ Introduce --autonumber-start option for initial value of %(autonumber)s
- template (#727, #2702, #9362, #10457, #10529, #11862)
-
-Extractors
-+ [azmedien:playlist] Add support for topic and themen playlists (#11817)
-* [npo] Fix subtitles extraction
-+ [itv] Extract subtitles
-+ [itv] Add support for itv.com (#9240)
-+ [mtv81] Add support for mtv81.com (#7619)
-+ [vlive] Add support for channels (#11826)
-+ [kaltura] Add fallback for fileExt
-+ [kaltura] Improve uploader_id extraction
-+ [konserthusetplay] Add support for rspoplay.se (#11828)
-
-
-version 2017.01.28
-
-Core
-* [utils] Improve parse_duration
-
-Extractors
-* [crunchyroll] Improve series and season metadata extraction (#11832)
-* [soundcloud] Improve formats extraction and extract audio bitrate
-+ [soundcloud] Extract HLS formats
-* [soundcloud] Fix track URL extraction (#11852)
-+ [twitch:vod] Expand URL regular expressions (#11846)
-* [aenetworks] Fix season episodes extraction (#11669)
-+ [tva] Add support for videos.tva.ca (#11842)
-* [jamendo] Improve and extract more metadata (#11836)
-+ [disney] Add support for Disney sites (#7409, #11801, #4975, #11000)
-* [vevo] Remove request to old API and catch API v2 errors
-+ [cmt,mtv,southpark] Add support for episode URLs (#11837)
-+ [youtube] Add fallback for duration extraction (#11841)
-
-
-version 2017.01.25
-
-Extractors
-+ [openload] Fallback video extension to mp4
-+ [extractor/generic] Add support for Openload embeds (#11536, #11812)
-* [srgssr] Fix rts video extraction (#11831)
-+ [afreecatv:global] Add support for afreeca.tv (#11807)
-+ [crackle] Extract vtt subtitles
-+ [crackle] Extract multiple resolutions for thumbnails
-+ [crackle] Add support for mobile URLs
-+ [konserthusetplay] Extract subtitles (#11823)
-+ [konserthusetplay] Add support for HLS videos (#11823)
-* [vimeo:review] Fix config URL extraction (#11821)
-
-
-version 2017.01.24
-
-Extractors
-* [pluralsight] Fix extraction (#11820)
-+ [nextmedia] Add support for NextTV (壹電視)
-* [24video] Fix extraction (#11811)
-* [youtube:playlist] Fix nonexistent and private playlist detection (#11604)
-+ [chirbit] Extract uploader (#11809)
-
-
-version 2017.01.22
-
-Extractors
-+ [pornflip] Add support for pornflip.com (#11556, #11795)
-* [chaturbate] Fix extraction (#11797, #11802)
-+ [azmedien] Add support for AZ Medien sites (#11784, #11785)
-+ [nextmedia] Support redirected URLs
-+ [vimeo:channel] Extract videos' titles for playlist entries (#11796)
-+ [youtube] Extract episode metadata (#9695, #11774)
-+ [cspan] Support Ustream embedded videos (#11547)
-+ [1tv] Add support for HLS videos (#11786)
-* [uol] Fix extraction (#11770)
-* [mtv] Relax triforce feed regular expression (#11766)
-
-
-version 2017.01.18
-
-Extractors
-* [bilibili] Fix extraction (#11077)
-+ [canalplus] Add fallback for video id (#11764)
-* [20min] Fix extraction (#11683, #11751)
-* [imdb] Extend URL regular expression (#11744)
-+ [naver] Add support for tv.naver.com links (#11743)
-
-
-version 2017.01.16
-
-Core
-* [options] Apply custom config to final composite configuration (#11741)
-* [YoutubeDL] Improve protocol auto determining (#11720)
-
-Extractors
-* [xiami] Relax URL regular expressions
-* [xiami] Improve track metadata extraction (#11699)
-+ [limelight] Check hand-make direct HTTP links
-+ [limelight] Add support for direct HTTP links at video.llnw.net (#11737)
-+ [brightcove] Recognize another player ID pattern (#11688)
-+ [niconico] Support login via cookies (#7968)
-* [yourupload] Fix extraction (#11601)
-+ [beam:live] Add support for beam.pro live streams (#10702, #11596)
-* [vevo] Improve geo restriction detection
-+ [dramafever] Add support for URLs with language code (#11714)
-* [cbc] Improve playlist support (#11704)
-
-
-version 2017.01.14
-
-Core
-+ [common] Add ability to customize akamai manifest host
-+ [utils] Add more date formats
-
-Extractors
-- [mtv] Eliminate _transform_rtmp_url
-* [mtv] Generalize triforce mgid extraction
-+ [cmt] Add support for full episodes and video clips (#11623)
-+ [mitele] Extract DASH formats
-+ [ooyala] Add support for videos with embedToken (#11684)
-* [mixcloud] Fix extraction (#11674)
-* [openload] Fix extraction (#10408)
-* [tv4] Improve extraction (#11698)
-* [freesound] Fix and improve extraction (#11602)
-+ [nick] Add support for beta.nick.com (#11655)
-* [mtv,cc] Use HLS by default with native HLS downloader (#11641)
-* [mtv] Fix non-HLS extraction
-
-
-version 2017.01.10
-
-Extractors
-* [youtube] Fix extraction (#11663, #11664)
-+ [inc] Add support for inc.com (#11277, #11647)
-+ [youtube] Add itag 212 (#11575)
-+ [egghead:course] Add support for egghead.io courses
-
-
-version 2017.01.08
-
-Core
-* Fix "invalid escape sequence" errors under Python 3.6 (#11581)
-
-Extractors
-+ [hitrecord] Add support for hitrecord.org (#10867, #11626)
-- [videott] Remove extractor
-* [swrmediathek] Improve extraction
-- [sharesix] Remove extractor
-- [aol:features] Remove extractor
-* [sendtonews] Improve info extraction
-* [3sat,phoenix] Fix extraction (#11619)
-* [comedycentral/mtv] Add support for HLS videos (#11600)
-* [discoverygo] Fix JSON data parsing (#11219, #11522)
-
-
-version 2017.01.05
-
-Extractors
-+ [zdf] Fix extraction (#11055, #11063)
-* [pornhub:playlist] Improve extraction (#11594)
-+ [cctv] Add support for ncpa-classic.com (#11591)
-+ [tunein] Add support for embeds (#11579)
-
-
-version 2017.01.02
-
-Extractors
-* [cctv] Improve extraction (#879, #6753, #8541)
-+ [nrktv:episodes] Add support for episodes (#11571)
-+ [arkena] Add support for video.arkena.com (#11568)
-
-
-version 2016.12.31
-
-Core
-+ Introduce --config-location option for custom configuration files (#6745,
- #10648)
-
-Extractors
-+ [twitch] Add support for player.twitch.tv (#11535, #11537)
-+ [videa] Add support for videa.hu (#8181, #11133)
-* [vk] Fix postlive videos extraction
-* [vk] Extract from playerParams (#11555)
-- [freevideo] Remove extractor (#11515)
-+ [showroomlive] Add support for showroom-live.com (#11458)
-* [xhamster] Fix duration extraction (#11549)
-* [rtve:live] Fix extraction (#11529)
-* [brightcove:legacy] Improve embeds detection (#11523)
-+ [twitch] Add support for rechat messages (#11524)
-* [acast] Fix audio and timestamp extraction (#11521)
-
-
-version 2016.12.22
-
-Core
-* [extractor/common] Improve detection of video-only formats in m3u8
- manifests (#11507)
-
-Extractors
-+ [theplatform] Pass geo verification headers to SMIL request (#10146)
-+ [viu] Pass geo verification headers to auth request
-* [rtl2] Extract more formats and metadata
-* [vbox7] Skip malformed JSON-LD (#11501)
-* [uplynk] Force downloading using native HLS downloader (#11496)
-+ [laola1] Add support for another extraction scenario (#11460)
-
-
-version 2016.12.20
-
-Core
-* [extractor/common] Improve fragment URL construction for DASH media
-* [extractor/common] Fix codec information extraction for mixed audio/video
- DASH media (#11490)
-
-Extractors
-* [vbox7] Fix extraction (#11494)
-+ [uktvplay] Add support for uktvplay.uktv.co.uk (#11027)
-+ [piksel] Add support for player.piksel.com (#11246)
-+ [vimeo] Add support for DASH formats
-* [vimeo] Fix extraction for HLS formats (#11490)
-* [kaltura] Fix wrong widget ID in some cases (#11480)
-+ [nrktv:direkte] Add support for live streams (#11488)
-* [pbs] Fix extraction for geo restricted videos (#7095)
-* [brightcove:new] Skip widevine classic videos
-+ [viu] Add support for viu.com (#10607, #11329)
-
-
-version 2016.12.18
-
-Core
-+ [extractor/common] Recognize DASH formats in html5 media entries
-
-Extractors
-+ [ccma] Add support for ccma.cat (#11359)
-* [laola1tv] Improve extraction
-+ [laola1tv] Add support embed URLs (#11460)
-* [nbc] Fix extraction for MSNBC videos (#11466)
-* [twitch] Adapt to new videos pages URL schema (#11469)
-+ [meipai] Add support for meipai.com (#10718)
-* [jwplatform] Improve subtitles and duration extraction
-+ [ondemandkorea] Add support for ondemandkorea.com (#10772)
-+ [vvvvid] Add support for vvvvid.it (#5915)
-
-
-version 2016.12.15
-
-Core
-+ [utils] Add convenience urljoin
-
-Extractors
-+ [openload] Recognize oload.tv URLs (#10408)
-+ [facebook] Recognize .onion URLs (#11443)
-* [vlive] Fix extraction (#11375, #11383)
-+ [canvas] Extract DASH formats
-+ [melonvod] Add support for vod.melon.com (#11419)
-
-
-version 2016.12.12
-
-Core
-+ [utils] Add common user agents map
-+ [common] Recognize HLS manifests that contain video only formats (#11394)
-
-Extractors
-+ [dplay] Use Safari user agent for HLS (#11418)
-+ [facebook] Detect login required error message
-* [facebook] Improve video selection (#11390)
-+ [canalplus] Add another video id pattern (#11399)
-* [mixcloud] Relax URL regular expression (#11406)
-* [ctvnews] Relax URL regular expression (#11394)
-+ [rte] Capture and output error message (#7746, #10498)
-+ [prosiebensat1] Add support for DASH formats
-* [srgssr] Improve extraction for geo restricted videos (#11089)
-* [rts] Improve extraction for geo restricted videos (#4989)
-
-
-version 2016.12.09
-
-Core
-* [socks] Fix error reporting (#11355)
-
-Extractors
-* [openload] Fix extraction (#10408)
-* [pandoratv] Fix extraction (#11023)
-+ [telebruxelles] Add support for emission URLs
-* [telebruxelles] Extract all formats
-+ [bloomberg] Add another video id regular expression (#11371)
-* [fusion] Update ooyala id regular expression (#11364)
-+ [1tv] Add support for playlists (#11335)
-* [1tv] Improve extraction (#11335)
-+ [aenetworks] Extract more formats (#11321)
-+ [thisoldhouse] Recognize /tv-episode/ URLs (#11271)
-
-
-version 2016.12.01
-
-Extractors
-* [soundcloud] Update client id (#11327)
-* [ruutu] Detect DRM protected videos
-+ [liveleak] Add support for youtube embeds (#10688)
-* [spike] Fix full episodes support (#11312)
-* [comedycentral] Fix full episodes support
-* [normalboots] Rewrite in terms of JWPlatform (#11184)
-* [teamfourstar] Rewrite in terms of JWPlatform (#11184)
-- [screenwavemedia] Remove extractor (#11184)
-
-
-version 2016.11.27
-
-Extractors
-+ [webcaster] Add support for webcaster.pro
-+ [azubu] Add support for azubu.uol.com.br (#11305)
-* [viki] Prefer hls formats
-* [viki] Fix rtmp formats extraction (#11255)
-* [puls4] Relax URL regular expression (#11267)
-* [vevo] Improve artist extraction (#10911)
-* [mitele] Relax URL regular expression and extract more metadata (#11244)
-+ [cbslocal] Recognize New York site (#11285)
-+ [youtube:playlist] Pass disable_polymer in URL query (#11193)
-
-
-version 2016.11.22
-
-Extractors
-* [hellporno] Fix video extension extraction (#11247)
-+ [hellporno] Add support for hellporno.net (#11247)
-+ [amcnetworks] Recognize more BBC America URLs (#11263)
-* [funnyordie] Improve extraction (#11208)
-* [extractor/generic] Improve limelight embeds support
-- [crunchyroll] Remove ScaledBorderAndShadow from ASS subtitles (#8207, #9028)
-* [bandcamp] Fix free downloads extraction and extract all formats (#11067)
-* [twitter:card] Relax URL regular expression (#11225)
-+ [tvanouvelles] Add support for tvanouvelles.ca (#10616)
-
-
-version 2016.11.18
-
-Extractors
-* [youtube:live] Relax URL regular expression (#11164)
-* [openload] Fix extraction (#10408, #11122)
-* [vlive] Prefer locale over language for subtitles id (#11203)
-
-
-version 2016.11.14.1
-
-Core
-+ [downoader/fragment,f4m,hls] Respect HTTP headers from info dict
-* [extractor/common] Fix media templates with Bandwidth substitution pattern in
- MPD manifests (#11175)
-* [extractor/common] Improve thumbnail extraction from JSON-LD
-
-Extractors
-+ [nrk] Workaround geo restriction
-+ [nrk] Improve error detection and messages
-+ [afreecatv] Add support for vod.afreecatv.com (#11174)
-* [cda] Fix and improve extraction (#10929, #10936)
-* [plays] Fix extraction (#11165)
-* [eagleplatform] Fix extraction (#11160)
-+ [audioboom] Recognize /posts/ URLs (#11149)
-
-
-version 2016.11.08.1
-
-Extractors
-* [espn:article] Fix support for espn.com articles
-* [franceculture] Fix extraction (#11140)
-
-
-version 2016.11.08
-
-Extractors
-* [tmz:article] Fix extraction (#11052)
-* [espn] Fix extraction (#11041)
-* [mitele] Fix extraction after website redesign (#10824)
-- [ard] Remove age restriction check (#11129)
-* [generic] Improve support for pornhub.com embeds (#11100)
-+ [generic] Add support for redtube.com embeds (#11099)
-+ [generic] Add support for drtuber.com embeds (#11098)
-+ [redtube] Add support for embed URLs
-+ [drtuber] Add support for embed URLs
-+ [yahoo] Improve content id extraction (#11088)
-* [toutv] Relax URL regular expression (#11121)
-
-
-version 2016.11.04
-
-Core
-* [extractor/common] Tolerate malformed RESOLUTION attribute in m3u8
- manifests (#11113)
-* [downloader/ism] Fix AVC Decoder Configuration Record
-
-Extractors
-+ [fox9] Add support for fox9.com (#11110)
-+ [anvato] Extract more metadata and improve formats extraction
-* [vodlocker] Improve removed videos detection (#11106)
-+ [vzaar] Add support for vzaar.com (#11093)
-+ [vice] Add support for uplynk preplay videos (#11101)
-* [tubitv] Fix extraction (#11061)
-+ [shahid] Add support for authentication (#11091)
-+ [radiocanada] Add subtitles support (#11096)
-+ [generic] Add support for ISM manifests
-
-
-version 2016.11.02
-
-Core
-+ Add basic support for Smooth Streaming protocol (#8118, #10969)
-* Improve MPD manifest base URL extraction (#10909, #11079)
-* Fix --match-filter for int-like strings (#11082)
-
-Extractors
-+ [mva] Add support for ISM formats
-+ [msn] Add support for ISM formats
-+ [onet] Add support for ISM formats
-+ [tvp] Add support for ISM formats
-+ [nicknight] Add support for nicknight sites (#10769)
-
-
-version 2016.10.30
-
-Extractors
-* [facebook] Improve 1080P video detection (#11073)
-* [imgur] Recognize /r/ URLs (#11071)
-* [beeg] Fix extraction (#11069)
-* [openload] Fix extraction (#10408)
-* [gvsearch] Modernize and fix search request (#11051)
-* [adultswim] Fix extraction (#10979)
-+ [nobelprize] Add support for nobelprize.org (#9999)
-* [hornbunny] Fix extraction (#10981)
-* [tvp] Improve video id extraction (#10585)
-
-
-version 2016.10.26
-
-Extractors
-+ [rentv] Add support for ren.tv (#10620)
-+ [ard] Detect unavailable videos (#11018)
-* [vk] Fix extraction (#11022)
-
-
-version 2016.10.25
-
-Core
-* Running youtube-dl in the background is fixed (#10996, #10706, #955)
-
-Extractors
-+ [jamendo] Add support for jamendo.com (#10132, #10736)
-+ [pandatv] Add support for panda.tv (#10736)
-+ [dotsub] Support Vimeo embed (#10964)
-* [litv] Fix extraction
-+ [vimeo] Delegate ondemand redirects to ondemand extractor (#10994)
-* [vivo] Fix extraction (#11003)
-+ [twitch:stream] Add support for rebroadcasts (#10995)
-* [pluralsight] Fix subtitles conversion (#10990)
-
-
-version 2016.10.21.1
-
-Extractors
-+ [pluralsight] Process all clip URLs (#10984)
-
-
-version 2016.10.21
-
-Core
-- Disable thumbnails embedding in mkv
-+ Add support for Comcast multiple-system operator (#10819)
-
-Extractors
-* [pluralsight] Adapt to new API (#10972)
-* [openload] Fix extraction (#10408, #10971)
-+ [natgeo] Extract m3u8 formats (#10959)
-
-
-version 2016.10.19
-
-Core
-+ [utils] Expose PACKED_CODES_RE
-+ [extractor/common] Extract non smil wowza mpd manifests
-+ [extractor/common] Detect f4m audio-only formats
-
-Extractors
-* [vidzi] Fix extraction (#10908, #10952)
-* [urplay] Fix subtitles extraction
-+ [urplay] Add support for urskola.se (#10915)
-+ [orf] Add subtitles support (#10939)
-* [youtube] Fix --no-playlist behavior for youtu.be/id URLs (#10896)
-* [nrk] Relax URL regular expression (#10928)
-+ [nytimes] Add support for podcasts (#10926)
-* [pluralsight] Relax URL regular expression (#10941)
-
-
-version 2016.10.16
-
-Core
-* [postprocessor/ffmpeg] Return correct filepath and ext in updated information
- in FFmpegExtractAudioPP (#10879)
-
-Extractors
-+ [ruutu] Add support for supla.fi (#10849)
-+ [theoperaplatform] Add support for theoperaplatform.eu (#10914)
-* [lynda] Fix height for prioritized streams
-+ [lynda] Add fallback extraction scenario
-* [lynda] Switch to https (#10916)
-+ [huajiao] New extractor (#10917)
-* [cmt] Fix mgid extraction (#10813)
-+ [safari:course] Add support for techbus.safaribooksonline.com
-* [orf:tvthek] Fix extraction and modernize (#10898)
-* [chirbit] Fix extraction of user profile pages
-* [carambatv] Fix extraction
-* [canalplus] Fix extraction for some videos
-* [cbsinteractive] Fix extraction for cnet.com
-* [parliamentliveuk] Lower case URLs are now recognized (#10912)
-
-
-version 2016.10.12
-
-Core
-+ Support HTML media elements without child nodes
-* [Makefile] Support for GNU make < 4 is fixed; BSD make dropped (#9387)
-
-Extractors
-* [dailymotion] Fix extraction (#10901)
-* [vimeo:review] Fix extraction (#10900)
-* [nhl] Correctly handle invalid formats (#10713)
-* [footyroom] Fix extraction (#10810)
-* [abc.net.au:iview] Fix for standalone (non series) videos (#10895)
-+ [hbo] Add support for episode pages (#10892)
-* [allocine] Fix extraction (#10860)
-+ [nextmedia] Recognize action news on AppleDaily
-* [lego] Improve info extraction and bypass geo restriction (#10872)
-
-
-version 2016.10.07
-
-Extractors
-+ [iprima] Detect geo restriction
-* [facebook] Fix video extraction (#10846)
-+ [commonprotocols] Support direct MMS links (#10838)
-+ [generic] Add support for multiple vimeo embeds (#10862)
-+ [nzz] Add support for nzz.ch (#4407)
-+ [npo] Detect geo restriction
-+ [npo] Add support for 2doc.nl (#10842)
-+ [lego] Add support for lego.com (#10369)
-+ [tonline] Add support for t-online.de (#10376)
-* [techtalks] Relax URL regular expression (#10840)
-* [youtube:live] Extend URL regular expression (#10839)
-+ [theweatherchannel] Add support for weather.com (#7188)
-+ [thisoldhouse] Add support for thisoldhouse.com (#10837)
-+ [nhl] Add support for wch2016.com (#10833)
-* [pornoxo] Use JWPlatform to improve metadata extraction
-
-
-version 2016.10.02
-
-Core
-* Fix possibly lost extended attributes during post-processing
-+ Support pyxattr as well as python-xattr for --xattrs and
- --xattr-set-filesize (#9054)
-
-Extractors
-+ [jwplatform] Support DASH streams in JWPlayer
-+ [jwplatform] Support old-style JWPlayer playlists
-+ [byutv:event] Add extractor
-* [periscope:user] Fix extraction (#10820)
-* [dctp] Fix extraction (#10734)
-+ [instagram] Extract video dimensions (#10790)
-+ [tvland] Extend URL regular expression (#10812)
-+ [vgtv] Add support for tv.aftonbladet.se (#10800)
-- [aftonbladet] Remove extractor
-* [vk] Fix timestamp and view count extraction (#10760)
-+ [vk] Add support for running and finished live streams (#10799)
-+ [leeco] Recognize more Le Sports URLs (#10794)
-+ [instagram] Extract comments (#10788)
-+ [ketnet] Extract mzsource formats (#10770)
-* [limelight:media] Improve HTTP formats extraction
-
-
-version 2016.09.27
-
-Core
-+ Add hdcore query parameter to akamai f4m formats
-+ Delegate HLS live streams downloading to ffmpeg
-+ Improved support for HTML5 subtitles
-
-Extractors
-+ [vk] Add support for dailymotion embeds (#10661)
-* [promptfile] Fix extraction (#10634)
-* [kaltura] Speed up embed regular expressions (#10764)
-+ [npo] Add support for anderetijden.nl (#10754)
-+ [prosiebensat1] Add support for advopedia sites
-* [mwave] Relax URL regular expression (#10735, #10748)
-* [prosiebensat1] Fix playlist support (#10745)
-+ [prosiebensat1] Add support for sat1gold sites (#10745)
-+ [cbsnews:livevideo] Fix extraction and extract m3u8 formats
-+ [brightcove:new] Add support for live streams
-* [soundcloud] Generalize playlist entries extraction (#10733)
-+ [mtv] Add support for new URL schema (#8169, #9808)
-* [einthusan] Fix extraction (#10714)
-+ [twitter] Support Periscope embeds (#10737)
-+ [openload] Support subtitles (#10625)
-
-
-version 2016.09.24
-
-Core
-+ Add support for watchTVeverywhere.com authentication provider based MSOs for
- Adobe Pass authentication (#10709)
-
-Extractors
-+ [soundcloud:playlist] Provide video id for early playlist entries (#10733)
-+ [prosiebensat1] Add support for kabeleinsdoku (#10732)
-* [cbs] Extract info from thunder videoPlayerService (#10728)
-* [openload] Fix extraction (#10408)
-+ [ustream] Support the new HLS streams (#10698)
-+ [ooyala] Extract all HLS formats
-+ [cartoonnetwork] Add support for Adobe Pass authentication
-+ [soundcloud] Extract license metadata
-+ [fox] Add support for Adobe Pass authentication (#8584)
-+ [tbs] Add support for Adobe Pass authentication (#10642, #10222)
-+ [trutv] Add support for Adobe Pass authentication (#10519)
-+ [turner] Add support for Adobe Pass authentication
-
-
-version 2016.09.19
-
-Extractors
-+ [crunchyroll] Check if already authenticated (#10700)
-- [twitch:stream] Remove fallback to profile extraction when stream is offline
-* [thisav] Improve title extraction (#10682)
-* [vyborymos] Improve station info extraction
-
-
-version 2016.09.18
-
-Core
-+ Introduce manifest_url and fragments fields in formats dictionary for
- fragmented media
-+ Provide manifest_url field for DASH segments, HLS and HDS
-+ Provide fragments field for DASH segments
-* Rework DASH segments downloader to use fragments field
-+ Add helper method for Wowza Streaming Engine formats extraction
-
-Extractors
-+ [vyborymos] Add extractor for vybory.mos.ru (#10692)
-+ [xfileshare] Add title regular expression for streamin.to (#10646)
-+ [globo:article] Add support for multiple videos (#10653)
-+ [thisav] Recognize HTML5 videos (#10447)
-* [jwplatform] Improve JWPlayer detection
-+ [mangomolo] Add support for Mangomolo embeds
-+ [toutv] Add support for authentication (#10669)
-* [franceinter] Fix upload date extraction
-* [tv4] Fix HLS and HDS formats extraction (#10659)
-
-
-version 2016.09.15
-
-Core
-* Improve _hidden_inputs
-+ Introduce improved explicit Adobe Pass support
-+ Add --ap-mso to provide multiple-system operator identifier
-+ Add --ap-username to provide MSO account username
-+ Add --ap-password to provide MSO account password
-+ Add --ap-list-mso to list all supported MSOs
-+ Add support for Rogers Cable multiple-system operator (#10606)
-
-Extractors
-* [crunchyroll] Fix authentication (#10655)
-* [twitch] Fix API calls (#10654, #10660)
-+ [bellmedia] Add support for more Bell Media Television sites
-* [franceinter] Fix extraction (#10538, #2105)
-* [kuwo] Improve error detection (#10650)
-+ [go] Add support for free full episodes (#10439)
-* [bilibili] Fix extraction for specific videos (#10647)
-* [nhk] Fix extraction (#10633)
-* [kaltura] Improve audio detection
-* [kaltura] Skip chun format
-+ [vimeo:ondemand] Pass Referer along with embed URL (#10624)
-+ [nbc] Add support for NBC Olympics (#10361)
-
-
-version 2016.09.11.1
-
-Extractors
-+ [tube8] Extract categories and tags (#10579)
-+ [pornhub] Extract categories and tags (#10499)
-* [openload] Temporary fix (#10408)
-+ [foxnews] Add support Fox News articles (#10598)
-* [viafree] Improve video id extraction (#10615)
-* [iwara] Fix extraction after relaunch (#10462, #3215)
-+ [tfo] Add extractor for tfo.org
-* [lrt] Fix audio extraction (#10566)
-* [9now] Fix extraction (#10561)
-+ [canalplus] Add support for c8.fr (#10577)
-* [newgrounds] Fix uploader extraction (#10584)
-+ [polskieradio:category] Add support for category lists (#10576)
-+ [ketnet] Add extractor for ketnet.be (#10343)
-+ [canvas] Add support for een.be (#10605)
-+ [telequebec] Add extractor for telequebec.tv (#1999)
-* [parliamentliveuk] Fix extraction (#9137)
-
-
-version 2016.09.08
-
-Extractors
-+ [jwplatform] Extract height from format label
-+ [yahoo] Extract Brightcove Legacy Studio embeds (#9345)
-* [videomore] Fix extraction (#10592)
-* [foxgay] Fix extraction (#10480)
-+ [rmcdecouverte] Add extractor for rmcdecouverte.bfmtv.com (#9709)
-* [gamestar] Fix metadata extraction (#10479)
-* [puls4] Fix extraction (#10583)
-+ [cctv] Add extractor for CCTV and CNTV (#8153)
-+ [lci] Add extractor for lci.fr (#10573)
-+ [wat] Extract DASH formats
-+ [viafree] Improve video id detection (#10569)
-+ [trutv] Add extractor for trutv.com (#10519)
-+ [nick] Add support for nickelodeon.nl (#10559)
-+ [abcotvs:clips] Add support for clips.abcotvs.com
-+ [abcotvs] Add support for ABC Owned Television Stations sites (#9551)
-+ [miaopai] Add extractor for miaopai.com (#10556)
-* [gamestar] Fix metadata extraction (#10479)
-+ [bilibili] Add support for episodes (#10190)
-+ [tvnoe] Add extractor for tvnoe.cz (#10524)
-
-
-version 2016.09.04.1
-
-Core
-* In DASH downloader if the first segment fails, abort the whole download
- process to prevent throttling (#10497)
-+ Add support for --skip-unavailable-fragments and --fragment retries in
- hlsnative downloader (#10165, #10448).
-+ Add support for --skip-unavailable-fragments in DASH downloader
-+ Introduce --skip-unavailable-fragments option for fragment based downloaders
- that allows to skip fragments unavailable due to a HTTP error
-* Fix extraction of video/audio entries with src attribute in
- _parse_html5_media_entries (#10540)
-
-Extractors
-* [theplatform] Relax URL regular expression (#10546)
-* [youtube:playlist] Extend URL regular expression
-* [rottentomatoes] Delegate extraction to internetvideoarchive extractor
-* [internetvideoarchive] Extract all formats
-* [pornvoisines] Fix extraction (#10469)
-* [rottentomatoes] Fix extraction (#10467)
-* [espn] Extend URL regular expression (#10549)
-* [vimple] Extend URL regular expression (#10547)
-* [youtube:watchlater] Fix extraction (#10544)
-* [youjizz] Fix extraction (#10437)
-+ [foxnews] Add support for FoxNews Insider (#10445)
-+ [fc2] Recognize Flash player URLs (#10512)
-
-
-version 2016.09.03
-
-Core
-* Restore usage of NAME attribute from EXT-X-MEDIA tag for formats codes in
- _extract_m3u8_formats (#10522)
-* Handle semicolon in mimetype2ext
-
-Extractors
-+ [youtube] Add support for rental videos' previews (#10532)
-* [youtube:playlist] Fallback to video extraction for video/playlist URLs when
- no playlist is actually served (#10537)
-+ [drtv] Add support for dr.dk/nyheder (#10536)
-+ [facebook:plugins:video] Add extractor (#10530)
-+ [go] Add extractor for *.go.com sites
-* [adobepass] Check for authz_token expiration (#10527)
-* [nytimes] improve extraction
-* [thestar] Fix extraction (#10465)
-* [glide] Fix extraction (#10478)
-- [exfm] Remove extractor (#10482)
-* [youporn] Fix categories and tags extraction (#10521)
-+ [curiositystream] Add extractor for app.curiositystream.com
-- [thvideo] Remove extractor (#10464)
-* [movingimage] Fix for the new site name (#10466)
-+ [cbs] Add support for once formats (#10515)
-* [limelight] Skip ism snd duplicate manifests
-+ [porncom] Extract categories and tags (#10510)
-+ [facebook] Extract timestamp (#10508)
-+ [yahoo] Extract more formats
-
-
-version 2016.08.31
-
-Extractors
-* [soundcloud] Fix URL regular expression to avoid clashes with sets (#10505)
-* [bandcamp:album] Fix title extraction (#10455)
-* [pyvideo] Fix extraction (#10468)
-+ [ctv] Add support for tsn.ca, bnn.ca and thecomedynetwork.ca (#10016)
-* [9c9media] Extract more metadata
-* [9c9media] Fix multiple stacks extraction (#10016)
-* [adultswim] Improve video info extraction (#10492)
-* [vodplatform] Improve embed regular expression
-- [played] Remove extractor (#10470)
-+ [tbs] Add extractor for tbs.com and tntdrama.com (#10222)
-+ [cartoonnetwork] Add extractor for cartoonnetwork.com (#10110)
-* [adultswim] Rework in terms of turner extractor
-* [cnn] Rework in terms of turner extractor
-* [nba] Rework in terms of turner extractor
-+ [turner] Add base extractor for Turner Broadcasting System based sites
-* [bilibili] Fix extraction (#10375)
-* [openload] Fix extraction (#10408)
-
-
-version 2016.08.28
-
-Core
-+ Add warning message that ffmpeg doesn't support SOCKS
-* Improve thumbnail sorting
-+ Extract formats from #EXT-X-MEDIA tags in _extract_m3u8_formats
-* Fill IV with leading zeros for IVs shorter than 16 octets in hlsnative
-+ Add ac-3 to the list of audio codecs in parse_codecs
-
-Extractors
-* [periscope:user] Fix extraction (#10453)
-* [douyutv] Fix extraction (#10153, #10318, #10444)
-+ [nhk:vod] Add extractor for www3.nhk.or.jp on demand (#4437, #10424)
-- [trutube] Remove extractor (#10438)
-+ [usanetwork] Add extractor for usanetwork.com
-* [crackle] Fix extraction (#10333)
-* [spankbang] Fix description and uploader extraction (#10339)
-* [discoverygo] Detect cable provider restricted videos (#10425)
-+ [cbc] Add support for watch.cbc.ca
-* [kickstarter] Silent the warning for og:description (#10415)
-* [mtvservices:embedded] Fix extraction for the new 'edge' player (#10363)
-
-
-version 2016.08.24.1
-
-Extractors
-+ [pluralsight] Add support for subtitles (#9681)
-
-
-version 2016.08.24
-
-Extractors
-* [youtube] Fix authentication (#10392)
-* [openload] Fix extraction (#10408)
-+ [bravotv] Add support for Adobe Pass (#10407)
-* [bravotv] Fix clip info extraction (#10407)
-* [eagleplatform] Improve embedded videos detection (#10409)
-* [awaan] Fix extraction
-* [mtvservices:embedded] Update config URL
-+ [abc:iview] Add extractor (#6148)
-
-
-version 2016.08.22
-
-Core
-* Improve formats and subtitles extension auto calculation
-+ Recognize full unit names in parse_filesize
-+ Add support for m3u8 manifests in HTML5 multimedia tags
-* Fix octal/hexadecimal number detection in js_to_json
-
-Extractors
-+ [ivi] Add support for 720p and 1080p
-+ [charlierose] Add new extractor (#10382)
-* [1tv] Fix extraction (#9249)
-* [twitch] Renew authentication
-* [kaltura] Improve subtitles extension calculation
-+ [zingmp3] Add support for video clips
-* [zingmp3] Fix extraction (#10041)
-* [kaltura] Improve subtitles extraction (#10279)
-* [cultureunplugged] Fix extraction (#10330)
-+ [cnn] Add support for money.cnn.com (#2797)
-* [cbsnews] Fix extraction (#10362)
-* [cbs] Fix extraction (#10393)
-+ [litv] Support 'promo' URLs (#10385)
-* [snotr] Fix extraction (#10338)
-* [n-tv.de] Fix extraction (#10331)
-* [globo:article] Relax URL and video id regular expressions (#10379)
-
-
-version 2016.08.19
-
-Core
-- Remove output template description from --help
-* Recognize lowercase units in parse_filesize
-
-Extractors
-+ [porncom] Add extractor for porn.com (#2251, #10251)
-+ [generic] Add support for DBTV embeds
-* [vk:wallpost] Fix audio extraction for new site layout
-* [vk] Fix authentication
-+ [hgtvcom:show] Add extractor for hgtv.com shows (#10365)
-+ [discoverygo] Add support for another GO network sites
-
-
-version 2016.08.17
-
-Core
-+ Add _get_netrc_login_info
-
-Extractors
-* [mofosex] Extract all formats (#10335)
-+ [generic] Add support for vbox7 embeds
-+ [vbox7] Add support for embed URLs
-+ [viafree] Add extractor (#10358)
-+ [mtg] Add support for viafree URLs (#10358)
-* [theplatform] Extract all subtitles per language
-+ [xvideos] Fix HLS extraction (#10356)
-+ [amcnetworks] Add extractor
-+ [bbc:playlist] Add support for pagination (#10349)
-+ [fxnetworks] Add extractor (#9462)
-* [cbslocal] Fix extraction for SendtoNews-based videos
-* [sendtonews] Fix extraction
-* [jwplatform] Extract video id from JWPlayer data
-- [zippcast] Remove extractor (#10332)
-+ [viceland] Add extractor (#8799)
-+ [adobepass] Add base extractor for Adobe Pass Authentication
-* [life:embed] Improve extraction
-* [vgtv] Detect geo restricted videos (#10348)
-+ [uplynk] Add extractor
-* [xiami] Fix extraction (#10342)
-
-
-version 2016.08.13
-
-Core
-* Show progress for curl external downloader
-* Forward more options to curl external downloader
-
-Extractors
-* [pbs] Fix description extraction
-* [franceculture] Fix extraction (#10324)
-* [pornotube] Fix extraction (#10322)
-* [4tube] Fix metadata extraction (#10321)
-* [imgur] Fix width and height extraction (#10325)
-* [expotv] Improve extraction
-+ [vbox7] Fix extraction (#10309)
-- [tapely] Remove extractor (#10323)
-* [muenchentv] Fix extraction (#10313)
-+ [24video] Add support for .me and .xxx TLDs
-* [24video] Fix comment count extraction
-* [sunporno] Add support for embed URLs
-* [sunporno] Fix metadata extraction (#10316)
-+ [hgtv] Add extractor for hgtv.ca (#3999)
-- [pbs] Remove request to unavailable API
-+ [pbs] Add support for high quality HTTP formats
-+ [crunchyroll] Add support for HLS formats (#10301)
-
-
-version 2016.08.12
-
-Core
-* Subtitles are now written as is. Newline conversions are disabled. (#10268)
-+ Recognize more formats in unified_timestamp
-
-Extractors
-- [goldenmoustache] Remove extractor (#10298)
-* [drtuber] Improve title extraction
-* [drtuber] Make dislike count optional (#10297)
-* [chirbit] Fix extraction (#10296)
-* [francetvinfo] Relax URL regular expression
-* [rtlnl] Relax URL regular expression (#10282)
-* [formula1] Relax URL regular expression (#10283)
-* [wat] Improve extraction (#10281)
-* [ctsnews] Fix extraction
-
-
-version 2016.08.10
-
-Core
-* Make --metadata-from-title non fatal when title does not match the pattern
-* Introduce options for randomized sleep before each download
- --min-sleep-interval and --max-sleep-interval (#9930)
-* Respect default in _search_json_ld
-
-Extractors
-+ [uol] Add extractor for uol.com.br (#4263)
-* [rbmaradio] Fix extraction and extract all formats (#10242)
-+ [sonyliv] Add extractor for sonyliv.com (#10258)
-* [aparat] Fix extraction
-* [cwtv] Extract HTTP formats
-+ [rozhlas] Add extractor for prehravac.rozhlas.cz (#10253)
-* [kuwo:singer] Fix extraction
-
-
-version 2016.08.07
-
-Core
-+ Add support for TV Parental Guidelines ratings in parse_age_limit
-+ Add decode_png (#9706)
-+ Add support for partOfTVSeries in JSON-LD
-* Lower master M3U8 manifest preference for better format sorting
-
-Extractors
-+ [discoverygo] Add extractor (#10245)
-* [flipagram] Make JSON-LD extraction non fatal
-* [generic] Make JSON-LD extraction non fatal
-+ [bbc] Add support for morph embeds (#10239)
-* [tnaflixnetworkbase] Improve title extraction
-* [tnaflix] Fix metadata extraction (#10249)
-* [fox] Fix theplatform release URL query
-* [openload] Fix extraction (#9706)
-* [bbc] Skip duplicate manifest URLs
-* [bbc] Improve format code
-+ [bbc] Add support for DASH and F4M
-* [bbc] Improve format sorting and listing
-* [bbc] Improve playlist extraction
-+ [pokemon] Add extractor (#10093)
-+ [condenast] Add fallback scenario for video info extraction
-
-
-version 2016.08.06
-
-Core
-* Add support for JSON-LD root list entries (#10203)
-* Improve unified_timestamp
-* Lower preference of RTSP formats in generic sorting
-+ Add support for multiple properties in _og_search_property
-* Improve password hiding from verbose output
-
-Extractors
-+ [adultswim] Add support for trailers (#10235)
-* [archiveorg] Improve extraction (#10219)
-+ [jwplatform] Add support for playlists
-+ [jwplatform] Add support for relative URLs
-* [jwplatform] Improve audio detection
-+ [tvplay] Capture and output native error message
-+ [tvplay] Extract series metadata
-+ [tvplay] Add support for subtitles (#10194)
-* [tvp] Improve extraction (#7799)
-* [cbslocal] Fix timestamp parsing (#10213)
-+ [naver] Add support for subtitles (#8096)
-* [naver] Improve extraction
-* [condenast] Improve extraction
-* [engadget] Relax URL regular expression
-* [5min] Fix extraction
-+ [nationalgeographic] Add support for Episode Guide
-+ [kaltura] Add support for subtitles
-* [kaltura] Optimize network requests
-+ [vodplatform] Add extractor for vod-platform.net
-- [gamekings] Remove extractor
-* [limelight] Extract HTTP formats
-* [ntvru] Fix extraction
-+ [comedycentral] Re-add :tds and :thedailyshow shortnames
-
-
-version 2016.08.01
-
-Fixed/improved extractors
-- [yandexmusic:track] Adapt to changes in track location JSON (#10193)
-- [bloomberg] Support another form of player (#10187)
-- [limelight] Skip DRM protected videos
-- [safari] Relax regular expressions for URL matching (#10202)
-- [cwtv] Add support for cwtvpr.com (#10196)
-
-
-version 2016.07.30
-
-Fixed/improved extractors
-- [twitch:clips] Sort formats
-- [tv2] Use m3u8_native
-- [tv2:article] Fix video detection (#10188)
-- rtve (#10076)
-- [dailymotion:playlist] Optimize download archive processing (#10180)
-
-
-version 2016.07.28
-
-Fixed/improved extractors
-- shared (#10170)
-- soundcloud (#10179)
-- twitch (#9767)
-
-
-version 2016.07.26.2
-
-Fixed/improved extractors
-- smotri
-- camdemy
-- mtv
-- comedycentral
-- cmt
-- cbc
-- mgtv
-- orf
-
-
-version 2016.07.24
-
-New extractors
-- arkena (#8682)
-- lcp (#8682)
-
-Fixed/improved extractors
-- facebook (#10151)
-- dailymail
-- telegraaf
-- dcn
-- onet
-- tvp
-
-Miscellaneous
-- Support $Time$ in DASH manifests
-
-
-version 2016.07.22
-
-New extractors
-- odatv (#9285)
-
-Fixed/improved extractors
-- bbc
-- youjizz (#10131)
-- youtube (#10140)
-- pornhub (#10138)
-- eporner (#10139)
-
-
-version 2016.07.17
-
-New extractors
-- nintendo (#9986)
-- streamable (#9122)
-
-Fixed/improved extractors
-- ard (#10095)
-- mtv
-- comedycentral (#10101)
-- viki (#10098)
-- spike (#10106)
-
-Miscellaneous
-- Improved twitter player detection (#10090)
-
-
-version 2016.07.16
-
-New extractors
-- ninenow (#5181)
-
-Fixed/improved extractors
-- rtve (#10076)
-- brightcove
-- 3qsdn
-- syfy (#9087, #3820, #2388)
-- youtube (#10083)
-
-Miscellaneous
-- Fix subtitle embedding for video-only and audio-only files (#10081)
-
-
-version 2016.07.13
-
-New extractors
-- rudo
-
-Fixed/improved extractors
-- biobiochiletv
-- tvplay
-- dbtv
-- brightcove
-- tmz
-- youtube (#10059)
-- shahid (#10062)
-- vk
-- ellentv (#10067)
-
-
-version 2016.07.11
-
-New Extractors
-- roosterteeth (#9864)
-
-Fixed/improved extractors
-- miomio (#9605)
-- vuclip
-- youtube
-- vidzi (#10058)
-
-
-version 2016.07.09.2
-
-Fixed/improved extractors
-- vimeo (#1638)
-- facebook (#10048)
-- lynda (#10047)
-- animeondemand
-
-Fixed/improved features
-- Embedding subtitles no longer throws an error with problematic inputs (#9063)
-
-
-version 2016.07.09.1
-
-Fixed/improved extractors
-- youtube
-- ard
-- srmediathek (#9373)
-
-
-version 2016.07.09
-
-New extractors
-- Flipagram (#9898)
-
-Fixed/improved extractors
-- telecinco
-- toutv
-- radiocanada
-- tweakers (#9516)
-- lynda
-- nick (#7542)
-- polskieradio (#10028)
-- le
-- facebook (#9851)
-- mgtv
-- animeondemand (#10031)
-
-Fixed/improved features
-- `--postprocessor-args` and `--downloader-args` now accepts non-ASCII inputs
- on non-Windows systems
-
-
-version 2016.07.07
-
-New extractors
-- kamcord (#10001)
-
-Fixed/improved extractors
-- spiegel (#10018)
-- metacafe (#8539, #3253)
-- onet (#9950)
-- francetv (#9955)
-- brightcove (#9965)
-- daum (#9972)
-
-
-version 2016.07.06
-
-Fixed/improved extractors
-- youtube (#10007, #10009)
-- xuite
-- stitcher
-- spiegel
-- slideshare
-- sandia
-- rtvnh
-- prosiebensat1
-- onionstudios
-
-
-version 2016.07.05
-
-Fixed/improved extractors
-- brightcove
-- yahoo (#9995)
-- pornhub (#9997)
-- iqiyi
-- kaltura (#5557)
-- la7
-- Changed features
-- Rename --cn-verification-proxy to --geo-verification-proxy
-Miscellaneous
-- Add script for displaying downloads statistics
-
-
-version 2016.07.03.1
-
-Fixed/improved extractors
-- theplatform
-- aenetworks
-- nationalgeographic
-- hrti (#9482)
-- facebook (#5701)
-- buzzfeed (#5701)
-- rai (#8617, #9157, #9232, #8552, #8551)
-- nationalgeographic (#9991)
-- iqiyi
-
-
-version 2016.07.03
-
-New extractors
-- hrti (#9482)
-
-Fixed/improved extractors
-- vk (#9981)
-- facebook (#9938)
-- xtube (#9953, #9961)
-
-
-version 2016.07.02
-
-New extractors
-- fusion (#9958)
-
-Fixed/improved extractors
-- twitch (#9975)
-- vine (#9970)
-- periscope (#9967)
-- pornhub (#8696)
-
-
-version 2016.07.01
-
-New extractors
-- 9c9media
-- ctvnews (#2156)
-- ctv (#4077)
-
-Fixed/Improved extractors
-- rds
-- meta (#8789)
-- pornhub (#9964)
-- sixplay (#2183)
-
-New features
-- Accept quoted strings across multiple lines (#9940)
diff --git a/Changelog.md b/Changelog.md
index 90f9bda..1a39d29 100644
--- a/Changelog.md
+++ b/Changelog.md
@@ -5,15 +5,1616 @@
* Run `make doc`
* Update Changelog.md and CONTRIBUTORS
-* Change "Merged with ytdl" version in Readme.md if needed
-* Add new/fixed extractors in "new features" section of Readme.md
-* Commit as `Release <version>`
-* Push to origin/release using `git push origin master:release`
- build task will now run
-
+* Change "Based on ytdl" version in Readme.md if needed
+* Commit as `Release <version>` and push to master
+* Dispatch the workflow https://github.com/yt-dlp/yt-dlp/actions/workflows/build.yml on master
-->
+### 2022.11.11
+
+* Merge youtube-dl: Upto [commit/de39d12](https://github.com/ytdl-org/youtube-dl/commit/de39d128)
+* Backport SSL configuration from Python 3.10 by [coletdjnz](https://github.com/coletdjnz)
+* Do more processing in `--flat-playlist`
+* Fix `--list` options not implying `-s` in some cases by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly)
+* Fix end time of clips by [cruel-efficiency](https://github.com/cruel-efficiency)
+* Fix for `formats=None`
+* Write API params in debug head
+* [outtmpl] Ensure ASCII in json and add option for Unicode
+* [SponsorBlock] Add `type` field, obey `--retry-sleep extractor`, relax duration check for large segments
+* [SponsorBlock] **Support `chapter` category** by [ajayyy](https://github.com/ajayyy), [pukkandan](https://github.com/pukkandan)
+* [ThumbnailsConvertor] Fix filename escaping by [dirkf](https://github.com/dirkf), [pukkandan](https://github.com/pukkandan)
+* [ModifyChapters] Handle the entire video being marked for removal
+* [embedthumbnail] Fix thumbnail name in mp3 by [How-Bout-No](https://github.com/How-Bout-No)
+* [downloader/fragment] HLS download can continue without first fragment
+* [cookies] Improve `LenientSimpleCookie` by [Grub4K](https://github.com/Grub4K)
+* [jsinterp] Improve separating regex
+* [extractor/common] Fix `fatal=False` for `_search_nuxt_data`
+* [extractor/common] Improve `_generic_title`
+* [extractor/common] Fix `json_ld` type checks by [Grub4K](https://github.com/Grub4K)
+* [extractor/generic] Separate embed extraction into own function
+* [extractor/generic:quoted-html] Add extractor by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
+* [extractor/unsupported] Raise error on known DRM-only sites by [coletdjnz](https://github.com/coletdjnz)
+* [utils] `js_to_json`: Improve escape handling by [Grub4K](https://github.com/Grub4K)
+* [utils] `strftime_or_none`: Workaround Python bug on Windows
+* [utils] `traverse_obj`: Always return list when branching, allow `re.Match` objects by [Grub4K](https://github.com/Grub4K)
+* [build, test] Harden workflows' security by [sashashura](https://github.com/sashashura)
+* [build] `py2exe`: Migrate to freeze API by [SG5](https://github.com/SG5), [pukkandan](https://github.com/pukkandan)
+* [build] Create `armv7l` and `aarch64` releases by [MrOctopus](https://github.com/MrOctopus), [pukkandan](https://github.com/pukkandan)
+* [build] Make linux binary truly standalone using `conda` by [mlampe](https://github.com/mlampe)
+* [build] Replace `set-output` with `GITHUB_OUTPUT` by [Lesmiscore](https://github.com/Lesmiscore)
+* [update] Use error code `100` for update errors
+* [compat] Fix `shutils.move` in restricted ACL mode on BSD by [ClosedPort22](https://github.com/ClosedPort22), [pukkandan](https://github.com/pukkandan)
+* [docs, devscripts] Document `pyinst`'s argument passthrough by [jahway603](https://github.com/jahway603)
+* [test] Allow `extract_flat` in download tests by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
+* [cleanup] Misc fixes and cleanup by [pukkandan](https://github.com/pukkandan), [Alienmaster](https://github.com/Alienmaster)
+* [extractor/aeon] Add extractor by [DoubleCouponDay](https://github.com/DoubleCouponDay)
+* [extractor/agora] Add extractors by [selfisekai](https://github.com/selfisekai)
+* [extractor/camsoda] Add extractor by [zulaport](https://github.com/zulaport)
+* [extractor/cinetecamilano] Add extractor by [timendum](https://github.com/timendum)
+* [extractor/deuxm] Add extractors by [CrankDatSouljaBoy](https://github.com/CrankDatSouljaBoy)
+* [extractor/genius] Add extractors by [bashonly](https://github.com/bashonly)
+* [extractor/japandiet] Add extractors by [Lesmiscore](https://github.com/Lesmiscore)
+* [extractor/listennotes] Add extractor by [lksj](https://github.com/lksj), [pukkandan](https://github.com/pukkandan)
+* [extractor/nos.nl] Add extractor by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/oftv] Add extractors by [DoubleCouponDay](https://github.com/DoubleCouponDay)
+* [extractor/podbayfm] Add extractor by [schnusch](https://github.com/schnusch)
+* [extractor/qingting] Add extractor by [bashonly](https://github.com/bashonly), [changren-wcr](https://github.com/changren-wcr)
+* [extractor/screen9] Add extractor by [tpikonen](https://github.com/tpikonen)
+* [extractor/swearnet] Add extractor by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/YleAreena] Add extractor by [pukkandan](https://github.com/pukkandan), [vitkhab](https://github.com/vitkhab)
+* [extractor/zeenews] Add extractor by [m4tu4g](https://github.com/m4tu4g), [pukkandan](https://github.com/pukkandan)
+* [extractor/youtube:tab] **Update tab handling for redesign** by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
+ * Channel URLs download all uploads of the channel as multiple playlists, separated by tab
+* [extractor/youtube] Differentiate between no comments and disabled comments by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/youtube] Extract `concurrent_view_count` for livestreams by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/youtube] Fix `duration` for premieres by [nosoop](https://github.com/nosoop)
+* [extractor/youtube] Fix `live_status` by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
+* [extractor/youtube] Ignore incomplete data error for comment replies by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/youtube] Improve chapter parsing from description
+* [extractor/youtube] Mark videos as fully watched by [bsun0000](https://github.com/bsun0000)
+* [extractor/youtube] Update piped instances by [Generator](https://github.com/Generator)
+* [extractor/youtube] Update playlist metadata extraction for new layout by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/youtube:tab] Fix video metadata from tabs by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/youtube:tab] Let `approximate_date` return timestamp
+* [extractor/americastestkitchen] Fix extractor by [bashonly](https://github.com/bashonly)
+* [extractor/bbc] Support onion domains by [DoubleCouponDay](https://github.com/DoubleCouponDay)
+* [extractor/bilibili] Add chapters and misc cleanup by [lockmatrix](https://github.com/lockmatrix), [pukkandan](https://github.com/pukkandan)
+* [extractor/bilibili] Fix BilibiliIE and Bangumi extractors by [lockmatrix](https://github.com/lockmatrix), [pukkandan](https://github.com/pukkandan)
+* [extractor/bitchute] Better error for geo-restricted videos by [flashdagger](https://github.com/flashdagger)
+* [extractor/bitchute] Improve `BitChuteChannelIE` by [flashdagger](https://github.com/flashdagger), [pukkandan](https://github.com/pukkandan)
+* [extractor/bitchute] Simplify extractor by [flashdagger](https://github.com/flashdagger), [pukkandan](https://github.com/pukkandan)
+* [extractor/cda] Support login through API by [selfisekai](https://github.com/selfisekai)
+* [extractor/crunchyroll] Beta is now the only layout by [tejing1](https://github.com/tejing1)
+* [extractor/detik] Avoid unnecessary extraction
+* [extractor/doodstream] Remove extractor
+* [extractor/dplay] Add MotorTrendOnDemand extractor by [bashonly](https://github.com/bashonly)
+* [extractor/epoch] Support videos without data-trailer by [gibson042](https://github.com/gibson042), [pukkandan](https://github.com/pukkandan)
+* [extractor/fox] Extract thumbnail by [vitkhab](https://github.com/vitkhab)
+* [extractor/foxnews] Add `FoxNewsVideo` extractor
+* [extractor/hotstar] Add season support by [m4tu4g](https://github.com/m4tu4g)
+* [extractor/hotstar] Refactor v1 API calls
+* [extractor/iprima] Make json+ld non-fatal by [bashonly](https://github.com/bashonly)
+* [extractor/iq] Increase phantomjs timeout
+* [extractor/kaltura] Support playlists by [jwoglom](https://github.com/jwoglom), [pukkandan](https://github.com/pukkandan)
+* [extractor/lbry] Authenticate with cookies by [flashdagger](https://github.com/flashdagger)
+* [extractor/livestreamfails] Support posts by [invertico](https://github.com/invertico)
+* [extractor/mlb] Add `MLBArticle` extractor by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/mxplayer] Improve extractor by [m4tu4g](https://github.com/m4tu4g)
+* [extractor/niconico] Always use HTTPS for requests
+* [extractor/nzherald] Support new video embed by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/odnoklassniki] Support boosty.to embeds by [Lesmiscore](https://github.com/Lesmiscore), [megapro17](https://github.com/megapro17), [pukkandan](https://github.com/pukkandan)
+* [extractor/paramountplus] Update API token by [bashonly](https://github.com/bashonly)
+* [extractor/reddit] Add fallback format by [bashonly](https://github.com/bashonly)
+* [extractor/redgifs] Fix extractors by [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan)
+* [extractor/redgifs] Refresh auth token for 401 by [endotronic](https://github.com/endotronic), [pukkandan](https://github.com/pukkandan)
+* [extractor/rumble] Add HLS formats and extract more metadata by [flashdagger](https://github.com/flashdagger)
+* [extractor/sbs] Improve `_VALID_URL` by [bashonly](https://github.com/bashonly)
+* [extractor/skyit] Fix extractors by [nixxo](https://github.com/nixxo)
+* [extractor/stripchat] Fix hostname for HLS stream by [zulaport](https://github.com/zulaport)
+* [extractor/stripchat] Improve error message by [freezboltz](https://github.com/freezboltz)
+* [extractor/telegram] Add playlist support and more metadata by [bashonly](https://github.com/bashonly), [bsun0000](https://github.com/bsun0000)
+* [extractor/Tnaflix] Fix for HTTP 500 by [SG5](https://github.com/SG5), [pukkandan](https://github.com/pukkandan)
+* [extractor/tubitv] Better DRM detection by [bashonly](https://github.com/bashonly)
+* [extractor/tvp] Update extractors by [selfisekai](https://github.com/selfisekai)
+* [extractor/twitcasting] Fix `data-movie-playlist` extraction by [Lesmiscore](https://github.com/Lesmiscore)
+* [extractor/twitter] Add onion site to `_VALID_URL` by [DoubleCouponDay](https://github.com/DoubleCouponDay)
+* [extractor/twitter] Add Spaces extractor and GraphQL API by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly), [nixxo](https://github.com/nixxo), [pukkandan](https://github.com/pukkandan)
+* [extractor/twitter] Support multi-video posts by [Grub4K](https://github.com/Grub4K)
+* [extractor/uktvplay] Fix `_VALID_URL`
+* [extractor/viu] Support subtitles of on-screen text by [tkgmomosheep](https://github.com/tkgmomosheep)
+* [extractor/VK] Fix playlist URLs by [the-marenga](https://github.com/the-marenga)
+* [extractor/vlive] Extract `release_timestamp`
+* [extractor/voot] Improve `_VALID_URL` by [freezboltz](https://github.com/freezboltz)
+* [extractor/wordpress:mb.miniAudioPlayer] Add embed extractor by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/YoutubeWebArchive] Improve metadata extraction by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/zee5] Improve `_VALID_URL` by [m4tu4g](https://github.com/m4tu4g)
+* [extractor/zenyandex] Fix extractors by [lksj](https://github.com/lksj), [puc9](https://github.com/puc9), [pukkandan](https://github.com/pukkandan)
+
+
+### 2022.10.04
+
+* Allow a `set` to be passed as `download_archive` by [pukkandan](https://github.com/pukkandan), [bashonly](https://github.com/bashonly)
+* Allow open ranges for time ranges by [Lesmiscore](https://github.com/Lesmiscore)
+* Allow plugin extractors to replace the built-in ones
+* Don't download entire video when no matching `--download-sections`
+* Fix `--config-location -`
+* Improve [5736d79](https://github.com/yt-dlp/yt-dlp/pull/5044/commits/5736d79172c47ff84740d5720467370a560febad)
+* Fix for when playlists don't have `webpage_url`
+* Support environment variables in `--ffmpeg-location`
+* Workaround `libc_ver` not be available on Windows Store version of Python
+* [outtmpl] Curly braces to filter keys by [pukkandan](https://github.com/pukkandan)
+* [outtmpl] Make `%s` work in strfformat for all systems
+* [jsinterp] Workaround operator associativity issue
+* [cookies] Let `_get_mac_keyring_password` fail gracefully
+* [cookies] Parse cookies leniently by [Grub4K](https://github.com/Grub4K)
+* [phantomjs] Fix bug in [587021c](https://github.com/yt-dlp/yt-dlp/commit/587021cd9f717181b44e881941aca3f8d753758b) by [elyse0](https://github.com/elyse0)
+* [downloader/aria2c] Fix filename containing leading whitespace by [std-move](https://github.com/std-move)
+* [downloader/ism] Support ec-3 codec by [nixxo](https://github.com/nixxo)
+* [extractor] Fix `fatal=False` in `RetryManager`
+* [extractor] Improve json-ld extraction
+* [extractor] Make `_search_json` able to parse lists
+* [extractor] Escape `%` in `representation_id` of m3u8
+* [extractor/generic] Pass through referer from json-ld
+* [utils] `base_url`: URL paths can contain `&` by [elyse0](https://github.com/elyse0)
+* [utils] `js_to_json`: Improve
+* [utils] `Popen.run`: Fix default return in binary mode
+* [utils] `traverse_obj`: Rewrite, document and add tests by [Grub4K](https://github.com/Grub4K)
+* [devscripts] `make_lazy_extractors`: Fix for Docker by [josanabr](https://github.com/josanabr)
+* [docs] Misc Improvements
+* [cleanup] Misc fixes and cleanup by [pukkandan](https://github.com/pukkandan), [gamer191](https://github.com/gamer191)
+* [extractor/24tv.ua] Add extractors by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/BerufeTV] Add extractor by [Fabi019](https://github.com/Fabi019)
+* [extractor/booyah] Add extractor by [HobbyistDev](https://github.com/HobbyistDev), [elyse0](https://github.com/elyse0)
+* [extractor/bundesliga] Add extractor by [Fabi019](https://github.com/Fabi019)
+* [extractor/GoPlay] Add extractor by [CNugteren](https://github.com/CNugteren), [basrieter](https://github.com/basrieter), [jeroenj](https://github.com/jeroenj)
+* [extractor/iltalehti] Add extractor by [tpikonen](https://github.com/tpikonen)
+* [extractor/IsraelNationalNews] Add extractor by [Bobscorn](https://github.com/Bobscorn)
+* [extractor/mediaworksnzvod] Add extractor by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/MicrosoftEmbed] Add extractor by [DoubleCouponDay](https://github.com/DoubleCouponDay)
+* [extractor/nbc] Add NBCStations extractor by [bashonly](https://github.com/bashonly)
+* [extractor/onenewsnz] Add extractor by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/prankcast] Add extractor by [HobbyistDev](https://github.com/HobbyistDev), [columndeeply](https://github.com/columndeeply)
+* [extractor/Smotrim] Add extractor by [Lesmiscore](https://github.com/Lesmiscore), [nikita-moor](https://github.com/nikita-moor)
+* [extractor/tencent] Add Iflix extractor by [elyse0](https://github.com/elyse0)
+* [extractor/unscripted] Add extractor by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/adobepass] Add MSO AlticeOne (Optimum TV) by [CplPwnies](https://github.com/CplPwnies)
+* [extractor/youtube] **Download `post_live` videos from start** by [Lesmiscore](https://github.com/Lesmiscore), [pukkandan](https://github.com/pukkandan)
+* [extractor/youtube] Add support for Shorts audio pivot feed by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
+* [extractor/youtube] Detect `lazy-load-for-videos` embeds
+* [extractor/youtube] Do not warn on duplicate chapters
+* [extractor/youtube] Fix video like count extraction by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/youtube] Support changing extraction language by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/youtube:tab] Improve continuation items extraction
+* [extractor/youtube:tab] Support `reporthistory` page
+* [extractor/amazonstore] Fix JSON extraction by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
+* [extractor/amazonstore] Retry to avoid captcha page by [Lesmiscore](https://github.com/Lesmiscore)
+* [extractor/animeondemand] Remove extractor by [TokyoBlackHole](https://github.com/TokyoBlackHole)
+* [extractor/anvato] Fix extractor and refactor by [bashonly](https://github.com/bashonly)
+* [extractor/artetv] Remove duplicate stream urls by [Grub4K](https://github.com/Grub4K)
+* [extractor/audioboom] Support direct URLs and refactor by [pukkandan](https://github.com/pukkandan), [tpikonen](https://github.com/tpikonen)
+* [extractor/bandcamp] Extract `uploader_url`
+* [extractor/bilibili] Add space.bilibili extractors by [lockmatrix](https://github.com/lockmatrix)
+* [extractor/BilibiliSpace] Fix extractor and better error message by [lockmatrix](https://github.com/lockmatrix)
+* [extractor/BiliIntl] Support uppercase lang in `_VALID_URL` by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/BiliIntlSeries] Fix `_VALID_URL`
+* [extractor/bongacams] Update `_VALID_URL` by [0xGodspeed](https://github.com/0xGodspeed)
+* [extractor/crunchyroll:beta] Improve handling of hardsubs by [Grub4K](https://github.com/Grub4K)
+* [extractor/detik] Generalize extractors by [HobbyistDev](https://github.com/HobbyistDev), [coletdjnz](https://github.com/coletdjnz)
+* [extractor/dplay:italy] Add default authentication by [Timendum](https://github.com/Timendum)
+* [extractor/heise] Fix extractor by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/holodex] Fix `_VALID_URL` by [LiviaMedeiros](https://github.com/LiviaMedeiros)
+* [extractor/hrfensehen] Fix extractor by [snapdgn](https://github.com/snapdgn)
+* [extractor/hungama] Add subtitle by [GautamMKGarg](https://github.com/GautamMKGarg), [pukkandan](https://github.com/pukkandan)
+* [extractor/instagram] Extract more metadata by [pritam20ps05](https://github.com/pritam20ps05)
+* [extractor/JWPlatform] Fix extractor by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/malltv] Fix video_id extraction by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/MLBTV] Detect live streams
+* [extractor/motorsport] Support native embeds
+* [extractor/Mxplayer] Fix extractor by [itachi-19](https://github.com/itachi-19)
+* [extractor/nebula] Add nebula.tv by [tannertechnology](https://github.com/tannertechnology)
+* [extractor/nfl] Fix extractor by [bashonly](https://github.com/bashonly)
+* [extractor/ondemandkorea] Update `jw_config` regex by [julien-hadleyjack](https://github.com/julien-hadleyjack)
+* [extractor/paramountplus] Better DRM detection by [bashonly](https://github.com/bashonly)
+* [extractor/patreon] Sort formats
+* [extractor/rcs] Fix embed extraction by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/redgifs] Fix extractor by [jhwgh1968](https://github.com/jhwgh1968)
+* [extractor/rutube] Fix `_EMBED_REGEX` by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/RUTV] Fix warnings for livestreams by [Lesmiscore](https://github.com/Lesmiscore)
+* [extractor/soundcloud:search] More metadata in `--flat-playlist` by [SuperSonicHub1](https://github.com/SuperSonicHub1)
+* [extractor/telegraaf] Use mobile GraphQL API endpoint by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/tennistv] Fix timestamp by [zenerdi0de](https://github.com/zenerdi0de)
+* [extractor/tiktok] Fix TikTokIE by [bashonly](https://github.com/bashonly)
+* [extractor/triller] Fix auth token by [bashonly](https://github.com/bashonly)
+* [extractor/trovo] Fix extractors by [Mehavoid](https://github.com/Mehavoid)
+* [extractor/tv2] Support new url format by [tobi1805](https://github.com/tobi1805)
+* [extractor/web.archive:youtube] Fix `_YT_INITIAL_PLAYER_RESPONSE_RE`
+* [extractor/wistia] Add support for channels by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/wistia] Match IDs in embed URLs by [bashonly](https://github.com/bashonly)
+* [extractor/wordpress:playlist] Add generic embed extractor by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/yandexvideopreview] Update `_VALID_URL` by [Grub4K](https://github.com/Grub4K)
+* [extractor/zee5] Fix `_VALID_URL` by [m4tu4g](https://github.com/m4tu4g)
+* [extractor/zee5] Generate device ids by [freezboltz](https://github.com/freezboltz)
+
+
+### 2022.09.01
+
+* Add option `--use-extractors`
+* Merge youtube-dl: Upto [commit/ed5c44e](https://github.com/ytdl-org/youtube-dl/commit/ed5c44e7)
+* Add yt-dlp version to infojson
+* Fix `--break-per-url --max-downloads`
+* Fix bug in `--alias`
+* [cookies] Support firefox container in `--cookies-from-browser` by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
+* [downloader/external] Smarter detection of executable
+* [extractor/generic] Don't return JW player without formats
+* [FormatSort] Fix `aext` for `--prefer-free-formats`
+* [jsinterp] Various improvements by [pukkandan](https://github.com/pukkandan), [dirkf](https://github.com/dirkf), [elyse0](https://github.com/elyse0)
+* [cache] Mechanism to invalidate old cache
+* [utils] Add `deprecation_warning`
+* [utils] Add `orderedSet_from_options`
+* [utils] `Popen`: Restore `LD_LIBRARY_PATH` when using PyInstaller by [Lesmiscore](https://github.com/Lesmiscore)
+* [build] `make tar` should not follow `DESTDIR` by [satan1st](https://github.com/satan1st)
+* [build] Update pyinstaller by [shirt-dev](https://github.com/shirt-dev)
+* [test] Fix `test_youtube_signature`
+* [cleanup] Misc fixes and cleanup by [DavidH-2022](https://github.com/DavidH-2022), [MrRawes](https://github.com/MrRawes), [pukkandan](https://github.com/pukkandan)
+* [extractor/epoch] Add extractor by [tejasa97](https://github.com/tejasa97)
+* [extractor/eurosport] Add extractor by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/IslamChannel] Add extractors by [Lesmiscore](https://github.com/Lesmiscore)
+* [extractor/newspicks] Add extractor by [Lesmiscore](https://github.com/Lesmiscore)
+* [extractor/triller] Add extractor by [bashonly](https://github.com/bashonly)
+* [extractor/VQQ] Add extractors by [elyse0](https://github.com/elyse0)
+* [extractor/youtube] Improvements to nsig extraction
+* [extractor/youtube] Fix bug in format sorting
+* [extractor/youtube] Update iOS Innertube clients by [SamantazFox](https://github.com/SamantazFox)
+* [extractor/youtube] Use device-specific user agent by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/youtube] Add `--compat-option no-youtube-prefer-utc-upload-date` by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/arte] Bug fix by [cgrigis](https://github.com/cgrigis)
+* [extractor/bilibili] Extract `flac` with premium account by [jackyyf](https://github.com/jackyyf)
+* [extractor/BiliBiliSearch] Don't sort by date
+* [extractor/BiliBiliSearch] Fix infinite loop
+* [extractor/bitchute] Mark errors as expected
+* [extractor/crunchyroll:beta] Use anonymous access by [tejing1](https://github.com/tejing1)
+* [extractor/huya] Fix stream extraction by [ohaiibuzzle](https://github.com/ohaiibuzzle)
+* [extractor/medaltv] Fix extraction by [xenova](https://github.com/xenova)
+* [extractor/mediaset] Fix embed extraction
+* [extractor/mixcloud] All formats are audio-only
+* [extractor/rtbf] Fix jwt extraction by [elyse0](https://github.com/elyse0)
+* [extractor/screencastomatic] Support `--video-password` by [shreyasminocha](https://github.com/shreyasminocha)
+* [extractor/stripchat] Don't modify input URL by [dfaker](https://github.com/dfaker)
+* [extractor/uktv] Improve `_VALID_URL` by [dirkf](https://github.com/dirkf)
+* [extractor/vimeo:user] Fix `_VALID_URL`
+
+
+### 2022.08.19
+
+* Fix bug in `--download-archive`
+* [jsinterp] **Fix for new youtube players** and related improvements by [dirkf](https://github.com/dirkf), [pukkandan](https://github.com/pukkandan)
+* [phantomjs] Add function to execute JS without a DOM by [MinePlayersPE](https://github.com/MinePlayersPE), [pukkandan](https://github.com/pukkandan)
+* [build] Exclude devscripts from installs by [Lesmiscore](https://github.com/Lesmiscore)
+* [cleanup] Misc fixes and cleanup
+* [extractor/youtube] **Add fallback to phantomjs** for nsig
+* [extractor/youtube] Fix error reporting of "Incomplete data"
+* [extractor/youtube] Improve format sorting for IOS formats
+* [extractor/youtube] Improve signature caching
+* [extractor/instagram] Fix extraction by [bashonly](https://github.com/bashonly), [pritam20ps05](https://github.com/pritam20ps05)
+* [extractor/rai] Minor fix by [nixxo](https://github.com/nixxo)
+* [extractor/rtbf] Fix stream extractor by [elyse0](https://github.com/elyse0)
+* [extractor/SovietsCloset] Fix extractor by [ChillingPepper](https://github.com/ChillingPepper)
+* [extractor/zattoo] Fix Zattoo resellers by [goggle](https://github.com/goggle)
+
+### 2022.08.14
+
+* Merge youtube-dl: Upto [commit/d231b56](https://github.com/ytdl-org/youtube-dl/commit/d231b56)
+* [jsinterp] Handle **new youtube signature functions**
+* [jsinterp] Truncate error messages
+* [extractor] Fix format sorting of `channels`
+* [ffmpeg] Disable avconv unless `--prefer-avconv`
+* [ffmpeg] Smarter detection of ffprobe filename
+* [embedthumbnail] Detect `libatomicparsley.so`
+* [ThumbnailsConvertor] Fix conversion after `fixup_webp`
+* [utils] Fix `get_compatible_ext`
+* [build] Fix changelog
+* [update] Set executable bit-mask by [pukkandan](https://github.com/pukkandan), [Lesmiscore](https://github.com/Lesmiscore)
+* [devscripts] Fix import
+* [docs] Consistent use of `e.g.` by [Lesmiscore](https://github.com/Lesmiscore)
+* [cleanup] Misc fixes and cleanup
+* [extractor/moview] Add extractor by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/parler] Add extractor by [palewire](https://github.com/palewire)
+* [extractor/patreon] Ignore erroneous media attachments by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/truth] Add extractor by [palewire](https://github.com/palewire)
+* [extractor/aenetworks] Add formats parameter by [jacobtruman](https://github.com/jacobtruman)
+* [extractor/crunchyroll] Improve `_VALID_URL`s
+* [extractor/doodstream] Add `wf` domain by [aldoridhoni](https://github.com/aldoridhoni)
+* [extractor/facebook] Add reel support by [bashonly](https://github.com/bashonly)
+* [extractor/MLB] New extractor by [ischmidt20](https://github.com/ischmidt20)
+* [extractor/rai] Misc fixes by [nixxo](https://github.com/nixxo)
+* [extractor/toggo] Improve `_VALID_URL` by [masta79](https://github.com/masta79)
+* [extractor/tubitv] Extract additional formats by [shirt-dev](https://github.com/shirt-dev)
+* [extractor/zattoo] Potential fix for resellers
+
+
+### 2022.08.08
+
+* **Remove Python 3.6 support**
+* Determine merge container better by [pukkandan](https://github.com/pukkandan), [selfisekai](https://github.com/selfisekai)
+* Framework for embed detection by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
+* Merge youtube-dl: Upto [commit/adb5294](https://github.com/ytdl-org/youtube-dl/commit/adb5294)
+* `--compat-option no-live-chat` should disable danmaku
+* Fix misleading DRM message
+* Import ctypes only when necessary
+* Minor bugfixes
+* Reject entire playlists faster with `--match-filter`
+* Remove filtered entries from `-J`
+* Standardize retry mechanism
+* Validate `--merge-output-format`
+* [downloader] Add average speed to final progress line
+* [extractor] Add field `audio_channels`
+* [extractor] Support multiple archive ids for one video
+* [ffmpeg] Set `ffmpeg_location` in a contextvar
+* [FFmpegThumbnailsConvertor] Fix conversion from GIF
+* [MetadataParser] Don't set `None` when the field didn't match
+* [outtmpl] Smarter replacing of unsupported characters
+* [outtmpl] Treat empty values as None in filenames
+* [utils] sanitize_open: Allow any IO stream as stdout
+* [build, devscripts] Add devscript to set a build variant
+* [build] Improve build process by [shirt-dev](https://github.com/shirt-dev)
+* [build] Update pyinstaller
+* [devscripts] Create `utils` and refactor
+* [docs] Clarify `best*`
+* [docs] Fix bug report issue template
+* [docs] Fix capitalization in references by [christoph-heinrich](https://github.com/christoph-heinrich)
+* [cleanup, mhtml] Use imghdr
+* [cleanup, utils] Consolidate known media extensions
+* [cleanup] Misc fixes and cleanup
+* [extractor/angel] Add extractor by [AxiosDeminence](https://github.com/AxiosDeminence)
+* [extractor/dplay] Add MotorTrend extractor by [Sipherdrakon](https://github.com/Sipherdrakon)
+* [extractor/harpodeon] Add extractor by [eren-kemer](https://github.com/eren-kemer)
+* [extractor/holodex] Add extractor by [pukkandan](https://github.com/pukkandan), [sqrtNOT](https://github.com/sqrtNOT)
+* [extractor/kompas] Add extractor by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/rai] Add raisudtirol extractor by [nixxo](https://github.com/nixxo)
+* [extractor/tempo] Add extractor by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/youtube] **Fixes for third party client detection** by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/youtube] Add `live_status=post_live` by [lazypete365](https://github.com/lazypete365)
+* [extractor/youtube] Extract more format info
+* [extractor/youtube] Parse translated subtitles only when requested
+* [extractor/youtube, extractor/twitch] Allow waiting for channels to become live
+* [extractor/youtube, webvtt] Extract auto-subs from livestream VODs by [fstirlitz](https://github.com/fstirlitz), [pukkandan](https://github.com/pukkandan)
+* [extractor/AbemaTVTitle] Implement paging by [Lesmiscore](https://github.com/Lesmiscore)
+* [extractor/archiveorg] Improve handling of formats by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
+* [extractor/arte] Fix title extraction
+* [extractor/arte] **Move to v2 API** by [fstirlitz](https://github.com/fstirlitz), [pukkandan](https://github.com/pukkandan)
+* [extractor/bbc] Fix news articles by [ajj8](https://github.com/ajj8)
+* [extractor/camtasia] Separate into own extractor by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/cloudflarestream] Fix video_id padding by [haobinliang](https://github.com/haobinliang)
+* [extractor/crunchyroll] Fix conversion of thumbnail from GIF
+* [extractor/crunchyroll] Handle missing metadata correctly by [Burve](https://github.com/Burve), [pukkandan](https://github.com/pukkandan)
+* [extractor/crunchyroll:beta] Extract timestamp and fix tests by [tejing1](https://github.com/tejing1)
+* [extractor/crunchyroll:beta] Use streams API by [tejing1](https://github.com/tejing1)
+* [extractor/doodstream] Support more domains by [Galiley](https://github.com/Galiley)
+* [extractor/ESPN] Extract duration by [ischmidt20](https://github.com/ischmidt20)
+* [extractor/FIFA] Change API endpoint by [Bricio](https://github.com/Bricio), [yashkc2025](https://github.com/yashkc2025)
+* [extractor/globo:article] Remove false positives by [Bricio](https://github.com/Bricio)
+* [extractor/Go] Extract timestamp by [ischmidt20](https://github.com/ischmidt20)
+* [extractor/hidive] Fix cookie login when netrc is also given by [winterbird-code](https://github.com/winterbird-code)
+* [extractor/html5] Separate into own extractor by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
+* [extractor/ina] Improve extractor by [elyse0](https://github.com/elyse0)
+* [extractor/NaverNow] Change endpoint by [ping](https://github.com/ping)
+* [extractor/ninegag] Extract uploader by [DjesonPV](https://github.com/DjesonPV)
+* [extractor/NovaPlay] Fix extractor by [Bojidarist](https://github.com/Bojidarist)
+* [extractor/orf:radio] Rewrite extractors
+* [extractor/patreon] Fix and improve extractors by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
+* [extractor/rai] Fix RaiNews extraction by [nixxo](https://github.com/nixxo)
+* [extractor/redbee] Unify and update extractors by [elyse0](https://github.com/elyse0)
+* [extractor/stripchat] Fix _VALID_URL by [freezboltz](https://github.com/freezboltz)
+* [extractor/tubi] Exclude playlists from playlist entries by [sqrtNOT](https://github.com/sqrtNOT)
+* [extractor/tviplayer] Improve `_VALID_URL` by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/twitch] Extract chapters for single chapter VODs by [mpeter50](https://github.com/mpeter50)
+* [extractor/vgtv] Support tv.vg.no by [sqrtNOT](https://github.com/sqrtNOT)
+* [extractor/vidio] Support embed link by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/vk] Fix extractor by [Mehavoid](https://github.com/Mehavoid)
+* [extractor/WASDTV:record] Fix `_VALID_URL`
+* [extractor/xfileshare] Add Referer by [Galiley](https://github.com/Galiley)
+* [extractor/YahooJapanNews] Fix extractor by [Lesmiscore](https://github.com/Lesmiscore)
+* [extractor/yandexmusic] Extract higher quality format
+* [extractor/zee5] Update Device ID by [m4tu4g](https://github.com/m4tu4g)
+
+
+### 2022.07.18
+
+* Allow users to specify encoding in each config files by [Lesmiscore](https://github.com/Lesmiscore)
+* Discard infodict from memory if no longer needed
+* Do not allow extractors to return `None`
+* Do not load system certificates when `certifi` is used
+* Fix rounding of integers in format table
+* Improve chapter sanitization
+* Skip some fixup if remux/recode is needed by [Lesmiscore](https://github.com/Lesmiscore)
+* Support `--no-progress` for `--wait-for-video`
+* Fix bug in [612f2be](https://github.com/yt-dlp/yt-dlp/commit/612f2be5d3924540158dfbe5f25d841f04cff8c6)
+* [outtmpl] Add alternate form `h` for HTML escaping
+* [aes] Add multiple padding modes in CBC by [elyse0](https://github.com/elyse0)
+* [extractor/common] Passthrough `errnote=False` to parsers
+* [extractor/generic] Remove HEAD request
+* [http] Ensure the file handle is always closed
+* [ModifyChapters] Modify duration in infodict
+* [options] Fix aliases to `--config-location`
+* [utils] Fix `get_domain`
+* [build] Consistent order for lazy extractors by [lamby](https://github.com/lamby)
+* [build] Fix architecture suffix of executables by [odo2063](https://github.com/odo2063)
+* [build] Improve `setup.py`
+* [update] Do not check `_update_spec` when up to date
+* [update] Prepare to remove Python 3.6 support
+* [compat] Let PyInstaller detect _legacy module
+* [devscripts/update-formulae] Do not change dependency section
+* [test] Split download tests so they can be more easily run in CI
+* [docs] Improve docstring of `download_ranges` by [FirefoxMetzger](https://github.com/FirefoxMetzger)
+* [docs] Improve issue templates
+* [build] Fix bug in [6d916fe](https://github.com/yt-dlp/yt-dlp/commit/6d916fe709a38e8c4c69b73843acf170b5165931)
+* [cleanup, utils] Refactor parse_codecs
+* [cleanup] Misc fixes and cleanup
+* [extractor/acfun] Add extractors by [lockmatrix](https://github.com/lockmatrix)
+* [extractor/Audiodraft] Add extractors by [Ashish0804](https://github.com/Ashish0804), [fstirlitz](https://github.com/fstirlitz)
+* [extractor/cellebrite] Add extractor by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/detik] Add extractor by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/hytale] Add extractor by [llamasblade](https://github.com/llamasblade), [pukkandan](https://github.com/pukkandan)
+* [extractor/liputan6] Add extractor by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/mocha] Add extractor by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/rtl.lu] Add extractor by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/rtvsl] Add extractor by [iw0nderhow](https://github.com/iw0nderhow), [pukkandan](https://github.com/pukkandan)
+* [extractor/StarTrek] Add extractor by [scy](https://github.com/scy)
+* [extractor/syvdk] Add extractor by [misaelaguayo](https://github.com/misaelaguayo)
+* [extractor/theholetv] Add extractor by [dosy4ev](https://github.com/dosy4ev)
+* [extractor/TubeTuGraz] Add extractor by [Ferdi265](https://github.com/Ferdi265), [pukkandan](https://github.com/pukkandan)
+* [extractor/tviplayer] Add extractor by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/wetv] Add extractors by [elyse0](https://github.com/elyse0)
+* [extractor/wikimedia] Add extractor by [EhtishamSabir](https://github.com/EhtishamSabir), [pukkandan](https://github.com/pukkandan)
+* [extractor/youtube] Fix duration check for post-live manifestless mode
+* [extractor/youtube] More metadata for storyboards by [ftk](https://github.com/ftk)
+* [extractor/bigo] Fix extractor by [Lesmiscore](https://github.com/Lesmiscore)
+* [extractor/BiliIntl] Fix subtitle extraction by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [extractor/crunchyroll] Improve `_VALID_URL`
+* [extractor/fifa] Fix extractor by [ischmidt20](https://github.com/ischmidt20)
+* [extractor/instagram] Fix post/story extractors by [pritam20ps05](https://github.com/pritam20ps05), [pukkandan](https://github.com/pukkandan)
+* [extractor/iq] Set language correctly for Korean subtitles
+* [extractor/MangoTV] Fix subtitle languages
+* [extractor/Netverse] Improve playlist extractor by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/philharmoniedeparis] Fix extractor by [sqrtNOT](https://github.com/sqrtNOT)
+* [extractor/Trovo] Fix extractor by [u-spec-png](https://github.com/u-spec-png)
+* [extractor/twitch] Support storyboards for VODs by [ftk](https://github.com/ftk)
+* [extractor/WatchESPN] Improve `_VALID_URL` by [IONECarter](https://github.com/IONECarter), [dirkf](https://github.com/dirkf)
+* [extractor/WSJArticle] Fix video id extraction by [sqrtNOT](https://github.com/sqrtNOT)
+* [extractor/Ximalaya] Fix extractors by [lockmatrix](https://github.com/lockmatrix)
+* [cleanup, extractor/youtube] Fix tests by [sheerluck](https://github.com/sheerluck)
+
+
+### 2022.06.29
+
+* Fix `--downloader native`
+* Fix `section_end` of clips
+* Fix playlist error handling
+* Sanitize `chapters`
+* [extractor] Fix `_create_request` when headers is None
+* [extractor] Fix empty `BaseURL` in MPD
+* [ffmpeg] Write full output to debug on error
+* [hls] Warn user when trying to download live HLS
+* [options] Fix `parse_known_args` for `--`
+* [utils] Fix inconsistent default handling between HTTP and HTTPS requests by [coletdjnz](https://github.com/coletdjnz)
+* [build] Draft release until complete
+* [build] Fix release tag commit
+* [build] Standalone x64 builds for MacOS 10.9 by [StefanLobbenmeier](https://github.com/StefanLobbenmeier)
+* [update] Ability to set a maximum version for specific variants
+* [compat] Fix `compat.WINDOWS_VT_MODE`
+* [compat] Remove deprecated functions from core code
+* [compat] Remove more functions
+* [cleanup, extractor] Reduce direct use of `_downloader`
+* [cleanup] Consistent style for file heads
+* [cleanup] Fix some typos by [crazymoose77756](https://github.com/crazymoose77756)
+* [cleanup] Misc fixes and cleanup
+* [extractor/Scrolller] Add extractor by [LunarFang416](https://github.com/LunarFang416)
+* [extractor/ViMP] Add playlist extractor by [FestplattenSchnitzel](https://github.com/FestplattenSchnitzel)
+* [extractor/fuyin] Add extractor by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/livestreamfails] Add extractor by [nomevi](https://github.com/nomevi)
+* [extractor/premiershiprugby] Add extractor by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/steam] Add broadcast extractor by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/youtube] Mark videos as fully watched by [Brett824](https://github.com/Brett824)
+* [extractor/CWTV] Extract thumbnail by [ischmidt20](https://github.com/ischmidt20)
+* [extractor/ViMP] Add thumbnail and support more sites by [FestplattenSchnitzel](https://github.com/FestplattenSchnitzel)
+* [extractor/dropout] Support cookies and login only as needed by [pingiun](https://github.com/pingiun), [pukkandan](https://github.com/pukkandan)
+* [extractor/ertflix] Improve `_VALID_URL`
+* [extractor/lbry] Use HEAD request for redirect URL by [flashdagger](https://github.com/flashdagger)
+* [extractor/mediaset] Improve `_VALID_URL`
+* [extractor/npr] Implement [e50c350](https://github.com/yt-dlp/yt-dlp/commit/e50c3500b43d80e4492569c4b4523c4379c6fbb2) differently
+* [extractor/tennistv] Rewrite extractor by [pukkandan](https://github.com/pukkandan), [zenerdi0de](https://github.com/zenerdi0de)
+
+### 2022.06.22.1
+
+* [build] Fix updating homebrew formula
+
+### 2022.06.22
+
+* [**Deprecate support for Python 3.6**](https://github.com/yt-dlp/yt-dlp/issues/3764#issuecomment-1154051119)
+* **Add option `--download-sections` to download video partially**
+ * Chapter regex and time ranges are accepted, e.g. `--download-sections *1:10-2:20`
+* Add option `--alias`
+* Add option `--lazy-playlist` to process entries as they are received
+* Add option `--retry-sleep`
+* Add slicing notation to `--playlist-items`
+ * Adds support for negative indices and step
+ * Add `-I` as alias for `--playlist-index`
+ * Makes `--playlist-start`, `--playlist-end`, `--playlist-reverse`, `--no-playlist-reverse` redundant
+* `--config-location -` to provide options interactively
+* [build] Add Linux standalone builds
+* [update] Self-restart after update
+* Merge youtube-dl: Upto [commit/8a158a9](https://github.com/ytdl-org/youtube-dl/commit/8a158a9)
+* Add `--no-update`
+* Allow extractors to specify section_start/end for clips
+* Do not print progress to `stderr` with `-q`
+* Ensure pre-processor errors do not block video download
+* Fix `--simulate --max-downloads`
+* Improve error handling of bad config files
+* Return an error code if update fails
+* Fix bug in [3a408f9](https://github.com/yt-dlp/yt-dlp/commit/3a408f9d199127ca2626359e21a866a09ab236b3)
+* [ExtractAudio] Allow conditional conversion
+* [ModifyChapters] Fix repeated removal of small segments
+* [ThumbnailsConvertor] Allow conditional conversion
+* [cookies] Detect profiles for cygwin/BSD by [moench-tegeder](https://github.com/moench-tegeder)
+* [dash] Show fragment count with `--live-from-start` by [flashdagger](https://github.com/flashdagger)
+* [extractor] Add `_search_json` by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
+* [extractor] Add `default` parameter to `_search_json` by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
+* [extractor] Add dev option `--load-pages`
+* [extractor] Handle `json_ld` with multiple `@type`s
+* [extractor] Import `_ALL_CLASSES` lazily
+* [extractor] Recognize `src` attribute from HTML5 media elements by [Lesmiscore](https://github.com/Lesmiscore)
+* [extractor/generic] Revert e6ae51c123897927eb3c9899923d8ffd31c7f85d
+* [f4m] Bugfix
+* [ffmpeg] Check version lazily
+* [jsinterp] Some optimizations and refactoring by [dirkf](https://github.com/dirkf), [pukkandan](https://github.com/pukkandan)
+* [utils] Improve performance using `functools.cache`
+* [utils] Send HTTP/1.1 ALPN extension by [coletdjnz](https://github.com/coletdjnz)
+* [utils] `ExtractorError`: Fix `exc_info`
+* [utils] `ISO3166Utils`: Add `EU` and `AP`
+* [utils] `Popen`: Refactor to use contextmanager
+* [utils] `locked_file`: Fix for PyPy on Windows
+* [update] Expose more functionality to API
+* [update] Use `.git` folder to distinguish `source`/`unknown`
+* [compat] Add `functools.cached_property`
+* [test] Fix `FakeYDL` signatures by [coletdjnz](https://github.com/coletdjnz)
+* [docs] Improvements
+* [cleanup, ExtractAudio] Refactor
+* [cleanup, downloader] Refactor `report_progress`
+* [cleanup, extractor] Refactor `_download_...` methods
+* [cleanup, extractor] Rename `extractors.py` to `_extractors.py`
+* [cleanup, utils] Don't use kwargs for `format_field`
+* [cleanup, build] Refactor
+* [cleanup, docs] Re-indent "Usage and Options" section
+* [cleanup] Deprecate `YoutubeDL.parse_outtmpl`
+* [cleanup] Misc fixes and cleanup by [Lesmiscore](https://github.com/Lesmiscore), [MrRawes](https://github.com/MrRawes), [christoph-heinrich](https://github.com/christoph-heinrich), [flashdagger](https://github.com/flashdagger), [gamer191](https://github.com/gamer191), [kwconder](https://github.com/kwconder), [pukkandan](https://github.com/pukkandan)
+* [extractor/DailyWire] Add extractors by [HobbyistDev](https://github.com/HobbyistDev), [pukkandan](https://github.com/pukkandan)
+* [extractor/fourzerostudio] Add extractors by [Lesmiscore](https://github.com/Lesmiscore)
+* [extractor/GoogleDrive] Add folder extractor by [evansp](https://github.com/evansp), [pukkandan](https://github.com/pukkandan)
+* [extractor/MirrorCoUK] Add extractor by [LunarFang416](https://github.com/LunarFang416), [pukkandan](https://github.com/pukkandan)
+* [extractor/atscaleconfevent] Add extractor by [Ashish0804](https://github.com/Ashish0804)
+* [extractor/freetv] Add extractor by [elyse0](https://github.com/elyse0)
+* [extractor/ixigua] Add Extractor by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/kicker.de] Add extractor by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/netverse] Add extractors by [HobbyistDev](https://github.com/HobbyistDev), [pukkandan](https://github.com/pukkandan)
+* [extractor/playsuisse] Add extractor by [pukkandan](https://github.com/pukkandan), [sbor23](https://github.com/sbor23)
+* [extractor/substack] Add extractor by [elyse0](https://github.com/elyse0)
+* [extractor/youtube] **Support downloading clips**
+* [extractor/youtube] Add `innertube_host` and `innertube_key` extractor args by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/youtube] Add warning for PostLiveDvr
+* [extractor/youtube] Bring back `_extract_chapters_from_description`
+* [extractor/youtube] Extract `comment_count` from webpage
+* [extractor/youtube] Fix `:ytnotifications` extractor by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/youtube] Fix initial player response extraction by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
+* [extractor/youtube] Fix live chat for videos with content warning by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/youtube] Make signature extraction non-fatal
+* [extractor/youtube:tab] Detect `videoRenderer` in `_post_thread_continuation_entries`
+* [extractor/BiliIntl] Fix metadata extraction
+* [extractor/BiliIntl] Fix subtitle extraction by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/FranceCulture] Fix extractor by [aurelg](https://github.com/aurelg), [pukkandan](https://github.com/pukkandan)
+* [extractor/PokemonSoundLibrary] Remove extractor by [Lesmiscore](https://github.com/Lesmiscore)
+* [extractor/StreamCZ] Fix extractor by [adamanldo](https://github.com/adamanldo), [dirkf](https://github.com/dirkf)
+* [extractor/WatchESPN] Support free videos and BAM_DTC by [ischmidt20](https://github.com/ischmidt20)
+* [extractor/animelab] Remove extractor by [gamer191](https://github.com/gamer191)
+* [extractor/bloomberg] Change playback endpoint by [m4tu4g](https://github.com/m4tu4g)
+* [extractor/ccc] Extract view_count by [vkorablin](https://github.com/vkorablin)
+* [extractor/crunchyroll:beta] Fix extractor after API change by [Burve](https://github.com/Burve), [tejing1](https://github.com/tejing1)
+* [extractor/curiositystream] Get `auth_token` from cookie by [mnn](https://github.com/mnn)
+* [extractor/digitalconcerthall] Fix extractor by [ZhymabekRoman](https://github.com/ZhymabekRoman)
+* [extractor/dropbox] Extract the correct `mountComponent`
+* [extractor/dropout] Login is not mandatory
+* [extractor/duboku] Fix for hostname change by [mozbugbox](https://github.com/mozbugbox)
+* [extractor/espn] Add `WatchESPN` extractor by [ischmidt20](https://github.com/ischmidt20), [pukkandan](https://github.com/pukkandan)
+* [extractor/expressen] Fix extractor by [aejdl](https://github.com/aejdl)
+* [extractor/foxnews] Update embed extraction by [elyse0](https://github.com/elyse0)
+* [extractor/ina] Fix extractor by [elyse0](https://github.com/elyse0)
+* [extractor/iwara:user] Make paging better by [Lesmiscore](https://github.com/Lesmiscore)
+* [extractor/jwplatform] Look for `data-video-jw-id`
+* [extractor/lbry] Update livestream API by [flashdagger](https://github.com/flashdagger)
+* [extractor/mediaset] Improve `_VALID_URL`
+* [extractor/naver] Add `navernow` extractor by [ping](https://github.com/ping)
+* [extractor/niconico:series] Fix extractor by [sqrtNOT](https://github.com/sqrtNOT)
+* [extractor/npr] Use stream url from json-ld by [r5d](https://github.com/r5d)
+* [extractor/pornhub] Extract `uploader_id` field by [Lesmiscore](https://github.com/Lesmiscore)
+* [extractor/radiofrance] Add more radios by [bubbleguuum](https://github.com/bubbleguuum)
+* [extractor/rumble] Detect JS embed
+* [extractor/rumble] Extract subtitles by [fstirlitz](https://github.com/fstirlitz)
+* [extractor/southpark] Add `southpark.lat` extractor by [darkxex](https://github.com/darkxex)
+* [extractor/spotify:show] Fix extractor
+* [extractor/tiktok] Detect embeds
+* [extractor/tiktok] Extract `SIGI_STATE` by [dirkf](https://github.com/dirkf), [pukkandan](https://github.com/pukkandan), [sulyi](https://github.com/sulyi)
+* [extractor/tver] Fix extractor by [Lesmiscore](https://github.com/Lesmiscore)
+* [extractor/vevo] Fix extractor by [Lesmiscore](https://github.com/Lesmiscore)
+* [extractor/yahoo:gyao] Fix extractor
+* [extractor/zattoo] Fix live streams by [miseran](https://github.com/miseran)
+* [extractor/zdf] Improve format sorting by [elyse0](https://github.com/elyse0)
+
+
+### 2022.05.18
+
+* Add support for SSL client certificate authentication by [coletdjnz](https://github.com/coletdjnz), [dirkf](https://github.com/dirkf)
+ * Adds `--client-certificate`, `--client-certificate-key`, `--client-certificate-password`
+* Add `--match-filter -` to interactively ask for each video
+* `--max-downloads` should obey `--break-per-input`
+* Allow use of weaker ciphers with `--legacy-server-connect`
+* Don't imply `-s` for later stages of `-O`
+* Fix `--date today`
+* Fix `--skip-unavailable-fragments`
+* Fix color in `-q -F`
+* Fix redirect HTTP method handling by [coletdjnz](https://github.com/coletdjnz)
+* Improve `--clean-infojson`
+* Remove warning for videos with an empty title
+* Run `FFmpegFixupM3u8PP` for live-streams if needed
+* Show name of downloader in verbose log
+* [cookies] Allow `cookiefile` to be a text stream
+* [cookies] Report progress when importing cookies
+* [downloader/ffmpeg] Specify headers for each URL by [elyse0](https://github.com/elyse0)
+* [fragment] Do not change chunk-size when `--test`
+* [fragment] Make single thread download work for `--live-from-start` by [Lesmiscore](https://github.com/Lesmiscore)
+* [hls] Fix `byte_range` for `EXT-X-MAP` fragment by [fstirlitz](https://github.com/fstirlitz)
+* [http] Fix retrying on read timeout by [coletdjnz](https://github.com/coletdjnz)
+* [ffmpeg] Fix features detection
+* [EmbedSubtitle] Enable for more video extensions
+* [EmbedThumbnail] Disable thumbnail conversion for mkv by [evansp](https://github.com/evansp)
+* [EmbedThumbnail] Do not obey `-k`
+* [EmbedThumbnail] Do not remove id3v1 tags
+* [FFmpegMetadata] Remove `\0` from metadata
+* [FFmpegMetadata] Remove filename from attached info-json
+* [FixupM3u8] Obey `--hls-prefer-mpegts`
+* [Sponsorblock] Don't crash when duration is unknown
+* [XAttrMetadata] Refactor and document dependencies
+* [extractor] Document netrc machines
+* [extractor] Update `manifest_url`s after redirect by [elyse0](https://github.com/elyse0)
+* [extractor] Update dash `manifest_url` after redirects by [elyse0](https://github.com/elyse0)
+* [extractor] Use `classmethod`/`property` where possible
+* [generic] Refactor `_extract_rss`
+* [utils] `is_html`: Handle double BOM
+* [utils] `locked_file`: Ignore illegal seek on `truncate` by [jakeogh](https://github.com/jakeogh)
+* [utils] `sanitize_path`: Fix when path is empty string
+* [utils] `write_string`: Workaround newline issue in `conhost`
+* [utils] `certifi`: Make sure the pem file exists
+* [utils] Fix `WebSocketsWrapper`
+* [utils] `locked_file`: Do not give executable bits for newly created files by [Lesmiscore](https://github.com/Lesmiscore)
+* [utils] `YoutubeDLCookieJar`: Detect and reject JSON file by [Lesmiscore](https://github.com/Lesmiscore)
+* [test] Convert warnings into errors and fix some existing warnings by [fstirlitz](https://github.com/fstirlitz)
+* [dependencies] Create module with all dependency imports
+* [compat] Split into sub-modules by [fstirlitz](https://github.com/fstirlitz), [pukkandan](https://github.com/pukkandan)
+* [compat] Implement `compat.imghdr`
+* [build] Add `make uninstall` by [MrRawes](https://github.com/MrRawes)
+* [build] Avoid use of `install -D`
+* [build] Fix `Makefile` by [putnam](https://github.com/putnam)
+* [build] Fix `--onedir` on macOS
+* [build] Add more test-runners
+* [cleanup] Deprecate some compat vars by [fstirlitz](https://github.com/fstirlitz), [pukkandan](https://github.com/pukkandan)
+* [cleanup] Remove unused code paths, extractors, scripts and tests by [fstirlitz](https://github.com/fstirlitz)
+* [cleanup] Upgrade syntax (`pyupgrade`) and sort imports (`isort`)
+* [cleanup, docs, build] Misc fixes
+* [BilibiliLive] Add extractor by [HE7086](https://github.com/HE7086), [pukkandan](https://github.com/pukkandan)
+* [Fifa] Add Extractor by [Bricio](https://github.com/Bricio)
+* [goodgame] Add extractor by [nevack](https://github.com/nevack)
+* [gronkh] Add playlist extractors by [hatienl0i261299](https://github.com/hatienl0i261299)
+* [icareus] Add extractor by [tpikonen](https://github.com/tpikonen), [pukkandan](https://github.com/pukkandan)
+* [iwara] Add playlist extractors by [i6t](https://github.com/i6t)
+* [Likee] Add extractor by [hatienl0i261299](https://github.com/hatienl0i261299)
+* [masters] Add extractor by [m4tu4g](https://github.com/m4tu4g)
+* [nebula] Add support for subscriptions by [hheimbuerger](https://github.com/hheimbuerger)
+* [Podchaser] Add extractors by [connercsbn](https://github.com/connercsbn)
+* [rokfin:search] Add extractor by [P-reducible](https://github.com/P-reducible), [pukkandan](https://github.com/pukkandan)
+* [youtube] Add `:ytnotifications` extractor by [krichbanana](https://github.com/krichbanana)
+* [youtube] Add YoutubeStoriesIE (`ytstories:<channel UCID>`) by [coletdjnz](https://github.com/coletdjnz)
+* [ZingMp3] Add chart and user extractors by [hatienl0i261299](https://github.com/hatienl0i261299)
+* [adn] Update AES key by [elyse0](https://github.com/elyse0)
+* [adobepass] Allow cookies for authenticating MSO
+* [bandcamp] Exclude merch links by [Yipten](https://github.com/Yipten)
+* [chingari] Fix archiving and tests
+* [DRTV] Improve `_VALID_URL` by [vertan](https://github.com/vertan)
+* [facebook] Improve thumbnail extraction by [Wikidepia](https://github.com/Wikidepia)
+* [fc2] Stop heatbeating once FFmpeg finishes by [Lesmiscore](https://github.com/Lesmiscore)
+* [Gofile] Fix extraction and support password-protected links by [mehq](https://github.com/mehq)
+* [hotstar, cleanup] Refactor extractors
+* [InfoQ] Don't fail on missing audio format by [evansp](https://github.com/evansp)
+* [Jamendo] Extract more metadata by [evansp](https://github.com/evansp)
+* [kaltura] Update API calls by [flashdagger](https://github.com/flashdagger)
+* [KhanAcademy] Fix extractor by [rand-net](https://github.com/rand-net)
+* [LCI] Fix extractor by [MarwenDallel](https://github.com/MarwenDallel)
+* [lrt] Support livestreams by [GiedriusS](https://github.com/GiedriusS)
+* [niconico] Set `expected_protocol` to a public field
+* [Niconico] Support 2FA by [ekangmonyet](https://github.com/ekangmonyet)
+* [Olympics] Fix format extension
+* [openrec:movie] Enable fallback for /movie/ URLs
+* [PearVideo] Add fallback for formats by [hatienl0i261299](https://github.com/hatienl0i261299)
+* [radiko] Fix extractor by [Lesmiscore](https://github.com/Lesmiscore)
+* [rai] Add `release_year`
+* [reddit] Prevent infinite loop
+* [rokfin] Implement login by [P-reducible](https://github.com/P-reducible), [pukkandan](https://github.com/pukkandan)
+* [ruutu] Support hs.fi embeds by [tpikonen](https://github.com/tpikonen), [pukkandan](https://github.com/pukkandan)
+* [spotify] Detect iframe embeds by [fstirlitz](https://github.com/fstirlitz)
+* [telegram] Fix metadata extraction
+* [tmz, cleanup] Update tests by [diegorodriguezv](https://github.com/diegorodriguezv)
+* [toggo] Fix `_VALID_URL` by [ca-za](https://github.com/ca-za)
+* [trovo] Update to new API by [nyuszika7h](https://github.com/nyuszika7h)
+* [TVer] Improve extraction by [Lesmiscore](https://github.com/Lesmiscore)
+* [twitcasting] Pass headers for each formats by [Lesmiscore](https://github.com/Lesmiscore)
+* [VideocampusSachsen] Improve extractor by [FestplattenSchnitzel](https://github.com/FestplattenSchnitzel)
+* [vimeo] Fix extractors
+* [wat] Fix extraction of multi-language videos and subtitles by [elyse0](https://github.com/elyse0)
+* [wistia] Fix `_VALID_URL` by [dirkf](https://github.com/dirkf)
+* [youtube, cleanup] Minor refactoring by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
+* [youtube] Added piped instance urls by [JordanWeatherby](https://github.com/JordanWeatherby)
+* [youtube] Deprioritize auto-generated thumbnails
+* [youtube] Deprioritize format 22 (often damaged)
+* [youtube] Fix episode metadata extraction
+* [zee5] Fix extractor by [Ashish0804](https://github.com/Ashish0804)
+* [zingmp3, cleanup] Refactor extractors
+
+
+### 2022.04.08
+
+* Use certificates from `certifi` if installed by [coletdjnz](https://github.com/coletdjnz)
+* Treat multiple `--match-filters` as OR
+* File locking improvements:
+ * Do not lock downloading file on Windows
+ * Do not prevent download if locking is unsupported
+ * Do not truncate files before locking by [jakeogh](https://github.com/jakeogh), [pukkandan](https://github.com/pukkandan)
+ * Fix non-blocking non-exclusive lock
+* De-prioritize automatic-subtitles when no `--sub-lang` is given
+* Exit after `--dump-user-agent`
+* Fallback to video-only format when selecting by extension
+* Fix `--abort-on-error` for subtitles
+* Fix `--no-overwrite` for playlist infojson
+* Fix `--print` with `--ignore-no-formats` when url is `None` by [flashdagger](https://github.com/flashdagger)
+* Fix `--sleep-interval`
+* Fix `--throttled-rate`
+* Fix `autonumber`
+* Fix case of `http_headers`
+* Fix filepath sanitization in `--print-to-file`
+* Handle float in `--wait-for-video`
+* Ignore `mhtml` formats from `-f mergeall`
+* Ignore format-specific fields in initial pass of `--match-filter`
+* Protect stdout from unexpected progress and console-title
+* Remove `Accept-Encoding` header from `std_headers` by [coletdjnz](https://github.com/coletdjnz)
+* Remove incorrect warning for `--dateafter`
+* Show warning when all media formats have DRM
+* [downloader] Fix invocation of `HttpieFD`
+* [http] Fix #3215
+* [http] Reject broken range before request by [Lesmiscore](https://github.com/Lesmiscore), [Jules-A](https://github.com/Jules-A), [pukkandan](https://github.com/pukkandan)
+* [fragment] Read downloaded fragments only when needed by [Lesmiscore](https://github.com/Lesmiscore)
+* [http] Retry on more errors by [coletdjnz](https://github.com/coletdjnz)
+* [mhtml] Fix fragments with absolute urls by [coletdjnz](https://github.com/coletdjnz)
+* [extractor] Add `_perform_login` function
+* [extractor] Allow control characters inside json
+* [extractor] Support merging subtitles with data by [coletdjnz](https://github.com/coletdjnz)
+* [generic] Extract subtitles from video.js by [Lesmiscore](https://github.com/Lesmiscore)
+* [ffmpeg] Cache version data
+* [FFmpegConcat] Ensure final directory exists
+* [FfmpegMetadata] Write id3v1 tags
+* [FFmpegVideoConvertor] Add more formats to `--remux-video`
+* [FFmpegVideoConvertor] Ensure all streams are copied
+* [MetadataParser] Validate outtmpl early
+* [outtmpl] Fix replacement/default when used with alternate
+* [outtmpl] Limit changes during sanitization
+* [phantomjs] Fix bug
+* [test] Add `test_locked_file`
+* [utils] `format_decimal_suffix`: Fix for very large numbers by [s0u1h](https://github.com/s0u1h)
+* [utils] `traverse_obj`: Allow filtering by value
+* [utils] Add `filter_dict`, `get_first`, `try_call`
+* [utils] ExtractorError: Fix for older python versions
+* [utils] WebSocketsWrapper: Allow omitting `__enter__` invocation by [Lesmiscore](https://github.com/Lesmiscore)
+* [docs] Add an `.editorconfig` file by [fstirlitz](https://github.com/fstirlitz)
+* [docs] Clarify the exact `BSD` license of dependencies by [MrRawes](https://github.com/MrRawes)
+* [docs] Minor improvements by [pukkandan](https://github.com/pukkandan), [cffswb](https://github.com/cffswb), [danielyli](https://github.com/danielyli)
+* [docs] Remove readthedocs
+* [build] Add `requirements.txt` to pip distributions
+* [cleanup, postprocessor] Create `_download_json`
+* [cleanup, vimeo] Fix tests
+* [cleanup] Misc fixes and minor cleanup
+* [cleanup] Use `_html_extract_title`
+* [AfreecaTV] Add `AfreecaTVUserIE` by [hatienl0i261299](https://github.com/hatienl0i261299)
+* [arte] Add `format_note` to m3u8 formats
+* [azmedien] Add TVO Online to supported hosts by [1-Byte](https://github.com/1-Byte)
+* [BanBye] Add extractor by [mehq](https://github.com/mehq)
+* [bilibili] Fix extraction of title with quotes by [dzek69](https://github.com/dzek69)
+* [Craftsy] Add extractor by [Bricio](https://github.com/Bricio)
+* [Cybrary] Add extractor by [aaearon](https://github.com/aaearon)
+* [Huya] Add extractor by [hatienl0i261299](https://github.com/hatienl0i261299)
+* [ITProTV] Add extractor by [aaearon](https://github.com/aaearon)
+* [Jable] Add extractors by [mehq](https://github.com/mehq)
+* [LastFM] Add extractors by [mehq](https://github.com/mehq)
+* [Moviepilot] Add extractor by [panatexxa](https://github.com/panatexxa)
+* [panopto] Add extractors by [coletdjnz](https://github.com/coletdjnz), [kmark](https://github.com/kmark)
+* [PokemonSoundLibrary] Add extractor by [Lesmiscore](https://github.com/Lesmiscore)
+* [WasdTV] Add extractor by [un-def](https://github.com/un-def), [hatienl0i261299](https://github.com/hatienl0i261299)
+* [adobepass] Fix Suddenlink MSO by [CplPwnies](https://github.com/CplPwnies)
+* [afreecatv] Match new vod url by [wlritchi](https://github.com/wlritchi)
+* [AZMedien] Support `tv.telezueri.ch` by [goggle](https://github.com/goggle)
+* [BiliIntl] Support user-generated videos by [wlritchi](https://github.com/wlritchi)
+* [BRMediathek] Fix VALID_URL
+* [crunchyroll:playlist] Implement beta API by [tejing1](https://github.com/tejing1)
+* [crunchyroll] Fix inheritance
+* [daftsex] Fix extractor by [Soebb](https://github.com/Soebb)
+* [dailymotion] Support `geo.dailymotion.com` by [hatienl0i261299](https://github.com/hatienl0i261299)
+* [ellentube] Extract subtitles from manifest
+* [elonet] Rewrite extractor by [Fam0r](https://github.com/Fam0r), [pukkandan](https://github.com/pukkandan)
+* [fptplay] Fix metadata extraction by [hatienl0i261299](https://github.com/hatienl0i261299)
+* [FranceCulture] Support playlists by [bohwaz](https://github.com/bohwaz)
+* [go, viu] Extract subtitles from the m3u8 manifest by [fstirlitz](https://github.com/fstirlitz)
+* [Imdb] Improve extractor by [hatienl0i261299](https://github.com/hatienl0i261299)
+* [MangoTV] Improve extractor by [hatienl0i261299](https://github.com/hatienl0i261299)
+* [Nebula] Fix bug in 52efa4b31200119adaa8acf33e50b84fcb6948f0
+* [niconico] Fix extraction of thumbnails and uploader (#3266)
+* [niconico] Rewrite NiconicoIE by [Lesmiscore](https://github.com/Lesmiscore)
+* [nitter] Minor fixes and update instance list by [foghawk](https://github.com/foghawk)
+* [NRK] Extract timestamp by [hatienl0i261299](https://github.com/hatienl0i261299)
+* [openrec] Download archived livestreams by [Lesmiscore](https://github.com/Lesmiscore)
+* [openrec] Refactor extractors by [Lesmiscore](https://github.com/Lesmiscore)
+* [panopto] Improve subtitle extraction and support slides by [coletdjnz](https://github.com/coletdjnz)
+* [ParamountPlus, CBS] Change VALID_URL by [Sipherdrakon](https://github.com/Sipherdrakon)
+* [ParamountPlusSeries] Support multiple pages by [dodrian](https://github.com/dodrian)
+* [Piapro] Extract description with break lines by [Lesmiscore](https://github.com/Lesmiscore)
+* [rai] Fix extraction of http formas by [nixxo](https://github.com/nixxo)
+* [rumble] unescape title
+* [RUTV] Fix format sorting by [Lesmiscore](https://github.com/Lesmiscore)
+* [ruutu] Detect embeds by [tpikonen](https://github.com/tpikonen)
+* [tenplay] Improve extractor by [aarubui](https://github.com/aarubui)
+* [TikTok] Fix URLs with user id by [hatienl0i261299](https://github.com/hatienl0i261299)
+* [TikTokVM] Fix redirect to user URL
+* [TVer] Fix extractor by [Lesmiscore](https://github.com/Lesmiscore)
+* [TVer] Support landing page by [vvto33](https://github.com/vvto33)
+* [twitcasting] Don't return multi_video for archive with single hls manifest by [Lesmiscore](https://github.com/Lesmiscore)
+* [veo] Fix `_VALID_URL`
+* [Veo] Fix extractor by [i6t](https://github.com/i6t)
+* [viki] Don't attempt to modify URLs with signature by [nyuszika7h](https://github.com/nyuszika7h)
+* [viu] Fix bypass for preview by [zackmark29](https://github.com/zackmark29)
+* [viu] Fixed extractor by [zackmark29](https://github.com/zackmark29), [pukkandan](https://github.com/pukkandan)
+* [web.archive:youtube] Make CDX API requests non-fatal by [coletdjnz](https://github.com/coletdjnz)
+* [wget] Fix proxy by [kikuyan](https://github.com/kikuyan), [coletdjnz](https://github.com/coletdjnz)
+* [xnxx] Add `xnxx3.com` by [rozari0](https://github.com/rozari0)
+* [youtube] **Add new age-gate bypass** by [zerodytrash](https://github.com/zerodytrash), [pukkandan](https://github.com/pukkandan)
+* [youtube] Add extractor-arg to skip auto-translated subs
+* [youtube] Avoid false positives when detecting damaged formats
+* [youtube] Detect DRM better by [shirt](https://github.com/shirt-dev)
+* [youtube] Fix auto-translated automatic captions
+* [youtube] Fix pagination of `membership` tab
+* [youtube] Fix uploader for collaborative playlists by [coletdjnz](https://github.com/coletdjnz)
+* [youtube] Improve video upload date handling by [coletdjnz](https://github.com/coletdjnz)
+* [youtube:api] Prefer minified JSON response by [coletdjnz](https://github.com/coletdjnz)
+* [youtube:search] Support hashtag entries by [coletdjnz](https://github.com/coletdjnz)
+* [youtube:tab] Fix duration extraction for shorts by [coletdjnz](https://github.com/coletdjnz)
+* [youtube:tab] Minor improvements
+* [youtube:tab] Return shorts url if video is a short by [coletdjnz](https://github.com/coletdjnz)
+* [Zattoo] Fix extractors by [goggle](https://github.com/goggle)
+* [Zingmp3] Fix signature by [hatienl0i261299](https://github.com/hatienl0i261299)
+
+
+### 2022.03.08.1
+
+* [cleanup] Refactor `__init__.py`
+* [build] Fix bug
+
+### 2022.03.08
+
+* Merge youtube-dl: Upto [commit/6508688](https://github.com/ytdl-org/youtube-dl/commit/6508688e88c83bb811653083db9351702cd39a6a) (except NDR)
+* Add regex operator and quoting to format filters by [lukasfink1](https://github.com/lukasfink1)
+* Add brotli content-encoding support by [coletdjnz](https://github.com/coletdjnz)
+* Add pre-processor stage `after_filter`
+* Better error message when no `--live-from-start` format
+* Create necessary directories for `--print-to-file`
+* Fill more fields for playlists by [Lesmiscore](https://github.com/Lesmiscore)
+* Fix `-all` for `--sub-langs`
+* Fix doubling of `video_id` in `ExtractorError`
+* Fix for when stdout/stderr encoding is `None`
+* Handle negative duration from extractor
+* Implement `--add-header` without modifying `std_headers`
+* Obey `--abort-on-error` for "ffmpeg not installed"
+* Set `webpage_url_...` from `webpage_url` and not input URL
+* Tolerate failure to `--write-link` due to unknown URL
+* [aria2c] Add `--http-accept-gzip=true`
+* [build] Update pyinstaller to 4.10 by [shirt](https://github.com/shirt-dev)
+* [cookies] Update MacOS12 `Cookies.binarycookies` location by [mdpauley](https://github.com/mdpauley)
+* [devscripts] Improve `prepare_manpage`
+* [downloader] Do not use aria2c for non-native `m3u8`
+* [downloader] Obey `--file-access-retries` when deleting/renaming by [ehoogeveen-medweb](https://github.com/ehoogeveen-medweb)
+* [extractor] Allow `http_headers` to be specified for `thumbnails`
+* [extractor] Extract subtitles from manifests for vimeo, globo, kaltura, svt by [fstirlitz](https://github.com/fstirlitz)
+* [extractor] Fix for manifests without period duration by [dirkf](https://github.com/dirkf), [pukkandan](https://github.com/pukkandan)
+* [extractor] Support `--mark-watched` without `_NETRC_MACHINE` by [coletdjnz](https://github.com/coletdjnz)
+* [FFmpegConcat] Abort on `--simulate`
+* [FormatSort] Consider `acodec`=`ogg` as `vorbis`
+* [fragment] Fix bugs around resuming with Range by [Lesmiscore](https://github.com/Lesmiscore)
+* [fragment] Improve `--live-from-start` for YouTube livestreams by [Lesmiscore](https://github.com/Lesmiscore)
+* [generic] Pass referer to extracted formats
+* [generic] Set rss `guid` as video id by [Bricio](https://github.com/Bricio)
+* [options] Better ambiguous option resolution
+* [options] Rename `--clean-infojson` to `--clean-info-json`
+* [SponsorBlock] Fixes for highlight and "full video labels" by [nihil-admirari](https://github.com/nihil-admirari)
+* [Sponsorblock] minor fixes by [nihil-admirari](https://github.com/nihil-admirari)
+* [utils] Better traceback for `ExtractorError`
+* [utils] Fix file locking for AOSP by [jakeogh](https://github.com/jakeogh)
+* [utils] Improve file locking
+* [utils] OnDemandPagedList: Do not download pages after error
+* [utils] render_table: Fix character calculation for removing extra gap by [Lesmiscore](https://github.com/Lesmiscore)
+* [utils] Use `locked_file` for `sanitize_open` by [jakeogh](https://github.com/jakeogh)
+* [utils] Validate `DateRange` input
+* [utils] WebSockets wrapper for non-async functions by [Lesmiscore](https://github.com/Lesmiscore)
+* [cleanup] Don't pass protocol to `_extract_m3u8_formats` for live videos
+* [cleanup] Remove extractors for some dead websites by [marieell](https://github.com/marieell)
+* [cleanup, docs] Misc cleanup
+* [AbemaTV] Add extractors by [Lesmiscore](https://github.com/Lesmiscore)
+* [adobepass] Add Suddenlink MSO by [CplPwnies](https://github.com/CplPwnies)
+* [ant1newsgr] Add extractor by [zmousm](https://github.com/zmousm)
+* [bigo] Add extractor by [Lesmiscore](https://github.com/Lesmiscore)
+* [Caltrans] Add extractor by [Bricio](https://github.com/Bricio)
+* [daystar] Add extractor by [hatienl0i261299](https://github.com/hatienl0i261299)
+* [fc2:live] Add extractor by [Lesmiscore](https://github.com/Lesmiscore)
+* [fptplay] Add extractor by [hatienl0i261299](https://github.com/hatienl0i261299)
+* [murrtube] Add extractor by [cyberfox1691](https://github.com/cyberfox1691)
+* [nfb] Add extractor by [ofkz](https://github.com/ofkz)
+* [niconico] Add playlist extractors and refactor by [Lesmiscore](https://github.com/Lesmiscore)
+* [peekvids] Add extractor by [schn0sch](https://github.com/schn0sch)
+* [piapro] Add extractor by [pycabbage](https://github.com/pycabbage), [Lesmiscore](https://github.com/Lesmiscore)
+* [rokfin] Add extractor by [P-reducible](https://github.com/P-reducible), [pukkandan](https://github.com/pukkandan)
+* [rokfin] Add stack and channel extractors by [P-reducible](https://github.com/P-reducible), [pukkandan](https://github.com/pukkandan)
+* [ruv.is] Add extractor by [iw0nderhow](https://github.com/iw0nderhow)
+* [telegram] Add extractor by [hatienl0i261299](https://github.com/hatienl0i261299)
+* [VideocampusSachsen] Add extractors by [FestplattenSchnitzel](https://github.com/FestplattenSchnitzel)
+* [xinpianchang] Add extractor by [hatienl0i261299](https://github.com/hatienl0i261299)
+* [abc] Support 1080p by [Ronnnny](https://github.com/Ronnnny)
+* [afreecatv] Support password-protected livestreams by [wlritchi](https://github.com/wlritchi)
+* [ard] Fix valid URL
+* [ATVAt] Detect geo-restriction by [marieell](https://github.com/marieell)
+* [bandcamp] Detect acodec
+* [bandcamp] Fix user URLs by [lyz-code](https://github.com/lyz-code)
+* [bbc] Fix extraction of news articles by [ajj8](https://github.com/ajj8)
+* [beeg] Fix extractor by [Bricio](https://github.com/Bricio)
+* [bigo] Fix extractor to not to use `form_params`
+* [Bilibili] Pass referer for all formats by [blackgear](https://github.com/blackgear)
+* [Biqle] Fix extractor by [Bricio](https://github.com/Bricio)
+* [ccma] Fix timestamp parsing by [nyuszika7h](https://github.com/nyuszika7h)
+* [crunchyroll] Better error reporting on login failure by [tejing1](https://github.com/tejing1)
+* [cspan] Support of C-Span congress videos by [Grabien](https://github.com/Grabien)
+* [dropbox] fix regex by [zenerdi0de](https://github.com/zenerdi0de)
+* [fc2] Fix extraction by [Lesmiscore](https://github.com/Lesmiscore)
+* [fujitv] Extract resolution for free sources by [YuenSzeHong](https://github.com/YuenSzeHong)
+* [Gettr] Add `GettrStreamingIE` by [i6t](https://github.com/i6t)
+* [Gettr] Fix formats order by [i6t](https://github.com/i6t)
+* [Gettr] Improve extractor by [i6t](https://github.com/i6t)
+* [globo] Expand valid URL by [Bricio](https://github.com/Bricio)
+* [lbry] Fix `--ignore-no-formats-error`
+* [manyvids] Extract `uploader` by [regarten](https://github.com/regarten)
+* [mildom] Fix linter
+* [mildom] Rework extractors by [Lesmiscore](https://github.com/Lesmiscore)
+* [mirrativ] Cleanup extractor code by [Lesmiscore](https://github.com/Lesmiscore)
+* [nhk] Add support for NHK for School by [Lesmiscore](https://github.com/Lesmiscore)
+* [niconico:tag] Add support for searching tags
+* [nrk] Add fallback API
+* [peekvids] Use JSON-LD by [schn0sch](https://github.com/schn0sch)
+* [peertube] Add media.fsfe.org by [mxmehl](https://github.com/mxmehl)
+* [rtvs] Fix extractor by [Bricio](https://github.com/Bricio)
+* [spiegel] Fix `_VALID_URL`
+* [ThumbnailsConvertor] Support `webp`
+* [tiktok] Fix `vm.tiktok`/`vt.tiktok` URLs
+* [tubitv] Fix/improve TV series extraction by [bbepis](https://github.com/bbepis)
+* [tumblr] Fix extractor by [foghawk](https://github.com/foghawk)
+* [twitcasting] Add fallback for finding running live by [Lesmiscore](https://github.com/Lesmiscore)
+* [TwitCasting] Check for password protection by [Lesmiscore](https://github.com/Lesmiscore)
+* [twitcasting] Fix extraction by [Lesmiscore](https://github.com/Lesmiscore)
+* [twitch] Fix field name of `view_count`
+* [twitter] Fix for private videos by [iphoting](https://github.com/iphoting)
+* [washingtonpost] Fix extractor by [Bricio](https://github.com/Bricio)
+* [youtube:tab] Add `approximate_date` extractor-arg
+* [youtube:tab] Follow redirect to regional channel by [coletdjnz](https://github.com/coletdjnz)
+* [youtube:tab] Reject webpage data if redirected to home page
+* [youtube] De-prioritize potentially damaged formats
+* [youtube] Differentiate descriptive audio by language code
+* [youtube] Ensure subtitle urls are absolute by [coletdjnz](https://github.com/coletdjnz)
+* [youtube] Escape possible `$` in `_extract_n_function_name` regex by [Lesmiscore](https://github.com/Lesmiscore)
+* [youtube] Fix automatic captions
+* [youtube] Fix n-sig extraction for phone player JS by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [youtube] Further de-prioritize 3gp format
+* [youtube] Label original auto-subs
+* [youtube] Prefer UTC upload date for videos by [coletdjnz](https://github.com/coletdjnz)
+* [zaq1] Remove dead extractor by [marieell](https://github.com/marieell)
+* [zee5] Support web-series by [Aniruddh-J](https://github.com/Aniruddh-J)
+* [zingmp3] Fix extractor by [hatienl0i261299](https://github.com/hatienl0i261299)
+* [zoom] Add support for screen cast by [Mipsters](https://github.com/Mipsters)
+
+
+### 2022.02.04
+
+* [youtube:search] Fix extractor by [coletdjnz](https://github.com/coletdjnz)
+* [youtube:search] Add tests
+* [twitcasting] Enforce UTF-8 for POST payload by [Lesmiscore](https://github.com/Lesmiscore)
+* [mediaset] Fix extractor by [nixxo](https://github.com/nixxo)
+* [websocket] Make syntax error in `websockets` module non-fatal
+
+### 2022.02.03
+
+* Merge youtube-dl: Upto [commit/78ce962](https://github.com/ytdl-org/youtube-dl/commit/78ce962f4fe020994c216dd2671546fbe58a5c67)
+* Add option `--print-to-file`
+* Make nested --config-locations relative to parent file
+* Ensure `_type` is present in `info.json`
+* Fix `--compat-options list-formats`
+* Fix/improve `InAdvancePagedList`
+* [downloader/ffmpeg] Handle unknown formats better
+* [outtmpl] Handle `-o ""` better
+* [outtmpl] Handle hard-coded file extension better
+* [extractor] Add convenience function `_yes_playlist`
+* [extractor] Allow non-fatal `title` extraction
+* [extractor] Extract video inside `Article` json_ld
+* [generic] Allow further processing of json_ld URL
+* [cookies] Fix keyring selection for unsupported desktops
+* [utils] Strip double spaces in `clean_html` by [dirkf](https://github.com/dirkf)
+* [aes] Add `unpad_pkcs7`
+* [test] Fix `test_youtube_playlist_noplaylist`
+* [docs,cleanup] Misc cleanup
+* [dplay] Add extractors for site changes by [Sipherdrakon](https://github.com/Sipherdrakon)
+* [ertgr] Add extractors by [zmousm](https://github.com/zmousm), [dirkf](https://github.com/dirkf)
+* [Musicdex] Add extractors by [Ashish0804](https://github.com/Ashish0804)
+* [YandexVideoPreview] Add extractor by [KiberInfinity](https://github.com/KiberInfinity)
+* [youtube] Add extractor `YoutubeMusicSearchURLIE`
+* [archive.org] Ignore unnecessary files
+* [Bilibili] Add 8k support by [u-spec-png](https://github.com/u-spec-png)
+* [bilibili] Fix extractor, make anthology title non-fatal
+* [CAM4] Add thumbnail extraction by [alerikaisattera](https://github.com/alerikaisattera)
+* [cctv] De-prioritize sample format
+* [crunchyroll:beta] Add cookies support by [tejing1](https://github.com/tejing1)
+* [crunchyroll] Fix login by [tejing1](https://github.com/tejing1)
+* [doodstream] Fix extractor
+* [fc2] Fix extraction by [Lesmiscore](https://github.com/Lesmiscore)
+* [FFmpegConcat] Abort on --skip-download and download errors
+* [Fujitv] Extract metadata and support premium by [YuenSzeHong](https://github.com/YuenSzeHong)
+* [globo] Fix extractor by [Bricio](https://github.com/Bricio)
+* [glomex] Simplify embed detection
+* [GoogleSearch] Fix extractor
+* [Instagram] Fix extraction when logged in by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [iq.com] Add VIP support by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [mildom] Fix extractor by [lazypete365](https://github.com/lazypete365)
+* [MySpass] Fix video url processing by [trassshhub](https://github.com/trassshhub)
+* [Odnoklassniki] Improve embedded players extraction by [KiberInfinity](https://github.com/KiberInfinity)
+* [orf:tvthek] Lazy playlist extraction and obey --no-playlist
+* [Pladform] Fix redirection to external player by [KiberInfinity](https://github.com/KiberInfinity)
+* [ThisOldHouse] Improve Premium URL check by [Ashish0804](https://github.com/Ashish0804)
+* [TikTok] Iterate through app versions by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [tumblr] Fix 403 errors and handle vimeo embeds by [foghawk](https://github.com/foghawk)
+* [viki] Fix "Bad request" for manifest by [nyuszika7h](https://github.com/nyuszika7h)
+* [Vimm] add recording extractor by [alerikaisattera](https://github.com/alerikaisattera)
+* [web.archive:youtube] Add `ytarchive:` prefix and misc cleanup
+* [youtube:api] Do not use seek when reading HTTPError response by [coletdjnz](https://github.com/coletdjnz)
+* [youtube] Fix n-sig for player e06dea74
+* [youtube, cleanup] Misc fixes and cleanup
+
+
+### 2022.01.21
+
+* Add option `--concat-playlist` to **concat videos in a playlist**
+* Allow **multiple and nested configuration files**
+* Add more post-processing stages (`after_video`, `playlist`)
+* Allow `--exec` to be run at any post-processing stage (Deprecates `--exec-before-download`)
+* Allow `--print` to be run at any post-processing stage
+* Allow listing formats, thumbnails, subtitles using `--print` by [pukkandan](https://github.com/pukkandan), [Zirro](https://github.com/Zirro)
+* Add fields `video_autonumber`, `modified_date`, `modified_timestamp`, `playlist_count`, `channel_follower_count`
+* Add key `requested_downloads` in the root `info_dict`
+* Write `download_archive` only after all formats are downloaded
+* [FfmpegMetadata] Allow setting metadata of individual streams using `meta<n>_` prefix
+* Add option `--legacy-server-connect` by [xtkoba](https://github.com/xtkoba)
+* Allow escaped `,` in `--extractor-args`
+* Allow unicode characters in `info.json`
+* Check for existing thumbnail/subtitle in final directory
+* Don't treat empty containers as `None` in `sanitize_info`
+* Fix `-s --ignore-no-formats --force-write-archive`
+* Fix live title for multiple formats
+* List playlist thumbnails in `--list-thumbnails`
+* Raise error if subtitle download fails
+* [cookies] Fix bug when keyring is unspecified
+* [ffmpeg] Ignore unknown streams, standardize use of `-map 0`
+* [outtmpl] Alternate form for `D` and fix suffix's case
+* [utils] Add `Sec-Fetch-Mode` to `std_headers`
+* [utils] Fix `format_bytes` output for Bytes by [pukkandan](https://github.com/pukkandan), [mdawar](https://github.com/mdawar)
+* [utils] Handle `ss:xxx` in `parse_duration`
+* [utils] Improve parsing for nested HTML elements by [zmousm](https://github.com/zmousm), [pukkandan](https://github.com/pukkandan)
+* [utils] Use key `None` in `traverse_obj` to return as-is
+* [extractor] Detect more subtitle codecs in MPD manifests by [fstirlitz](https://github.com/fstirlitz)
+* [extractor] Extract chapters from JSON-LD by [iw0nderhow](https://github.com/iw0nderhow), [pukkandan](https://github.com/pukkandan)
+* [extractor] Extract thumbnails from JSON-LD by [nixxo](https://github.com/nixxo)
+* [extractor] Improve `url_result` and related
+* [generic] Improve KVS player extraction by [trassshhub](https://github.com/trassshhub)
+* [build] Reduce dependency on third party workflows
+* [extractor,cleanup] Use `_search_nextjs_data`, `format_field`
+* [cleanup] Minor fixes and cleanup
+* [docs] Improvements
+* [test] Fix TestVerboseOutput
+* [afreecatv] Add livestreams extractor by [wlritchi](https://github.com/wlritchi)
+* [callin] Add extractor by [foghawk](https://github.com/foghawk)
+* [CrowdBunker] Add extractors by [Ashish0804](https://github.com/Ashish0804)
+* [daftsex] Add extractors by [k3ns1n](https://github.com/k3ns1n)
+* [digitalconcerthall] Add extractor by [teridon](https://github.com/teridon)
+* [Drooble] Add extractor by [u-spec-png](https://github.com/u-spec-png)
+* [EuropeanTour] Add extractor by [Ashish0804](https://github.com/Ashish0804)
+* [iq.com] Add extractors by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [KelbyOne] Add extractor by [Ashish0804](https://github.com/Ashish0804)
+* [LnkIE] Add extractor by [Ashish0804](https://github.com/Ashish0804)
+* [MainStreaming] Add extractor by [coletdjnz](https://github.com/coletdjnz)
+* [megatvcom] Add extractors by [zmousm](https://github.com/zmousm)
+* [Newsy] Add extractor by [Ashish0804](https://github.com/Ashish0804)
+* [noodlemagazine] Add extractor by [trassshhub](https://github.com/trassshhub)
+* [PokerGo] Add extractors by [Ashish0804](https://github.com/Ashish0804)
+* [Pornez] Add extractor by [mozlima](https://github.com/mozlima)
+* [PRX] Add Extractors by [coletdjnz](https://github.com/coletdjnz)
+* [RTNews] Add extractor by [Ashish0804](https://github.com/Ashish0804)
+* [Rule34video] Add extractor by [trassshhub](https://github.com/trassshhub)
+* [tvopengr] Add extractors by [zmousm](https://github.com/zmousm)
+* [Vimm] Add extractor by [alerikaisattera](https://github.com/alerikaisattera)
+* [glomex] Add extractors by [zmousm](https://github.com/zmousm)
+* [instagram] Add story/highlight extractor by [u-spec-png](https://github.com/u-spec-png)
+* [openrec] Add movie extractor by [Lesmiscore](https://github.com/Lesmiscore)
+* [rai] Add Raiplaysound extractors by [nixxo](https://github.com/nixxo), [pukkandan](https://github.com/pukkandan)
+* [aparat] Fix extractor
+* [ard] Extract subtitles by [fstirlitz](https://github.com/fstirlitz)
+* [BiliIntl] Add login by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [CeskaTelevize] Use `http` for manifests
+* [CTVNewsIE] Add fallback for video search by [Ashish0804](https://github.com/Ashish0804)
+* [dplay] Migrate DiscoveryPlusItaly to DiscoveryPlus by [timendum](https://github.com/timendum)
+* [dplay] Re-structure DiscoveryPlus extractors
+* [Dropbox] Support password protected files and more formats by [zenerdi0de](https://github.com/zenerdi0de)
+* [facebook] Fix extraction from groups
+* [facebook] Improve title and uploader extraction
+* [facebook] Parse dash manifests
+* [fox] Extract m3u8 from preview by [ischmidt20](https://github.com/ischmidt20)
+* [funk] Support origin URLs
+* [gfycat] Fix `uploader`
+* [gfycat] Support embeds by [coletdjnz](https://github.com/coletdjnz)
+* [hotstar] Add extractor args to ignore tags by [Ashish0804](https://github.com/Ashish0804)
+* [hrfernsehen] Fix ardloader extraction by [CreaValix](https://github.com/CreaValix)
+* [instagram] Fix username extraction for stories and highlights by [nyuszika7h](https://github.com/nyuszika7h)
+* [kakao] Detect geo-restriction
+* [line] Remove `tv.line.me` by [sian1468](https://github.com/sian1468)
+* [mixch] Add `MixchArchiveIE` by [Lesmiscore](https://github.com/Lesmiscore)
+* [mixcloud] Detect restrictions by [llacb47](https://github.com/llacb47)
+* [NBCSports] Fix extraction of platform URLs by [ischmidt20](https://github.com/ischmidt20)
+* [Nexx] Extract more metadata by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [Nexx] Support 3q CDN by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [pbs] de-prioritize AD formats
+* [PornHub,YouTube] Refresh onion addresses by [unit193](https://github.com/unit193)
+* [RedBullTV] Parse subtitles from manifest by [Ashish0804](https://github.com/Ashish0804)
+* [streamcz] Fix extractor by [arkamar](https://github.com/arkamar), [pukkandan](https://github.com/pukkandan)
+* [Ted] Rewrite extractor by [pukkandan](https://github.com/pukkandan), [trassshhub](https://github.com/trassshhub)
+* [Theta] Fix valid URL by [alerikaisattera](https://github.com/alerikaisattera)
+* [ThisOldHouseIE] Add support for premium videos by [Ashish0804](https://github.com/Ashish0804)
+* [TikTok] Fix extraction for sigi-based webpages, add API fallback by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [TikTok] Pass cookies to formats, and misc fixes by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [TikTok] Extract captions, user thumbnail by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [TikTok] Change app version by [MinePlayersPE](https://github.com/MinePlayersPE), [llacb47](https://github.com/llacb47)
+* [TVer] Extract message for unaired live by [Lesmiscore](https://github.com/Lesmiscore)
+* [twitcasting] Refactor extractor by [Lesmiscore](https://github.com/Lesmiscore)
+* [twitter] Fix video in quoted tweets
+* [veoh] Improve extractor by [foghawk](https://github.com/foghawk)
+* [vk] Capture `clip` URLs
+* [vk] Fix VKUserVideosIE by [Ashish0804](https://github.com/Ashish0804)
+* [vk] Improve `_VALID_URL` by [k3ns1n](https://github.com/k3ns1n)
+* [VrtNU] Handle empty title by [pgaig](https://github.com/pgaig)
+* [XVideos] Check HLS formats by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [yahoo:gyao] Improved playlist handling by [hyano](https://github.com/hyano)
+* [youtube:tab] Extract more playlist metadata by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
+* [youtube:tab] Raise error on tab redirect by [krichbanana](https://github.com/krichbanana), [coletdjnz](https://github.com/coletdjnz)
+* [youtube] Update Innertube clients by [coletdjnz](https://github.com/coletdjnz)
+* [youtube] Detect live-stream embeds
+* [youtube] Do not return `upload_date` for playlists
+* [youtube] Extract channel subscriber count by [coletdjnz](https://github.com/coletdjnz)
+* [youtube] Make invalid storyboard URL non-fatal
+* [youtube] Enforce UTC, update innertube clients and tests by [coletdjnz](https://github.com/coletdjnz)
+* [zdf] Add chapter extraction by [iw0nderhow](https://github.com/iw0nderhow)
+* [zee5] Add geo-bypass
+
+
+### 2021.12.27
+
+* Avoid recursion error when re-extracting info
+* [ffmpeg] Fix position of `--ppa`
+* [aria2c] Don't show progress when `--no-progress`
+* [cookies] Support other keyrings by [mbway](https://github.com/mbway)
+* [EmbedThumbnail] Prefer AtomicParsley over ffmpeg if available
+* [generic] Fix HTTP KVS Player by [git-anony-mouse](https://github.com/git-anony-mouse)
+* [ThumbnailsConvertor] Fix for when there are no thumbnails
+* [docs] Add examples for using `TYPES:` in `-P`/`-o`
+* [PixivSketch] Add extractors by [nao20010128nao](https://github.com/nao20010128nao)
+* [tiktok] Add music, sticker and tag IEs by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [BiliIntl] Fix extractor by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [CBC] Fix URL regex
+* [tiktok] Fix `extractor_key` used in archive
+* [youtube] **End `live-from-start` properly when stream ends with 403**
+* [Zee5] Fix VALID_URL for tv-shows by [Ashish0804](https://github.com/Ashish0804)
+
+### 2021.12.25
+
+* [dash,youtube] **Download live from start to end** by [nao20010128nao](https://github.com/nao20010128nao), [pukkandan](https://github.com/pukkandan)
+ * Add option `--live-from-start` to enable downloading live videos from start
+ * Add key `is_from_start` in formats to identify formats (of live videos) that downloads from start
+ * [dash] Create protocol `http_dash_segments_generator` that allows a function to be passed instead of fragments
+ * [fragment] Allow multiple live dash formats to download simultaneously
+ * [youtube] Implement fragment re-fetching for the live dash formats
+ * [youtube] Re-extract dash manifest every 5 hours (manifest expires in 6hrs)
+ * [postprocessor/ffmpeg] Add `FFmpegFixupDuplicateMoovPP` to fixup duplicated moov atoms
+ * Known issues:
+ * Ctrl+C doesn't work on Windows when downloading multiple formats
+ * If video becomes private, download hangs
+* [SponsorBlock] Add `Filler` and `Highlight` categories by [nihil-admirari](https://github.com/nihil-admirari), [pukkandan](https://github.com/pukkandan)
+ * Change `--sponsorblock-cut all` to `--sponsorblock-cut default` if you do not want filler sections to be removed
+* Add field `webpage_url_domain`
+* Add interactive format selection with `-f -`
+* Add option `--file-access-retries` by [ehoogeveen-medweb](https://github.com/ehoogeveen-medweb)
+* [outtmpl] Add alternate forms `S`, `D` and improve `id` detection
+* [outtmpl] Add operator `&` for replacement text by [PilzAdam](https://github.com/PilzAdam)
+* [EmbedSubtitle] Disable duration check temporarily
+* [extractor] Add `_search_nuxt_data` by [nao20010128nao](https://github.com/nao20010128nao)
+* [extractor] Ignore errors in comment extraction when `-i` is given
+* [extractor] Standardize `_live_title`
+* [FormatSort] Prevent incorrect deprecation warning
+* [generic] Extract m3u8 formats from JSON-LD
+* [postprocessor/ffmpeg] Always add `faststart`
+* [utils] Fix parsing `YYYYMMDD` dates in Nov/Dec by [wlritchi](https://github.com/wlritchi)
+* [utils] Improve `parse_count`
+* [utils] Update `std_headers` by [kikuyan](https://github.com/kikuyan), [fstirlitz](https://github.com/fstirlitz)
+* [lazy_extractors] Fix for search IEs
+* [extractor] Support default implicit graph in JSON-LD by [zmousm](https://github.com/zmousm)
+* Allow `--no-write-thumbnail` to override `--write-all-thumbnail`
+* Fix `--throttled-rate`
+* Fix control characters being printed to `--console-title`
+* Fix PostProcessor hooks not registered for some PPs
+* Pre-process when using `--flat-playlist`
+* Remove known invalid thumbnails from `info_dict`
+* Add warning when using `-f best`
+* Use `parse_duration` for `--wait-for-video` and some minor fix
+* [test/download] Add more fields
+* [test/download] Ignore field `webpage_url_domain` by [std-move](https://github.com/std-move)
+* [compat] Suppress errors in enabling VT mode
+* [docs] Improve manpage format by [iw0nderhow](https://github.com/iw0nderhow), [pukkandan](https://github.com/pukkandan)
+* [docs,cleanup] Minor fixes and cleanup
+* [cleanup] Fix some typos by [unit193](https://github.com/unit193)
+* [ABC:iview] Add show extractor by [pabs3](https://github.com/pabs3)
+* [dropout] Add extractor by [TwoThousandHedgehogs](https://github.com/TwoThousandHedgehogs), [pukkandan](https://github.com/pukkandan)
+* [GameJolt] Add extractors by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [gofile] Add extractor by [Jertzukka](https://github.com/Jertzukka), [Ashish0804](https://github.com/Ashish0804)
+* [hse] Add extractors by [cypheron](https://github.com/cypheron), [pukkandan](https://github.com/pukkandan)
+* [NateTV] Add NateIE and NateProgramIE by [Ashish0804](https://github.com/Ashish0804), [Hyeeji](https://github.com/Hyeeji)
+* [OpenCast] Add extractors by [bwildenhain](https://github.com/bwildenhain), [C0D3D3V](https://github.com/C0D3D3V)
+* [rtve] Add `RTVEAudioIE` by [kebianizao](https://github.com/kebianizao)
+* [Rutube] Add RutubeChannelIE by [Ashish0804](https://github.com/Ashish0804)
+* [skeb] Add extractor by [nao20010128nao](https://github.com/nao20010128nao)
+* [soundcloud] Add related tracks extractor by [Lapin0t](https://github.com/Lapin0t)
+* [toggo] Add extractor by [nyuszika7h](https://github.com/nyuszika7h)
+* [TrueID] Add extractor by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [audiomack] Update album and song VALID_URL by [abdullah-if](https://github.com/abdullah-if), [dirkf](https://github.com/dirkf)
+* [CBC Gem] Extract 1080p formats by [DavidSkrundz](https://github.com/DavidSkrundz)
+* [ceskatelevize] Fetch iframe from nextJS data by [mkubecek](https://github.com/mkubecek)
+* [crackle] Look for non-DRM formats by [raleeper](https://github.com/raleeper)
+* [dplay] Temporary fix for `discoveryplus.com/it`
+* [DiscoveryPlusShowBaseIE] yield actual video id by [Ashish0804](https://github.com/Ashish0804)
+* [Facebook] Handle redirect URLs
+* [fujitv] Extract 1080p from `tv_android` m3u8 by [YuenSzeHong](https://github.com/YuenSzeHong)
+* [gronkh] Support new URL pattern by [Sematre](https://github.com/Sematre)
+* [instagram] Expand valid URL by [u-spec-png](https://github.com/u-spec-png)
+* [Instagram] Try bypassing login wall with embed page by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [Jamendo] Fix use of `_VALID_URL_RE` by [jaller94](https://github.com/jaller94)
+* [LBRY] Support livestreams by [Ashish0804](https://github.com/Ashish0804), [pukkandan](https://github.com/pukkandan)
+* [NJPWWorld] Extract formats from m3u8 by [aarubui](https://github.com/aarubui)
+* [NovaEmbed] update player regex by [std-move](https://github.com/std-move)
+* [npr] Make SMIL extraction non-fatal by [r5d](https://github.com/r5d)
+* [ntvcojp] Extract NUXT data by [nao20010128nao](https://github.com/nao20010128nao)
+* [ok.ru] add mobile fallback by [nao20010128nao](https://github.com/nao20010128nao)
+* [olympics] Add uploader and cleanup by [u-spec-png](https://github.com/u-spec-png)
+* [ondemandkorea] Update `jw_config` regex by [julien-hadleyjack](https://github.com/julien-hadleyjack)
+* [PlutoTV] Expand `_VALID_URL`
+* [RaiNews] Fix extractor by [nixxo](https://github.com/nixxo)
+* [RCTIPlusSeries] Lazy extraction and video type selection by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [redtube] Handle formats delivered inside a JSON by [dirkf](https://github.com/dirkf), [nixxo](https://github.com/nixxo)
+* [SonyLiv] Add OTP login support by [Ashish0804](https://github.com/Ashish0804)
+* [Steam] Fix extractor by [u-spec-png](https://github.com/u-spec-png)
+* [TikTok] Pass cookies to mobile API by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [trovo] Fix inheritance of `TrovoChannelBaseIE`
+* [TVer] Extract better thumbnails by [YuenSzeHong](https://github.com/YuenSzeHong)
+* [vimeo] Extract chapters
+* [web.archive:youtube] Improve metadata extraction by [coletdjnz](https://github.com/coletdjnz)
+* [youtube:comments] Add more options for limiting number of comments extracted by [coletdjnz](https://github.com/coletdjnz)
+* [youtube:tab] Extract more metadata from feeds/channels/playlists by [coletdjnz](https://github.com/coletdjnz)
+* [youtube:tab] Extract video thumbnails from playlist by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
+* [youtube:tab] Ignore query when redirecting channel to playlist and cleanup of related code
+* [youtube] Fix `ytsearchdate`
+* [zdf] Support videos with different ptmd location by [iw0nderhow](https://github.com/iw0nderhow)
+* [zee5] Support /episodes in URL
+
+
+### 2021.12.01
+
+* **Add option `--wait-for-video` to wait for scheduled streams**
+* Add option `--break-per-input` to apply --break-on... to each input URL
+* Add option `--embed-info-json` to embed info.json in mkv
+* Add compat-option `embed-metadata`
+* Allow using a custom format selector through API
+* [AES] Add ECB mode by [nao20010128nao](https://github.com/nao20010128nao)
+* [build] Fix MacOS Build
+* [build] Save Git HEAD at release alongside version info
+* [build] Use `workflow_dispatch` for release
+* [downloader/ffmpeg] Fix for direct videos inside mpd manifests
+* [downloader] Add colors to download progress
+* [EmbedSubtitles] Slightly relax duration check and related cleanup
+* [ExtractAudio] Fix conversion to `wav` and `vorbis`
+* [ExtractAudio] Support `alac`
+* [extractor] Extract `average_rating` from JSON-LD
+* [FixupM3u8] Fixup MPEG-TS in MP4 container
+* [generic] Support mpd manifests without extension by [shirt](https://github.com/shirt-dev)
+* [hls] Better FairPlay DRM detection by [nyuszika7h](https://github.com/nyuszika7h)
+* [jsinterp] Fix splice to handle float (for youtube js player f1ca6900)
+* [utils] Allow alignment in `render_table` and add tests
+* [utils] Fix `PagedList`
+* [utils] Fix error when copying `LazyList`
+* Clarify video/audio-only formats in -F
+* Ensure directory exists when checking formats
+* Ensure path for link files exists by [Zirro](https://github.com/Zirro)
+* Ensure same config file is not loaded multiple times
+* Fix `postprocessor_hooks`
+* Fix `--break-on-archive` when pre-checking
+* Fix `--check-formats` for `mhtml`
+* Fix `--load-info-json` of playlists with failed entries
+* Fix `--trim-filename` when filename has `.`
+* Fix bug in parsing `--add-header`
+* Fix error in `report_unplayable_conflict` by [shirt](https://github.com/shirt-dev)
+* Fix writing playlist infojson with `--no-clean-infojson`
+* Validate --get-bypass-country
+* [blogger] Add extractor by [pabs3](https://github.com/pabs3)
+* [breitbart] Add extractor by [Grabien](https://github.com/Grabien)
+* [CableAV] Add extractor by [j54vc1bk](https://github.com/j54vc1bk)
+* [CanalAlpha] Add extractor by [Ashish0804](https://github.com/Ashish0804)
+* [CozyTV] Add extractor by [Ashish0804](https://github.com/Ashish0804)
+* [CPTwentyFour] Add extractor by [Ashish0804](https://github.com/Ashish0804)
+* [DiscoveryPlus] Add `DiscoveryPlusItalyShowIE` by [Ashish0804](https://github.com/Ashish0804)
+* [ESPNCricInfo] Add extractor by [Ashish0804](https://github.com/Ashish0804)
+* [LinkedIn] Add extractor by [u-spec-png](https://github.com/u-spec-png)
+* [mixch] Add extractor by [nao20010128nao](https://github.com/nao20010128nao)
+* [nebula] Add `NebulaCollectionIE` and rewrite extractor by [hheimbuerger](https://github.com/hheimbuerger)
+* [OneFootball] Add extractor by [Ashish0804](https://github.com/Ashish0804)
+* [peer.tv] Add extractor by [u-spec-png](https://github.com/u-spec-png)
+* [radiozet] Add extractor by [0xA7404A](https://github.com/0xA7404A) (Aurora)
+* [redgifs] Add extractor by [chio0hai](https://github.com/chio0hai)
+* [RedGifs] Add Search and User extractors by [Deer-Spangle](https://github.com/Deer-Spangle)
+* [rtrfm] Add extractor by [pabs3](https://github.com/pabs3)
+* [Streamff] Add extractor by [cntrl-s](https://github.com/cntrl-s)
+* [Stripchat] Add extractor by [zulaport](https://github.com/zulaport)
+* [Aljazeera] Fix extractor by [u-spec-png](https://github.com/u-spec-png)
+* [AmazonStoreIE] Fix regex to not match vdp urls by [Ashish0804](https://github.com/Ashish0804)
+* [ARDBetaMediathek] Handle new URLs
+* [bbc] Get all available formats by [nyuszika7h](https://github.com/nyuszika7h)
+* [Bilibili] Fix title extraction by [u-spec-png](https://github.com/u-spec-png)
+* [CBC Gem] Fix for shows that don't have all seasons by [makeworld-the-better-one](https://github.com/makeworld-the-better-one)
+* [curiositystream] Add more metadata
+* [CuriosityStream] Fix series
+* [DiscoveryPlus] Rewrite extractors by [Ashish0804](https://github.com/Ashish0804), [pukkandan](https://github.com/pukkandan)
+* [HotStar] Set language field from tags by [Ashish0804](https://github.com/Ashish0804)
+* [instagram, cleanup] Refactor extractors
+* [Instagram] Display more login errors by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [itv] Fix extractor by [staubichsauger](https://github.com/staubichsauger), [pukkandan](https://github.com/pukkandan)
+* [mediaklikk] Expand valid URL
+* [MTV] Improve mgid extraction by [Sipherdrakon](https://github.com/Sipherdrakon), [kikuyan](https://github.com/kikuyan)
+* [nexx] Better error message for unsupported format
+* [NovaEmbed] Fix extractor by [pukkandan](https://github.com/pukkandan), [std-move](https://github.com/std-move)
+* [PatreonUser] Do not capture RSS URLs
+* [Reddit] Add support for 1080p videos by [xenova](https://github.com/xenova)
+* [RoosterTeethSeries] Fix for multiple pages by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [sbs] Fix for movies and livestreams
+* [Senate.gov] Add SenateGovIE and fix SenateISVPIE by [Grabien](https://github.com/Grabien), [pukkandan](https://github.com/pukkandan)
+* [soundcloud:search] Fix pagination
+* [tiktok:user] Set `webpage_url` correctly
+* [Tokentube] Fix description by [u-spec-png](https://github.com/u-spec-png)
+* [trovo] Fix extractor by [nyuszika7h](https://github.com/nyuszika7h)
+* [tv2] Expand valid URL
+* [Tvplayhome] Fix extractor by [pukkandan](https://github.com/pukkandan), [18928172992817182](https://github.com/18928172992817182)
+* [Twitch:vod] Add chapters by [mpeter50](https://github.com/mpeter50)
+* [twitch:vod] Extract live status by [DEvmIb](https://github.com/DEvmIb)
+* [VidLii] Add 720p support by [mrpapersonic](https://github.com/mrpapersonic)
+* [vimeo] Add fallback for config URL
+* [vimeo] Sort http formats higher
+* [WDR] Expand valid URL
+* [willow] Add extractor by [aarubui](https://github.com/aarubui)
+* [xvideos] Detect embed URLs by [4a1e2y5](https://github.com/4a1e2y5)
+* [xvideos] Fix extractor by [Yakabuff](https://github.com/Yakabuff)
+* [youtube, cleanup] Reorganize Tab and Search extractor inheritances
+* [youtube:search_url] Add playlist/channel support
+* [youtube] Add `default` player client by [coletdjnz](https://github.com/coletdjnz)
+* [youtube] Add storyboard formats
+* [youtube] Decrypt n-sig for URLs with `ratebypass`
+* [youtube] Minor improvement to format sorting
+* [cleanup] Add deprecation warnings
+* [cleanup] Refactor `JSInterpreter._seperate`
+* [Cleanup] Remove some unnecessary groups in regexes by [Ashish0804](https://github.com/Ashish0804)
+* [cleanup] Misc cleanup
+
+
+### 2021.11.10.1
+
+* Temporarily disable MacOS Build
+
+### 2021.11.10
+
+* [youtube] **Fix throttling by decrypting n-sig**
+* Merging extractors from [haruhi-dl](https://git.sakamoto.pl/laudom/haruhi-dl) by [selfisekai](https://github.com/selfisekai)
+ * [extractor] Add `_search_nextjs_data`
+ * [tvp] Fix extractors
+ * [tvp] Add TVPStreamIE
+ * [wppilot] Add extractors
+ * [polskieradio] Add extractors
+ * [radiokapital] Add extractors
+ * [polsatgo] Add extractor by [selfisekai](https://github.com/selfisekai), [sdomi](https://github.com/sdomi)
+* Separate `--check-all-formats` from `--check-formats`
+* Approximate filesize from bitrate
+* Don't create console in `windows_enable_vt_mode`
+* Fix bug in `--load-infojson` of playlists
+* [minicurses] Add colors to `-F` and standardize color-printing code
+* [outtmpl] Add type `link` for internet shortcut files
+* [outtmpl] Add alternate forms for `q` and `j`
+* [outtmpl] Do not traverse `None`
+* [fragment] Fix progress display in fragmented downloads
+* [downloader/ffmpeg] Fix vtt download with ffmpeg
+* [ffmpeg] Detect presence of setts and libavformat version
+* [ExtractAudio] Rescale `--audio-quality` correctly by [CrypticSignal](https://github.com/CrypticSignal), [pukkandan](https://github.com/pukkandan)
+* [ExtractAudio] Use `libfdk_aac` if available by [CrypticSignal](https://github.com/CrypticSignal)
+* [FormatSort] `eac3` is better than `ac3`
+* [FormatSort] Fix some fields' defaults
+* [generic] Detect more json_ld
+* [generic] parse jwplayer with only the json URL
+* [extractor] Add keyword automatically to SearchIE descriptions
+* [extractor] Fix some errors being converted to `ExtractorError`
+* [utils] Add `join_nonempty`
+* [utils] Add `jwt_decode_hs256` by [Ashish0804](https://github.com/Ashish0804)
+* [utils] Create `DownloadCancelled` exception
+* [utils] Parse `vp09` as vp9
+* [utils] Sanitize URL when determining protocol
+* [test/download] Fallback test to `bv`
+* [docs] Minor documentation improvements
+* [cleanup] Improvements to error and debug messages
+* [cleanup] Minor fixes and cleanup
+* [3speak] Add extractors by [Ashish0804](https://github.com/Ashish0804)
+* [AmazonStore] Add extractor by [Ashish0804](https://github.com/Ashish0804)
+* [Gab] Add extractor by [u-spec-png](https://github.com/u-spec-png)
+* [mediaset] Add playlist support by [nixxo](https://github.com/nixxo)
+* [MLSScoccer] Add extractor by [Ashish0804](https://github.com/Ashish0804)
+* [N1] Add support for nova.rs by [u-spec-png](https://github.com/u-spec-png)
+* [PlanetMarathi] Add extractor by [Ashish0804](https://github.com/Ashish0804)
+* [RaiplayRadio] Add extractors by [frafra](https://github.com/frafra)
+* [roosterteeth] Add series extractor
+* [sky] Add `SkyNewsStoryIE` by [ajj8](https://github.com/ajj8)
+* [youtube] Fix sorting for some videos
+* [youtube] Populate `thumbnail` with the best "known" thumbnail
+* [youtube] Refactor itag processing
+* [youtube] Remove unnecessary no-playlist warning
+* [youtube:tab] Add Invidious list for playlists/channels by [rhendric](https://github.com/rhendric)
+* [Bilibili:comments] Fix infinite loop by [u-spec-png](https://github.com/u-spec-png)
+* [ceskatelevize] Fix extractor by [flashdagger](https://github.com/flashdagger)
+* [Coub] Fix media format identification by [wlritchi](https://github.com/wlritchi)
+* [crunchyroll] Add extractor-args `language` and `hardsub`
+* [DiscoveryPlus] Allow language codes in URL
+* [imdb] Fix thumbnail by [ozburo](https://github.com/ozburo)
+* [instagram] Add IOS URL support by [u-spec-png](https://github.com/u-spec-png)
+* [instagram] Improve login code by [u-spec-png](https://github.com/u-spec-png)
+* [Instagram] Improve metadata extraction by [u-spec-png](https://github.com/u-spec-png)
+* [iPrima] Fix extractor by [stanoarn](https://github.com/stanoarn)
+* [itv] Add support for ITV News by [ajj8](https://github.com/ajj8)
+* [la7] Fix extractor by [nixxo](https://github.com/nixxo)
+* [linkedin] Don't login multiple times
+* [mtv] Fix some videos by [Sipherdrakon](https://github.com/Sipherdrakon)
+* [Newgrounds] Fix description by [u-spec-png](https://github.com/u-spec-png)
+* [Nrk] Minor fixes by [fractalf](https://github.com/fractalf)
+* [Olympics] Fix extractor by [u-spec-png](https://github.com/u-spec-png)
+* [piksel] Fix sorting
+* [twitter] Do not sort by codec
+* [viewlift] Add cookie-based login and series support by [Ashish0804](https://github.com/Ashish0804), [pukkandan](https://github.com/pukkandan)
+* [vimeo] Detect source extension and misc cleanup by [flashdagger](https://github.com/flashdagger)
+* [vimeo] Fix ondemand videos and direct URLs with hash
+* [vk] Fix login and add subtitles by [kaz-us](https://github.com/kaz-us)
+* [VLive] Add upload_date and thumbnail by [Ashish0804](https://github.com/Ashish0804)
+* [VRT] Fix login by [pgaig](https://github.com/pgaig)
+* [Vupload] Fix extractor by [u-spec-png](https://github.com/u-spec-png)
+* [wakanim] Add support for MPD manifests by [nyuszika7h](https://github.com/nyuszika7h)
+* [wakanim] Detect geo-restriction by [nyuszika7h](https://github.com/nyuszika7h)
+* [ZenYandex] Fix extractor by [u-spec-png](https://github.com/u-spec-png)
+
+
+### 2021.10.22
+
+* [build] Improvements
+ * Build standalone MacOS packages by [smplayer-dev](https://github.com/smplayer-dev)
+ * Release windows exe built with `py2exe`
+ * Enable lazy-extractors in releases
+ * Set env var `YTDLP_NO_LAZY_EXTRACTORS` to forcefully disable this (experimental)
+ * Clean up error reporting in update
+ * Refactor `pyinst.py`, misc cleanup and improve docs
+* [docs] Migrate issues to use forms by [Ashish0804](https://github.com/Ashish0804)
+* [downloader] **Fix slow progress hooks**
+ * This was causing HLS/DASH downloads to be extremely slow in some situations
+* [downloader/ffmpeg] Improve simultaneous download and merge
+* [EmbedMetadata] Allow overwriting all default metadata with `meta_default` key
+* [ModifyChapters] Add ability for `--remove-chapters` to remove sections by timestamp
+* [utils] Allow duration strings in `--match-filter`
+* Add HDR information to formats
+* Add negative option `--no-batch-file` by [Zirro](https://github.com/Zirro)
+* Calculate more fields for merged formats
+* Do not verify thumbnail URLs unless `--check-formats` is specified
+* Don't create console for subprocesses on Windows
+* Fix `--restrict-filename` when used with default template
+* Fix `check_formats` output being written to stdout when `-qv`
+* Fix bug in storyboards
+* Fix conflict b/w id and ext in format selection
+* Fix verbose head not showing custom configs
+* Load archive only after printing verbose head
+* Make `duration_string` and `resolution` available in --match-filter
+* Re-implement deprecated option `--id`
+* Reduce default `--socket-timeout`
+* Write verbose header to logger
+* [outtmpl] Fix bug in expanding environment variables
+* [cookies] Local State should be opened as utf-8
+* [extractor,utils] Detect more codecs/mimetypes
+* [extractor] Detect `EXT-X-KEY` Apple FairPlay
+* [utils] Use `importlib` to load plugins by [sulyi](https://github.com/sulyi)
+* [http] Retry on socket timeout and show the last encountered error
+* [fragment] Print error message when skipping fragment
+* [aria2c] Fix `--skip-unavailable-fragment`
+* [SponsorBlock] Obey `extractor-retries` and `sleep-requests`
+* [Merger] Do not add `aac_adtstoasc` to non-hls audio
+* [ModifyChapters] Do not mutate original chapters by [nihil-admirari](https://github.com/nihil-admirari)
+* [devscripts/run_tests] Use markers to filter tests by [sulyi](https://github.com/sulyi)
+* [7plus] Add cookie based authentication by [nyuszika7h](https://github.com/nyuszika7h)
+* [AdobePass] Fix RCN MSO by [jfogelman](https://github.com/jfogelman)
+* [CBC] Fix Gem livestream by [makeworld-the-better-one](https://github.com/makeworld-the-better-one)
+* [CBC] Support CBC Gem member content by [makeworld-the-better-one](https://github.com/makeworld-the-better-one)
+* [crunchyroll] Add season to flat-playlist
+* [crunchyroll] Add support for `beta.crunchyroll` URLs and fix series URLs with language code
+* [EUScreen] Add Extractor by [Ashish0804](https://github.com/Ashish0804)
+* [Gronkh] Add extractor by [Ashish0804](https://github.com/Ashish0804)
+* [hidive] Fix typo
+* [Hotstar] Mention Dynamic Range in `format_id` by [Ashish0804](https://github.com/Ashish0804)
+* [Hotstar] Raise appropriate error for DRM
+* [instagram] Add login by [u-spec-png](https://github.com/u-spec-png)
+* [instagram] Show appropriate error when login is needed
+* [microsoftstream] Add extractor by [damianoamatruda](https://github.com/damianoamatruda), [nixklai](https://github.com/nixklai)
+* [on24] Add extractor by [damianoamatruda](https://github.com/damianoamatruda)
+* [patreon] Fix vimeo player regex by [zenerdi0de](https://github.com/zenerdi0de)
+* [SkyNewsAU] Add extractor by [Ashish0804](https://github.com/Ashish0804)
+* [tagesschau] Fix extractor by [u-spec-png](https://github.com/u-spec-png)
+* [tbs] Add tbs live streams by [llacb47](https://github.com/llacb47)
+* [tiktok] Fix typo and update tests
+* [trovo] Support channel clips and VODs by [Ashish0804](https://github.com/Ashish0804)
+* [Viafree] Add support for Finland by [18928172992817182](https://github.com/18928172992817182)
+* [vimeo] Fix embedded `player.vimeo`
+* [vlive:channel] Fix extraction by [kikuyan](https://github.com/kikuyan), [pukkandan](https://github.com/pukkandan)
+* [youtube] Add auto-translated subtitles
+* [youtube] Expose different formats with same itag
+* [youtube:comments] Fix for new layout by [coletdjnz](https://github.com/coletdjnz)
+* [cleanup] Cleanup bilibili code by [pukkandan](https://github.com/pukkandan), [u-spec-png](https://github.com/u-spec-png)
+* [cleanup] Remove broken youtube login code
+* [cleanup] Standardize timestamp formatting code
+* [cleanup] Generalize `getcomments` implementation for extractors
+* [cleanup] Simplify search extractors code
+* [cleanup] misc
+
+
### 2021.10.10
* [downloader/ffmpeg] Fix bug in initializing `FFmpegPostProcessor`
@@ -96,7 +1697,7 @@
* Add new option `--netrc-location`
* [outtmpl] Allow alternate fields using `,`
-* [outtmpl] Add format type `B` to treat the value as bytes (eg: to limit the filename to a certain number of bytes)
+* [outtmpl] Add format type `B` to treat the value as bytes, e.g. to limit the filename to a certain number of bytes
* Separate the options `--ignore-errors` and `--no-abort-on-error`
* Basic framework for simultaneous download of multiple formats by [nao20010128nao](https://github.com/nao20010128nao)
* [17live] Add 17.live extractor by [nao20010128nao](https://github.com/nao20010128nao)
@@ -194,7 +1795,7 @@
* [build] Automate more of the release process by [animelover1984](https://github.com/animelover1984), [pukkandan](https://github.com/pukkandan)
* [build] Fix sha256 by [nihil-admirari](https://github.com/nihil-admirari)
* [build] Bring back brew taps by [nao20010128nao](https://github.com/nao20010128nao)
-* [build] Provide `--onedir` zip for windows by [pukkandan](https://github.com/pukkandan)
+* [build] Provide `--onedir` zip for windows
* [cleanup,docs] Add deprecation warning in docs for some counter intuitive behaviour
* [cleanup] Fix line endings for `nebula.py` by [glenn-slayden](https://github.com/glenn-slayden)
* [cleanup] Improve `make clean-test` by [sulyi](https://github.com/sulyi)
@@ -486,7 +2087,7 @@
* Merge youtube-dl: Upto [commit/a803582](https://github.com/ytdl-org/youtube-dl/commit/a8035827177d6b59aca03bd717acb6a9bdd75ada)
* Add `--extractor-args` to pass some extractor-specific arguments. See [readme](https://github.com/yt-dlp/yt-dlp#extractor-arguments)
- * Add extractor option `skip` for `youtube`. Eg: `--extractor-args youtube:skip=hls,dash`
+ * Add extractor option `skip` for `youtube`, e.g. `--extractor-args youtube:skip=hls,dash`
* Deprecates `--youtube-skip-dash-manifest`, `--youtube-skip-hls-manifest`, `--youtube-include-dash-manifest`, `--youtube-include-hls-manifest`
* Allow `--list...` options to work with `--print`, `--quiet` and other `--list...` options
* [youtube] Use `player` API for additional video extraction requests by [coletdjnz](https://github.com/coletdjnz)
@@ -591,7 +2192,7 @@
* [utils] Generalize `traverse_dict` to `traverse_obj`
* [downloader/ffmpeg] Hide FFmpeg banner unless in verbose mode by [fstirlitz](https://github.com/fstirlitz)
* [build] Release `yt-dlp.tar.gz`
-* [build,update] Add GNU-style SHA512 and prepare updater for simlar SHA256 by [nihil-admirari](https://github.com/nihil-admirari)
+* [build,update] Add GNU-style SHA512 and prepare updater for similar SHA256 by [nihil-admirari](https://github.com/nihil-admirari)
* [pyinst] Show Python version in exe metadata by [nihil-admirari](https://github.com/nihil-admirari)
* [docs] Improve documentation of dependencies
* [cleanup] Mark unused files
@@ -1187,7 +2788,7 @@
* **Format Sort:** Added `--format-sort` (`-S`), `--format-sort-force` (`--S-force`) - See [Sorting Formats](README.md#sorting-formats) for details
* **Format Selection:** See [Format Selection](README.md#format-selection) for details
* New format selectors: `best*`, `worst*`, `bestvideo*`, `bestaudio*`, `worstvideo*`, `worstaudio*`
- * Changed video format sorting to show video only files and video+audio files together.
+ * Changed video format sorting to show video only files and video+audio files together
* Added `--video-multistreams`, `--no-video-multistreams`, `--audio-multistreams`, `--no-audio-multistreams`
* Added `b`,`w`,`v`,`a` as alias for `best`, `worst`, `video` and `audio` respectively
* Shortcut Options: Added `--write-link`, `--write-url-link`, `--write-webloc-link`, `--write-desktop-link` by [h-h-h-h](https://github.com/h-h-h-h) - See [Internet Shortcut Options](README.md#internet-shortcut-options) for details
@@ -1205,7 +2806,7 @@
* Cleaned up the fork for public use
-**PS**: All uncredited changes above this point are authored by [pukkandan](https://github.com/pukkandan)
+**Note**: All uncredited changes above this point are authored by [pukkandan](https://github.com/pukkandan)
### Unreleased changes in [blackjack4494/yt-dlc](https://github.com/blackjack4494/yt-dlc)
* Updated to youtube-dl release 2020.11.26 by [pukkandan](https://github.com/pukkandan)
@@ -1230,8 +2831,110 @@
* [spreaker] fix SpreakerShowIE test URL by [pukkandan](https://github.com/pukkandan)
* [Vlive] Fix playlist handling when downloading a channel by [kyuyeunk](https://github.com/kyuyeunk)
* [tmz] Fix extractor by [diegorodriguezv](https://github.com/diegorodriguezv)
+* [ITV] BTCC URL update by [WolfganP](https://github.com/WolfganP)
* [generic] Detect embedded bitchute videos by [pukkandan](https://github.com/pukkandan)
* [generic] Extract embedded youtube and twitter videos by [diegorodriguezv](https://github.com/diegorodriguezv)
* [ffmpeg] Ensure all streams are copied by [pukkandan](https://github.com/pukkandan)
* [embedthumbnail] Fix for os.rename error by [pukkandan](https://github.com/pukkandan)
* make_win.bat: don't use UPX to pack vcruntime140.dll by [jbruchon](https://github.com/jbruchon)
+
+
+### Changelog of [blackjack4494/yt-dlc](https://github.com/blackjack4494/yt-dlc) till release 2020.11.11-3
+
+**Note**: This was constructed from the merge commit messages and may not be entirely accurate
+
+* [bandcamp] fix failing test. remove subclass hack by [insaneracist](https://github.com/insaneracist)
+* [bandcamp] restore album downloads by [insaneracist](https://github.com/insaneracist)
+* [francetv] fix extractor by [Surkal](https://github.com/Surkal)
+* [gdcvault] fix extractor by [blackjack4494](https://github.com/blackjack4494)
+* [hotstar] Move to API v1 by [theincognito-inc](https://github.com/theincognito-inc)
+* [hrfernsehen] add extractor by [blocktrron](https://github.com/blocktrron)
+* [kakao] new apis by [blackjack4494](https://github.com/blackjack4494)
+* [la7] fix missing protocol by [nixxo](https://github.com/nixxo)
+* [mailru] removed escaped braces, use urljoin, added tests by [nixxo](https://github.com/nixxo)
+* [MTV/Nick] universal mgid extractor + fix nick.de feed by [blackjack4494](https://github.com/blackjack4494)
+* [mtv] Fix a missing match_id by [nixxo](https://github.com/nixxo)
+* [Mtv] updated extractor logic & more by [blackjack4494](https://github.com/blackjack4494)
+* [ndr] support Daserste ndr by [blackjack4494](https://github.com/blackjack4494)
+* [Netzkino] Only use video id to find metadata by [TobiX](https://github.com/TobiX)
+* [newgrounds] fix: video download by [insaneracist](https://github.com/insaneracist)
+* [nitter] Add new extractor by [B0pol](https://github.com/B0pol)
+* [soundcloud] Resolve audio/x-wav by [tfvlrue](https://github.com/tfvlrue)
+* [soundcloud] sets pattern and tests by [blackjack4494](https://github.com/blackjack4494)
+* [SouthparkDE/MTV] another mgid extraction (mtv_base) feed url updated by [blackjack4494](https://github.com/blackjack4494)
+* [StoryFire] Add new extractor by [sgstair](https://github.com/sgstair)
+* [twitch] by [geauxlo](https://github.com/geauxlo)
+* [videa] Adapt to updates by [adrianheine](https://github.com/adrianheine)
+* [Viki] subtitles, formats by [blackjack4494](https://github.com/blackjack4494)
+* [vlive] fix extractor for revamped website by [exwm](https://github.com/exwm)
+* [xtube] fix extractor by [insaneracist](https://github.com/insaneracist)
+* [youtube] Convert subs when download is skipped by [blackjack4494](https://github.com/blackjack4494)
+* [youtube] Fix age gate detection by [random-nick](https://github.com/random-nick)
+* [youtube] fix yt-only playback when age restricted/gated - requires cookies by [blackjack4494](https://github.com/blackjack4494)
+* [youtube] fix: extract artist metadata from ytInitialData by [insaneracist](https://github.com/insaneracist)
+* [youtube] fix: extract mix playlist ids from ytInitialData by [insaneracist](https://github.com/insaneracist)
+* [youtube] fix: mix playlist title by [insaneracist](https://github.com/insaneracist)
+* [youtube] fix: Youtube Music playlists by [insaneracist](https://github.com/insaneracist)
+* [Youtube] Fixed problem with new youtube player by [peet1993](https://github.com/peet1993)
+* [zoom] Fix url parsing for url's containing /share/ and dots by [Romern](https://github.com/Romern)
+* [zoom] new extractor by [insaneracist](https://github.com/insaneracist)
+* abc by [adrianheine](https://github.com/adrianheine)
+* Added Comcast_SSO fix by [merval](https://github.com/merval)
+* Added DRM logic to brightcove by [merval](https://github.com/merval)
+* Added regex for ABC.com site. by [kucksdorfs](https://github.com/kucksdorfs)
+* alura by [hugohaa](https://github.com/hugohaa)
+* Arbitrary merges by [fstirlitz](https://github.com/fstirlitz)
+* ard.py_add_playlist_support by [martin54](https://github.com/martin54)
+* Bugfix/youtube/chapters fix extractor by [gschizas](https://github.com/gschizas)
+* bugfix_youtube_like_extraction by [RedpointsBots](https://github.com/RedpointsBots)
+* Create build workflow by [blackjack4494](https://github.com/blackjack4494)
+* deezer by [LucBerge](https://github.com/LucBerge)
+* Detect embedded bitchute videos by [pukkandan](https://github.com/pukkandan)
+* Don't install tests by [l29ah](https://github.com/l29ah)
+* Don't try to embed/convert json subtitles generated by [youtube](https://github.com/youtube) livechat by [pukkandan](https://github.com/pukkandan)
+* Doodstream by [sxvghd](https://github.com/sxvghd)
+* duboku by [lkho](https://github.com/lkho)
+* elonet by [tpikonen](https://github.com/tpikonen)
+* ext/remuxe-video by [Zocker1999NET](https://github.com/Zocker1999NET)
+* fall-back to the old way to fetch subtitles, if needed by [RobinD42](https://github.com/RobinD42)
+* feature_subscriber_count by [RedpointsBots](https://github.com/RedpointsBots)
+* Fix external downloader when there is no http_header by [pukkandan](https://github.com/pukkandan)
+* Fix issue triggered by [tubeup](https://github.com/tubeup) by [nsapa](https://github.com/nsapa)
+* Fix YoutubePlaylistsIE by [ZenulAbidin](https://github.com/ZenulAbidin)
+* fix-mitele' by [DjMoren](https://github.com/DjMoren)
+* fix/google-drive-cookie-issue by [legraphista](https://github.com/legraphista)
+* fix_tiktok by [mervel-mervel](https://github.com/mervel-mervel)
+* Fixed problem with JS player URL by [peet1993](https://github.com/peet1993)
+* fixYTSearch by [xarantolus](https://github.com/xarantolus)
+* FliegendeWurst-3sat-zdf-merger-bugfix-feature
+* gilou-bandcamp_update
+* implement ThisVid extractor by [rigstot](https://github.com/rigstot)
+* JensTimmerman-patch-1 by [JensTimmerman](https://github.com/JensTimmerman)
+* Keep download archive in memory for better performance by [jbruchon](https://github.com/jbruchon)
+* la7-fix by [iamleot](https://github.com/iamleot)
+* magenta by [adrianheine](https://github.com/adrianheine)
+* Merge 26564 from [adrianheine](https://github.com/adrianheine)
+* Merge code from [ddland](https://github.com/ddland)
+* Merge code from [nixxo](https://github.com/nixxo)
+* Merge code from [ssaqua](https://github.com/ssaqua)
+* Merge code from [zubearc](https://github.com/zubearc)
+* mkvthumbnail by [MrDoritos](https://github.com/MrDoritos)
+* myvideo_ge by [fonkap](https://github.com/fonkap)
+* naver by [SeonjaeHyeon](https://github.com/SeonjaeHyeon)
+* ondemandkorea by [julien-hadleyjack](https://github.com/julien-hadleyjack)
+* rai-update by [iamleot](https://github.com/iamleot)
+* RFC: youtube: Polymer UI and JSON endpoints for playlists by [wlritchi](https://github.com/wlritchi)
+* rutv by [adrianheine](https://github.com/adrianheine)
+* Sc extractor web auth by [blackjack4494](https://github.com/blackjack4494)
+* Switch from binary search tree to Python sets by [jbruchon](https://github.com/jbruchon)
+* tiktok by [skyme5](https://github.com/skyme5)
+* tvnow by [TinyToweringTree](https://github.com/TinyToweringTree)
+* twitch-fix by [lel-amri](https://github.com/lel-amri)
+* Twitter shortener by [blackjack4494](https://github.com/blackjack4494)
+* Update README.md by [JensTimmerman](https://github.com/JensTimmerman)
+* Update to reflect website changes. by [amigatomte](https://github.com/amigatomte)
+* use webarchive to fix a dead link in README by [B0pol](https://github.com/B0pol)
+* Viki the second by [blackjack4494](https://github.com/blackjack4494)
+* wdr-subtitles by [mrtnmtth](https://github.com/mrtnmtth)
+* Webpfix by [alexmerkel](https://github.com/alexmerkel)
+* Youtube live chat by [siikamiika](https://github.com/siikamiika)
diff --git a/Makefile b/Makefile
index b54e8ad..a395a0e 100644
--- a/Makefile
+++ b/Makefile
@@ -97,13 +97,14 @@ hypervideo.tar.gz: all
--exclude '*.pyo' \
--exclude '*~' \
--exclude '__pycache__' \
+ --exclude '.pytest_cache' \
--exclude '.git' \
-- \
bin README.md Changelog.md LICENSE \
CONTRIBUTING.md CONTRIBUTORS AUTHORS \
Makefile MANIFEST.in README.md completions \
setup.py setup.cfg hypervideo hypervideo_dl requirements.txt \
- devscripts test tox.ini pytest.ini
+ devscripts test
AUTHORS: .mailmap
git shortlog -s -n | cut -f2 | sort > AUTHORS
diff --git a/README.md b/README.md
index 7e5b600..3548389 100644
--- a/README.md
+++ b/README.md
@@ -1,466 +1,1036 @@
-hypervideo - A fork of youtube-dl without nonfree parts
-
-- [INSTALLATION](#installation)
-- [DESCRIPTION](#description)
-- [OPTIONS](#options)
-- [CONFIGURATION](#configuration)
-- [OUTPUT TEMPLATE](#output-template)
-- [FORMAT SELECTION](#format-selection)
-- [VIDEO SELECTION](#video-selection)
-- [FAQ](#faq)
-- [DEVELOPER INSTRUCTIONS](#developer-instructions)
-- [EMBEDDING HYPERVIDEO](#embedding-hypervideo)
-- [BUGS](#bugs)
-- [COPYRIGHT](#copyright)
+hypervideo - A fork of yt-dlp without nonfree parts
-# INSTALLATION
+<!-- MANPAGE: MOVE "USAGE AND OPTIONS" SECTION HERE -->
+
+<!-- MANPAGE: BEGIN EXCLUDED SECTION -->
+* [NEW FEATURES](#new-features)
+ * [Differences in default behavior](#differences-in-default-behavior)
+* [INSTALLATION](#installation)
+ * [Detailed instructions](https://github.com/yt-dlp/yt-dlp/wiki/Installation)
+ * [Release Files](#release-files)
+ * [Dependencies](#dependencies)
+ * [Compile](#compile)
+* [USAGE AND OPTIONS](#usage-and-options)
+ * [General Options](#general-options)
+ * [Network Options](#network-options)
+ * [Geo-restriction](#geo-restriction)
+ * [Video Selection](#video-selection)
+ * [Download Options](#download-options)
+ * [Filesystem Options](#filesystem-options)
+ * [Thumbnail Options](#thumbnail-options)
+ * [Internet Shortcut Options](#internet-shortcut-options)
+ * [Verbosity and Simulation Options](#verbosity-and-simulation-options)
+ * [Workarounds](#workarounds)
+ * [Video Format Options](#video-format-options)
+ * [Subtitle Options](#subtitle-options)
+ * [Authentication Options](#authentication-options)
+ * [Post-processing Options](#post-processing-options)
+ * [SponsorBlock Options](#sponsorblock-options)
+ * [Extractor Options](#extractor-options)
+* [CONFIGURATION](#configuration)
+ * [Configuration file encoding](#configuration-file-encoding)
+ * [Authentication with .netrc file](#authentication-with-netrc-file)
+ * [Notes about environment variables](#notes-about-environment-variables)
+* [OUTPUT TEMPLATE](#output-template)
+ * [Output template examples](#output-template-examples)
+* [FORMAT SELECTION](#format-selection)
+ * [Filtering Formats](#filtering-formats)
+ * [Sorting Formats](#sorting-formats)
+ * [Format Selection examples](#format-selection-examples)
+* [MODIFYING METADATA](#modifying-metadata)
+ * [Modifying metadata examples](#modifying-metadata-examples)
+* [EXTRACTOR ARGUMENTS](#extractor-arguments)
+* [PLUGINS](#plugins)
+* [EMBEDDING HYPERVIDEO](#embedding-hypervideo)
+ * [Embedding examples](#embedding-examples)
+* [DEPRECATED OPTIONS](#deprecated-options)
+* [CONTRIBUTING](CONTRIBUTING.md#contributing-to-yt-dlp)
+ * [Opening an Issue](CONTRIBUTING.md#opening-an-issue)
+ * [Developer Instructions](CONTRIBUTING.md#developer-instructions)
+* [WIKI](https://github.com/yt-dlp/yt-dlp/wiki)
+ * [FAQ](https://github.com/yt-dlp/yt-dlp/wiki/FAQ)
+<!-- MANPAGE: END EXCLUDED SECTION -->
+
+
+# NEW FEATURES
+
+* Merged with **youtube-dl v2021.12.17+ [commit/de39d12](https://github.com/ytdl-org/youtube-dl/commit/de39d128)** <!--([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21))--> and **youtube-dlc v2020.11.11-3+ [commit/f9401f2](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee)**: You get all the features and patches of [youtube-dlc](https://github.com/blackjack4494/yt-dlc) in addition to the latest [youtube-dl](https://github.com/ytdl-org/youtube-dl)
+
+* **[SponsorBlock Integration](#sponsorblock-options)**: You can mark/remove sponsor sections in YouTube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API
+
+* **[Format Sorting](#sorting-formats)**: The default format sorting options have been changed so that higher resolution and better codecs will be now preferred instead of simply using larger bitrate. Furthermore, you can now specify the sort order using `-S`. This allows for much easier format selection than what is possible by simply using `--format` ([examples](#format-selection-examples))
+
+* **Merged with animelover1984/youtube-dl**: You get most of the features and improvements from [animelover1984/youtube-dl](https://github.com/animelover1984/youtube-dl) including `--write-comments`, `BiliBiliSearch`, `BilibiliChannel`, Embedding thumbnail in mp4/ogg/opus, playlist infojson etc. Note that the NicoNico livestreams are not available. See [#31](https://github.com/yt-dlp/yt-dlp/pull/31) for details.
+
+* **YouTube improvements**:
+ * Supports Clips, Stories (`ytstories:<channel UCID>`), Search (including filters)**\***, YouTube Music Search, Channel-specific search, Search prefixes (`ytsearch:`, `ytsearchdate:`)**\***, Mixes, YouTube Music Albums/Channels ([except self-uploaded music](https://github.com/yt-dlp/yt-dlp/issues/723)), and Feeds (`:ytfav`, `:ytwatchlater`, `:ytsubs`, `:ythistory`, `:ytrec`, `:ytnotif`)
+ * Fix for [n-sig based throttling](https://github.com/ytdl-org/youtube-dl/issues/29326) **\***
+ * Supports some (but not all) age-gated content without cookies
+ * Download livestreams from the start using `--live-from-start` (*experimental*)
+ * `255kbps` audio is extracted (if available) from YouTube Music when premium cookies are given
+ * Channel URLs download all uploads of the channel, including shorts and live
+
+* **Cookies from browser**: Cookies can be automatically extracted from all major web browsers using `--cookies-from-browser BROWSER[+KEYRING][:PROFILE][::CONTAINER]`
+
+* **Download time range**: Videos can be downloaded partially based on either timestamps or chapters using `--download-sections`
+
+* **Split video by chapters**: Videos can be split into multiple files based on chapters using `--split-chapters`
+
+* **Multi-threaded fragment downloads**: Download multiple fragments of m3u8/mpd videos in parallel. Use `--concurrent-fragments` (`-N`) option to set the number of threads used
+
+* **Aria2c with HLS/DASH**: You can use `aria2c` as the external downloader for DASH(mpd) and HLS(m3u8) formats
+
+* **New and fixed extractors**: Many new extractors have been added and a lot of existing ones have been fixed. See the [changelog](Changelog.md) or the [list of supported sites](supportedsites.md)
+
+* **New MSOs**: Philo, Spectrum, SlingTV, Cablevision, RCN etc.
+
+* **Subtitle extraction from manifests**: Subtitles can be extracted from streaming media manifests. See [commit/be6202f](https://github.com/yt-dlp/yt-dlp/commit/be6202f12b97858b9d716e608394b51065d0419f) for details
+
+* **Multiple paths and output templates**: You can give different [output templates](#output-template) and download paths for different types of files. You can also set a temporary path where intermediary files are downloaded to using `--paths` (`-P`)
+
+* **Portable Configuration**: Configuration files are automatically loaded from the home and root directories. See [CONFIGURATION](#configuration) for details
+
+* **Output template improvements**: Output templates can now have date-time formatting, numeric offsets, object traversal etc. See [output template](#output-template) for details. Even more advanced operations can also be done with the help of `--parse-metadata` and `--replace-in-metadata`
+
+* **Other new options**: Many new options have been added such as `--alias`, `--print`, `--concat-playlist`, `--wait-for-video`, `--retry-sleep`, `--sleep-requests`, `--convert-thumbnails`, `--force-download-archive`, `--force-overwrites`, `--break-on-reject` etc
+
+* **Improvements**: Regex and other operators in `--format`/`--match-filter`, multiple `--postprocessor-args` and `--downloader-args`, faster archive checking, more [format selection options](#format-selection), merge multi-video/audio, multiple `--config-locations`, `--exec` at different stages, etc
+
+* **Plugins**: Extractors and PostProcessors can be loaded from an external file. See [plugins](#plugins) for details
+
+See [changelog](Changelog.md) or [commits](https://github.com/yt-dlp/yt-dlp/commits) for the full list of changes
+
+Features marked with a **\*** have been back-ported to youtube-dl
+
+### Differences in default behavior
+
+Some of hypervideo's default options are different from that of youtube-dl and youtube-dlc:
+
+* The options `--auto-number` (`-A`), `--title` (`-t`) and `--literal` (`-l`), no longer work. See [removed options](#Removed) for details
+* `avconv` is not supported as an alternative to `ffmpeg`
+* hypervideo stores config files in slightly different locations to youtube-dl. See [CONFIGURATION](#configuration) for a list of correct locations
+* The default [output template](#output-template) is `%(title)s [%(id)s].%(ext)s`. There is no real reason for this change. This was changed before hypervideo was ever made public and now there are no plans to change it back to `%(title)s-%(id)s.%(ext)s`. Instead, you may use `--compat-options filename`
+* The default [format sorting](#sorting-formats) is different from youtube-dl and prefers higher resolution and better codecs rather than higher bitrates. You can use the `--format-sort` option to change this to any order you prefer, or use `--compat-options format-sort` to use youtube-dl's sorting order
+* The default format selector is `bv*+ba/b`. This means that if a combined video + audio format that is better than the best video-only format is found, the former will be preferred. Use `-f bv+ba/b` or `--compat-options format-spec` to revert this
+* Unlike youtube-dlc, hypervideo does not allow merging multiple audio/video streams into one file by default (since this conflicts with the use of `-f bv*+ba`). If needed, this feature must be enabled using `--audio-multistreams` and `--video-multistreams`. You can also use `--compat-options multistreams` to enable both
+* `--no-abort-on-error` is enabled by default. Use `--abort-on-error` or `--compat-options abort-on-error` to abort on errors instead
+* When writing metadata files such as thumbnails, description or infojson, the same information (if available) is also written for playlists. Use `--no-write-playlist-metafiles` or `--compat-options no-playlist-metafiles` to not write these files
+* `--add-metadata` attaches the `infojson` to `mkv` files in addition to writing the metadata when used with `--write-info-json`. Use `--no-embed-info-json` or `--compat-options no-attach-info-json` to revert this
+* Some metadata are embedded into different fields when using `--add-metadata` as compared to youtube-dl. Most notably, `comment` field contains the `webpage_url` and `synopsis` contains the `description`. You can [use `--parse-metadata`](#modifying-metadata) to modify this to your liking or use `--compat-options embed-metadata` to revert this
+* `playlist_index` behaves differently when used with options like `--playlist-reverse` and `--playlist-items`. See [#302](https://github.com/yt-dlp/yt-dlp/issues/302) for details. You can use `--compat-options playlist-index` if you want to keep the earlier behavior
+* The output of `-F` is listed in a new format. Use `--compat-options list-formats` to revert this
+* Live chats (if available) are considered as subtitles. Use `--sub-langs all,-live_chat` to download all subtitles except live chat. You can also use `--compat-options no-live-chat` to prevent any live chat/danmaku from downloading
+* YouTube channel URLs download all uploads of the channel. To download only the videos in a specific tab, pass the tab's URL. If the channel does not show the requested tab, an error will be raised. Also, `/live` URLs raise an error if there are no live videos instead of silently downloading the entire channel. You may use `--compat-options no-youtube-channel-redirect` to revert all these redirections
+* Unavailable videos are also listed for YouTube playlists. Use `--compat-options no-youtube-unavailable-videos` to remove this
+* The upload dates extracted from YouTube are in UTC [when available](https://github.com/yt-dlp/yt-dlp/blob/89e4d86171c7b7c997c77d4714542e0383bf0db0/hypervideo_dl/extractor/youtube.py#L3898-L3900). Use `--compat-options no-youtube-prefer-utc-upload-date` to prefer the non-UTC upload date.
+* If `ffmpeg` is used as the downloader, the downloading and merging of formats happen in a single step when possible. Use `--compat-options no-direct-merge` to revert this
+* Thumbnail embedding in `mp4` is done with mutagen if possible. Use `--compat-options embed-thumbnail-atomicparsley` to force the use of AtomicParsley instead
+* Some private fields such as filenames are removed by default from the infojson. Use `--no-clean-infojson` or `--compat-options no-clean-infojson` to revert this
+* When `--embed-subs` and `--write-subs` are used together, the subtitles are written to disk and also embedded in the media file. You can use just `--embed-subs` to embed the subs and automatically delete the separate file. See [#630 (comment)](https://github.com/yt-dlp/yt-dlp/issues/630#issuecomment-893659460) for more info. `--compat-options no-keep-subs` can be used to revert this
+* `certifi` will be used for SSL root certificates, if installed. If you want to use system certificates (e.g. self-signed), use `--compat-options no-certifi`
+* hypervideo's sanitization of invalid characters in filenames is different/smarter than in youtube-dl. You can use `--compat-options filename-sanitization` to revert to youtube-dl's behavior
-A fork of [youtube-dl](https://ytdl-org.github.io/youtube-dl/) without nonfree parts
+For ease of use, a few more compat options are available:
-Hypervideo is distributed for [Hyperbola GNU/Linux-libre](https://www.hyperbola.info/).
+* `--compat-options all`: Use all compat options (Do NOT use)
+* `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams`
+* `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect`
-To install on Hyperbola run:
+
+# INSTALLATION
```console
# pacman -S hypervideo
```
-# DESCRIPTION
-**hypervideo** A fork of youtube-dl without nonfree parts
-
- $ hypervideo [OPTIONS] URL [URL...]
-
-# OPTIONS
- -h, --help Print this help text and exit
- --version Print program version and exit
- -i, --ignore-errors Continue on download errors, for
- example to skip unavailable videos in a
- playlist
- --abort-on-error Abort downloading of further videos (in
- the playlist or the command line) if an
- error occurs
- --dump-user-agent Display the current browser
- identification
- --list-extractors List all supported extractors
- --extractor-descriptions Output descriptions of all supported
- extractors
- --force-generic-extractor Force extraction to use the generic
- extractor
- --default-search PREFIX Use this prefix for unqualified URLs.
- For example "gvsearch2:" downloads two
- videos from google videos for
- hypervideo "large apple". Use the value
- "auto" to let hypervideo guess
- ("auto_warning" to emit a warning when
- guessing). "error" just throws an
- error. The default value "fixup_error"
- repairs broken URLs, but emits an error
- if this is not possible instead of
- searching.
- --ignore-config Do not read configuration files. When
- given in the global configuration file
- /etc/hypervideo.conf: Do not read the
- user configuration in
- ~/.config/hypervideo/config
- (%APPDATA%/hypervideo/config.txt on
- Windows)
- --config-location PATH Location of the configuration file;
- either the path to the config or its
- containing directory.
- --flat-playlist Do not extract the videos of a
- playlist, only list them.
- --mark-watched Mark videos watched (YouTube only)
- --no-mark-watched Do not mark videos watched (YouTube
- only)
- --no-color Do not emit color codes in output
+## DEPENDENCIES
+Python versions 3.7+ (CPython and PyPy) are supported. Other versions and implementations may or may not work correctly.
+
+While all the other dependencies are optional, `ffmpeg` and `ffprobe` are highly recommended
+
+### Strongly recommended
+
+* [**ffmpeg** and **ffprobe**](https://www.ffmpeg.org) - Required for [merging separate video and audio files](#format-selection) as well as for various [post-processing](#post-processing-options) tasks. License [depends on the build](https://www.ffmpeg.org/legal.html)
+
+ <!-- TODO: ffmpeg has merged this patch. Remove this note once there is new release -->
+ **Note**: There are some regressions in newer ffmpeg versions that causes various issues when used alongside hypervideo. Since ffmpeg is such an important dependency, we provide [custom builds](https://github.com/yt-dlp/FFmpeg-Builds#ffmpeg-static-auto-builds) with patches for these issues at [yt-dlp/FFmpeg-Builds](https://github.com/yt-dlp/FFmpeg-Builds). See [the readme](https://github.com/yt-dlp/FFmpeg-Builds#patches-applied) for details on the specific issues solved by these builds
+
+### Networking
+* [**certifi**](https://github.com/certifi/python-certifi)\* - Provides Mozilla's root certificate bundle. Licensed under [MPLv2](https://github.com/certifi/python-certifi/blob/master/LICENSE)
+* [**brotli**](https://github.com/google/brotli)\* or [**brotlicffi**](https://github.com/python-hyper/brotlicffi) - [Brotli](https://en.wikipedia.org/wiki/Brotli) content encoding support. Both licensed under MIT <sup>[1](https://github.com/google/brotli/blob/master/LICENSE) [2](https://github.com/python-hyper/brotlicffi/blob/master/LICENSE) </sup>
+* [**websockets**](https://github.com/aaugustin/websockets)\* - For downloading over websocket. Licensed under [BSD-3-Clause](https://github.com/aaugustin/websockets/blob/main/LICENSE)
+
+### Metadata
+
+* [**mutagen**](https://github.com/quodlibet/mutagen)\* - For `--embed-thumbnail` in certain formats. Licensed under [GPLv2+](https://github.com/quodlibet/mutagen/blob/master/COPYING)
+* [**AtomicParsley**](https://github.com/wez/atomicparsley) - For `--embed-thumbnail` in `mp4`/`m4a` files when `mutagen`/`ffmpeg` cannot. Licensed under [GPLv2+](https://github.com/wez/atomicparsley/blob/master/COPYING)
+* [**xattr**](https://github.com/xattr/xattr), [**pyxattr**](https://github.com/iustin/pyxattr) or [**setfattr**](http://savannah.nongnu.org/projects/attr) - For writing xattr metadata (`--xattr`) on **Linux**. Licensed under [MIT](https://github.com/xattr/xattr/blob/master/LICENSE.txt), [LGPL2.1](https://github.com/iustin/pyxattr/blob/master/COPYING) and [GPLv2+](http://git.savannah.nongnu.org/cgit/attr.git/tree/doc/COPYING) respectively
+
+### Misc
+
+* [**pycryptodomex**](https://github.com/Legrandin/pycryptodome)\* - For decrypting AES-128 HLS streams and various other data. Licensed under [BSD-2-Clause](https://github.com/Legrandin/pycryptodome/blob/master/LICENSE.rst)
+* [**phantomjs**](https://github.com/ariya/phantomjs) - Used in extractors where javascript needs to be run. Licensed under [BSD-3-Clause](https://github.com/ariya/phantomjs/blob/master/LICENSE.BSD)
+* [**secretstorage**](https://github.com/mitya57/secretstorage) - For `--cookies-from-browser` to access the **Gnome** keyring while decrypting cookies of **Chromium**-based browsers on **Linux**. Licensed under [BSD-3-Clause](https://github.com/mitya57/secretstorage/blob/master/LICENSE)
+* Any external downloader that you want to use with `--downloader`
+
+### Deprecated
+
+* [**avconv** and **avprobe**](https://www.libav.org) - Now **deprecated** alternative to ffmpeg. License [depends on the build](https://libav.org/legal)
+* [**sponskrub**](https://github.com/faissaloo/SponSkrub) - For using the now **deprecated** [sponskrub options](#sponskrub-options). Licensed under [GPLv3+](https://github.com/faissaloo/SponSkrub/blob/master/LICENCE.md)
+* [**rtmpdump**](http://rtmpdump.mplayerhq.hu) - For downloading `rtmp` streams. ffmpeg can be used instead with `--downloader ffmpeg`. Licensed under [GPLv2+](http://rtmpdump.mplayerhq.hu)
+* [**mplayer**](http://mplayerhq.hu/design7/info.html) or [**mpv**](https://mpv.io) - For downloading `rstp`/`mms` streams. ffmpeg can be used instead with `--downloader ffmpeg`. Licensed under [GPLv2+](https://github.com/mpv-player/mpv/blob/master/Copyright)
+
+To use or redistribute the dependencies, you must agree to their respective licensing terms.
+
+The standalone release binaries are built with the Python interpreter and the packages marked with **\*** included.
+
+If you do not have the necessary dependencies for a task you are attempting, hypervideo will warn you. All the currently available dependencies are visible at the top of the `--verbose` output
+
+
+## COMPILE
+
+### Standalone PyInstaller Builds
+To build the standalone executable, you must have Python and `pyinstaller` (plus any of hypervideo's [optional dependencies](#dependencies) if needed). Once you have all the necessary dependencies installed, simply run `pyinst.py`. The executable will be built for the same architecture (x86/ARM, 32/64 bit) as the Python used.
+
+ python3 -m pip install -U pyinstaller -r requirements.txt
+ python3 devscripts/make_lazy_extractors.py
+ python3 pyinst.py
+
+On some systems, you may need to use `py` or `python` instead of `python3`.
+
+`pyinst.py` accepts any arguments that can be passed to `pyinstaller`, such as `--onefile/-F` or `--onedir/-D`, which is further [documented here](https://pyinstaller.org/en/stable/usage.html#what-to-generate).
+
+Note that pyinstaller with versions below 4.4 [do not support](https://github.com/pyinstaller/pyinstaller#requirements-and-tested-platforms) Python installed from the Windows store without using a virtual environment.
+
+**Important**: Running `pyinstaller` directly **without** using `pyinst.py` is **not** officially supported. This may or may not work correctly.
+
+### Platform-independent Binary (UNIX)
+You will need the build tools `python` (3.7+), `zip`, `make` (GNU), `pandoc`\* and `pytest`\*.
+
+After installing these, simply run `make`.
+
+You can also run `make hypervideo` instead to compile only the binary without updating any of the additional files. (The build tools marked with **\*** are not needed for this)
+
+### Standalone Py2Exe Builds (Windows)
+
+While we provide the option to build with [py2exe](https://www.py2exe.org), it is recommended to build [using PyInstaller](#standalone-pyinstaller-builds) instead since the py2exe builds **cannot contain `pycryptodomex`/`certifi` and needs VC++14** on the target computer to run.
+
+If you wish to build it anyway, install Python and py2exe, and then simply run `setup.py py2exe`
+
+ py -m pip install -U py2exe -r requirements.txt
+ py devscripts/make_lazy_extractors.py
+ py setup.py py2exe
+
+### Related scripts
+
+* **`devscripts/set-variant.py variant [-M update_message]`** - Set the build variant of the executable
+* **`devscripts/make_lazy_extractors.py`** - Create lazy extractors. Running this before building the binaries (any variant) will improve their startup performance. Set the environment variable `YTDLP_NO_LAZY_EXTRACTORS=1` if you wish to forcefully disable lazy extractor loading.
+
+You can also fork the project on GitHub and run your fork's [build workflow](.github/workflows/build.yml) to automatically build a full release
+
+# USAGE AND OPTIONS
+
+<!-- MANPAGE: BEGIN EXCLUDED SECTION -->
+ hypervideo [OPTIONS] [--] URL [URL...]
+
+`Ctrl+F` is your friend :D
+<!-- MANPAGE: END EXCLUDED SECTION -->
+
+<!-- Auto generated -->
+## General Options:
+ -h, --help Print this help text and exit
+ --version Print program version and exit
+ -i, --ignore-errors Ignore download and postprocessing errors.
+ The download will be considered successful
+ even if the postprocessing fails
+ --no-abort-on-error Continue with next video on download errors;
+ e.g. to skip unavailable videos in a
+ playlist (default)
+ --abort-on-error Abort downloading of further videos if an
+ error occurs (Alias: --no-ignore-errors)
+ --dump-user-agent Display the current user-agent and exit
+ --list-extractors List all supported extractors and exit
+ --extractor-descriptions Output descriptions of all supported
+ extractors and exit
+ --use-extractors NAMES Extractor names to use separated by commas.
+ You can also use regexes, "all", "default"
+ and "end" (end URL matching); e.g. --ies
+ "holodex.*,end,youtube". Prefix the name
+ with a "-" to exclude it, e.g. --ies
+ default,-generic. Use --list-extractors for
+ a list of extractor names. (Alias: --ies)
+ --default-search PREFIX Use this prefix for unqualified URLs. E.g.
+ "gvsearch2:python" downloads two videos from
+ google videos for the search term "python".
+ Use the value "auto" to let hypervideo guess
+ ("auto_warning" to emit a warning when
+ guessing). "error" just throws an error. The
+ default value "fixup_error" repairs broken
+ URLs, but emits an error if this is not
+ possible instead of searching
+ --ignore-config Don't load any more configuration files
+ except those given by --config-locations.
+ For backward compatibility, if this option
+ is found inside the system configuration
+ file, the user configuration is not loaded.
+ (Alias: --no-config)
+ --no-config-locations Do not load any custom configuration files
+ (default). When given inside a configuration
+ file, ignore all previous --config-locations
+ defined in the current file
+ --config-locations PATH Location of the main configuration file;
+ either the path to the config or its
+ containing directory ("-" for stdin). Can be
+ used multiple times and inside other
+ configuration files
+ --flat-playlist Do not extract the videos of a playlist,
+ only list them
+ --no-flat-playlist Extract the videos of a playlist
+ --live-from-start Download livestreams from the start.
+ Currently only supported for YouTube
+ (Experimental)
+ --no-live-from-start Download livestreams from the current time
+ (default)
+ --wait-for-video MIN[-MAX] Wait for scheduled streams to become
+ available. Pass the minimum number of
+ seconds (or range) to wait between retries
+ --no-wait-for-video Do not wait for scheduled streams (default)
+ --mark-watched Mark videos watched (even with --simulate)
+ --no-mark-watched Do not mark videos watched (default)
+ --no-colors Do not emit color codes in output (Alias:
+ --no-colours)
+ --compat-options OPTS Options that can help keep compatibility
+ with youtube-dl or youtube-dlc
+ configurations by reverting some of the
+ changes made in hypervideo. See "Differences
+ in default behavior" for details
+ --alias ALIASES OPTIONS Create aliases for an option string. Unless
+ an alias starts with a dash "-", it is
+ prefixed with "--". Arguments are parsed
+ according to the Python string formatting
+ mini-language. E.g. --alias get-audio,-X
+ "-S=aext:{0},abr -x --audio-format {0}"
+ creates options "--get-audio" and "-X" that
+ takes an argument (ARG0) and expands to
+ "-S=aext:ARG0,abr -x --audio-format ARG0".
+ All defined aliases are listed in the --help
+ output. Alias options can trigger more
+ aliases; so be careful to avoid defining
+ recursive options. As a safety measure, each
+ alias may be triggered a maximum of 100
+ times. This option can be used multiple times
## Network Options:
- --proxy URL Use the specified HTTP/HTTPS/SOCKS
- proxy. To enable SOCKS proxy, specify a
- proper scheme. For example
- socks5://127.0.0.1:1080/. Pass in an
- empty string (--proxy "") for direct
- connection
- --socket-timeout SECONDS Time to wait before giving up, in
- seconds
- --source-address IP Client-side IP address to bind to
- -4, --force-ipv4 Make all connections via IPv4
- -6, --force-ipv6 Make all connections via IPv6
-
-## Geo Restriction:
- --geo-verification-proxy URL Use this proxy to verify the IP address
- for some geo-restricted sites. The
- default proxy specified by --proxy (or
- none, if the option is not present) is
- used for the actual downloading.
- --geo-bypass Bypass geographic restriction via
- faking X-Forwarded-For HTTP header
- --no-geo-bypass Do not bypass geographic restriction
- via faking X-Forwarded-For HTTP header
- --geo-bypass-country CODE Force bypass geographic restriction
- with explicitly provided two-letter ISO
- 3166-2 country code
- --geo-bypass-ip-block IP_BLOCK Force bypass geographic restriction
- with explicitly provided IP block in
- CIDR notation
+ --proxy URL Use the specified HTTP/HTTPS/SOCKS proxy. To
+ enable SOCKS proxy, specify a proper scheme,
+ e.g. socks5://user:pass@127.0.0.1:1080/.
+ Pass in an empty string (--proxy "") for
+ direct connection
+ --socket-timeout SECONDS Time to wait before giving up, in seconds
+ --source-address IP Client-side IP address to bind to
+ -4, --force-ipv4 Make all connections via IPv4
+ -6, --force-ipv6 Make all connections via IPv6
+
+## Geo-restriction:
+ --geo-verification-proxy URL Use this proxy to verify the IP address for
+ some geo-restricted sites. The default proxy
+ specified by --proxy (or none, if the option
+ is not present) is used for the actual
+ downloading
+ --geo-bypass Bypass geographic restriction via faking
+ X-Forwarded-For HTTP header (default)
+ --no-geo-bypass Do not bypass geographic restriction via
+ faking X-Forwarded-For HTTP header
+ --geo-bypass-country CODE Force bypass geographic restriction with
+ explicitly provided two-letter ISO 3166-2
+ country code
+ --geo-bypass-ip-block IP_BLOCK Force bypass geographic restriction with
+ explicitly provided IP block in CIDR notation
## Video Selection:
- --playlist-start NUMBER Playlist video to start at (default is
- 1)
- --playlist-end NUMBER Playlist video to end at (default is
- last)
- --playlist-items ITEM_SPEC Playlist video items to download.
- Specify indices of the videos in the
- playlist separated by commas like: "--
- playlist-items 1,2,5,8" if you want to
- download videos indexed 1, 2, 5, 8 in
- the playlist. You can specify range: "
- --playlist-items 1-3,7,10-13", it will
- download the videos at index 1, 2, 3,
- 7, 10, 11, 12 and 13.
- --match-title REGEX Download only matching titles (regex or
- caseless sub-string)
- --reject-title REGEX Skip download for matching titles
- (regex or caseless sub-string)
- --max-downloads NUMBER Abort after downloading NUMBER files
- --min-filesize SIZE Do not download any videos smaller than
- SIZE (e.g. 50k or 44.6m)
- --max-filesize SIZE Do not download any videos larger than
- SIZE (e.g. 50k or 44.6m)
- --date DATE Download only videos uploaded in this
- date
- --datebefore DATE Download only videos uploaded on or
- before this date (i.e. inclusive)
- --dateafter DATE Download only videos uploaded on or
- after this date (i.e. inclusive)
- --min-views COUNT Do not download any videos with less
- than COUNT views
- --max-views COUNT Do not download any videos with more
- than COUNT views
- --match-filter FILTER Generic video filter. Specify any key
- (see the "OUTPUT TEMPLATE" for a list
- of available keys) to match if the key
- is present, !key to check if the key is
- not present, key > NUMBER (like
- "comment_count > 12", also works with
- >=, <, <=, !=, =) to compare against a
- number, key = 'LITERAL' (like "uploader
- = 'Mike Smith'", also works with !=) to
- match against a string literal and & to
- require multiple matches. Values which
- are not known are excluded unless you
- put a question mark (?) after the
- operator. For example, to only match
- videos that have been liked more than
- 100 times and disliked less than 50
- times (or the dislike functionality is
- not available at the given service),
- but who also have a description, use
- --match-filter "like_count > 100 &
- dislike_count <? 50 & description" .
- --no-playlist Download only the video, if the URL
- refers to a video and a playlist.
- --yes-playlist Download the playlist, if the URL
- refers to a video and a playlist.
- --age-limit YEARS Download only videos suitable for the
- given age
- --download-archive FILE Download only videos not listed in the
- archive file. Record the IDs of all
- downloaded videos in it.
- --include-ads Download advertisements as well
- (experimental)
+ -I, --playlist-items ITEM_SPEC Comma separated playlist_index of the videos
+ to download. You can specify a range using
+ "[START]:[STOP][:STEP]". For backward
+ compatibility, START-STOP is also supported.
+ Use negative indices to count from the right
+ and negative STEP to download in reverse
+ order. E.g. "-I 1:3,7,-5::2" used on a
+ playlist of size 15 will download the videos
+ at index 1,2,3,7,11,13,15
+ --min-filesize SIZE Abort download if filesize is smaller than
+ SIZE, e.g. 50k or 44.6M
+ --max-filesize SIZE Abort download if filesize if larger than
+ SIZE, e.g. 50k or 44.6M
+ --date DATE Download only videos uploaded on this date.
+ The date can be "YYYYMMDD" or in the format
+ [now|today|yesterday][-N[day|week|month|year
+ ]]. E.g. --date today-2weeks
+ --datebefore DATE Download only videos uploaded on or before
+ this date. The date formats accepted is the
+ same as --date
+ --dateafter DATE Download only videos uploaded on or after
+ this date. The date formats accepted is the
+ same as --date
+ --match-filters FILTER Generic video filter. Any "OUTPUT TEMPLATE"
+ field can be compared with a number or a
+ string using the operators defined in
+ "Filtering Formats". You can also simply
+ specify a field to match if the field is
+ present, use "!field" to check if the field
+ is not present, and "&" to check multiple
+ conditions. Use a "\" to escape "&" or
+ quotes if needed. If used multiple times,
+ the filter matches if atleast one of the
+ conditions are met. E.g. --match-filter
+ !is_live --match-filter "like_count>?100 &
+ description~='(?i)\bcats \& dogs\b'" matches
+ only videos that are not live OR those that
+ have a like count more than 100 (or the like
+ field is not available) and also has a
+ description that contains the phrase "cats &
+ dogs" (caseless). Use "--match-filter -" to
+ interactively ask whether to download each
+ video
+ --no-match-filter Do not use generic video filter (default)
+ --no-playlist Download only the video, if the URL refers
+ to a video and a playlist
+ --yes-playlist Download the playlist, if the URL refers to
+ a video and a playlist
+ --age-limit YEARS Download only videos suitable for the given
+ age
+ --download-archive FILE Download only videos not listed in the
+ archive file. Record the IDs of all
+ downloaded videos in it
+ --no-download-archive Do not use archive file (default)
+ --max-downloads NUMBER Abort after downloading NUMBER files
+ --break-on-existing Stop the download process when encountering
+ a file that is in the archive
+ --break-on-reject Stop the download process when encountering
+ a file that has been filtered out
+ --break-per-input --break-on-existing, --break-on-reject,
+ --max-downloads, and autonumber resets per
+ input URL
+ --no-break-per-input --break-on-existing and similar options
+ terminates the entire download queue
+ --skip-playlist-after-errors N Number of allowed failures until the rest of
+ the playlist is skipped
## Download Options:
- -r, --limit-rate RATE Maximum download rate in bytes per
- second (e.g. 50K or 4.2M)
- -R, --retries RETRIES Number of retries (default is 10), or
- "infinite".
- --fragment-retries RETRIES Number of retries for a fragment
- (default is 10), or "infinite" (DASH,
- hlsnative and ISM)
- --skip-unavailable-fragments Skip unavailable fragments (DASH,
- hlsnative and ISM)
- --abort-on-unavailable-fragment Abort downloading when some fragment is
- not available
- --keep-fragments Keep downloaded fragments on disk after
- downloading is finished; fragments are
- erased by default
- --buffer-size SIZE Size of download buffer (e.g. 1024 or
- 16K) (default is 1024)
- --no-resize-buffer Do not automatically adjust the buffer
- size. By default, the buffer size is
- automatically resized from an initial
- value of SIZE.
- --http-chunk-size SIZE Size of a chunk for chunk-based HTTP
- downloading (e.g. 10485760 or 10M)
- (default is disabled). May be useful
- for bypassing bandwidth throttling
- imposed by a webserver (experimental)
- --playlist-reverse Download playlist videos in reverse
- order
- --playlist-random Download playlist videos in random
- order
- --xattr-set-filesize Set file xattribute ytdl.filesize with
- expected file size
- --hls-prefer-native Use the native HLS downloader instead
- of ffmpeg
- --hls-prefer-ffmpeg Use ffmpeg instead of the native HLS
- downloader
- --hls-use-mpegts Use the mpegts container for HLS
- videos, allowing to play the video
- while downloading (some players may not
- be able to play it)
- --external-downloader COMMAND Use the specified external downloader.
- Currently supports aria2c,avconv,axel,c
- url,ffmpeg,httpie,wget
- --external-downloader-args ARGS Give these arguments to the external
- downloader
+ -N, --concurrent-fragments N Number of fragments of a dash/hlsnative
+ video that should be downloaded concurrently
+ (default is 1)
+ -r, --limit-rate RATE Maximum download rate in bytes per second,
+ e.g. 50K or 4.2M
+ --throttled-rate RATE Minimum download rate in bytes per second
+ below which throttling is assumed and the
+ video data is re-extracted, e.g. 100K
+ -R, --retries RETRIES Number of retries (default is 10), or
+ "infinite"
+ --file-access-retries RETRIES Number of times to retry on file access
+ error (default is 3), or "infinite"
+ --fragment-retries RETRIES Number of retries for a fragment (default is
+ 10), or "infinite" (DASH, hlsnative and ISM)
+ --retry-sleep [TYPE:]EXPR Time to sleep between retries in seconds
+ (optionally) prefixed by the type of retry
+ (http (default), fragment, file_access,
+ extractor) to apply the sleep to. EXPR can
+ be a number, linear=START[:END[:STEP=1]] or
+ exp=START[:END[:BASE=2]]. This option can be
+ used multiple times to set the sleep for the
+ different retry types, e.g. --retry-sleep
+ linear=1::2 --retry-sleep fragment:exp=1:20
+ --skip-unavailable-fragments Skip unavailable fragments for DASH,
+ hlsnative and ISM downloads (default)
+ (Alias: --no-abort-on-unavailable-fragment)
+ --abort-on-unavailable-fragment
+ Abort download if a fragment is unavailable
+ (Alias: --no-skip-unavailable-fragments)
+ --keep-fragments Keep downloaded fragments on disk after
+ downloading is finished
+ --no-keep-fragments Delete downloaded fragments after
+ downloading is finished (default)
+ --buffer-size SIZE Size of download buffer, e.g. 1024 or 16K
+ (default is 1024)
+ --resize-buffer The buffer size is automatically resized
+ from an initial value of --buffer-size
+ (default)
+ --no-resize-buffer Do not automatically adjust the buffer size
+ --http-chunk-size SIZE Size of a chunk for chunk-based HTTP
+ downloading, e.g. 10485760 or 10M (default
+ is disabled). May be useful for bypassing
+ bandwidth throttling imposed by a webserver
+ (experimental)
+ --playlist-random Download playlist videos in random order
+ --lazy-playlist Process entries in the playlist as they are
+ received. This disables n_entries,
+ --playlist-random and --playlist-reverse
+ --no-lazy-playlist Process videos in the playlist only after
+ the entire playlist is parsed (default)
+ --xattr-set-filesize Set file xattribute ytdl.filesize with
+ expected file size
+ --hls-use-mpegts Use the mpegts container for HLS videos;
+ allowing some players to play the video
+ while downloading, and reducing the chance
+ of file corruption if download is
+ interrupted. This is enabled by default for
+ live streams
+ --no-hls-use-mpegts Do not use the mpegts container for HLS
+ videos. This is default when not downloading
+ live streams
+ --download-sections REGEX Download only chapters whose title matches
+ the given regular expression. Time ranges
+ prefixed by a "*" can also be used in place
+ of chapters to download the specified range.
+ Needs ffmpeg. This option can be used
+ multiple times to download multiple
+ sections, e.g. --download-sections
+ "*10:15-inf" --download-sections "intro"
+ --downloader [PROTO:]NAME Name or path of the external downloader to
+ use (optionally) prefixed by the protocols
+ (http, ftp, m3u8, dash, rstp, rtmp, mms) to
+ use it for. Currently supports native,
+ aria2c, avconv, axel, curl, ffmpeg, httpie,
+ wget. You can use this option multiple times
+ to set different downloaders for different
+ protocols. E.g. --downloader aria2c
+ --downloader "dash,m3u8:native" will use
+ aria2c for http/ftp downloads, and the
+ native downloader for dash/m3u8 downloads
+ (Alias: --external-downloader)
+ --downloader-args NAME:ARGS Give these arguments to the external
+ downloader. Specify the downloader name and
+ the arguments separated by a colon ":". For
+ ffmpeg, arguments can be passed to different
+ positions using the same syntax as
+ --postprocessor-args. You can use this
+ option multiple times to give different
+ arguments to different downloaders (Alias:
+ --external-downloader-args)
## Filesystem Options:
- -a, --batch-file FILE File containing URLs to download ('-'
- for stdin), one URL per line. Lines
- starting with '#', ';' or ']' are
- considered as comments and ignored.
- --id Use only video ID in file name
- -o, --output TEMPLATE Output filename template, see the
- "OUTPUT TEMPLATE" for all the info
- --output-na-placeholder PLACEHOLDER Placeholder value for unavailable meta
- fields in output filename template
- (default is "NA")
- --autonumber-start NUMBER Specify the start value for
- %(autonumber)s (default is 1)
- --restrict-filenames Restrict filenames to only ASCII
- characters, and avoid "&" and spaces in
- filenames
- -w, --no-overwrites Do not overwrite files
- -c, --continue Force resume of partially downloaded
- files. By default, hypervideo will
- resume downloads if possible.
- --no-continue Do not resume partially downloaded
- files (restart from beginning)
- --no-part Do not use .part files - write directly
- into output file
- --no-mtime Do not use the Last-modified header to
- set the file modification time
- --write-description Write video description to a
- .description file
- --write-info-json Write video metadata to a .info.json
- file
- --write-annotations Write video annotations to a
- .annotations.xml file
- --load-info-json FILE JSON file containing the video
- information (created with the "--write-
- info-json" option)
- --cookies FILE File to read cookies from and dump
- cookie jar in
- --cache-dir DIR Location in the filesystem where
- hypervideo can store some downloaded
- information permanently. By default
- $XDG_CACHE_HOME/hypervideo or
- ~/.cache/hypervideo . At the moment,
- only YouTube player files (for videos
- with obfuscated signatures) are cached,
- but that may change.
- --no-cache-dir Disable filesystem caching
- --rm-cache-dir Delete all filesystem cache files
+ -a, --batch-file FILE File containing URLs to download ("-" for
+ stdin), one URL per line. Lines starting
+ with "#", ";" or "]" are considered as
+ comments and ignored
+ --no-batch-file Do not read URLs from batch file (default)
+ -P, --paths [TYPES:]PATH The paths where the files should be
+ downloaded. Specify the type of file and the
+ path separated by a colon ":". All the same
+ TYPES as --output are supported.
+ Additionally, you can also provide "home"
+ (default) and "temp" paths. All intermediary
+ files are first downloaded to the temp path
+ and then the final files are moved over to
+ the home path after download is finished.
+ This option is ignored if --output is an
+ absolute path
+ -o, --output [TYPES:]TEMPLATE Output filename template; see "OUTPUT
+ TEMPLATE" for details
+ --output-na-placeholder TEXT Placeholder for unavailable fields in
+ "OUTPUT TEMPLATE" (default: "NA")
+ --restrict-filenames Restrict filenames to only ASCII characters,
+ and avoid "&" and spaces in filenames
+ --no-restrict-filenames Allow Unicode characters, "&" and spaces in
+ filenames (default)
+ --windows-filenames Force filenames to be Windows-compatible
+ --no-windows-filenames Make filenames Windows-compatible only if
+ using Windows (default)
+ --trim-filenames LENGTH Limit the filename length (excluding
+ extension) to the specified number of
+ characters
+ -w, --no-overwrites Do not overwrite any files
+ --force-overwrites Overwrite all video and metadata files. This
+ option includes --no-continue
+ --no-force-overwrites Do not overwrite the video, but overwrite
+ related files (default)
+ -c, --continue Resume partially downloaded files/fragments
+ (default)
+ --no-continue Do not resume partially downloaded
+ fragments. If the file is not fragmented,
+ restart download of the entire file
+ --part Use .part files instead of writing directly
+ into output file (default)
+ --no-part Do not use .part files - write directly into
+ output file
+ --mtime Use the Last-modified header to set the file
+ modification time (default)
+ --no-mtime Do not use the Last-modified header to set
+ the file modification time
+ --write-description Write video description to a .description file
+ --no-write-description Do not write video description (default)
+ --write-info-json Write video metadata to a .info.json file
+ (this may contain personal information)
+ --no-write-info-json Do not write video metadata (default)
+ --write-playlist-metafiles Write playlist metadata in addition to the
+ video metadata when using --write-info-json,
+ --write-description etc. (default)
+ --no-write-playlist-metafiles Do not write playlist metadata when using
+ --write-info-json, --write-description etc.
+ --clean-info-json Remove some private fields such as filenames
+ from the infojson. Note that it could still
+ contain some personal information (default)
+ --no-clean-info-json Write all fields to the infojson
+ --write-comments Retrieve video comments to be placed in the
+ infojson. The comments are fetched even
+ without this option if the extraction is
+ known to be quick (Alias: --get-comments)
+ --no-write-comments Do not retrieve video comments unless the
+ extraction is known to be quick (Alias:
+ --no-get-comments)
+ --load-info-json FILE JSON file containing the video information
+ (created with the "--write-info-json" option)
+ --cookies FILE Netscape formatted file to read cookies from
+ and dump cookie jar in
+ --no-cookies Do not read/dump cookies from/to file
+ (default)
+ --cookies-from-browser BROWSER[+KEYRING][:PROFILE][::CONTAINER]
+ The name of the browser to load cookies
+ from. Currently supported browsers are:
+ brave, chrome, chromium, edge, firefox,
+ opera, safari, vivaldi. Optionally, the
+ KEYRING used for decrypting Chromium cookies
+ on Linux, the name/path of the PROFILE to
+ load cookies from, and the CONTAINER name
+ (if Firefox) ("none" for no container) can
+ be given with their respective seperators.
+ By default, all containers of the most
+ recently accessed profile are used.
+ Currently supported keyrings are: basictext,
+ gnomekeyring, kwallet
+ --no-cookies-from-browser Do not load cookies from browser (default)
+ --cache-dir DIR Location in the filesystem where hypervideo
+ can store some downloaded information (such
+ as client ids and signatures) permanently.
+ By default ${XDG_CACHE_HOME}/hypervideo
+ --no-cache-dir Disable filesystem caching
+ --rm-cache-dir Delete all filesystem cache files
## Thumbnail Options:
- --write-thumbnail Write thumbnail image to disk
- --write-all-thumbnails Write all thumbnail image formats to
- disk
- --list-thumbnails Simulate and list all available
- thumbnail formats
-
-## Verbosity / Simulation Options:
- -q, --quiet Activate quiet mode
- --no-warnings Ignore warnings
- -s, --simulate Do not download the video and do not
- write anything to disk
- --skip-download Do not download the video
- -g, --get-url Simulate, quiet but print URL
- -e, --get-title Simulate, quiet but print title
- --get-id Simulate, quiet but print id
- --get-thumbnail Simulate, quiet but print thumbnail URL
- --get-description Simulate, quiet but print video
- description
- --get-duration Simulate, quiet but print video length
- --get-filename Simulate, quiet but print output
- filename
- --get-format Simulate, quiet but print output format
- -j, --dump-json Simulate, quiet but print JSON
- information. See the "OUTPUT TEMPLATE"
- for a description of available keys.
- -J, --dump-single-json Simulate, quiet but print JSON
- information for each command-line
- argument. If the URL refers to a
- playlist, dump the whole playlist
- information in a single line.
- --print-json Be quiet and print the video
- information as JSON (video is still
- being downloaded).
- --newline Output progress bar as new lines
- --no-progress Do not print progress bar
- --console-title Display progress in console titlebar
- -v, --verbose Print various debugging information
- --dump-pages Print downloaded pages encoded using
- base64 to debug problems (very verbose)
- --write-pages Write downloaded intermediary pages to
- files in the current directory to debug
- problems
- --print-traffic Display sent and read HTTP traffic
- -C, --call-home Contact the hypervideo server for
- debugging
- --no-call-home Do NOT contact the hypervideo server
- for debugging
+ --write-thumbnail Write thumbnail image to disk
+ --no-write-thumbnail Do not write thumbnail image to disk (default)
+ --write-all-thumbnails Write all thumbnail image formats to disk
+ --list-thumbnails List available thumbnails of each video.
+ Simulate unless --no-simulate is used
+
+## Internet Shortcut Options:
+ --write-link Write an internet shortcut file, depending
+ on the current platform (.url, .webloc or
+ .desktop). The URL may be cached by the OS
+ --write-url-link Write a .url Windows internet shortcut. The
+ OS caches the URL based on the file path
+ --write-webloc-link Write a .webloc macOS internet shortcut
+ --write-desktop-link Write a .desktop Linux internet shortcut
+
+## Verbosity and Simulation Options:
+ -q, --quiet Activate quiet mode. If used with --verbose,
+ print the log to stderr
+ --no-warnings Ignore warnings
+ -s, --simulate Do not download the video and do not write
+ anything to disk
+ --no-simulate Download the video even if printing/listing
+ options are used
+ --ignore-no-formats-error Ignore "No video formats" error. Useful for
+ extracting metadata even if the videos are
+ not actually available for download
+ (experimental)
+ --no-ignore-no-formats-error Throw error when no downloadable video
+ formats are found (default)
+ --skip-download Do not download the video but write all
+ related files (Alias: --no-download)
+ -O, --print [WHEN:]TEMPLATE Field name or output template to print to
+ screen, optionally prefixed with when to
+ print it, separated by a ":". Supported
+ values of "WHEN" are the same as that of
+ --use-postprocessor, and "video" (default).
+ Implies --quiet. Implies --simulate unless
+ --no-simulate or later stages of WHEN are
+ used. This option can be used multiple times
+ --print-to-file [WHEN:]TEMPLATE FILE
+ Append given template to the file. The
+ values of WHEN and TEMPLATE are same as that
+ of --print. FILE uses the same syntax as the
+ output template. This option can be used
+ multiple times
+ -j, --dump-json Quiet, but print JSON information for each
+ video. Simulate unless --no-simulate is
+ used. See "OUTPUT TEMPLATE" for a
+ description of available keys
+ -J, --dump-single-json Quiet, but print JSON information for each
+ url or infojson passed. Simulate unless
+ --no-simulate is used. If the URL refers to
+ a playlist, the whole playlist information
+ is dumped in a single line
+ --force-write-archive Force download archive entries to be written
+ as far as no errors occur, even if -s or
+ another simulation option is used (Alias:
+ --force-download-archive)
+ --newline Output progress bar as new lines
+ --no-progress Do not print progress bar
+ --progress Show progress bar, even if in quiet mode
+ --console-title Display progress in console titlebar
+ --progress-template [TYPES:]TEMPLATE
+ Template for progress outputs, optionally
+ prefixed with one of "download:" (default),
+ "download-title:" (the console title),
+ "postprocess:", or "postprocess-title:".
+ The video's fields are accessible under the
+ "info" key and the progress attributes are
+ accessible under "progress" key. E.g.
+ --console-title --progress-template
+ "download-title:%(info.id)s-%(progress.eta)s"
+ -v, --verbose Print various debugging information
+ --dump-pages Print downloaded pages encoded using base64
+ to debug problems (very verbose)
+ --write-pages Write downloaded intermediary pages to files
+ in the current directory to debug problems
+ --print-traffic Display sent and read HTTP traffic
## Workarounds:
- --encoding ENCODING Force the specified encoding
- (experimental)
- --no-check-certificate Suppress HTTPS certificate validation
- --prefer-insecure Use an unencrypted connection to
- retrieve information about the video.
- (Currently supported only for YouTube)
- --user-agent UA Specify a custom user agent
- --referer URL Specify a custom referer, use if the
- video access is restricted to one
- domain
- --add-header FIELD:VALUE Specify a custom HTTP header and its
- value, separated by a colon ':'. You
- can use this option multiple times
- --bidi-workaround Work around terminals that lack
- bidirectional text support. Requires
- bidiv or fribidi executable in PATH
- --sleep-interval SECONDS Number of seconds to sleep before each
- download when used alone or a lower
- bound of a range for randomized sleep
- before each download (minimum possible
- number of seconds to sleep) when used
- along with --max-sleep-interval.
- --max-sleep-interval SECONDS Upper bound of a range for randomized
- sleep before each download (maximum
- possible number of seconds to sleep).
- Must only be used along with --min-
- sleep-interval.
+ --encoding ENCODING Force the specified encoding (experimental)
+ --legacy-server-connect Explicitly allow HTTPS connection to servers
+ that do not support RFC 5746 secure
+ renegotiation
+ --no-check-certificates Suppress HTTPS certificate validation
+ --prefer-insecure Use an unencrypted connection to retrieve
+ information about the video (Currently
+ supported only for YouTube)
+ --add-header FIELD:VALUE Specify a custom HTTP header and its value,
+ separated by a colon ":". You can use this
+ option multiple times
+ --bidi-workaround Work around terminals that lack
+ bidirectional text support. Requires bidiv
+ or fribidi executable in PATH
+ --sleep-requests SECONDS Number of seconds to sleep between requests
+ during data extraction
+ --sleep-interval SECONDS Number of seconds to sleep before each
+ download. This is the minimum time to sleep
+ when used along with --max-sleep-interval
+ (Alias: --min-sleep-interval)
+ --max-sleep-interval SECONDS Maximum number of seconds to sleep. Can only
+ be used along with --min-sleep-interval
+ --sleep-subtitles SECONDS Number of seconds to sleep before each
+ subtitle download
## Video Format Options:
- -f, --format FORMAT Video format code, see the "FORMAT
- SELECTION" for all the info
- --all-formats Download all available video formats
- --prefer-free-formats Prefer free video formats unless a
- specific one is requested
- -F, --list-formats List all available formats of requested
- videos
- --youtube-skip-dash-manifest Do not download the DASH manifests and
- related data on YouTube videos
- --merge-output-format FORMAT If a merge is required (e.g.
- bestvideo+bestaudio), output to given
- container format. One of mkv, mp4, ogg,
- webm, flv. Ignored if no merge is
- required
+ -f, --format FORMAT Video format code, see "FORMAT SELECTION"
+ for more details
+ -S, --format-sort SORTORDER Sort the formats by the fields given, see
+ "Sorting Formats" for more details
+ --format-sort-force Force user specified sort order to have
+ precedence over all fields, see "Sorting
+ Formats" for more details (Alias: --S-force)
+ --no-format-sort-force Some fields have precedence over the user
+ specified sort order (default)
+ --video-multistreams Allow multiple video streams to be merged
+ into a single file
+ --no-video-multistreams Only one video stream is downloaded for each
+ output file (default)
+ --audio-multistreams Allow multiple audio streams to be merged
+ into a single file
+ --no-audio-multistreams Only one audio stream is downloaded for each
+ output file (default)
+ --prefer-free-formats Prefer video formats with free containers
+ over non-free ones of same quality. Use with
+ "-S ext" to strictly prefer free containers
+ irrespective of quality
+ --no-prefer-free-formats Don't give any special preference to free
+ containers (default)
+ --check-formats Make sure formats are selected only from
+ those that are actually downloadable
+ --check-all-formats Check all formats for whether they are
+ actually downloadable
+ --no-check-formats Do not check that the formats are actually
+ downloadable
+ -F, --list-formats List available formats of each video.
+ Simulate unless --no-simulate is used
+ --merge-output-format FORMAT Containers that may be used when merging
+ formats, separated by "/", e.g. "mp4/mkv".
+ Ignored if no merge is required. (currently
+ supported: avi, flv, mkv, mov, mp4, webm)
## Subtitle Options:
- --write-sub Write subtitle file
- --write-auto-sub Write automatically generated subtitle
- file (YouTube only)
- --all-subs Download all the available subtitles of
- the video
- --list-subs List all available subtitles for the
- video
- --sub-format FORMAT Subtitle format, accepts formats
- preference, for example: "srt" or
- "ass/srt/best"
- --sub-lang LANGS Languages of the subtitles to download
- (optional) separated by commas, use
- --list-subs for available language tags
+ --write-subs Write subtitle file
+ --no-write-subs Do not write subtitle file (default)
+ --write-auto-subs Write automatically generated subtitle file
+ (Alias: --write-automatic-subs)
+ --no-write-auto-subs Do not write auto-generated subtitles
+ (default) (Alias: --no-write-automatic-subs)
+ --list-subs List available subtitles of each video.
+ Simulate unless --no-simulate is used
+ --sub-format FORMAT Subtitle format; accepts formats preference,
+ e.g. "srt" or "ass/srt/best"
+ --sub-langs LANGS Languages of the subtitles to download (can
+ be regex) or "all" separated by commas, e.g.
+ --sub-langs "en.*,ja". You can prefix the
+ language code with a "-" to exclude it from
+ the requested languages, e.g. --sub-langs
+ all,-live_chat. Use --list-subs for a list
+ of available language tags
## Authentication Options:
- -u, --username USERNAME Login with this account ID
- -p, --password PASSWORD Account password. If this option is
- left out, hypervideo will ask
- interactively.
- -2, --twofactor TWOFACTOR Two-factor authentication code
- -n, --netrc Use .netrc authentication data
- --video-password PASSWORD Video password (vimeo, youku)
-
-## Adobe Pass Options:
- --ap-mso MSO Adobe Pass multiple-system operator (TV
- provider) identifier, use --ap-list-mso
- for a list of available MSOs
- --ap-username USERNAME Multiple-system operator account login
- --ap-password PASSWORD Multiple-system operator account
- password. If this option is left out,
- hypervideo will ask interactively.
- --ap-list-mso List all supported multiple-system
- operators
-
-## Post-processing Options:
- -x, --extract-audio Convert video files to audio-only files
- (requires ffmpeg/avconv and
- ffprobe/avprobe)
- --audio-format FORMAT Specify audio format: "best", "aac",
- "flac", "mp3", "m4a", "opus", "vorbis",
- or "wav"; "best" by default; No effect
- without -x
- --audio-quality QUALITY Specify ffmpeg/avconv audio quality,
- insert a value between 0 (better) and 9
- (worse) for VBR or a specific bitrate
- like 128K (default 5)
- --recode-video FORMAT Encode the video to another format if
- necessary (currently supported:
- mp4|flv|ogg|webm|mkv|avi)
- --postprocessor-args ARGS Give these arguments to the
- postprocessor
- -k, --keep-video Keep the video file on disk after the
- post-processing; the video is erased by
- default
- --no-post-overwrites Do not overwrite post-processed files;
- the post-processed files are
- overwritten by default
- --embed-subs Embed subtitles in the video (only for
- mp4, webm and mkv videos)
- --embed-thumbnail Embed thumbnail in the audio as cover
- art
- --add-metadata Write metadata to the video file
- --metadata-from-title FORMAT Parse additional metadata like song
- title / artist from the video title.
- The format syntax is the same as
- --output. Regular expression with named
- capture groups may also be used. The
- parsed parameters replace existing
- values. Example: --metadata-from-title
- "%(artist)s - %(title)s" matches a
- title like "Coldplay - Paradise".
- Example (regex): --metadata-from-title
- "(?P<artist>.+?) - (?P<title>.+)"
- --xattrs Write metadata to the video file's
- xattrs (using dublin core and xdg
- standards)
- --fixup POLICY Automatically correct known faults of
- the file. One of never (do nothing),
- warn (only emit a warning),
- detect_or_warn (the default; fix file
- if we can, warn otherwise)
- --prefer-avconv Prefer avconv over ffmpeg for running
- the postprocessors
- --prefer-ffmpeg Prefer ffmpeg over avconv for running
- the postprocessors (default)
- --ffmpeg-location PATH Location of the ffmpeg/avconv binary;
- either the path to the binary or its
- containing directory.
- --exec CMD Execute a command on the file after
- downloading and post-processing,
- similar to find's -exec syntax.
- Example: --exec 'adb push {}
- /sdcard/Music/ && rm {}'
- --convert-subs FORMAT Convert the subtitles to other format
- (currently supported: srt|ass|vtt|lrc)
+ -u, --username USERNAME Login with this account ID
+ -p, --password PASSWORD Account password. If this option is left
+ out, hypervideo will ask interactively
+ -2, --twofactor TWOFACTOR Two-factor authentication code
+ -n, --netrc Use .netrc authentication data
+ --netrc-location PATH Location of .netrc authentication data;
+ either the path or its containing directory.
+ Defaults to ~/.netrc
+ --video-password PASSWORD Video password (vimeo, youku)
+ --ap-mso MSO Adobe Pass multiple-system operator (TV
+ provider) identifier, use --ap-list-mso for
+ a list of available MSOs
+ --ap-username USERNAME Multiple-system operator account login
+ --ap-password PASSWORD Multiple-system operator account password.
+ If this option is left out, hypervideo will
+ ask interactively
+ --ap-list-mso List all supported multiple-system operators
+ --client-certificate CERTFILE Path to client certificate file in PEM
+ format. May include the private key
+ --client-certificate-key KEYFILE
+ Path to private key file for client
+ certificate
+ --client-certificate-password PASSWORD
+ Password for client certificate private key,
+ if encrypted. If not provided, and the key
+ is encrypted, hypervideo will ask
+ interactively
+
+## Post-Processing Options:
+ -x, --extract-audio Convert video files to audio-only files
+ (requires ffmpeg and ffprobe)
+ --audio-format FORMAT Format to convert the audio to when -x is
+ used. (currently supported: best (default),
+ aac, alac, flac, m4a, mp3, opus, vorbis,
+ wav). You can specify multiple rules using
+ similar syntax as --remux-video
+ --audio-quality QUALITY Specify ffmpeg audio quality to use when
+ converting the audio with -x. Insert a value
+ between 0 (best) and 10 (worst) for VBR or a
+ specific bitrate like 128K (default 5)
+ --remux-video FORMAT Remux the video into another container if
+ necessary (currently supported: avi, flv,
+ mkv, mov, mp4, webm, aac, aiff, alac, flac,
+ m4a, mka, mp3, ogg, opus, vorbis, wav). If
+ target container does not support the
+ video/audio codec, remuxing will fail. You
+ can specify multiple rules; e.g.
+ "aac>m4a/mov>mp4/mkv" will remux aac to m4a,
+ mov to mp4 and anything else to mkv
+ --recode-video FORMAT Re-encode the video into another format if
+ necessary. The syntax and supported formats
+ are the same as --remux-video
+ --postprocessor-args NAME:ARGS Give these arguments to the postprocessors.
+ Specify the postprocessor/executable name
+ and the arguments separated by a colon ":"
+ to give the argument to the specified
+ postprocessor/executable. Supported PP are:
+ Merger, ModifyChapters, SplitChapters,
+ ExtractAudio, VideoRemuxer, VideoConvertor,
+ Metadata, EmbedSubtitle, EmbedThumbnail,
+ SubtitlesConvertor, ThumbnailsConvertor,
+ FixupStretched, FixupM4a, FixupM3u8,
+ FixupTimestamp and FixupDuration. The
+ supported executables are: AtomicParsley,
+ FFmpeg and FFprobe. You can also specify
+ "PP+EXE:ARGS" to give the arguments to the
+ specified executable only when being used by
+ the specified postprocessor. Additionally,
+ for ffmpeg/ffprobe, "_i"/"_o" can be
+ appended to the prefix optionally followed
+ by a number to pass the argument before the
+ specified input/output file, e.g. --ppa
+ "Merger+ffmpeg_i1:-v quiet". You can use
+ this option multiple times to give different
+ arguments to different postprocessors.
+ (Alias: --ppa)
+ -k, --keep-video Keep the intermediate video file on disk
+ after post-processing
+ --no-keep-video Delete the intermediate video file after
+ post-processing (default)
+ --post-overwrites Overwrite post-processed files (default)
+ --no-post-overwrites Do not overwrite post-processed files
+ --embed-subs Embed subtitles in the video (only for mp4,
+ webm and mkv videos)
+ --no-embed-subs Do not embed subtitles (default)
+ --embed-thumbnail Embed thumbnail in the video as cover art
+ --no-embed-thumbnail Do not embed thumbnail (default)
+ --embed-metadata Embed metadata to the video file. Also
+ embeds chapters/infojson if present unless
+ --no-embed-chapters/--no-embed-info-json are
+ used (Alias: --add-metadata)
+ --no-embed-metadata Do not add metadata to file (default)
+ (Alias: --no-add-metadata)
+ --embed-chapters Add chapter markers to the video file
+ (Alias: --add-chapters)
+ --no-embed-chapters Do not add chapter markers (default) (Alias:
+ --no-add-chapters)
+ --embed-info-json Embed the infojson as an attachment to
+ mkv/mka video files
+ --no-embed-info-json Do not embed the infojson as an attachment
+ to the video file
+ --parse-metadata FROM:TO Parse additional metadata like title/artist
+ from other fields; see "MODIFYING METADATA"
+ for details
+ --replace-in-metadata FIELDS REGEX REPLACE
+ Replace text in a metadata field using the
+ given regex. This option can be used
+ multiple times
+ --xattrs Write metadata to the video file's xattrs
+ (using dublin core and xdg standards)
+ --concat-playlist POLICY Concatenate videos in a playlist. One of
+ "never", "always", or "multi_video"
+ (default; only when the videos form a single
+ show). All the video files must have same
+ codecs and number of streams to be
+ concatable. The "pl_video:" prefix can be
+ used with "--paths" and "--output" to set
+ the output filename for the concatenated
+ files. See "OUTPUT TEMPLATE" for details
+ --fixup POLICY Automatically correct known faults of the
+ file. One of never (do nothing), warn (only
+ emit a warning), detect_or_warn (the
+ default; fix file if we can, warn
+ otherwise), force (try fixing even if file
+ already exists)
+ --ffmpeg-location PATH Location of the ffmpeg binary; either the
+ path to the binary or its containing directory
+ --exec [WHEN:]CMD Execute a command, optionally prefixed with
+ when to execute it (after_move if
+ unspecified), separated by a ":". Supported
+ values of "WHEN" are the same as that of
+ --use-postprocessor. Same syntax as the
+ output template can be used to pass any
+ field as arguments to the command. After
+ download, an additional field "filepath"
+ that contains the final path of the
+ downloaded file is also available, and if no
+ fields are passed, %(filepath)q is appended
+ to the end of the command. This option can
+ be used multiple times
+ --no-exec Remove any previously defined --exec
+ --convert-subs FORMAT Convert the subtitles to another format
+ (currently supported: ass, lrc, srt, vtt)
+ (Alias: --convert-subtitles)
+ --convert-thumbnails FORMAT Convert the thumbnails to another format
+ (currently supported: jpg, png, webp). You
+ can specify multiple rules using similar
+ syntax as --remux-video
+ --split-chapters Split video into multiple files based on
+ internal chapters. The "chapter:" prefix can
+ be used with "--paths" and "--output" to set
+ the output filename for the split files. See
+ "OUTPUT TEMPLATE" for details
+ --no-split-chapters Do not split video based on chapters (default)
+ --remove-chapters REGEX Remove chapters whose title matches the
+ given regular expression. The syntax is the
+ same as --download-sections. This option can
+ be used multiple times
+ --no-remove-chapters Do not remove any chapters from the file
+ (default)
+ --force-keyframes-at-cuts Force keyframes at cuts when
+ downloading/splitting/removing sections.
+ This is slow due to needing a re-encode, but
+ the resulting video may have fewer artifacts
+ around the cuts
+ --no-force-keyframes-at-cuts Do not force keyframes around the chapters
+ when cutting/splitting (default)
+ --use-postprocessor NAME[:ARGS]
+ The (case sensitive) name of plugin
+ postprocessors to be enabled, and
+ (optionally) arguments to be passed to it,
+ separated by a colon ":". ARGS are a
+ semicolon ";" delimited list of NAME=VALUE.
+ The "when" argument determines when the
+ postprocessor is invoked. It can be one of
+ "pre_process" (after video extraction),
+ "after_filter" (after video passes filter),
+ "before_dl" (before each video download),
+ "post_process" (after each video download;
+ default), "after_move" (after moving video
+ file to it's final locations), "after_video"
+ (after downloading and processing all
+ formats of a video), or "playlist" (at end
+ of playlist). This option can be used
+ multiple times to add different postprocessors
+
+## SponsorBlock Options:
+Make chapter entries for, or remove various segments (sponsor,
+ introductions, etc.) from downloaded YouTube videos using the
+ SponsorBlock API (https://sponsor.ajay.app)
+
+ --sponsorblock-mark CATS SponsorBlock categories to create chapters
+ for, separated by commas. Available
+ categories are sponsor, intro, outro,
+ selfpromo, preview, filler, interaction,
+ music_offtopic, poi_highlight, chapter, all
+ and default (=all). You can prefix the
+ category with a "-" to exclude it. See [1]
+ for description of the categories. E.g.
+ --sponsorblock-mark all,-preview
+ [1] https://wiki.sponsor.ajay.app/w/Segment_Categories
+ --sponsorblock-remove CATS SponsorBlock categories to be removed from
+ the video file, separated by commas. If a
+ category is present in both mark and remove,
+ remove takes precedence. The syntax and
+ available categories are the same as for
+ --sponsorblock-mark except that "default"
+ refers to "all,-filler" and poi_highlight,
+ chapter are not available
+ --sponsorblock-chapter-title TEMPLATE
+ An output template for the title of the
+ SponsorBlock chapters created by
+ --sponsorblock-mark. The only available
+ fields are start_time, end_time, category,
+ categories, name, category_names. Defaults
+ to "[SponsorBlock]: %(category_names)l"
+ --no-sponsorblock Disable both --sponsorblock-mark and
+ --sponsorblock-remove
+ --sponsorblock-api URL SponsorBlock API location, defaults to
+ https://sponsor.ajay.app
+
+## Extractor Options:
+ --extractor-retries RETRIES Number of retries for known extractor errors
+ (default is 3), or "infinite"
+ --allow-dynamic-mpd Process dynamic DASH manifests (default)
+ (Alias: --no-ignore-dynamic-mpd)
+ --ignore-dynamic-mpd Do not process dynamic DASH manifests
+ (Alias: --no-allow-dynamic-mpd)
+ --hls-split-discontinuity Split HLS playlists to different formats at
+ discontinuities such as ad breaks
+ --no-hls-split-discontinuity Do not split HLS playlists to different
+ formats at discontinuities such as ad breaks
+ (default)
+ --extractor-args IE_KEY:ARGS Pass ARGS arguments to the IE_KEY extractor.
+ See "EXTRACTOR ARGUMENTS" for details. You
+ can use this option multiple times to give
+ arguments for different extractors
# CONFIGURATION
-You can configure hypervideo by placing any supported command line option to a configuration file. On GNU+Linux and macOS, the system wide configuration file is located at `/etc/hypervideo.conf` and the user wide configuration file at `~/.config/hypervideo/config`. Note that by default configuration file may not exist so you may need to create it yourself.
-
-For example, with the following configuration file hypervideo will always extract the audio, not copy the mtime, use a proxy and save all videos under `Movies` directory in your home directory:
-
+You can configure hypervideo by placing any supported command line option to a configuration file. The configuration is loaded from the following locations:
+
+1. **Main Configuration**:
+ * The file given by `--config-location`
+1. **Portable Configuration**: (Recommended for portable installations)
+ * If using a binary, `hypervideo.conf` in the same directory as the binary
+ * If running from source-code, `hypervideo.conf` in the parent directory of `hypervideo`
+1. **Home Configuration**:
+ * `hypervideo.conf` in the home path given by `-P`
+ * If `-P` is not given, the current directory is searched
+1. **User Configuration**:
+ * `${XDG_CONFIG_HOME}/hypervideo/config` (recommended on Linux/macOS)
+ * `${XDG_CONFIG_HOME}/hypervideo.conf`
+ * `${APPDATA}/hypervideo/config` (recommended on Windows)
+ * `${APPDATA}/hypervideo/config.txt`
+ * `~/hypervideo.conf`
+ * `~/hypervideo.conf.txt`
+
+ See also: [Notes about environment variables](#notes-about-environment-variables)
+1. **System Configuration**:
+ * `/etc/hypervideo.conf`
+
+E.g. with the following configuration file hypervideo will always extract the audio, not copy the mtime, use a proxy and save all videos under `YouTube` directory in your home directory:
```
# Lines starting with # are comments
@@ -473,101 +1043,148 @@ For example, with the following configuration file hypervideo will always extrac
# Use this proxy
--proxy 127.0.0.1:3128
-# Save all videos under Movies directory in your home directory
--o ~/Movies/%(title)s.%(ext)s
+# Save all videos under YouTube directory in your home directory
+-o ~/YouTube/%(title)s.%(ext)s
```
-Note that options in configuration file are just the same options aka switches used in regular command line calls thus there **must be no whitespace** after `-` or `--`, e.g. `-o` or `--proxy` but not `- o` or `-- proxy`.
+Note that options in configuration file are just the same options aka switches used in regular command line calls; thus there **must be no whitespace** after `-` or `--`, e.g. `-o` or `--proxy` but not `- o` or `-- proxy`. They must also be quoted when necessary as-if it were a UNIX shell.
+
+You can use `--ignore-config` if you want to disable all configuration files for a particular hypervideo run. If `--ignore-config` is found inside any configuration file, no further configuration will be loaded. For example, having the option in the portable configuration file prevents loading of home, user, and system configurations. Additionally, (for backward compatibility) if `--ignore-config` is found inside the system configuration file, the user configuration is not loaded.
-You can use `--ignore-config` if you want to disable the configuration file for a particular hypervideo run.
+### Configuration file encoding
-You can also use `--config-location` if you want to use custom configuration file for a particular hypervideo run.
+The configuration files are decoded according to the UTF BOM if present, and in the encoding from system locale otherwise.
+
+If you want your file to be decoded differently, add `# coding: ENCODING` to the beginning of the file (e.g. `# coding: shift-jis`). There must be no characters before that, even spaces or BOM.
### Authentication with `.netrc` file
-You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every hypervideo execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](https://stackoverflow.com/tags/.netrc/info) on a per extractor basis. For that you will need to create a `.netrc` file in your `$HOME` and restrict permissions to read/write by only you:
+You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every hypervideo execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](https://stackoverflow.com/tags/.netrc/info) on a per-extractor basis. For that you will need to create a `.netrc` file in `--netrc-location` and restrict permissions to read/write by only you:
```
-touch $HOME/.netrc
-chmod a-rwx,u+rw $HOME/.netrc
+touch ${HOME}/.netrc
+chmod a-rwx,u+rw ${HOME}/.netrc
```
After that you can add credentials for an extractor in the following format, where *extractor* is the name of the extractor in lowercase:
```
-machine <extractor> login <login> password <password>
+machine <extractor> login <username> password <password>
```
-For example:
+E.g.
```
machine youtube login myaccount@gmail.com password my_youtube_password
machine twitch login my_twitch_account_name password my_twitch_password
```
To activate authentication with the `.netrc` file you should pass `--netrc` to hypervideo or place it in the [configuration file](#configuration).
-On Windows you may also need to setup the `%HOME%` environment variable manually. For example:
-```
-set HOME=%USERPROFILE%
-```
+The default location of the .netrc file is `~` (see below).
+
+### Notes about environment variables
+* Environment variables are normally specified as `${VARIABLE}`/`$VARIABLE` on UNIX and `%VARIABLE%` on Windows; but is always shown as `${VARIABLE}` in this documentation
+* hypervideo also allow using UNIX-style variables on Windows for path-like options; e.g. `--output`, `--config-location`
+* If unset, `${XDG_CONFIG_HOME}` defaults to `~/.config` and `${XDG_CACHE_HOME}` to `~/.cache`
+* On Windows, `~` points to `${HOME}` if present; or, `${USERPROFILE}` or `${HOMEDRIVE}${HOMEPATH}` otherwise
+* On Windows, `${USERPROFILE}` generally points to `C:\Users\<user name>` and `${APPDATA}` to `${USERPROFILE}\AppData\Roaming`
# OUTPUT TEMPLATE
-The `-o` option allows users to indicate a template for the output file names.
+The `-o` option is used to indicate a template for the output file names while `-P` option is used to specify the path each type of file should be saved to.
+<!-- MANPAGE: BEGIN EXCLUDED SECTION -->
**tl;dr:** [navigate me to examples](#output-template-examples).
+<!-- MANPAGE: END EXCLUDED SECTION -->
+
+The simplest usage of `-o` is not to set any template arguments when downloading a single file, like in `hypervideo -o funny_video.flv "https://some/video"` (hard-coding file extension like this is _not_ recommended and could break some post-processing).
+
+It may however also contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [Python string formatting operations](https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting), e.g. `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations.
+
+The field names themselves (the part inside the parenthesis) can also have some special formatting:
+
+1. **Object traversal**: The dictionaries and lists available in metadata can be traversed by using a dot `.` separator; e.g. `%(tags.0)s`, `%(subtitles.en.-1.ext)s`. You can do Python slicing with colon `:`; E.g. `%(id.3:7:-1)s`, `%(formats.:.format_id)s`. Curly braces `{}` can be used to build dictionaries with only specific keys; e.g. `%(formats.:.{format_id,height})#j`. An empty field name `%()s` refers to the entire infodict; e.g. `%(.{id,title})s`. Note that all the fields that become available using this method are not listed below. Use `-j` to see such fields
+
+1. **Addition**: Addition and subtraction of numeric fields can be done using `+` and `-` respectively. E.g. `%(playlist_index+10)03d`, `%(n_entries+1-playlist_index)d`
+
+1. **Date/time Formatting**: Date/time fields can be formatted according to [strftime formatting](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes) by specifying it separated from the field name using a `>`. E.g. `%(duration>%H-%M-%S)s`, `%(upload_date>%Y-%m-%d)s`, `%(epoch-3600>%H-%M-%S)s`
-The basic usage is not to set any template arguments when downloading a single file, like in `hypervideo -o funny_video.flv "https://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations. Allowed names along with sequence type are:
+1. **Alternatives**: Alternate fields can be specified separated with a `,`. E.g. `%(release_date>%Y,upload_date>%Y|Unknown)s`
+
+1. **Replacement**: A replacement value can be specified using a `&` separator. If the field is *not* empty, this replacement value will be used instead of the actual field content. This is done after alternate fields are considered; thus the replacement is used if *any* of the alternative fields is *not* empty.
+
+1. **Default**: A literal default value can be specified for when the field is empty using a `|` separator. This overrides `--output-na-placeholder`. E.g. `%(uploader|Unknown)s`
+
+1. **More Conversions**: In addition to the normal format types `diouxXeEfFgGcrs`, hypervideo additionally supports converting to `B` = **B**ytes, `j` = **j**son (flag `#` for pretty-printing, `+` for Unicode), `h` = HTML escaping, `l` = a comma separated **l**ist (flag `#` for `\n` newline-separated), `q` = a string **q**uoted for the terminal (flag `#` to split a list into different arguments), `D` = add **D**ecimal suffixes (e.g. 10M) (flag `#` to use 1024 as factor), and `S` = **S**anitize as filename (flag `#` for restricted)
+
+1. **Unicode normalization**: The format type `U` can be used for NFC [Unicode normalization](https://docs.python.org/3/library/unicodedata.html#unicodedata.normalize). The alternate form flag (`#`) changes the normalization to NFD and the conversion flag `+` can be used for NFKC/NFKD compatibility equivalence normalization. E.g. `%(title)+.100U` is NFKC
+
+To summarize, the general syntax for a field is:
+```
+%(name[.keys][addition][>strf][,alternate][&replacement][|default])[flags][width][.precision][length]type
+```
+
+Additionally, you can set different output templates for the various metadata files separately from the general output template by specifying the type of file followed by the template separated by a colon `:`. The different file types supported are `subtitle`, `thumbnail`, `description`, `annotation` (deprecated), `infojson`, `link`, `pl_thumbnail`, `pl_description`, `pl_infojson`, `chapter`, `pl_video`. E.g. `-o "%(title)s.%(ext)s" -o "thumbnail:%(title)s\%(title)s.%(ext)s"` will put the thumbnails in a folder with the same name as the video. If any of the templates is empty, that type of file will not be written. E.g. `--write-thumbnail -o "thumbnail:"` will write thumbnails only for playlists and not for video.
+
+<a id="outtmpl-postprocess-note"></a>
+
+Note: Due to post-processing (i.e. merging etc.), the actual output filename might differ. Use `--print after_move:filepath` to get the name after all post-processing is complete.
+
+The available fields are:
- `id` (string): Video identifier
- `title` (string): Video title
- - `url` (string): Video URL
+ - `fulltitle` (string): Video title ignoring live timestamp and generic title
- `ext` (string): Video filename extension
- `alt_title` (string): A secondary title of the video
+ - `description` (string): The description of the video
- `display_id` (string): An alternative identifier for the video
- `uploader` (string): Full name of the video uploader
- `license` (string): License name the video is licensed under
- `creator` (string): The creator of the video
- - `release_date` (string): The date (YYYYMMDD) when the video was released
- `timestamp` (numeric): UNIX timestamp of the moment the video became available
- - `upload_date` (string): Video upload date (YYYYMMDD)
+ - `upload_date` (string): Video upload date in UTC (YYYYMMDD)
+ - `release_timestamp` (numeric): UNIX timestamp of the moment the video was released
+ - `release_date` (string): The date (YYYYMMDD) when the video was released in UTC
+ - `modified_timestamp` (numeric): UNIX timestamp of the moment the video was last modified
+ - `modified_date` (string): The date (YYYYMMDD) when the video was last modified in UTC
- `uploader_id` (string): Nickname or id of the video uploader
- `channel` (string): Full name of the channel the video is uploaded on
- `channel_id` (string): Id of the channel
+ - `channel_follower_count` (numeric): Number of followers of the channel
- `location` (string): Physical location where the video was filmed
- `duration` (numeric): Length of the video in seconds
+ - `duration_string` (string): Length of the video (HH:mm:ss)
- `view_count` (numeric): How many users have watched the video on the platform
+ - `concurrent_view_count` (numeric): How many users are currently watching the video on the platform.
- `like_count` (numeric): Number of positive ratings of the video
- `dislike_count` (numeric): Number of negative ratings of the video
- `repost_count` (numeric): Number of reposts of the video
- `average_rating` (numeric): Average rating give by users, the scale used depends on the webpage
- - `comment_count` (numeric): Number of comments on the video
+ - `comment_count` (numeric): Number of comments on the video (For some extractors, comments are only downloaded at the end, and so this field cannot be used)
- `age_limit` (numeric): Age restriction for the video (years)
+ - `live_status` (string): One of "not_live", "is_live", "is_upcoming", "was_live", "post_live" (was live, but VOD is not yet processed)
- `is_live` (boolean): Whether this video is a live stream or a fixed-length video
+ - `was_live` (boolean): Whether this video was originally a live stream
+ - `playable_in_embed` (string): Whether this video is allowed to play in embedded players on other sites
+ - `availability` (string): Whether the video is "private", "premium_only", "subscriber_only", "needs_auth", "unlisted" or "public"
- `start_time` (numeric): Time in seconds where the reproduction should start, as specified in the URL
- `end_time` (numeric): Time in seconds where the reproduction should end, as specified in the URL
- - `format` (string): A human-readable description of the format
- - `format_id` (string): Format code specified by `--format`
- - `format_note` (string): Additional info about the format
- - `width` (numeric): Width of the video
- - `height` (numeric): Height of the video
- - `resolution` (string): Textual description of width and height
- - `tbr` (numeric): Average bitrate of audio and video in KBit/s
- - `abr` (numeric): Average audio bitrate in KBit/s
- - `acodec` (string): Name of the audio codec in use
- - `asr` (numeric): Audio sampling rate in Hertz
- - `vbr` (numeric): Average video bitrate in KBit/s
- - `fps` (numeric): Frame rate
- - `vcodec` (string): Name of the video codec in use
- - `container` (string): Name of the container format
- - `filesize` (numeric): The number of bytes, if known in advance
- - `filesize_approx` (numeric): An estimate for the number of bytes
- - `protocol` (string): The protocol that will be used for the actual download
- `extractor` (string): Name of the extractor
- `extractor_key` (string): Key name of the extractor
- - `epoch` (numeric): Unix epoch when creating the file
+ - `epoch` (numeric): Unix epoch of when the information extraction was completed
- `autonumber` (numeric): Number that will be increased with each download, starting at `--autonumber-start`
- - `playlist` (string): Name or id of the playlist that contains the video
- - `playlist_index` (numeric): Index of the video in the playlist padded with leading zeros according to the total length of the playlist
- - `playlist_id` (string): Playlist identifier
- - `playlist_title` (string): Playlist title
+ - `video_autonumber` (numeric): Number that will be increased with each video
+ - `n_entries` (numeric): Total number of extracted items in the playlist
+ - `playlist_id` (string): Identifier of the playlist that contains the video
+ - `playlist_title` (string): Name of the playlist that contains the video
+ - `playlist` (string): `playlist_id` or `playlist_title`
+ - `playlist_count` (numeric): Total number of items in the playlist. May not be known if entire playlist is not extracted
+ - `playlist_index` (numeric): Index of the video in the playlist padded with leading zeros according the final index
+ - `playlist_autonumber` (numeric): Position of the video in the playlist download queue padded with leading zeros according to the total length of the playlist
- `playlist_uploader` (string): Full name of the playlist uploader
- `playlist_uploader_id` (string): Nickname or id of the playlist uploader
+ - `webpage_url` (string): A URL to the video webpage which if given to hypervideo should allow to get the same result again
+ - `webpage_url_basename` (string): The basename of the webpage URL
+ - `webpage_url_domain` (string): The domain of the webpage URL
+ - `original_url` (string): The URL given by the user (or same as `webpage_url` for playlist entries)
+
+All the fields in [Filtering Formats](#filtering-formats) can also be used
Available for the video that belongs to some logical chapter or section:
@@ -598,46 +1215,85 @@ Available for the media that is a track or a part of a music album:
- `disc_number` (numeric): Number of the disc or other physical medium the track belongs to
- `release_year` (numeric): Year (YYYY) when the album was released
-Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with placeholder value provided with `--output-na-placeholder` (`NA` by default).
+Available only when using `--download-sections` and for `chapter:` prefix when using `--split-chapters` for videos with internal chapters:
-For example for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `hypervideo test video` and id `BaW_jenozKcj`, this will result in a `hypervideo test video-BaW_jenozKcj.mp4` file created in the current directory.
+ - `section_title` (string): Title of the chapter
+ - `section_number` (numeric): Number of the chapter within the file
+ - `section_start` (numeric): Start time of the chapter in seconds
+ - `section_end` (numeric): End time of the chapter in seconds
-For numeric sequences you can use numeric related formatting, for example, `%(view_count)05d` will result in a string with view count padded with zeros up to 5 characters, like in `00042`.
+Available only when used in `--print`:
-Output templates can also contain arbitrary hierarchical path, e.g. `-o '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s'` which will result in downloading each video in a directory corresponding to this path template. Any missing directory will be automatically created for you.
+ - `urls` (string): The URLs of all requested formats, one in each line
+ - `filename` (string): Name of the video file. Note that the [actual filename may differ](#outtmpl-postprocess-note)
+ - `formats_table` (table): The video format table as printed by `--list-formats`
+ - `thumbnails_table` (table): The thumbnail format table as printed by `--list-thumbnails`
+ - `subtitles_table` (table): The subtitle format table as printed by `--list-subs`
+ - `automatic_captions_table` (table): The automatic subtitle format table as printed by `--list-subs`
-To use percent literals in an output template use `%%`. To output to stdout use `-o -`.
-The current default template is `%(title)s-%(id)s.%(ext)s`.
+Available only in `--sponsorblock-chapter-title`:
-In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title:
+ - `start_time` (numeric): Start time of the chapter in seconds
+ - `end_time` (numeric): End time of the chapter in seconds
+ - `categories` (list): The [SponsorBlock categories](https://wiki.sponsor.ajay.app/w/Types#Category) the chapter belongs to
+ - `category` (string): The smallest SponsorBlock category the chapter belongs to
+ - `category_names` (list): Friendly names of the categories
+ - `name` (string): Friendly name of the smallest category
+ - `type` (string): The [SponsorBlock action type](https://wiki.sponsor.ajay.app/w/Types#Action_Type) of the chapter
-#### Output template and Windows batch files
+Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. E.g. for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `hypervideo test video` and id `BaW_jenozKc`, this will result in a `hypervideo test video-BaW_jenozKc.mp4` file created in the current directory.
-If you are using an output template inside a Windows batch file then you must escape plain percent characters (`%`) by doubling, so that `-o "%(title)s-%(id)s.%(ext)s"` should become `-o "%%(title)s-%%(id)s.%%(ext)s"`. However you should not touch `%`'s that are not plain characters, e.g. environment variables for expansion should stay intact: `-o "C:\%HOMEPATH%\Desktop\%%(title)s.%%(ext)s"`.
+Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with placeholder value provided with `--output-na-placeholder` (`NA` by default).
-#### Output template examples
+**Tip**: Look at the `-j` output to identify which fields are available for the particular URL
+
+For numeric sequences you can use [numeric related formatting](https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting); e.g. `%(view_count)05d` will result in a string with view count padded with zeros up to 5 characters, like in `00042`.
+
+Output templates can also contain arbitrary hierarchical path, e.g. `-o "%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s"` which will result in downloading each video in a directory corresponding to this path template. Any missing directory will be automatically created for you.
+
+To use percent literals in an output template use `%%`. To output to stdout use `-o -`.
-Note that on Windows you may need to use double quotes instead of single.
+The current default template is `%(title)s [%(id)s].%(ext)s`.
+
+In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title.
+
+#### Output template examples
```bash
-$ hypervideo --get-filename -o '%(title)s.%(ext)s' BaW_jenozKc
-hypervideo test video ''_ä↭𝕐.mp4 # All kinds of weird characters
+$ hypervideo --print filename -o "test video.%(ext)s" BaW_jenozKc
+test video.webm # Literal name with correct extension
-$ hypervideo --get-filename -o '%(title)s.%(ext)s' BaW_jenozKc --restrict-filenames
-hypervideo_test_video_.mp4 # A simple file name
+$ hypervideo --print filename -o "%(title)s.%(ext)s" BaW_jenozKc
+youtube-dl test video ''_ä↭𝕐.webm # All kinds of weird characters
+
+$ hypervideo --print filename -o "%(title)s.%(ext)s" BaW_jenozKc --restrict-filenames
+youtube-dl_test_video_.webm # Restricted file name
# Download YouTube playlist videos in separate directory indexed by video order in a playlist
-$ hypervideo -o '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s' https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re
+$ hypervideo -o "%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s" "https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re"
+
+# Download YouTube playlist videos in separate directories according to their uploaded year
+$ hypervideo -o "%(upload_date>%Y)s/%(title)s.%(ext)s" "https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re"
+
+# Prefix playlist index with " - " separator, but only if it is available
+$ hypervideo -o '%(playlist_index|)s%(playlist_index& - |)s%(title)s.%(ext)s' BaW_jenozKc "https://www.youtube.com/user/TheLinuxFoundation/playlists"
# Download all playlists of YouTube channel/user keeping each playlist in separate directory:
-$ hypervideo -o '%(uploader)s/%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s' https://www.youtube.com/user/TheLinuxFoundation/playlists
+$ hypervideo -o "%(uploader)s/%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s" "https://www.youtube.com/user/TheLinuxFoundation/playlists"
# Download Udemy course keeping each chapter in separate directory under MyVideos directory in your home
-$ hypervideo -u user -p password -o '~/MyVideos/%(playlist)s/%(chapter_number)s - %(chapter)s/%(title)s.%(ext)s' https://www.udemy.com/java-tutorial/
+$ hypervideo -u user -p password -P "~/MyVideos" -o "%(playlist)s/%(chapter_number)s - %(chapter)s/%(title)s.%(ext)s" "https://www.udemy.com/java-tutorial"
# Download entire series season keeping each series and each season in separate directory under C:/MyVideos
-$ hypervideo -o "C:/MyVideos/%(series)s/%(season_number)s - %(season)s/%(episode_number)s - %(episode)s.%(ext)s" https://videomore.ru/kino_v_detalayah/5_sezon/367617
+$ hypervideo -P "C:/MyVideos" -o "%(series)s/%(season_number)s - %(season)s/%(episode_number)s - %(episode)s.%(ext)s" "https://videomore.ru/kino_v_detalayah/5_sezon/367617"
+
+# Download video as "C:\MyVideos\uploader\title.ext", subtitles as "C:\MyVideos\subs\uploader\title.ext"
+# and put all temporary files in "C:\MyVideos\tmp"
+$ hypervideo -P "C:/MyVideos" -P "temp:tmp" -P "subtitle:subs" -o "%(uploader)s/%(title)s.%(ext)s" BaW_jenoz --write-subs
+
+# Download video as "C:\MyVideos\uploader\title.ext" and subtitles as "C:\MyVideos\uploader\subs\title.ext"
+$ hypervideo -P "C:/MyVideos" -o "%(uploader)s/%(title)s.%(ext)s" -o "subtitle:%(uploader)s/subs/%(title)s.%(ext)s" BaW_jenozKc --write-subs
# Stream the video being downloaded to stdout
$ hypervideo -o - BaW_jenozKc
@@ -645,697 +1301,527 @@ $ hypervideo -o - BaW_jenozKc
# FORMAT SELECTION
-By default hypervideo tries to download the best available quality, i.e. if you want the best quality you **don't need** to pass any special options, hypervideo will guess it for you by **default**.
+By default, hypervideo tries to download the best available quality if you **don't** pass any options.
+This is generally equivalent to using `-f bestvideo*+bestaudio/best`. However, if multiple audiostreams is enabled (`--audio-multistreams`), the default format changes to `-f bestvideo+bestaudio/best`. Similarly, if ffmpeg is unavailable, or if you use hypervideo to stream to `stdout` (`-o -`), the default becomes `-f best/bestvideo+bestaudio`.
-But sometimes you may want to download in a different format, for example when you are on a slow or intermittent connection. The key mechanism for achieving this is so-called *format selection* based on which you can explicitly specify desired format, select formats based on some criterion or criteria, setup precedence and much more.
+**Deprecation warning**: Latest versions of hypervideo can stream multiple formats to the stdout simultaneously using ffmpeg. So, in future versions, the default for this will be set to `-f bv*+ba/b` similar to normal downloads. If you want to preserve the `-f b/bv+ba` setting, it is recommended to explicitly specify it in the configuration options.
-The general syntax for format selection is `--format FORMAT` or shorter `-f FORMAT` where `FORMAT` is a *selector expression*, i.e. an expression that describes format or formats you would like to download.
+The general syntax for format selection is `-f FORMAT` (or `--format FORMAT`) where `FORMAT` is a *selector expression*, i.e. an expression that describes format or formats you would like to download.
+<!-- MANPAGE: BEGIN EXCLUDED SECTION -->
**tl;dr:** [navigate me to examples](#format-selection-examples).
+<!-- MANPAGE: END EXCLUDED SECTION -->
-The simplest case is requesting a specific format, for example with `-f 22` you can download the format with format code equal to 22. You can get the list of available format codes for particular video using `--list-formats` or `-F`. Note that these format codes are extractor specific.
+The simplest case is requesting a specific format; e.g. with `-f 22` you can download the format with format code equal to 22. You can get the list of available format codes for particular video using `--list-formats` or `-F`. Note that these format codes are extractor specific.
You can also use a file extension (currently `3gp`, `aac`, `flv`, `m4a`, `mp3`, `mp4`, `ogg`, `wav`, `webm` are supported) to download the best quality format of a particular file extension served as a single file, e.g. `-f webm` will download the best quality format with the `webm` extension served as a single file.
+You can use `-f -` to interactively provide the format selector *for each video*
+
You can also use special names to select particular edge case formats:
- - `best`: Select the best quality format represented by a single file with video and audio.
- - `worst`: Select the worst quality format represented by a single file with video and audio.
- - `bestvideo`: Select the best quality video-only format (e.g. DASH video). May not be available.
- - `worstvideo`: Select the worst quality video-only format. May not be available.
- - `bestaudio`: Select the best quality audio only-format. May not be available.
- - `worstaudio`: Select the worst quality audio only-format. May not be available.
+ - `all`: Select **all formats** separately
+ - `mergeall`: Select and **merge all formats** (Must be used with `--audio-multistreams`, `--video-multistreams` or both)
+ - `b*`, `best*`: Select the best quality format that **contains either** a video or an audio or both (ie; `vcodec!=none or acodec!=none`)
+ - `b`, `best`: Select the best quality format that **contains both** video and audio. Equivalent to `best*[vcodec!=none][acodec!=none]`
+ - `bv`, `bestvideo`: Select the best quality **video-only** format. Equivalent to `best*[acodec=none]`
+ - `bv*`, `bestvideo*`: Select the best quality format that **contains video**. It may also contain audio. Equivalent to `best*[vcodec!=none]`
+ - `ba`, `bestaudio`: Select the best quality **audio-only** format. Equivalent to `best*[vcodec=none]`
+ - `ba*`, `bestaudio*`: Select the best quality format that **contains audio**. It may also contain video. Equivalent to `best*[acodec!=none]` ([Do not use!](https://github.com/yt-dlp/yt-dlp/issues/979#issuecomment-919629354))
+ - `w*`, `worst*`: Select the worst quality format that contains either a video or an audio
+ - `w`, `worst`: Select the worst quality format that contains both video and audio. Equivalent to `worst*[vcodec!=none][acodec!=none]`
+ - `wv`, `worstvideo`: Select the worst quality video-only format. Equivalent to `worst*[acodec=none]`
+ - `wv*`, `worstvideo*`: Select the worst quality format that contains video. It may also contain audio. Equivalent to `worst*[vcodec!=none]`
+ - `wa`, `worstaudio`: Select the worst quality audio-only format. Equivalent to `worst*[vcodec=none]`
+ - `wa*`, `worstaudio*`: Select the worst quality format that contains audio. It may also contain video. Equivalent to `worst*[acodec!=none]`
-For example, to download the worst quality video-only format you can use `-f worstvideo`.
+For example, to download the worst quality video-only format you can use `-f worstvideo`. It is however recommended not to use `worst` and related options. When your format selector is `worst`, the format which is worst in all respects is selected. Most of the time, what you actually want is the video with the smallest filesize instead. So it is generally better to use `-S +size` or more rigorously, `-S +size,+br,+res,+fps` instead of `-f worst`. See [Sorting Formats](#sorting-formats) for more details.
-If you want to download multiple videos and they don't have the same formats available, you can specify the order of preference using slashes. Note that slash is left-associative, i.e. formats on the left hand side are preferred, for example `-f 22/17/18` will download format 22 if it's available, otherwise it will download format 17 if it's available, otherwise it will download format 18 if it's available, otherwise it will complain that no suitable formats are available for download.
+You can select the n'th best format of a type by using `best<type>.<n>`. For example, `best.2` will select the 2nd best combined format. Similarly, `bv*.3` will select the 3rd best format that contains a video stream.
+
+If you want to download multiple videos, and they don't have the same formats available, you can specify the order of preference using slashes. Note that formats on the left hand side are preferred; e.g. `-f 22/17/18` will download format 22 if it's available, otherwise it will download format 17 if it's available, otherwise it will download format 18 if it's available, otherwise it will complain that no suitable formats are available for download.
If you want to download several formats of the same video use a comma as a separator, e.g. `-f 22,17,18` will download all these three formats, of course if they are available. Or a more sophisticated example combined with the precedence feature: `-f 136/137/mp4/bestvideo,140/m4a/bestaudio`.
+You can merge the video and audio of multiple formats into a single file using `-f <format1>+<format2>+...` (requires ffmpeg installed); e.g. `-f bestvideo+bestaudio` will download the best video-only format, the best audio-only format and mux them together with ffmpeg.
+
+**Deprecation warning**: Since the *below* described behavior is complex and counter-intuitive, this will be removed and multistreams will be enabled by default in the future. A new operator will be instead added to limit formats to single audio/video
+
+Unless `--video-multistreams` is used, all formats with a video stream except the first one are ignored. Similarly, unless `--audio-multistreams` is used, all formats with an audio stream except the first one are ignored. E.g. `-f bestvideo+best+bestaudio --video-multistreams --audio-multistreams` will download and merge all 3 given formats. The resulting file will have 2 video streams and 2 audio streams. But `-f bestvideo+best+bestaudio --no-video-multistreams` will download and merge only `bestvideo` and `bestaudio`. `best` is ignored since another format containing a video stream (`bestvideo`) has already been selected. The order of the formats is therefore important. `-f best+bestaudio --no-audio-multistreams` will download only `best` while `-f bestaudio+best --no-audio-multistreams` will ignore `best` and download only `bestaudio`.
+
+## Filtering Formats
+
You can also filter the video formats by putting a condition in brackets, as in `-f "best[height=720]"` (or `-f "[filesize>10M]"`).
The following numeric meta fields can be used with comparisons `<`, `<=`, `>`, `>=`, `=` (equals), `!=` (not equals):
- `filesize`: The number of bytes, if known in advance
+ - `filesize_approx`: An estimate for the number of bytes
- `width`: Width of the video, if known
- `height`: Height of the video, if known
+ - `aspect_ratio`: Aspect ratio of the video, if known
- `tbr`: Average bitrate of audio and video in KBit/s
- `abr`: Average audio bitrate in KBit/s
- `vbr`: Average video bitrate in KBit/s
- `asr`: Audio sampling rate in Hertz
- `fps`: Frame rate
+ - `audio_channels`: The number of audio channels
+ - `stretched_ratio`: `width:height` of the video's pixels, if not square
-Also filtering work for comparisons `=` (equals), `^=` (starts with), `$=` (ends with), `*=` (contains) and following string meta fields:
+Also filtering work for comparisons `=` (equals), `^=` (starts with), `$=` (ends with), `*=` (contains), `~=` (matches regex) and following string meta fields:
+ - `url`: Video URL
- `ext`: File extension
- `acodec`: Name of the audio codec in use
- `vcodec`: Name of the video codec in use
- `container`: Name of the container format
- `protocol`: The protocol that will be used for the actual download, lower-case (`http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `mms`, `f4m`, `ism`, `http_dash_segments`, `m3u8`, or `m3u8_native`)
- - `format_id`: A short description of the format
- `language`: Language code
+ - `dynamic_range`: The dynamic range of the video
+ - `format_id`: A short description of the format
+ - `format`: A human-readable description of the format
+ - `format_note`: Additional info about the format
+ - `resolution`: Textual description of width and height
+
+Any string comparison may be prefixed with negation `!` in order to produce an opposite comparison, e.g. `!*=` (does not contain). The comparand of a string comparison needs to be quoted with either double or single quotes if it contains spaces or special characters other than `._-`.
+
+Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the website. Any other field made available by the extractor can also be used for filtering.
+
+Formats for which the value is not known are excluded unless you put a question mark (`?`) after the operator. You can combine format filters, so `-f "[height<=?720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s. You can also use the filters with `all` to download all formats that satisfy the filter, e.g. `-f "all[vcodec=none]"` selects all audio-only formats.
+
+Format selectors can also be grouped using parentheses; e.g. `-f "(mp4,webm)[height<480]"` will download the best pre-merged mp4 and webm formats with a height lower than 480.
+
+## Sorting Formats
+
+You can change the criteria for being considered the `best` by using `-S` (`--format-sort`). The general format for this is `--format-sort field1,field2...`.
+
+The available fields are:
+
+ - `hasvid`: Gives priority to formats that have a video stream
+ - `hasaud`: Gives priority to formats that have an audio stream
+ - `ie_pref`: The format preference
+ - `lang`: The language preference
+ - `quality`: The quality of the format
+ - `source`: The preference of the source
+ - `proto`: Protocol used for download (`https`/`ftps` > `http`/`ftp` > `m3u8_native`/`m3u8` > `http_dash_segments`> `websocket_frag` > `mms`/`rtsp` > `f4f`/`f4m`)
+ - `vcodec`: Video Codec (`av01` > `vp9.2` > `vp9` > `h265` > `h264` > `vp8` > `h263` > `theora` > other)
+ - `acodec`: Audio Codec (`flac`/`alac` > `wav`/`aiff` > `opus` > `vorbis` > `aac` > `mp4a` > `mp3` > `eac3` > `ac3` > `dts` > other)
+ - `codec`: Equivalent to `vcodec,acodec`
+ - `vext`: Video Extension (`mp4` > `mov` > `webm` > `flv` > other). If `--prefer-free-formats` is used, `webm` is preferred.
+ - `aext`: Audio Extension (`m4a` > `aac` > `mp3` > `ogg` > `opus` > `webm` > other). If `--prefer-free-formats` is used, the order changes to `ogg` > `opus` > `webm` > `mp3` > `m4a` > `aac`
+ - `ext`: Equivalent to `vext,aext`
+ - `filesize`: Exact filesize, if known in advance
+ - `fs_approx`: Approximate filesize calculated from the manifests
+ - `size`: Exact filesize if available, otherwise approximate filesize
+ - `height`: Height of video
+ - `width`: Width of video
+ - `res`: Video resolution, calculated as the smallest dimension.
+ - `fps`: Framerate of video
+ - `hdr`: The dynamic range of the video (`DV` > `HDR12` > `HDR10+` > `HDR10` > `HLG` > `SDR`)
+ - `channels`: The number of audio channels
+ - `tbr`: Total average bitrate in KBit/s
+ - `vbr`: Average video bitrate in KBit/s
+ - `abr`: Average audio bitrate in KBit/s
+ - `br`: Equivalent to using `tbr,vbr,abr`
+ - `asr`: Audio sample rate in Hz
-Any string comparison may be prefixed with negation `!` in order to produce an opposite comparison, e.g. `!*=` (does not contain).
-
-Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the video hoster.
-
-Formats for which the value is not known are excluded unless you put a question mark (`?`) after the operator. You can combine format filters, so `-f "[height <=? 720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s.
+**Deprecation warning**: Many of these fields have (currently undocumented) aliases, that may be removed in a future version. It is recommended to use only the documented field names.
-You can merge the video and audio of two formats into a single file using `-f <video-format>+<audio-format>` (requires ffmpeg or avconv installed), for example `-f bestvideo+bestaudio` will download the best video-only format, the best audio-only format and mux them together with ffmpeg/avconv.
+All fields, unless specified otherwise, are sorted in descending order. To reverse this, prefix the field with a `+`. E.g. `+res` prefers format with the smallest resolution. Additionally, you can suffix a preferred value for the fields, separated by a `:`. E.g. `res:720` prefers larger videos, but no larger than 720p and the smallest video if there are no videos less than 720p. For `codec` and `ext`, you can provide two preferred values, the first for video and the second for audio. E.g. `+codec:avc:m4a` (equivalent to `+vcodec:avc,+acodec:m4a`) sets the video codec preference to `h264` > `h265` > `vp9` > `vp9.2` > `av01` > `vp8` > `h263` > `theora` and audio codec preference to `mp4a` > `aac` > `vorbis` > `opus` > `mp3` > `ac3` > `dts`. You can also make the sorting prefer the nearest values to the provided by using `~` as the delimiter. E.g. `filesize~1G` prefers the format with filesize closest to 1 GiB.
-Format selectors can also be grouped using parentheses, for example if you want to download the best mp4 and webm formats with a height lower than 480 you can use `-f '(mp4,webm)[height<480]'`.
+The fields `hasvid` and `ie_pref` are always given highest priority in sorting, irrespective of the user-defined order. This behaviour can be changed by using `--format-sort-force`. Apart from these, the default order used is: `lang,quality,res,fps,hdr:12,vcodec:vp9.2,channels,acodec,size,br,asr,proto,ext,hasaud,source,id`. The extractors may override this default order, but they cannot override the user-provided order.
-Since the end of April 2015 and version 2015.04.26, hypervideo uses `-f bestvideo+bestaudio/best` as the default format selection (see [#5447](https://github.com/ytdl-org/youtube-dl/issues/5447), [#5456](https://github.com/ytdl-org/youtube-dl/issues/5456)). If ffmpeg or avconv are installed this results in downloading `bestvideo` and `bestaudio` separately and muxing them together into a single file giving the best overall quality available. Otherwise it falls back to `best` and results in downloading the best available quality served as a single file. `best` is also needed for videos that don't come from YouTube because they don't provide the audio and video in two different files. If you want to only download some DASH formats (for example if you are not interested in getting videos with a resolution higher than 1080p), you can add `-f bestvideo[height<=?1080]+bestaudio/best` to your configuration file. Note that if you use youtube-dl to stream to `stdout` (and most likely to pipe it to your media player then), i.e. you explicitly specify output template as `-o -`, youtube-dl still uses `-f best` format selection in order to start content delivery immediately to your player and not to wait until `bestvideo` and `bestaudio` are downloaded and muxed.
+Note that the default has `vcodec:vp9.2`; i.e. `av1` is not preferred. Similarly, the default for hdr is `hdr:12`; i.e. dolby vision is not preferred. These choices are made since DV and AV1 formats are not yet fully compatible with most devices. This may be changed in the future as more devices become capable of smoothly playing back these formats.
-If you want to preserve the old format selection behavior (prior to youtube-dl 2015.04.26), i.e. you want to download the best available quality media served as a single file, you should explicitly specify your choice with `-f best`. You may want to add it to the [configuration file](#configuration) in order not to type it every time you run hypervideo.
+If your format selector is `worst`, the last item is selected after sorting. This means it will select the format that is worst in all respects. Most of the time, what you actually want is the video with the smallest filesize instead. So it is generally better to use `-f best -S +size,+br,+res,+fps`.
-#### Format selection examples
+**Tip**: You can use the `-v -F` to see how the formats have been sorted (worst to best).
-Note that on Windows you may need to use double quotes instead of single.
+## Format Selection examples
```bash
-# Download best mp4 format available or any other best if no mp4 available
-$ hypervideo -f 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best'
-
-# Download best format available but no better than 480p
-$ hypervideo -f 'bestvideo[height<=480]+bestaudio/best[height<=480]'
-
-# Download best video only format but no bigger than 50 MB
-$ hypervideo -f 'best[filesize<50M]'
-
-# Download best format available via direct link over HTTP/HTTPS protocol
-$ hypervideo -f '(bestvideo+bestaudio/best)[protocol^=http]'
-
-# Download the best video format and the best audio format without merging them
-$ hypervideo -f 'bestvideo,bestaudio' -o '%(title)s.f%(format_id)s.%(ext)s'
-```
-Note that in the last example, an output template is recommended as bestvideo and bestaudio may have the same file name.
-
+# Download and merge the best video-only format and the best audio-only format,
+# or download the best combined format if video-only format is not available
+$ hypervideo -f "bv+ba/b"
-# VIDEO SELECTION
+# Download best format that contains video,
+# and if it doesn't already have an audio stream, merge it with best audio-only format
+$ hypervideo -f "bv*+ba/b"
-Videos can be filtered by their upload date using the options `--date`, `--datebefore` or `--dateafter`. They accept dates in two formats:
+# Same as above
+$ hypervideo
- - Absolute dates: Dates in the format `YYYYMMDD`.
- - Relative dates: Dates in the format `(now|today)[+-][0-9](day|week|month|year)(s)?`
+# Download the best video-only format and the best audio-only format without merging them
+# For this case, an output template should be used since
+# by default, bestvideo and bestaudio will have the same file name.
+$ hypervideo -f "bv,ba" -o "%(title)s.f%(format_id)s.%(ext)s"
-Examples:
+# Download and merge the best format that has a video stream,
+# and all audio-only formats into one file
+$ hypervideo -f "bv*+mergeall[vcodec=none]" --audio-multistreams
-```bash
-# Download only the videos uploaded in the last 6 months
-$ hypervideo --dateafter now-6months
+# Download and merge the best format that has a video stream,
+# and the best 2 audio-only formats into one file
+$ hypervideo -f "bv*+ba+ba.2" --audio-multistreams
-# Download only the videos uploaded on January 1, 1970
-$ hypervideo --date 19700101
-$ # Download only the videos uploaded in the 200x decade
-$ hypervideo --dateafter 20000101 --datebefore 20091231
-```
+# The following examples show the old method (without -S) of format selection
+# and how to use -S to achieve a similar but (generally) better result
-# FAQ
+# Download the worst video available (old method)
+$ hypervideo -f "wv*+wa/w"
-### How do I update hypervideo?
+# Download the best video available but with the smallest resolution
+$ hypervideo -S "+res"
-If you have used pacman, a simple `doas pacman -Syu hypervideo` is sufficient to update.
+# Download the smallest video available
+$ hypervideo -S "+size,+br"
-As a last resort, you can also uninstall the version installed by your package manager and follow our manual installation instructions. For that, remove the distribution's package, with a line like
-```console
-$ doas pacman -Rs hypervideo
-```
-### I'm getting an error `Unable to extract OpenGraph title` on YouTube playlists
+# Download the best mp4 video available, or the best video if no mp4 available
+$ hypervideo -f "bv*[ext=mp4]+ba[ext=m4a]/b[ext=mp4] / bv*+ba/b"
-YouTube changed their playlist format in March 2014 and later on, so you'll need at least hypervideo to download all YouTube videos.
+# Download the best video with the best extension
+# (For video, mp4 > mov > webm > flv. For audio, m4a > aac > mp3 ...)
+$ hypervideo -S "ext"
-### I'm getting an error when trying to use output template: `error: using output template conflicts with using title, video ID or auto number`
-Make sure you are not using `-o` with any of these options `-t`, `--title`, `--id`, `-A` or `--auto-number` set in command line or in a configuration file. Remove the latter if any.
-### Do I always have to pass `-citw`?
+# Download the best video available but no better than 480p,
+# or the worst video if there is no video under 480p
+$ hypervideo -f "bv*[height<=480]+ba/b[height<=480] / wv*+ba/w"
-By default, hypervideo intends to have the best options (incidentally, if you have a convincing case that these should be different, [please file an issue where you explain that](https://yt-dl.org/bug)). Therefore, it is unnecessary and sometimes harmful to copy long option strings from webpages. In particular, the only option out of `-citw` that is regularly useful is `-i`.
+# Download the best video available with the largest height but no better than 480p,
+# or the best video with the smallest resolution if there is no video under 480p
+$ hypervideo -S "height:480"
-### Can you please put the `-b` option back?
+# Download the best video available with the largest resolution but no better than 480p,
+# or the best video with the smallest resolution if there is no video under 480p
+# Resolution is determined by using the smallest dimension.
+# So this works correctly for vertical videos as well
+$ hypervideo -S "res:480"
-Most people asking this question are not aware that hypervideo now defaults to downloading the highest available quality as reported by YouTube, which will be 1080p or 720p in some cases, so you no longer need the `-b` option. For some specific videos, maybe YouTube does not report them to be available in a specific high quality format you're interested in. In that case, simply request it with the `-f` option and hypervideo will try to download it.
-### I get HTTP error 402 when trying to download a video. What's this?
-Apparently YouTube requires you to pass a CAPTCHA test if you download too much. We're [considering to provide a way to let you solve the CAPTCHA](https://github.com/ytdl-org/youtube-dl/issues/154), but at the moment, your best course of action is pointing a web browser to the youtube URL, solving the CAPTCHA, and restart hypervideo.
+# Download the best video (that also has audio) but no bigger than 50 MB,
+# or the worst video (that also has audio) if there is no video under 50 MB
+$ hypervideo -f "b[filesize<50M] / w"
-### Do I need any other programs?
+# Download largest video (that also has audio) but no bigger than 50 MB,
+# or the smallest video (that also has audio) if there is no video under 50 MB
+$ hypervideo -f "b" -S "filesize:50M"
-hypervideo works fine on its own on most sites. However, if you want to convert video/audio, you'll need [avconv](https://libav.org/) or [ffmpeg](https://www.ffmpeg.org/). On some sites - most notably YouTube - videos can be retrieved in a higher quality format without sound. hypervideo will detect whether avconv/ffmpeg is present and automatically pick the best option.
+# Download best video (that also has audio) that is closest in size to 50 MB
+$ hypervideo -f "b" -S "filesize~50M"
-Videos or video formats streamed via RTMP protocol can only be downloaded when [rtmpdump](https://rtmpdump.mplayerhq.hu/) is installed. Downloading MMS and RTSP videos requires either [mplayer](https://mplayerhq.hu/) or [mpv](https://mpv.io/) to be installed.
-### I have downloaded a video but how can I play it?
-Once the video is fully downloaded, use any video player, such as [mpv](https://mpv.io/), [vlc](https://www.videolan.org/) or [mplayer](https://www.mplayerhq.hu/).
+# Download best video available via direct link over HTTP/HTTPS protocol,
+# or the best video available via any protocol if there is no such video
+$ hypervideo -f "(bv*+ba/b)[protocol^=http][protocol!*=dash] / (bv*+ba/b)"
-### I extracted a video URL with `-g`, but it does not play on another machine / in my web browser.
+# Download best video available via the best protocol
+# (https/ftps > http/ftp > m3u8_native > m3u8 > http_dash_segments ...)
+$ hypervideo -S "proto"
-It depends a lot on the service. In many cases, requests for the video (to download/play it) must come from the same IP address and with the same cookies and/or HTTP headers. Use the `--cookies` option to write the required cookies into a file, and advise your downloader to read cookies from that file. Some sites also require a common user agent to be used, use `--dump-user-agent` to see the one in use by hypervideo. You can also get necessary cookies and HTTP headers from JSON output obtained with `--dump-json`.
-It may be beneficial to use IPv6; in some cases, the restrictions are only applied to IPv4. Some services (sometimes only for a subset of videos) do not restrict the video URL by IP address, cookie, or user-agent, but these are the exception rather than the rule.
-Please bear in mind that some URL protocols are **not** supported by browsers out of the box, including RTMP. If you are using `-g`, your own downloader must support these as well.
+# Download the best video with either h264 or h265 codec,
+# or the best video if there is no such video
+$ hypervideo -f "(bv*[vcodec~='^((he|a)vc|h26[45])']+ba) / (bv*+ba/b)"
-If you want to play the video on a machine that is not running hypervideo, you can relay the video content from the machine that runs hypervideo. You can use `-o -` to let hypervideo stream a video to stdout, or simply allow the player to download the files written by hypervideo in turn.
+# Download the best video with best codec no better than h264,
+# or the best video with worst codec if there is no such video
+$ hypervideo -S "codec:h264"
-### ERROR: no fmt_url_map or conn information found in video info
+# Download the best video with worst codec no worse than h264,
+# or the best video with best codec if there is no such video
+$ hypervideo -S "+codec:h264"
-YouTube has switched to a new video info format in July 2011 which is not supported by old versions of hypervideo. See [above](#how-do-i-update-hypervideo) for how to update hypervideo.
-### ERROR: unable to download video
-YouTube requires an additional signature since September 2012 which is not supported by old versions of hypervideo. See [above](#how-do-i-update-hypervideo) for how to hypervideo.
+# More complex examples
-### Video URL contains an ampersand and I'm getting some strange output `[1] 2839` or `'v' is not recognized as an internal or external command`
+# Download the best video no better than 720p preferring framerate greater than 30,
+# or the worst video (still preferring framerate greater than 30) if there is no such video
+$ hypervideo -f "((bv*[fps>30]/bv*)[height<=720]/(wv*[fps>30]/wv*)) + ba / (b[fps>30]/b)[height<=720]/(w[fps>30]/w)"
-That's actually the output from your shell. Since ampersand is one of the special shell characters it's interpreted by the shell preventing you from passing the whole URL to hypervideo. To disable your shell from interpreting the ampersands (or any other special characters) you have to either put the whole URL in quotes or escape them with a backslash (which approach will work depends on your shell).
+# Download the video with the largest resolution no better than 720p,
+# or the video with the smallest resolution available if there is no such video,
+# preferring larger framerate for formats with the same resolution
+$ hypervideo -S "res:720,fps"
-For example if your URL is https://www.youtube.com/watch?t=4&v=BaW_jenozKc you should end up with following command:
-```console
-$ hypervideo 'https://www.youtube.com/watch?t=4&v=BaW_jenozKc'
-```
-
-or
-```console
-$ hypervideo https://www.youtube.com/watch?t=4\&v=BaW_jenozKc
+# Download the video with smallest resolution no worse than 480p,
+# or the video with the largest resolution available if there is no such video,
+# preferring better codec and then larger total bitrate for the same resolution
+$ hypervideo -S "+res:480,codec,br"
```
-### ExtractorError: Could not find JS function u'OF'
-
-In February 2015, the new YouTube player contained a character sequence in a string that was misinterpreted by old versions of hypervideo. See [above](#how-do-i-update-hypervideo) for how to update hypervideo.
-
-### HTTP Error 429: Too Many Requests or 402: Payment Required
-
-These two error codes indicate that the service is blocking your IP address because of overuse. Usually this is a soft block meaning that you can gain access again after solving CAPTCHA. Just open a browser and solve a CAPTCHA the service suggests you and after that [pass cookies](#how-do-i-pass-cookies-to-hypervideo) to hypervideo. Note that if your machine has multiple external IPs then you should also pass exactly the same IP you've used for solving CAPTCHA with [`--source-address`](#network-options). Also you may need to pass a `User-Agent` HTTP header of your browser with [`--user-agent`](#workarounds).
-
-If this is not the case (no CAPTCHA suggested to solve by the service) then you can contact the service and ask them to unblock your IP address, or - if you have acquired a whitelisted IP address already - use the [`--proxy` or `--source-address` options](#network-options) to select another IP address.
-
-### SyntaxError: Non-ASCII character
-
-The error
-
- File "hypervideo", line 2
- SyntaxError: Non-ASCII character '\x93' ...
-
-means you're using an outdated version of Python. Please update to Python 2.6 or 2.7.
-
-### What is this binary file? Where has the code gone?
-
-Since June 2012 ([#342](https://github.com/ytdl-org/youtube-dl/issues/342)) hypervideo is packed as an executable zipfile, simply unzip it (might need renaming to `hypervideo.zip` first on some systems) or clone the git repository, as laid out above. If you modify the code, you can run it by executing the `__main__.py` file. To recompile the executable, run `make hypervideo`.
-
-### The exe throws an error due to missing `MSVCR100.dll`
-
-To run the exe you need to install first the [Microsoft Visual C++ 2010 Service Pack 1 Redistributable Package (x86)](https://download.microsoft.com/download/1/6/5/165255E7-1014-4D0A-B094-B6A430A6BFFC/vcredist_x86.exe).
-
-### On Windows, how should I set up ffmpeg and hypervideo? Where should I put the exe files?
-
-If you put hypervideo and ffmpeg in the same directory that you're running the command from, it will work, but that's rather cumbersome.
-
-To make a different directory work - either for ffmpeg, or for hypervideo, or for both - simply create the directory (say, `C:\bin`, or `C:\Users\<User name>\bin`), put all the executables directly in there, and then [set your PATH environment variable](https://www.java.com/en/download/help/path.xml) to include that directory.
-
-From then on, after restarting your shell, you will be able to access both hypervideo and ffmpeg (and hypervideo will be able to find ffmpeg) by simply typing `hypervideo` or `ffmpeg`, no matter what directory you're in.
-
-### How do I put downloads into a specific folder?
-
-Use the `-o` to specify an [output template](#output-template), for example `-o "/home/user/videos/%(title)s-%(id)s.%(ext)s"`. If you want this for all of your downloads, put the option into your [configuration file](#configuration).
-
-### How do I download a video starting with a `-`?
-
-Either prepend `https://www.youtube.com/watch?v=` or separate the ID from the options with `--`:
-
- $ hypervideo -- -wNyEUrxzFU
- $ hypervideo "https://www.youtube.com/watch?v=-wNyEUrxzFU"
-
-### How do I pass cookies to hypervideo?
-
-Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`.
-
-In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [Get cookies.txt](https://chrome.google.com/webstore/detail/get-cookiestxt/bgaddhkoddajcdgocldbbfleckgcbcid/) (for Chrome) or [cookies.txt](https://addons.mozilla.org/en-US/firefox/addon/cookies-txt/) (for Firefox).
-
-Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows and `LF` (`\n`) for Unix and Unix-like systems (Linux, macOS, etc.). `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format.
-
-Passing cookies to hypervideo is a good way to workaround login when a particular extractor does not implement it explicitly. Another use case is working around [CAPTCHA](https://en.wikipedia.org/wiki/CAPTCHA) some websites require you to solve in particular cases in order to get access (e.g. YouTube, CloudFlare).
-
-### How do I stream directly to media player?
-
-You will first need to tell hypervideo to stream media to stdout with `-o -`, and also tell your media player to read from stdin (it must be capable of this for streaming) and then pipe former to latter. For example, streaming to [vlc](https://www.videolan.org/) can be achieved with:
-
- $ hypervideo -o - "https://www.youtube.com/watch?v=BaW_jenozKcj" | vlc -
-
-### How do I download only new videos from a playlist?
-
-Use download-archive feature. With this feature you should initially download the complete playlist with `--download-archive /path/to/download/archive/file.txt` that will record identifiers of all the videos in a special file. Each subsequent run with the same `--download-archive` will download only new videos and skip all videos that have been downloaded before. Note that only successful downloads are recorded in the file.
-
-For example, at first,
-
- $ hypervideo --download-archive archive.txt "https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re"
-
-will download the complete `PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re` playlist and create a file `archive.txt`. Each subsequent run will only download new videos if any:
-
- $ hypervideo --download-archive archive.txt "https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re"
-
-### Should I add `--hls-prefer-native` into my config?
-
-When hypervideo detects an HLS video, it can download it either with the built-in downloader or ffmpeg. Since many HLS streams are slightly invalid and ffmpeg/hypervideo each handle some invalid cases better than the other, there is an option to switch the downloader if needed.
-
-When hypervideo knows that one particular downloader works better for a given website, that downloader will be picked. Otherwise, hypervideo will pick the best downloader for general compatibility, which at the moment happens to be ffmpeg. This choice may change in future versions of hypervideo, with improvements of the built-in downloader and/or ffmpeg.
-
-In particular, the generic extractor (used when your website is not in the [list of supported sites by youtube-dl](https://ytdl-org.github.io/youtube-dl/supportedsites.html) cannot mandate one specific downloader.
-
-If you put either `--hls-prefer-native` or `--hls-prefer-ffmpeg` into your configuration, a different subset of videos will fail to download correctly. Instead, it is much better to [file an issue](https://yt-dl.org/bug) or a pull request which details why the native or the ffmpeg HLS downloader is a better choice for your use case.
-
-### Can you add support for this anime video site, or site which shows current movies for free?
-
-As a matter of policy (as well as legality), hypervideo does not include support for services that specialize in infringing copyright. As a rule of thumb, if you cannot easily find a video that the service is quite obviously allowed to distribute (i.e. that has been uploaded by the creator, the creator's distributor, or is published under a free license), the service is probably unfit for inclusion to hypervideo.
-
-A note on the service that they don't host the infringing content, but just link to those who do, is evidence that the service should **not** be included into hypervideo. The same goes for any DMCA note when the whole front page of the service is filled with videos they are not allowed to distribute. A "fair use" note is equally unconvincing if the service shows copyright-protected videos in full without authorization.
-
-Support requests for services that **do** purchase the rights to distribute their content are perfectly fine though. If in doubt, you can simply include a source that mentions the legitimate purchase of content.
-
-### How can I speed up work on my issue?
-
-(Also known as: Help, my important issue not being solved!) The hypervideo core developer team is quite small. While we do our best to solve as many issues as possible, sometimes that can take quite a while. To speed up your issue, here's what you can do:
-
-First of all, please do report the issue [at our issue tracker](https://yt-dl.org/bugs). That allows us to coordinate all efforts by users and developers, and serves as a unified point. Unfortunately, the hypervideo project has grown too large to use personal email as an effective communication channel.
-
-Please read the [bug reporting instructions](#bugs) below. A lot of bugs lack all the necessary information. If you can, offer proxy, VPN, or shell access to the hypervideo developers. If you are able to, test the issue from multiple computers in multiple countries to exclude local censorship or misconfiguration issues.
-
-If nobody is interested in solving your issue, you are welcome to take matters into your own hands and submit a pull request (or coerce/pay somebody else to do so).
-
-Feel free to bump the issue from time to time by writing a small comment ("Issue is still present in hypervideo version ...from France, but fixed from Belgium"), but please not more than once a month. Please do not declare your issue as `important` or `urgent`.
-
-### How can I detect whether a given URL is supported by hypervideo?
-
-For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from https://example.com/video/1234567 to https://example.com/v/1234567 ) and hypervideo reports an URL of a service in that list as unsupported. In that case, simply report a bug.
+# MODIFYING METADATA
-It is *not* possible to detect whether a URL is supported or not. That's because hypervideo contains a generic extractor which matches **all** URLs. You may be tempted to disable, exclude, or remove the generic extractor, but the generic extractor not only allows users to extract videos from lots of websites that embed a video from another service, but may also be used to extract video from a service that it's hosting itself. Therefore, we neither recommend nor support disabling, excluding, or removing the generic extractor.
+The metadata obtained by the extractors can be modified by using `--parse-metadata` and `--replace-in-metadata`
-If you want to find out whether a given URL is supported, simply call hypervideo with it. If you get no videos back, chances are the URL is either not referring to a video or unsupported. You can find out which by examining the output (if you run hypervideo on the console) or catching an `UnsupportedError` exception if you run it from a Python program.
+`--replace-in-metadata FIELDS REGEX REPLACE` is used to replace text in any metadata field using [python regular expression](https://docs.python.org/3/library/re.html#regular-expression-syntax). [Backreferences](https://docs.python.org/3/library/re.html?highlight=backreferences#re.sub) can be used in the replace string for advanced use.
-# Why do I need to go through that much red tape when filing bugs?
+The general syntax of `--parse-metadata FROM:TO` is to give the name of a field or an [output template](#output-template) to extract data from, and the format to interpret it as, separated by a colon `:`. Either a [python regular expression](https://docs.python.org/3/library/re.html#regular-expression-syntax) with named capture groups, a single field name, or a similar syntax to the [output template](#output-template) (only `%(field)s` formatting is supported) can be used for `TO`. The option can be used multiple times to parse and modify various fields.
-Before we had the issue template, despite our extensive [bug reporting instructions](#bugs), about 80% of the issue reports we got were useless, for instance because people used ancient versions hundreds of releases old, because of simple syntactic errors (not in hypervideo but in general shell usage), because the problem was already reported multiple times before, because people did not actually read an error message, even if it said "please install ffmpeg", because people did not mention the URL they were trying to download and many more simple, easy-to-avoid problems, many of whom were totally unrelated to hypervideo.
+Note that these options preserve their relative order, allowing replacements to be made in parsed fields and viceversa. Also, any field thus created can be used in the [output template](#output-template) and will also affect the media file's metadata added when using `--embed-metadata`.
-hypervideo is an free software project manned by too few volunteers, so we'd rather spend time fixing bugs where we are certain none of those simple problems apply, and where we can be reasonably confident to be able to reproduce the issue without asking the reporter repeatedly. As such, the output of `hypervideo -v YOUR_URL_HERE` is really all that's required to file an issue. The issue template also guides you through some basic steps you can do, such as checking that your version of hypervideo is current.
+This option also has a few special uses:
-# DEVELOPER INSTRUCTIONS
+* You can download an additional URL based on the metadata of the currently downloaded video. To do this, set the field `additional_urls` to the URL that you want to download. E.g. `--parse-metadata "description:(?P<additional_urls>https?://www\.vimeo\.com/\d+)` will download the first vimeo video found in the description
-Most users do not need to build hypervideo and can [download the builds](https://git.conocimientoslibres.ga/software/hypervideo.git) or get them from their distribution.
+* You can use this to change the metadata that is embedded in the media file. To do this, set the value of the corresponding field with a `meta_` prefix. For example, any value you set to `meta_description` field will be added to the `description` field in the file - you can use this to set a different "description" and "synopsis". To modify the metadata of individual streams, use the `meta<n>_` prefix (e.g. `meta1_language`). Any value set to the `meta_` field will overwrite all default values.
-To run hypervideo as a developer, you don't need to build anything either. Simply execute
+**Note**: Metadata modification happens before format selection, post-extraction and other post-processing operations. Some fields may be added or changed during these steps, overriding your changes.
- $ python -m hypervideo_dl
+For reference, these are the fields hypervideo adds by default to the file metadata:
-To run the test, simply invoke your favorite test runner, or execute a test file directly; any of the following work:
+Metadata fields | From
+:--------------------------|:------------------------------------------------
+`title` | `track` or `title`
+`date` | `upload_date`
+`description`, `synopsis` | `description`
+`purl`, `comment` | `webpage_url`
+`track` | `track_number`
+`artist` | `artist`, `creator`, `uploader` or `uploader_id`
+`genre` | `genre`
+`album` | `album`
+`album_artist` | `album_artist`
+`disc` | `disc_number`
+`show` | `series`
+`season_number` | `season_number`
+`episode_id` | `episode` or `episode_id`
+`episode_sort` | `episode_number`
+`language` of each stream | the format's `language`
- $ python -m unittest discover
- $ python test/test_download.py
- $ nosetests
+**Note**: The file format may not support some of these fields
-See item 6 of [new extractor tutorial](#adding-support-for-a-new-site) for how to run extractor specific test cases.
-If you want to create a build of hypervideo yourself, you'll need
+## Modifying metadata examples
-* python
-* make (only GNU make is supported)
-* pandoc
-* zip
-* nosetests
-
-### Adding support for a new site
-
-If you want to add support for a new site, first of all **make sure** this site is **not dedicated to [copyright infringement](README.md#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. hypervideo does **not support** such sites thus pull requests adding support for them **will be rejected**.
-
-After you have ensured this site is distributing its content legally, you can follow this quick list (assuming your service is called `yourextractor`):
-
-1. [Fork this repository](https://git.conocimientoslibres.ga/software/hypervideo.git)
-2. Check out the source code with:
-
- $ git clone https://git.conocimientoslibres.ga/software/hypervideo.git
-
-3. Start a new git branch with
-
- $ cd hypervideo
- $ git checkout -b yourextractor
-
-4. Start with this simple template and save it to `hypervideo_dl/extractor/yourextractor.py`:
-
- ```python
- # coding: utf-8
- from __future__ import unicode_literals
-
- from .common import InfoExtractor
-
-
- class YourExtractorIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)'
- _TEST = {
- 'url': 'https://yourextractor.com/watch/42',
- 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)',
- 'info_dict': {
- 'id': '42',
- 'ext': 'mp4',
- 'title': 'Video title goes here',
- 'thumbnail': r're:^https?://.*\.jpg$',
- # TODO more properties, either as:
- # * A value
- # * MD5 checksum; start the string with md5:
- # * A regular expression; start the string with re:
- # * Any Python type (for example int or float)
- }
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- # TODO more code goes here, for example ...
- title = self._html_search_regex(r'<h1>(.+?)</h1>', webpage, 'title')
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': self._og_search_description(webpage),
- 'uploader': self._search_regex(r'<div[^>]+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False),
- # TODO more properties (see hypervideo_dl/extractor/common.py)
- }
- ```
-5. Add an import in [`hypervideo_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/hypervideo_dl/extractor/extractors.py).
-6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
-7. Have a look at [`hypervideo_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/hypervideo_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/hypervideo_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
-8. Make sure your code follows [hypervideo coding conventions](#hypervideo-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart):
-
- $ flake8 hypervideo_dl/extractor/yourextractor.py
-
-9. Make sure your code works under all [Python](https://www.python.org/) versions claimed supported by hypervideo, namely 2.6, 2.7, and 3.2+.
-10. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
-
- $ git add hypervideo_dl/extractor/extractors.py
- $ git add hypervideo_dl/extractor/yourextractor.py
- $ git commit -m '[yourextractor] Add new extractor'
- $ git push origin yourextractor
-
-11. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
-
-In any case, thank you very much for your contributions!
-
-## hypervideo coding conventions
-
-This section introduces a guide lines for writing idiomatic, robust and future-proof extractor code.
-
-Extractors are very fragile by nature since they depend on the layout of the source data provided by 3rd party media hosters out of your control and this layout tends to change. As an extractor implementer your task is not only to write code that will extract media links and metadata correctly but also to minimize dependency on the source's layout and even to make the code foresee potential future changes and be ready for that. This is important because it will allow the extractor not to break on minor layout changes thus keeping old hypervideo versions working. Even though this breakage issue is easily fixed by emitting a new version of hypervideo with a fix incorporated, all the previous versions become broken in all repositories and distros' packages that may not be so prompt in fetching the update from us. Needless to say, some non rolling release distros may never receive an update at all.
-
-### Mandatory and optional metafields
-
-For extraction to work hypervideo relies on metadata your extractor extracts and provides to hypervideo expressed by an [information dictionary](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/hypervideo_dl/extractor/common.py#L94-L303) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl:
-
- - `id` (media identifier)
- - `title` (media title)
- - `url` (media download URL) or `formats`
-
-In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention hypervideo also treats `id` and `title` as mandatory. Thus the aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken.
-
-[Any field](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/hypervideo_dl/extractor/common.py#L188-L303) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
-
-#### Example
+```bash
+# Interpret the title as "Artist - Title"
+$ hypervideo --parse-metadata "title:%(artist)s - %(title)s"
-Say you have some source dictionary `meta` that you've fetched as JSON with HTTP request and it has a key `summary`:
+# Regex example
+$ hypervideo --parse-metadata "description:Artist - (?P<artist>.+)"
-```python
-meta = self._download_json(url, video_id)
-```
+# Set title as "Series name S01E05"
+$ hypervideo --parse-metadata "%(series)s S%(season_number)02dE%(episode_number)02d:%(title)s"
-Assume at this point `meta`'s layout is:
+# Prioritize uploader as the "artist" field in video metadata
+$ hypervideo --parse-metadata "%(uploader|)s:%(meta_artist)s" --embed-metadata
-```python
-{
- ...
- "summary": "some fancy summary text",
- ...
-}
-```
+# Set "comment" field in video metadata using description instead of webpage_url,
+# handling multiple lines correctly
+$ hypervideo --parse-metadata "description:(?s)(?P<meta_comment>.+)" --embed-metadata
-Assume you want to extract `summary` and put it into the resulting info dict as `description`. Since `description` is an optional meta field you should be ready that this key may be missing from the `meta` dict, so that you should extract it like:
+# Do not set any "synopsis" in the video metadata
+$ hypervideo --parse-metadata ":(?P<meta_synopsis>)"
-```python
-description = meta.get('summary') # correct
-```
+# Remove "formats" field from the infojson by setting it to an empty string
+$ hypervideo --parse-metadata ":(?P<formats>)" -j
-and not like:
+# Replace all spaces and "_" in title and uploader with a `-`
+$ hypervideo --replace-in-metadata "title,uploader" "[ _]" "-"
-```python
-description = meta['summary'] # incorrect
```
-The latter will break extraction process with `KeyError` if `summary` disappears from `meta` at some later time but with the former approach extraction will just go ahead with `description` set to `None` which is perfectly fine (remember `None` is equivalent to the absence of data).
+# EXTRACTOR ARGUMENTS
-Similarly, you should pass `fatal=False` when extracting optional data from a webpage with `_search_regex`, `_html_search_regex` or similar methods, for instance:
+Some extractors accept additional arguments which can be passed using `--extractor-args KEY:ARGS`. `ARGS` is a `;` (semicolon) separated string of `ARG=VAL1,VAL2`. E.g. `--extractor-args "youtube:player-client=android_embedded,web;include_live_dash" --extractor-args "funimation:version=uncut"`
-```python
-description = self._search_regex(
- r'<span[^>]+id="title"[^>]*>([^<]+)<',
- webpage, 'description', fatal=False)
-```
+The following extractors use this feature:
-With `fatal` set to `False` if `_search_regex` fails to extract `description` it will emit a warning and continue extraction.
+#### youtube
+* `lang`: Language code to prefer translated metadata of this language (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/hypervideo_dl/extractor/youtube.py#L381-L390) for list of supported content language codes
+* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
+* `player_client`: Clients to extract video data from. The main clients are `web`, `android` and `ios` with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mweb` and `tv_embedded` (agegate bypass) with no variants. By default, `android,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. You can use `all` to use all the clients, and `default` for the default clients.
+* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details
+* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
+* `max_comments`: Limit the amount of comments to gather. Comma-separated list of integers representing `max-comments,max-parents,max-replies,max-replies-per-thread`. Default is `all,all,all,all`
+ * E.g. `all,all,1000,10` will get a maximum of 1000 replies total, with up to 10 replies per thread. `1000,all,100` will get a maximum of 1000 comments, with a maximum of 100 replies total
+* `include_incomplete_formats`: Extract formats that cannot be downloaded completely (live dash and post-live m3u8)
+* `innertube_host`: Innertube API host to use for all API requests; e.g. `studio.youtube.com`, `youtubei.googleapis.com`. Note that cookies exported from one subdomain will not work on others
+* `innertube_key`: Innertube API key to use for all API requests
-You can also pass `default=<some fallback value>`, for example:
+#### youtubetab (YouTube playlists, channels, feeds, etc.)
+* `skip`: One or more of `webpage` (skip initial webpage download), `authcheck` (allow the download of playlists requiring authentication when no initial webpage is downloaded. This may cause unwanted behavior, see [#1122](https://github.com/yt-dlp/yt-dlp/pull/1122) for more details)
+* `approximate_date`: Extract approximate `upload_date` and `timestamp` in flat-playlist. This may cause date-based filters to be slightly off
-```python
-description = self._search_regex(
- r'<span[^>]+id="title"[^>]*>([^<]+)<',
- webpage, 'description', default=None)
-```
+#### generic
+* `fragment_query`: Passthrough any query in mpd/m3u8 manifest URLs to their fragments. Does not apply to ffmpeg
-On failure this code will silently continue the extraction with `description` set to `None`. That is useful for metafields that may or may not be present.
+#### funimation
+* `language`: Audio languages to extract, e.g. `funimation:language=english,japanese`
+* `version`: The video version to extract - `uncut` or `simulcast`
-### Provide fallbacks
+#### crunchyrollbeta (Crunchyroll)
+* `format`: Which stream type(s) to extract (default: `adaptive_hls`). Potentially useful values include `adaptive_hls`, `adaptive_dash`, `vo_adaptive_hls`, `vo_adaptive_dash`, `download_hls`, `download_dash`, `multitrack_adaptive_hls_v2`
+* `hardsub`: Preference order for which hardsub versions to extract, or `all` (default: `None` = no hardsubs), e.g. `crunchyrollbeta:hardsub=en-US,None`
-When extracting metadata try to do so from multiple sources. For example if `title` is present in several places, try extracting from at least some of them. This makes it more future-proof in case some of the sources become unavailable.
+#### vikichannel
+* `video_types`: Types of videos to download - one or more of `episodes`, `movies`, `clips`, `trailers`
-#### Example
+#### niconico
+* `segment_duration`: Segment duration in milliseconds for HLS-DMC formats. Use it at your own risk since this feature **may result in your account termination.**
-Say `meta` from the previous example has a `title` and you are about to extract it. Since `title` is a mandatory meta field you should end up with something like:
+#### youtubewebarchive
+* `check_all`: Try to check more at the cost of more requests. One or more of `thumbnails`, `captures`
-```python
-title = meta['title']
-```
+#### gamejolt
+* `comment_sort`: `hot` (default), `you` (cookies needed), `top`, `new` - choose comment sorting mode (on GameJolt's side)
-If `title` disappears from `meta` in future due to some changes on the hoster's side the extraction would fail since `title` is mandatory. That's expected.
+#### hotstar
+* `res`: resolution to ignore - one or more of `sd`, `hd`, `fhd`
+* `vcodec`: vcodec to ignore - one or more of `h264`, `h265`, `dvh265`
+* `dr`: dynamic range to ignore - one or more of `sdr`, `hdr10`, `dv`
-Assume that you have some another source you can extract `title` from, for example `og:title` HTML meta of a `webpage`. In this case you can provide a fallback scenario:
+#### tiktok
+* `app_version`: App version to call mobile APIs with - should be set along with `manifest_app_version`, e.g. `20.2.1`
+* `manifest_app_version`: Numeric app version to call mobile APIs with, e.g. `221`
-```python
-title = meta.get('title') or self._og_search_title(webpage)
-```
+#### rokfinchannel
+* `tab`: Which tab to download - one of `new`, `top`, `videos`, `podcasts`, `streams`, `stacks`
-This code will try to extract from `meta` first and if it fails it will try extracting `og:title` from a `webpage`.
+#### twitter
+* `force_graphql`: Force usage of the GraphQL API. By default it will only be used if login cookies are provided
-### Regular expressions
+NOTE: These options may be changed/removed in the future without concern for backward compatibility
-#### Don't capture groups you don't use
+<!-- MANPAGE: MOVE "INSTALLATION" SECTION HERE -->
-Capturing group must be an indication that it's used somewhere in the code. Any group that is not used must be non capturing.
-##### Example
+# PLUGINS
-Don't capture id attribute name here since you can't use it for anything anyway.
+Plugins are loaded from `<root-dir>/ytdlp_plugins/<type>/__init__.py`; where `<root-dir>` is the directory of the binary (`<root-dir>/hypervideo`), or the root directory of the module if you are running directly from source-code (`<root dir>/hypervideo_dl/__main__.py`). Plugins are currently not supported for the `pip` version
-Correct:
+Plugins can be of `<type>`s `extractor` or `postprocessor`. Extractor plugins do not need to be enabled from the CLI and are automatically invoked when the input URL is suitable for it. Postprocessor plugins can be invoked using `--use-postprocessor NAME`.
-```python
-r'(?:id|ID)=(?P<id>\d+)'
-```
+See [ytdlp_plugins](ytdlp_plugins) for example plugins.
-Incorrect:
-```python
-r'(id|ID)=(?P<id>\d+)'
-```
+Note that **all** plugins are imported even if not invoked, and that **there are no checks** performed on plugin code. Use plugins at your own risk and only if you trust the code
+If you are a plugin author, add [ytdlp-plugins](https://github.com/topics/ytdlp-plugins) as a topic to your repository for discoverability
-#### Make regular expressions relaxed and flexible
+See the [wiki for some known plugins](https://github.com/yt-dlp/yt-dlp/wiki/Plugins)
-When using regular expressions try to write them fuzzy, relaxed and flexible, skipping insignificant parts that are more likely to change, allowing both single and double quotes for quoted values and so on.
-##### Example
-Say you need to extract `title` from the following HTML code:
+# EMBEDDING HYPERVIDEO
-```html
-<span style="position: absolute; left: 910px; width: 90px; float: right; z-index: 9999;" class="title">some fancy title</span>
-```
+hypervideo makes the best effort to be a good command-line program, and thus should be callable from any programming language.
-The code for that task should look similar to:
+Your program should avoid parsing the normal stdout since they may change in future versions. Instead they should use options such as `-J`, `--print`, `--progress-template`, `--exec` etc to create console output that you can reliably reproduce and parse.
-```python
-title = self._search_regex(
- r'<span[^>]+class="title"[^>]*>([^<]+)', webpage, 'title')
-```
-
-Or even better:
+From a Python program, you can embed hypervideo in a more powerful fashion, like this:
```python
-title = self._search_regex(
- r'<span[^>]+class=(["\'])title\1[^>]*>(?P<title>[^<]+)',
- webpage, 'title', group='title')
-```
-
-Note how you tolerate potential changes in the `style` attribute's value or switch from using double quotes to single for `class` attribute:
+from hypervideo_dl import YoutubeDL
-The code definitely should not look like:
-
-```python
-title = self._search_regex(
- r'<span style="position: absolute; left: 910px; width: 90px; float: right; z-index: 9999;" class="title">(.*?)</span>',
- webpage, 'title', group='title')
+URLS = ['https://www.youtube.com/watch?v=BaW_jenozKc']
+with YoutubeDL() as ydl:
+ ydl.download(URLS)
```
-### Long lines policy
+Most likely, you'll want to use various options. For a list of options available, have a look at [`hypervideo_dl/YoutubeDL.py`](hypervideo_dl/YoutubeDL.py#L180).
-There is a soft limit to keep lines of code under 80 characters long. This means it should be respected if possible and if it does not make readability and code maintenance worse.
+**Tip**: If you are porting your code from youtube-dl to hypervideo, one important point to look out for is that we do not guarantee the return value of `YoutubeDL.extract_info` to be json serializable, or even be a dictionary. It will be dictionary-like, but if you want to ensure it is a serializable dictionary, pass it through `YoutubeDL.sanitize_info` as shown in the [example below](#extracting-information)
-For example, you should **never** split long string literals like URLs or some other often copied entities over multiple lines to fit this limit:
+## Embedding examples
-Correct:
+#### Extracting information
```python
-'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4'
-```
-
-Incorrect:
-
-```python
-'https://www.youtube.com/watch?v=FqZTN594JQw&list='
-'PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4'
-```
-
-### Inline values
-
-Extracting variables is acceptable for reducing code duplication and improving readability of complex expressions. However, you should avoid extracting variables used only once and moving them to opposite parts of the extractor file, which makes reading the linear flow difficult.
+import json
+import hypervideo_dl
-#### Example
+URL = 'https://www.youtube.com/watch?v=BaW_jenozKc'
-Correct:
+# ℹ️ See help(hypervideo_dl.YoutubeDL) for a list of available options and public functions
+ydl_opts = {}
+with hypervideo_dl.YoutubeDL(ydl_opts) as ydl:
+ info = ydl.extract_info(URL, download=False)
-```python
-title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, 'title')
+ # ℹ️ ydl.sanitize_info makes the info json-serializable
+ print(json.dumps(ydl.sanitize_info(info)))
```
-
-Incorrect:
+#### Download using an info-json
```python
-TITLE_RE = r'<title>([^<]+)</title>'
-# ...some lines of code...
-title = self._html_search_regex(TITLE_RE, webpage, 'title')
-```
-
-### Collapse fallbacks
-
-Multiple fallback values can quickly become unwieldy. Collapse multiple fallback values into a single expression via a list of patterns.
+import hypervideo_dl
-#### Example
+INFO_FILE = 'path/to/video.info.json'
-Good:
+with hypervideo_dl.YoutubeDL() as ydl:
+ error_code = ydl.download_with_info_file(INFO_FILE)
-```python
-description = self._html_search_meta(
- ['og:description', 'description', 'twitter:description'],
- webpage, 'description', default=None)
+print('Some videos failed to download' if error_code
+ else 'All videos successfully downloaded')
```
-Unwieldy:
+#### Extract audio
```python
-description = (
- self._og_search_description(webpage, default=None)
- or self._html_search_meta('description', webpage, default=None)
- or self._html_search_meta('twitter:description', webpage, default=None))
-```
-
-Methods supporting list of patterns are: `_search_regex`, `_html_search_regex`, `_og_search_property`, `_html_search_meta`.
-
-### Trailing parentheses
-
-Always move trailing parentheses after the last argument.
-
-#### Example
+import hypervideo_dl
-Correct:
+URLS = ['https://www.youtube.com/watch?v=BaW_jenozKc']
-```python
- lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'],
- list)
-```
-
-Incorrect:
+ydl_opts = {
+ 'format': 'm4a/bestaudio/best',
+ # ℹ️ See help(hypervideo_dl.postprocessor) for a list of available Postprocessors and their arguments
+ 'postprocessors': [{ # Extract audio using ffmpeg
+ 'key': 'FFmpegExtractAudio',
+ 'preferredcodec': 'm4a',
+ }]
+}
-```python
- lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'],
- list,
-)
+with hypervideo_dl.YoutubeDL(ydl_opts) as ydl:
+ error_code = ydl.download(URLS)
```
-### Use convenience conversion and parsing functions
-
-Wrap all extracted numeric data into safe functions from [`hypervideo_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/hypervideo_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
-
-Use `url_or_none` for safe URL processing.
-
-Use `try_get` for safe metadata extraction from parsed JSON.
-
-Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction.
-
-Explore [`hypervideo_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/hypervideo_dl/utils.py) for more useful convenience functions.
-
-#### More examples
+#### Filter videos
-##### Safely extract optional description from parsed JSON
```python
-description = try_get(response, lambda x: x['result']['video'][0]['summary'], compat_str)
-```
-
-##### Safely extract more optional metadata
-```python
-video = try_get(response, lambda x: x['result']['video'][0], dict) or {}
-description = video.get('summary')
-duration = float_or_none(video.get('durationMs'), scale=1000)
-view_count = int_or_none(video.get('views'))
-```
-
-# EMBEDDING HYPERVIDEO
+import hypervideo_dl
-hypervideo makes the best effort to be a good command-line program, and thus should be callable from any programming language. If you encounter any problems parsing its output, feel free to [create a report](https://github.com/ytdl-org/youtube-dl/issues/new).
+URLS = ['https://www.youtube.com/watch?v=BaW_jenozKc']
-From a Python program, you can embed hypervideo in a more powerful fashion, like this:
+def longer_than_a_minute(info, *, incomplete):
+ """Download only videos longer than a minute (or with unknown duration)"""
+ duration = info.get('duration')
+ if duration and duration < 60:
+ return 'The video is too short'
-```python
-from __future__ import unicode_literals
-import hypervideo_dl
+ydl_opts = {
+ 'match_filter': longer_than_a_minute,
+}
-ydl_opts = {}
with hypervideo_dl.YoutubeDL(ydl_opts) as ydl:
- ydl.download(['https://www.youtube.com/watch?v=BaW_jenozKc'])
+ error_code = ydl.download(URLS)
```
-Most likely, you'll want to use various options. For a list of options available, have a look at [`hypervideo_dl/YoutubeDL.py`](https://github.com/ytdl-org/youtube-dl/blob/3e4cedf9e8cd3157df2457df7274d0c842421945/hypervideo_dl/YoutubeDL.py#L137-L312). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
-
-Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file:
+#### Adding logger and progress hook
```python
-from __future__ import unicode_literals
import hypervideo_dl
+URLS = ['https://www.youtube.com/watch?v=BaW_jenozKc']
-class MyLogger(object):
+class MyLogger:
def debug(self, msg):
+ # For compatibility with youtube-dl, both debug and info are passed into debug
+ # You can distinguish them by the prefix '[debug] '
+ if msg.startswith('[debug] '):
+ pass
+ else:
+ self.info(msg)
+
+ def info(self, msg):
pass
def warning(self, msg):
@@ -1345,103 +1831,208 @@ class MyLogger(object):
print(msg)
+# ℹ️ See "progress_hooks" in help(hypervideo_dl.YoutubeDL)
def my_hook(d):
if d['status'] == 'finished':
- print('Done downloading, now converting ...')
+ print('Done downloading, now post-processing ...')
ydl_opts = {
- 'format': 'bestaudio/best',
- 'postprocessors': [{
- 'key': 'FFmpegExtractAudio',
- 'preferredcodec': 'mp3',
- 'preferredquality': '192',
- }],
'logger': MyLogger(),
'progress_hooks': [my_hook],
}
-with hypervideo_dl.YoutubeDL(ydl_opts) as ydl:
- ydl.download(['https://www.youtube.com/watch?v=BaW_jenozKc'])
-```
-# BUGS
-
-Bugs and suggestions should be reported at: <https://github.com/ytdl-org/youtube-dl/issues>. Unless you were prompted to or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the IRC channel [#youtube-dl](irc://chat.freenode.net/#youtube-dl) on freenode ([webchat](https://webchat.freenode.net/?randomnick=1&channels=youtube-dl)).
-
-**Please include the full output of hypervideo when run with `-v`**, i.e. **add** `-v` flag to **your command line**, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this:
-
-```
-$ hypervideo -v <your command line>
-[debug] System config: []
-[debug] User config: []
-[debug] Command-line args: [u'-v', u'https://www.youtube.com/watch?v=BaW_jenozKcj']
-[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
-[debug] hypervideo version 1.1.11
-[debug] Git HEAD: 135392e
-[debug] Python version 2.6.6 - Windows-2003Server-5.2.3790-SP2
-[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
-[debug] Proxy map: {}
-...
+with hypervideo_dl.YoutubeDL(ydl_opts) as ydl:
+ ydl.download(URLS)
```
-**Do not post screenshots of verbose logs; only plain text is acceptable.**
-
-The output (including the first lines) contains important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever.
-
-Please re-read your issue once again to avoid a couple of common mistakes (you can and should use this as a checklist):
-
-### Is the description of the issue itself sufficient?
+#### Add a custom PostProcessor
-We often get issue reports that we cannot really decipher. While in most cases we eventually get the required information after asking back multiple times, this poses an unnecessary drain on our resources. Many contributors, including myself, are also not native speakers, so we may misread some parts.
-
-So please elaborate on what feature you are requesting, or what bug you want to be fixed. Make sure that it's obvious
-
-- What the problem is
-- How it could be fixed
-- How your proposed solution would look like
-
-If your report is shorter than two lines, it is almost certainly missing some of these, which makes it hard for us to respond to it. We're often too polite to close the issue outright, but the missing info makes misinterpretation likely. As a committer myself, I often get frustrated by these issues, since the only possible way for me to move forward on them is to ask for clarification over and over.
-
-For bug reports, this means that your report should contain the *complete* output of hypervideo when called with the `-v` flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information.
-
-If your server has multiple IPs or you suspect censorship, adding `--call-home` may be a good idea to get more diagnostics. If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--dump-pages` (warning: this will yield a rather large output, redirect it to the file `log.txt` by adding `>log.txt 2>&1` to your command-line) or upload the `.dump` files you get when you add `--write-pages` [somewhere](https://gist.github.com/).
-
-**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like `https://www.youtube.com/watch?v=BaW_jenozKc`. There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. `https://www.youtube.com/`) is *not* an example URL.
-
-### Are you using the latest version?
-
-Before reporting any issue, type `doas pacman -Sy hypervideo`. This should report that you're up-to-date. About 20% of the reports we receive are already fixed, but people are using outdated versions. This goes for feature requests as well.
+```python
+import hypervideo_dl
-### Is the issue already documented?
+URLS = ['https://www.youtube.com/watch?v=BaW_jenozKc']
-Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or browse the [GitHub Issues](https://github.com/ytdl-org/youtube-dl/search?type=Issues) of this repository. If there is an issue, feel free to write something along the lines of "This affects me as well, with version 2015.01.01. Here is some more information on the issue: ...". While some issues may be old, a new post into them often spurs rapid activity.
+# ℹ️ See help(hypervideo_dl.postprocessor.PostProcessor)
+class MyCustomPP(hypervideo_dl.postprocessor.PostProcessor):
+ def run(self, info):
+ self.to_screen('Doing stuff')
+ return [], info
-### Why are existing options not enough?
-Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/ytdl-org/youtube-dl/blob/master/README.md#options). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem.
+with hypervideo_dl.YoutubeDL() as ydl:
+ # ℹ️ "when" can take any value in hypervideo_dl.utils.POSTPROCESS_WHEN
+ ydl.add_post_processor(MyCustomPP(), when='pre_process')
+ ydl.download(URLS)
+```
-### Is there enough context in your bug report?
-People want to solve problems, and often think they do us a favor by breaking down their larger problems (e.g. wanting to skip already downloaded files) to a specific request (e.g. requesting us to look whether the file exists before downloading the info page). However, what often happens is that they break down the problem into two steps: One simple, and one impossible (or extremely complicated one).
+#### Use a custom format selector
-We are then presented with a very complicated request when the original problem could be solved far easier, e.g. by recording the downloaded video IDs in a separate file. To avoid this, you must include the greater context where it is non-obvious. In particular, every feature request that does not consist of adding support for a new site should contain a use case scenario that explains in what situation the missing feature would be useful.
+```python
+import hypervideo_dl
-### Does the issue involve one problem, and one problem only?
+URL = ['https://www.youtube.com/watch?v=BaW_jenozKc']
-Some of our users seem to think there is a limit of issues they can or should open. There is no limit of issues they can or should open. While it may seem appealing to be able to dump all your issues into one ticket, that means that someone who solves one of your issues cannot mark the issue as closed. Typically, reporting a bunch of issues leads to the ticket lingering since nobody wants to attack that behemoth, until someone mercifully splits the issue into multiple ones.
+def format_selector(ctx):
+ """ Select the best video and the best audio that won't result in an mkv.
+ NOTE: This is just an example and does not handle all cases """
-In particular, every site support request issue should only pertain to services at one site (generally under a common domain, but always using the same backend technology). Do not request support for vimeo user videos, White house podcasts, and Google Plus pages in the same issue. Also, make sure that you don't post bug reports alongside feature requests. As a rule of thumb, a feature request does not include outputs of hypervideo that are not immediately related to the feature at hand. Do not post reports of a network error alongside the request for a new video service.
+ # formats are already sorted worst to best
+ formats = ctx.get('formats')[::-1]
-### Is anyone going to need the feature?
+ # acodec='none' means there is no audio
+ best_video = next(f for f in formats
+ if f['vcodec'] != 'none' and f['acodec'] == 'none')
-Only post features that you (or an incapacitated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them.
+ # find compatible audio extension
+ audio_ext = {'mp4': 'm4a', 'webm': 'webm'}[best_video['ext']]
+ # vcodec='none' means there is no video
+ best_audio = next(f for f in formats if (
+ f['acodec'] != 'none' and f['vcodec'] == 'none' and f['ext'] == audio_ext))
-### Is your question about hypervideo?
+ # These are the minimum required fields for a merged format
+ yield {
+ 'format_id': f'{best_video["format_id"]}+{best_audio["format_id"]}',
+ 'ext': best_video['ext'],
+ 'requested_formats': [best_video, best_audio],
+ # Must be + separated list of protocols
+ 'protocol': f'{best_video["protocol"]}+{best_audio["protocol"]}'
+ }
-It may sound strange, but some bug reports we receive are completely unrelated to hypervideo and relate to a different, or even the reporter's own, application. Please make sure that you are actually using hypervideo. If you are using a UI for hypervideo, report the bug to the maintainer of the actual application providing the UI. On the other hand, if your UI for hypervideo fails in some way you believe is related to hypervideo, by all means, go ahead and report the bug.
-# COPYRIGHT
+ydl_opts = {
+ 'format': format_selector,
+}
-hypervideo is released into the [CC0 1.0 Universal](LICENSE) by the copyright holders.
+with hypervideo_dl.YoutubeDL(ydl_opts) as ydl:
+ ydl.download(URLS)
+```
-This README file was originally written by [Daniel Bolton](https://github.com/dbbolton) and is likewise released into the public domain.
+<!-- MANPAGE: MOVE "NEW FEATURES" SECTION HERE -->
+
+# DEPRECATED OPTIONS
+
+These are all the deprecated options and the current alternative to achieve the same effect
+
+#### Almost redundant options
+While these options are almost the same as their new counterparts, there are some differences that prevents them being redundant
+
+ -j, --dump-json --print "%()j"
+ -F, --list-formats --print formats_table
+ --list-thumbnails --print thumbnails_table --print playlist:thumbnails_table
+ --list-subs --print automatic_captions_table --print subtitles_table
+
+#### Redundant options
+While these options are redundant, they are still expected to be used due to their ease of use
+
+ --get-description --print description
+ --get-duration --print duration_string
+ --get-filename --print filename
+ --get-format --print format
+ --get-id --print id
+ --get-thumbnail --print thumbnail
+ -e, --get-title --print title
+ -g, --get-url --print urls
+ --match-title REGEX --match-filter "title ~= (?i)REGEX"
+ --reject-title REGEX --match-filter "title !~= (?i)REGEX"
+ --min-views COUNT --match-filter "view_count >=? COUNT"
+ --max-views COUNT --match-filter "view_count <=? COUNT"
+ --user-agent UA --add-header "User-Agent:UA"
+ --referer URL --add-header "Referer:URL"
+ --playlist-start NUMBER -I NUMBER:
+ --playlist-end NUMBER -I :NUMBER
+ --playlist-reverse -I ::-1
+ --no-playlist-reverse Default
+
+
+#### Not recommended
+While these options still work, their use is not recommended since there are other alternatives to achieve the same
+
+ --force-generic-extractor --ies generic,default
+ --exec-before-download CMD --exec "before_dl:CMD"
+ --no-exec-before-download --no-exec
+ --all-formats -f all
+ --all-subs --sub-langs all --write-subs
+ --print-json -j --no-simulate
+ --autonumber-size NUMBER Use string formatting, e.g. %(autonumber)03d
+ --autonumber-start NUMBER Use internal field formatting like %(autonumber+NUMBER)s
+ --id -o "%(id)s.%(ext)s"
+ --metadata-from-title FORMAT --parse-metadata "%(title)s:FORMAT"
+ --hls-prefer-native --downloader "m3u8:native"
+ --hls-prefer-ffmpeg --downloader "m3u8:ffmpeg"
+ --list-formats-old --compat-options list-formats (Alias: --no-list-formats-as-table)
+ --list-formats-as-table --compat-options -list-formats [Default] (Alias: --no-list-formats-old)
+ --youtube-skip-dash-manifest --extractor-args "youtube:skip=dash" (Alias: --no-youtube-include-dash-manifest)
+ --youtube-skip-hls-manifest --extractor-args "youtube:skip=hls" (Alias: --no-youtube-include-hls-manifest)
+ --youtube-include-dash-manifest Default (Alias: --no-youtube-skip-dash-manifest)
+ --youtube-include-hls-manifest Default (Alias: --no-youtube-skip-hls-manifest)
+
+
+#### Developer options
+These options are not intended to be used by the end-user
+
+ --test Download only part of video for testing extractors
+ --load-pages Load pages dumped by --write-pages
+ --youtube-print-sig-code For testing youtube signatures
+ --allow-unplayable-formats List unplayable formats also
+ --no-allow-unplayable-formats Default
+
+
+#### Old aliases
+These are aliases that are no longer documented for various reasons
+
+ --avconv-location --ffmpeg-location
+ --clean-infojson --clean-info-json
+ --cn-verification-proxy URL --geo-verification-proxy URL
+ --dump-headers --print-traffic
+ --dump-intermediate-pages --dump-pages
+ --force-write-download-archive --force-write-archive
+ --load-info --load-info-json
+ --no-clean-infojson --no-clean-info-json
+ --no-split-tracks --no-split-chapters
+ --no-write-srt --no-write-subs
+ --prefer-unsecure --prefer-insecure
+ --rate-limit RATE --limit-rate RATE
+ --split-tracks --split-chapters
+ --srt-lang LANGS --sub-langs LANGS
+ --trim-file-names LENGTH --trim-filenames LENGTH
+ --write-srt --write-subs
+ --yes-overwrites --force-overwrites
+
+#### Sponskrub Options
+Support for [SponSkrub](https://github.com/faissaloo/SponSkrub) has been deprecated in favor of the `--sponsorblock` options
+
+ --sponskrub --sponsorblock-mark all
+ --no-sponskrub --no-sponsorblock
+ --sponskrub-cut --sponsorblock-remove all
+ --no-sponskrub-cut --sponsorblock-remove -all
+ --sponskrub-force Not applicable
+ --no-sponskrub-force Not applicable
+ --sponskrub-location Not applicable
+ --sponskrub-args Not applicable
+
+#### No longer supported
+These options may no longer work as intended
+
+ --prefer-avconv avconv is not officially supported by hypervideo (Alias: --no-prefer-ffmpeg)
+ --prefer-ffmpeg Default (Alias: --no-prefer-avconv)
+ -C, --call-home Not implemented
+ --no-call-home Default
+ --include-ads No longer supported
+ --no-include-ads Default
+ --write-annotations No supported site has annotations now
+ --no-write-annotations Default
+ --compat-options seperate-video-versions No longer needed
+
+#### Removed
+These options were deprecated since 2014 and have now been entirely removed
+
+ -A, --auto-number -o "%(autonumber)s-%(id)s.%(ext)s"
+ -t, -l, --title, --literal -o "%(title)s-%(id)s.%(ext)s"
+
+# CONTRIBUTING
+See [CONTRIBUTING.md](CONTRIBUTING.md#contributing-to-yt-dlp) for instructions on [Opening an Issue](CONTRIBUTING.md#opening-an-issue) and [Contributing code to the project](CONTRIBUTING.md#developer-instructions)
+
+# WIKI
+See the [Wiki](https://github.com/yt-dlp/yt-dlp/wiki) for more information
diff --git a/completions/zsh/_hypervideo b/completions/zsh/_hypervideo
index 0a8d491..f31f234 100644
--- a/completions/zsh/_hypervideo
+++ b/completions/zsh/_hypervideo
@@ -21,7 +21,7 @@ __hypervideo_dl() {
elif [[ ${prev} == "--recode-video" ]]; then
_arguments '*: :(mp4 flv ogg webm mkv)'
else
- _arguments '*: :(--help --version --ignore-errors --no-abort-on-error --abort-on-error --dump-user-agent --list-extractors --extractor-descriptions --force-generic-extractor --default-search --ignore-config --no-config-locations --config-locations --flat-playlist --no-flat-playlist --live-from-start --no-live-from-start --wait-for-video --no-wait-for-video --mark-watched --no-mark-watched --no-colors --compat-options --proxy --socket-timeout --source-address --force-ipv4 --force-ipv6 --geo-verification-proxy --cn-verification-proxy --geo-bypass --no-geo-bypass --geo-bypass-country --geo-bypass-ip-block --playlist-start --playlist-end --playlist-items --match-title --reject-title --min-filesize --max-filesize --date --datebefore --dateafter --min-views --max-views --match-filters --no-match-filter --no-playlist --yes-playlist --age-limit --download-archive --no-download-archive --max-downloads --break-on-existing --break-on-reject --break-per-input --no-break-per-input --skip-playlist-after-errors --include-ads --no-include-ads --concurrent-fragments --limit-rate --throttled-rate --retries --file-access-retries --fragment-retries --skip-unavailable-fragments --abort-on-unavailable-fragment --keep-fragments --no-keep-fragments --buffer-size --resize-buffer --no-resize-buffer --http-chunk-size --test --playlist-reverse --no-playlist-reverse --playlist-random --xattr-set-filesize --hls-prefer-native --hls-prefer-ffmpeg --hls-use-mpegts --no-hls-use-mpegts --downloader --downloader-args --batch-file --no-batch-file --id --paths --output --output-na-placeholder --autonumber-size --autonumber-start --restrict-filenames --no-restrict-filenames --windows-filenames --no-windows-filenames --trim-filenames --no-overwrites --force-overwrites --no-force-overwrites --continue --no-continue --part --no-part --mtime --no-mtime --write-description --no-write-description --write-info-json --no-write-info-json --write-annotations --no-write-annotations --write-playlist-metafiles --no-write-playlist-metafiles --clean-info-json --no-clean-info-json --write-comments --no-write-comments --load-info-json --cookies --no-cookies --cookies-from-browser --no-cookies-from-browser --cache-dir --no-cache-dir --rm-cache-dir --write-thumbnail --no-write-thumbnail --write-all-thumbnails --list-thumbnails --write-link --write-url-link --write-webloc-link --write-desktop-link --quiet --no-warnings --simulate --no-simulate --ignore-no-formats-error --no-ignore-no-formats-error --skip-download --print --print-to-file --get-url --get-title --get-id --get-thumbnail --get-description --get-duration --get-filename --get-format --dump-json --dump-single-json --print-json --force-write-archive --newline --no-progress --progress --console-title --progress-template --verbose --dump-pages --write-pages --youtube-print-sig-code --print-traffic --call-home --no-call-home --encoding --legacy-server-connect --no-check-certificates --prefer-insecure --user-agent --referer --add-header --bidi-workaround --sleep-requests --sleep-interval --max-sleep-interval --sleep-subtitles --format --format-sort --format-sort-force --no-format-sort-force --video-multistreams --no-video-multistreams --audio-multistreams --no-audio-multistreams --all-formats --prefer-free-formats --no-prefer-free-formats --check-formats --check-all-formats --no-check-formats --list-formats --list-formats-as-table --list-formats-old --merge-output-format --allow-unplayable-formats --no-allow-unplayable-formats --write-subs --no-write-subs --write-auto-subs --no-write-auto-subs --all-subs --list-subs --sub-format --sub-langs --username --password --twofactor --netrc --netrc-location --video-password --ap-mso --ap-username --ap-password --ap-list-mso --extract-audio --audio-format --audio-quality --remux-video --recode-video --postprocessor-args --keep-video --no-keep-video --post-overwrites --no-post-overwrites --embed-subs --no-embed-subs --embed-thumbnail --no-embed-thumbnail --embed-metadata --no-embed-metadata --embed-chapters --no-embed-chapters --embed-info-json --no-embed-info-json --metadata-from-title --parse-metadata --replace-in-metadata --xattrs --concat-playlist --fixup --prefer-avconv --prefer-ffmpeg --ffmpeg-location --exec --no-exec --exec-before-download --no-exec-before-download --convert-subs --convert-thumbnails --split-chapters --no-split-chapters --remove-chapters --no-remove-chapters --force-keyframes-at-cuts --no-force-keyframes-at-cuts --use-postprocessor --sponsorblock-mark --sponsorblock-remove --sponsorblock-chapter-title --no-sponsorblock --sponsorblock-api --sponskrub --no-sponskrub --sponskrub-cut --no-sponskrub-cut --sponskrub-force --no-sponskrub-force --sponskrub-location --sponskrub-args --extractor-retries --allow-dynamic-mpd --ignore-dynamic-mpd --hls-split-discontinuity --no-hls-split-discontinuity --extractor-args --youtube-include-dash-manifest --youtube-skip-dash-manifest --youtube-include-hls-manifest --youtube-skip-hls-manifest)'
+ _arguments '*: :(--help --version --ignore-errors --no-abort-on-error --abort-on-error --dump-user-agent --list-extractors --extractor-descriptions --use-extractors --force-generic-extractor --default-search --ignore-config --no-config-locations --config-locations --flat-playlist --no-flat-playlist --live-from-start --no-live-from-start --wait-for-video --no-wait-for-video --mark-watched --no-mark-watched --no-colors --compat-options --alias --proxy --socket-timeout --source-address --force-ipv4 --force-ipv6 --geo-verification-proxy --cn-verification-proxy --geo-bypass --no-geo-bypass --geo-bypass-country --geo-bypass-ip-block --playlist-start --playlist-end --playlist-items --match-title --reject-title --min-filesize --max-filesize --date --datebefore --dateafter --min-views --max-views --match-filters --no-match-filter --no-playlist --yes-playlist --age-limit --download-archive --no-download-archive --max-downloads --break-on-existing --break-on-reject --break-per-input --no-break-per-input --skip-playlist-after-errors --include-ads --no-include-ads --concurrent-fragments --limit-rate --throttled-rate --retries --file-access-retries --fragment-retries --retry-sleep --skip-unavailable-fragments --abort-on-unavailable-fragment --keep-fragments --no-keep-fragments --buffer-size --resize-buffer --no-resize-buffer --http-chunk-size --test --playlist-reverse --no-playlist-reverse --playlist-random --lazy-playlist --no-lazy-playlist --xattr-set-filesize --hls-prefer-native --hls-prefer-ffmpeg --hls-use-mpegts --no-hls-use-mpegts --download-sections --downloader --downloader-args --batch-file --no-batch-file --id --paths --output --output-na-placeholder --autonumber-size --autonumber-start --restrict-filenames --no-restrict-filenames --windows-filenames --no-windows-filenames --trim-filenames --no-overwrites --force-overwrites --no-force-overwrites --continue --no-continue --part --no-part --mtime --no-mtime --write-description --no-write-description --write-info-json --no-write-info-json --write-annotations --no-write-annotations --write-playlist-metafiles --no-write-playlist-metafiles --clean-info-json --no-clean-info-json --write-comments --no-write-comments --load-info-json --cookies --no-cookies --cookies-from-browser --no-cookies-from-browser --cache-dir --no-cache-dir --rm-cache-dir --write-thumbnail --no-write-thumbnail --write-all-thumbnails --list-thumbnails --write-link --write-url-link --write-webloc-link --write-desktop-link --quiet --no-warnings --simulate --no-simulate --ignore-no-formats-error --no-ignore-no-formats-error --skip-download --print --print-to-file --get-url --get-title --get-id --get-thumbnail --get-description --get-duration --get-filename --get-format --dump-json --dump-single-json --print-json --force-write-archive --newline --no-progress --progress --console-title --progress-template --verbose --dump-pages --write-pages --load-pages --youtube-print-sig-code --print-traffic --call-home --no-call-home --encoding --legacy-server-connect --no-check-certificates --prefer-insecure --user-agent --referer --add-header --bidi-workaround --sleep-requests --sleep-interval --max-sleep-interval --sleep-subtitles --format --format-sort --format-sort-force --no-format-sort-force --video-multistreams --no-video-multistreams --audio-multistreams --no-audio-multistreams --all-formats --prefer-free-formats --no-prefer-free-formats --check-formats --check-all-formats --no-check-formats --list-formats --list-formats-as-table --list-formats-old --merge-output-format --allow-unplayable-formats --no-allow-unplayable-formats --write-subs --no-write-subs --write-auto-subs --no-write-auto-subs --all-subs --list-subs --sub-format --sub-langs --username --password --twofactor --netrc --netrc-location --video-password --ap-mso --ap-username --ap-password --ap-list-mso --client-certificate --client-certificate-key --client-certificate-password --extract-audio --audio-format --audio-quality --remux-video --recode-video --postprocessor-args --keep-video --no-keep-video --post-overwrites --no-post-overwrites --embed-subs --no-embed-subs --embed-thumbnail --no-embed-thumbnail --embed-metadata --no-embed-metadata --embed-chapters --no-embed-chapters --embed-info-json --no-embed-info-json --metadata-from-title --parse-metadata --replace-in-metadata --xattrs --concat-playlist --fixup --prefer-avconv --prefer-ffmpeg --ffmpeg-location --exec --no-exec --exec-before-download --no-exec-before-download --convert-subs --convert-thumbnails --split-chapters --no-split-chapters --remove-chapters --no-remove-chapters --force-keyframes-at-cuts --no-force-keyframes-at-cuts --use-postprocessor --sponsorblock-mark --sponsorblock-remove --sponsorblock-chapter-title --no-sponsorblock --sponsorblock-api --sponskrub --no-sponskrub --sponskrub-cut --no-sponskrub-cut --sponskrub-force --no-sponskrub-force --sponskrub-location --sponskrub-args --extractor-retries --allow-dynamic-mpd --ignore-dynamic-mpd --hls-split-discontinuity --no-hls-split-discontinuity --extractor-args --youtube-include-dash-manifest --youtube-skip-dash-manifest --youtube-include-hls-manifest --youtube-skip-hls-manifest)'
fi
;;
esac
diff --git a/devscripts/__init__.py b/devscripts/__init__.py
new file mode 100644
index 0000000..750dbdc
--- /dev/null
+++ b/devscripts/__init__.py
@@ -0,0 +1 @@
+# Empty file needed to make devscripts.utils properly importable from outside
diff --git a/devscripts/bash-completion.py b/devscripts/bash-completion.py
index e0768d2..cef5414 100755
--- a/devscripts/bash-completion.py
+++ b/devscripts/bash-completion.py
@@ -1,11 +1,12 @@
#!/usr/bin/env python3
-from __future__ import unicode_literals
+# Allow direct execution
import os
-from os.path import dirname as dirn
import sys
-sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
import hypervideo_dl
BASH_COMPLETION_FILE = "completions/bash/hypervideo"
@@ -26,5 +27,5 @@ def build_completion(opt_parser):
f.write(filled_template)
-parser = hypervideo_dl.parseOpts()[0]
+parser = hypervideo_dl.parseOpts(ignore_config_files=True)[0]
build_completion(parser)
diff --git a/devscripts/buildserver.py b/devscripts/buildserver.py
deleted file mode 100644
index 2a8039e..0000000
--- a/devscripts/buildserver.py
+++ /dev/null
@@ -1,435 +0,0 @@
-# UNUSED
-
-#!/usr/bin/python3
-
-import argparse
-import ctypes
-import functools
-import shutil
-import subprocess
-import sys
-import tempfile
-import threading
-import traceback
-import os.path
-
-sys.path.insert(0, os.path.dirname(os.path.dirname((os.path.abspath(__file__)))))
-from hypervideo_dl.compat import (
- compat_input,
- compat_http_server,
- compat_str,
- compat_urlparse,
-)
-
-# These are not used outside of buildserver.py thus not in compat.py
-
-try:
- import winreg as compat_winreg
-except ImportError: # Python 2
- import _winreg as compat_winreg
-
-try:
- import socketserver as compat_socketserver
-except ImportError: # Python 2
- import SocketServer as compat_socketserver
-
-
-class BuildHTTPServer(compat_socketserver.ThreadingMixIn, compat_http_server.HTTPServer):
- allow_reuse_address = True
-
-
-advapi32 = ctypes.windll.advapi32
-
-SC_MANAGER_ALL_ACCESS = 0xf003f
-SC_MANAGER_CREATE_SERVICE = 0x02
-SERVICE_WIN32_OWN_PROCESS = 0x10
-SERVICE_AUTO_START = 0x2
-SERVICE_ERROR_NORMAL = 0x1
-DELETE = 0x00010000
-SERVICE_STATUS_START_PENDING = 0x00000002
-SERVICE_STATUS_RUNNING = 0x00000004
-SERVICE_ACCEPT_STOP = 0x1
-
-SVCNAME = 'youtubedl_builder'
-
-LPTSTR = ctypes.c_wchar_p
-START_CALLBACK = ctypes.WINFUNCTYPE(None, ctypes.c_int, ctypes.POINTER(LPTSTR))
-
-
-class SERVICE_TABLE_ENTRY(ctypes.Structure):
- _fields_ = [
- ('lpServiceName', LPTSTR),
- ('lpServiceProc', START_CALLBACK)
- ]
-
-
-HandlerEx = ctypes.WINFUNCTYPE(
- ctypes.c_int, # return
- ctypes.c_int, # dwControl
- ctypes.c_int, # dwEventType
- ctypes.c_void_p, # lpEventData,
- ctypes.c_void_p, # lpContext,
-)
-
-
-def _ctypes_array(c_type, py_array):
- ar = (c_type * len(py_array))()
- ar[:] = py_array
- return ar
-
-
-def win_OpenSCManager():
- res = advapi32.OpenSCManagerW(None, None, SC_MANAGER_ALL_ACCESS)
- if not res:
- raise Exception('Opening service manager failed - '
- 'are you running this as administrator?')
- return res
-
-
-def win_install_service(service_name, cmdline):
- manager = win_OpenSCManager()
- try:
- h = advapi32.CreateServiceW(
- manager, service_name, None,
- SC_MANAGER_CREATE_SERVICE, SERVICE_WIN32_OWN_PROCESS,
- SERVICE_AUTO_START, SERVICE_ERROR_NORMAL,
- cmdline, None, None, None, None, None)
- if not h:
- raise OSError('Service creation failed: %s' % ctypes.FormatError())
-
- advapi32.CloseServiceHandle(h)
- finally:
- advapi32.CloseServiceHandle(manager)
-
-
-def win_uninstall_service(service_name):
- manager = win_OpenSCManager()
- try:
- h = advapi32.OpenServiceW(manager, service_name, DELETE)
- if not h:
- raise OSError('Could not find service %s: %s' % (
- service_name, ctypes.FormatError()))
-
- try:
- if not advapi32.DeleteService(h):
- raise OSError('Deletion failed: %s' % ctypes.FormatError())
- finally:
- advapi32.CloseServiceHandle(h)
- finally:
- advapi32.CloseServiceHandle(manager)
-
-
-def win_service_report_event(service_name, msg, is_error=True):
- with open('C:/sshkeys/log', 'a', encoding='utf-8') as f:
- f.write(msg + '\n')
-
- event_log = advapi32.RegisterEventSourceW(None, service_name)
- if not event_log:
- raise OSError('Could not report event: %s' % ctypes.FormatError())
-
- try:
- type_id = 0x0001 if is_error else 0x0004
- event_id = 0xc0000000 if is_error else 0x40000000
- lines = _ctypes_array(LPTSTR, [msg])
-
- if not advapi32.ReportEventW(
- event_log, type_id, 0, event_id, None, len(lines), 0,
- lines, None):
- raise OSError('Event reporting failed: %s' % ctypes.FormatError())
- finally:
- advapi32.DeregisterEventSource(event_log)
-
-
-def win_service_handler(stop_event, *args):
- try:
- raise ValueError('Handler called with args ' + repr(args))
- TODO
- except Exception as e:
- tb = traceback.format_exc()
- msg = str(e) + '\n' + tb
- win_service_report_event(service_name, msg, is_error=True)
- raise
-
-
-def win_service_set_status(handle, status_code):
- svcStatus = SERVICE_STATUS()
- svcStatus.dwServiceType = SERVICE_WIN32_OWN_PROCESS
- svcStatus.dwCurrentState = status_code
- svcStatus.dwControlsAccepted = SERVICE_ACCEPT_STOP
-
- svcStatus.dwServiceSpecificExitCode = 0
-
- if not advapi32.SetServiceStatus(handle, ctypes.byref(svcStatus)):
- raise OSError('SetServiceStatus failed: %r' % ctypes.FormatError())
-
-
-def win_service_main(service_name, real_main, argc, argv_raw):
- try:
- # args = [argv_raw[i].value for i in range(argc)]
- stop_event = threading.Event()
- handler = HandlerEx(functools.partial(stop_event, win_service_handler))
- h = advapi32.RegisterServiceCtrlHandlerExW(service_name, handler, None)
- if not h:
- raise OSError('Handler registration failed: %s' %
- ctypes.FormatError())
-
- TODO
- except Exception as e:
- tb = traceback.format_exc()
- msg = str(e) + '\n' + tb
- win_service_report_event(service_name, msg, is_error=True)
- raise
-
-
-def win_service_start(service_name, real_main):
- try:
- cb = START_CALLBACK(
- functools.partial(win_service_main, service_name, real_main))
- dispatch_table = _ctypes_array(SERVICE_TABLE_ENTRY, [
- SERVICE_TABLE_ENTRY(
- service_name,
- cb
- ),
- SERVICE_TABLE_ENTRY(None, ctypes.cast(None, START_CALLBACK))
- ])
-
- if not advapi32.StartServiceCtrlDispatcherW(dispatch_table):
- raise OSError('ctypes start failed: %s' % ctypes.FormatError())
- except Exception as e:
- tb = traceback.format_exc()
- msg = str(e) + '\n' + tb
- win_service_report_event(service_name, msg, is_error=True)
- raise
-
-
-def main(args=None):
- parser = argparse.ArgumentParser()
- parser.add_argument('-i', '--install',
- action='store_const', dest='action', const='install',
- help='Launch at Windows startup')
- parser.add_argument('-u', '--uninstall',
- action='store_const', dest='action', const='uninstall',
- help='Remove Windows service')
- parser.add_argument('-s', '--service',
- action='store_const', dest='action', const='service',
- help='Run as a Windows service')
- parser.add_argument('-b', '--bind', metavar='<host:port>',
- action='store', default='0.0.0.0:8142',
- help='Bind to host:port (default %default)')
- options = parser.parse_args(args=args)
-
- if options.action == 'install':
- fn = os.path.abspath(__file__).replace('v:', '\\\\vboxsrv\\vbox')
- cmdline = '%s %s -s -b %s' % (sys.executable, fn, options.bind)
- win_install_service(SVCNAME, cmdline)
- return
-
- if options.action == 'uninstall':
- win_uninstall_service(SVCNAME)
- return
-
- if options.action == 'service':
- win_service_start(SVCNAME, main)
- return
-
- host, port_str = options.bind.split(':')
- port = int(port_str)
-
- print('Listening on %s:%d' % (host, port))
- srv = BuildHTTPServer((host, port), BuildHTTPRequestHandler)
- thr = threading.Thread(target=srv.serve_forever)
- thr.start()
- compat_input('Press ENTER to shut down')
- srv.shutdown()
- thr.join()
-
-
-def rmtree(path):
- for name in os.listdir(path):
- fname = os.path.join(path, name)
- if os.path.isdir(fname):
- rmtree(fname)
- else:
- os.chmod(fname, 0o666)
- os.remove(fname)
- os.rmdir(path)
-
-
-class BuildError(Exception):
- def __init__(self, output, code=500):
- self.output = output
- self.code = code
-
- def __str__(self):
- return self.output
-
-
-class HTTPError(BuildError):
- pass
-
-
-class PythonBuilder(object):
- def __init__(self, **kwargs):
- python_version = kwargs.pop('python', '3.4')
- python_path = None
- for node in ('Wow6432Node\\', ''):
- try:
- key = compat_winreg.OpenKey(
- compat_winreg.HKEY_LOCAL_MACHINE,
- r'SOFTWARE\%sPython\PythonCore\%s\InstallPath' % (node, python_version))
- try:
- python_path, _ = compat_winreg.QueryValueEx(key, '')
- finally:
- compat_winreg.CloseKey(key)
- break
- except Exception:
- pass
-
- if not python_path:
- raise BuildError('No such Python version: %s' % python_version)
-
- self.pythonPath = python_path
-
- super(PythonBuilder, self).__init__(**kwargs)
-
-
-class GITInfoBuilder(object):
- def __init__(self, **kwargs):
- try:
- self.user, self.repoName = kwargs['path'][:2]
- self.rev = kwargs.pop('rev')
- except ValueError:
- raise BuildError('Invalid path')
- except KeyError as e:
- raise BuildError('Missing mandatory parameter "%s"' % e.args[0])
-
- path = os.path.join(os.environ['APPDATA'], 'Build archive', self.repoName, self.user)
- if not os.path.exists(path):
- os.makedirs(path)
- self.basePath = tempfile.mkdtemp(dir=path)
- self.buildPath = os.path.join(self.basePath, 'build')
-
- super(GITInfoBuilder, self).__init__(**kwargs)
-
-
-class GITBuilder(GITInfoBuilder):
- def build(self):
- try:
- subprocess.check_output(['git', 'clone', 'git://github.com/%s/%s.git' % (self.user, self.repoName), self.buildPath])
- subprocess.check_output(['git', 'checkout', self.rev], cwd=self.buildPath)
- except subprocess.CalledProcessError as e:
- raise BuildError(e.output)
-
- super(GITBuilder, self).build()
-
-
-class YoutubeDLBuilder(object):
- authorizedUsers = ['fraca7', 'phihag', 'rg3', 'FiloSottile', 'ytdl-org']
-
- def __init__(self, **kwargs):
- if self.repoName != 'hypervideo':
- raise BuildError('Invalid repository "%s"' % self.repoName)
- if self.user not in self.authorizedUsers:
- raise HTTPError('Unauthorized user "%s"' % self.user, 401)
-
- super(YoutubeDLBuilder, self).__init__(**kwargs)
-
- def build(self):
- try:
- proc = subprocess.Popen([os.path.join(self.pythonPath, 'python.exe'), 'setup.py', 'py2exe'], stdin=subprocess.PIPE, cwd=self.buildPath)
- proc.wait()
- #subprocess.check_output([os.path.join(self.pythonPath, 'python.exe'), 'setup.py', 'py2exe'],
- # cwd=self.buildPath)
- except subprocess.CalledProcessError as e:
- raise BuildError(e.output)
-
- super(YoutubeDLBuilder, self).build()
-
-
-class DownloadBuilder(object):
- def __init__(self, **kwargs):
- self.handler = kwargs.pop('handler')
- self.srcPath = os.path.join(self.buildPath, *tuple(kwargs['path'][2:]))
- self.srcPath = os.path.abspath(os.path.normpath(self.srcPath))
- if not self.srcPath.startswith(self.buildPath):
- raise HTTPError(self.srcPath, 401)
-
- super(DownloadBuilder, self).__init__(**kwargs)
-
- def build(self):
- if not os.path.exists(self.srcPath):
- raise HTTPError('No such file', 404)
- if os.path.isdir(self.srcPath):
- raise HTTPError('Is a directory: %s' % self.srcPath, 401)
-
- self.handler.send_response(200)
- self.handler.send_header('Content-Type', 'application/octet-stream')
- self.handler.send_header('Content-Disposition', 'attachment; filename=%s' % os.path.split(self.srcPath)[-1])
- self.handler.send_header('Content-Length', str(os.stat(self.srcPath).st_size))
- self.handler.end_headers()
-
- with open(self.srcPath, 'rb') as src:
- shutil.copyfileobj(src, self.handler.wfile)
-
- super(DownloadBuilder, self).build()
-
-
-class CleanupTempDir(object):
- def build(self):
- try:
- rmtree(self.basePath)
- except Exception as e:
- print('WARNING deleting "%s": %s' % (self.basePath, e))
-
- super(CleanupTempDir, self).build()
-
-
-class Null(object):
- def __init__(self, **kwargs):
- pass
-
- def start(self):
- pass
-
- def close(self):
- pass
-
- def build(self):
- pass
-
-
-class Builder(PythonBuilder, GITBuilder, YoutubeDLBuilder, DownloadBuilder, CleanupTempDir, Null):
- pass
-
-
-class BuildHTTPRequestHandler(compat_http_server.BaseHTTPRequestHandler):
- actionDict = {'build': Builder, 'download': Builder} # They're the same, no more caching.
-
- def do_GET(self):
- path = compat_urlparse.urlparse(self.path)
- paramDict = dict([(key, value[0]) for key, value in compat_urlparse.parse_qs(path.query).items()])
- action, _, path = path.path.strip('/').partition('/')
- if path:
- path = path.split('/')
- if action in self.actionDict:
- try:
- builder = self.actionDict[action](path=path, handler=self, **paramDict)
- builder.start()
- try:
- builder.build()
- finally:
- builder.close()
- except BuildError as e:
- self.send_response(e.code)
- msg = compat_str(e).encode('UTF-8')
- self.send_header('Content-Type', 'text/plain; charset=UTF-8')
- self.send_header('Content-Length', len(msg))
- self.end_headers()
- self.wfile.write(msg)
- else:
- self.send_response(500, 'Unknown build method "%s"' % action)
- else:
- self.send_response(500, 'Malformed URL')
-
-if __name__ == '__main__':
- main()
diff --git a/devscripts/check-porn.py b/devscripts/check-porn.py
index 7dd372f..fc72c30 100644
--- a/devscripts/check-porn.py
+++ b/devscripts/check-porn.py
@@ -1,6 +1,4 @@
#!/usr/bin/env python3
-from __future__ import unicode_literals
-
"""
This script employs a VERY basic heuristic ('porn' in webpage.lower()) to check
if we are not 'age_limit' tagging some porn site
@@ -12,11 +10,14 @@ pass the list filename as the only argument
# Allow direct execution
import os
import sys
+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import urllib.parse
+import urllib.request
+
from test.helper import gettestcases
-from hypervideo_dl.utils import compat_urllib_parse_urlparse
-from hypervideo_dl.utils import compat_urllib_request
if len(sys.argv) > 1:
METHOD = 'LIST'
@@ -27,9 +28,9 @@ else:
for test in gettestcases():
if METHOD == 'EURISTIC':
try:
- webpage = compat_urllib_request.urlopen(test['url'], timeout=10).read()
+ webpage = urllib.request.urlopen(test['url'], timeout=10).read()
except Exception:
- print('\nFail: {0}'.format(test['name']))
+ print('\nFail: {}'.format(test['name']))
continue
webpage = webpage.decode('utf8', 'replace')
@@ -37,9 +38,9 @@ for test in gettestcases():
RESULT = 'porn' in webpage.lower()
elif METHOD == 'LIST':
- domain = compat_urllib_parse_urlparse(test['url']).netloc
+ domain = urllib.parse.urlparse(test['url']).netloc
if not domain:
- print('\nFail: {0}'.format(test['name']))
+ print('\nFail: {}'.format(test['name']))
continue
domain = '.'.join(domain.split('.')[-2:])
@@ -47,11 +48,11 @@ for test in gettestcases():
if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict']
or test['info_dict']['age_limit'] != 18):
- print('\nPotential missing age_limit check: {0}'.format(test['name']))
+ print('\nPotential missing age_limit check: {}'.format(test['name']))
elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict']
and test['info_dict']['age_limit'] == 18):
- print('\nPotential false negative: {0}'.format(test['name']))
+ print('\nPotential false negative: {}'.format(test['name']))
else:
sys.stdout.write('.')
diff --git a/devscripts/fish-completion.py b/devscripts/fish-completion.py
index 84ced2d..0b2b113 100755
--- a/devscripts/fish-completion.py
+++ b/devscripts/fish-completion.py
@@ -1,12 +1,14 @@
#!/usr/bin/env python3
-from __future__ import unicode_literals
-import optparse
+# Allow direct execution
import os
-from os.path import dirname as dirn
import sys
-sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+import optparse
+
import hypervideo_dl
from hypervideo_dl.utils import shell_quote
@@ -46,5 +48,5 @@ def build_completion(opt_parser):
f.write(filled_template)
-parser = hypervideo_dl.parseOpts()[0]
+parser = hypervideo_dl.parseOpts(ignore_config_files=True)[0]
build_completion(parser)
diff --git a/devscripts/generate_aes_testdata.py b/devscripts/generate_aes_testdata.py
index 09feeaa..f131e47 100644
--- a/devscripts/generate_aes_testdata.py
+++ b/devscripts/generate_aes_testdata.py
@@ -1,15 +1,17 @@
#!/usr/bin/env python3
-from __future__ import unicode_literals
-
-import codecs
-import subprocess
+# Allow direct execution
import os
import sys
+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from hypervideo_dl.utils import intlist_to_bytes
+
+import codecs
+import subprocess
+
from hypervideo_dl.aes import aes_encrypt, key_expansion
+from hypervideo_dl.utils import intlist_to_bytes
secret_msg = b'Secret message goes here'
diff --git a/devscripts/lazy_load_template.py b/devscripts/lazy_load_template.py
index da89e07..c8815e0 100644
--- a/devscripts/lazy_load_template.py
+++ b/devscripts/lazy_load_template.py
@@ -1,31 +1,38 @@
-# coding: utf-8
+import importlib
+import random
import re
-from ..utils import bug_reports_message, write_string
+from ..utils import (
+ age_restricted,
+ bug_reports_message,
+ classproperty,
+ write_string,
+)
+
+# These bloat the lazy_extractors, so allow them to passthrough silently
+ALLOWED_CLASSMETHODS = {'extract_from_webpage', 'get_testcases', 'get_webpage_testcases'}
+_WARNED = False
class LazyLoadMetaClass(type):
def __getattr__(cls, name):
- if '_real_class' not in cls.__dict__:
- write_string(
- f'WARNING: Falling back to normal extractor since lazy extractor '
- f'{cls.__name__} does not have attribute {name}{bug_reports_message()}')
- return getattr(cls._get_real_class(), name)
+ global _WARNED
+ if ('_real_class' not in cls.__dict__
+ and name not in ALLOWED_CLASSMETHODS and not _WARNED):
+ _WARNED = True
+ write_string('WARNING: Falling back to normal extractor since lazy extractor '
+ f'{cls.__name__} does not have attribute {name}{bug_reports_message()}\n')
+ return getattr(cls.real_class, name)
class LazyLoadExtractor(metaclass=LazyLoadMetaClass):
- _module = None
- _WORKING = True
-
- @classmethod
- def _get_real_class(cls):
+ @classproperty
+ def real_class(cls):
if '_real_class' not in cls.__dict__:
- mod = __import__(cls._module, fromlist=(cls.__name__,))
- cls._real_class = getattr(mod, cls.__name__)
+ cls._real_class = getattr(importlib.import_module(cls._module), cls.__name__)
return cls._real_class
def __new__(cls, *args, **kwargs):
- real_cls = cls._get_real_class()
- instance = real_cls.__new__(real_cls)
+ instance = cls.real_class.__new__(cls.real_class)
instance.__init__(*args, **kwargs)
return instance
diff --git a/devscripts/make_contributing.py b/devscripts/make_contributing.py
index 8c5f107..e777730 100755
--- a/devscripts/make_contributing.py
+++ b/devscripts/make_contributing.py
@@ -1,7 +1,5 @@
#!/usr/bin/env python3
-from __future__ import unicode_literals
-import io
import optparse
import re
@@ -16,7 +14,7 @@ def main():
infile, outfile = args
- with io.open(infile, encoding='utf-8') as inf:
+ with open(infile, encoding='utf-8') as inf:
readme = inf.read()
bug_text = re.search(
@@ -26,7 +24,7 @@ def main():
out = bug_text + dev_text
- with io.open(outfile, 'w', encoding='utf-8') as outf:
+ with open(outfile, 'w', encoding='utf-8') as outf:
outf.write(out)
diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py
index 1e22620..69e1758 100644
--- a/devscripts/make_lazy_extractors.py
+++ b/devscripts/make_lazy_extractors.py
@@ -1,105 +1,128 @@
#!/usr/bin/env python3
-from __future__ import unicode_literals, print_function
-from inspect import getsource
-import io
+# Allow direct execution
import os
-from os.path import dirname as dirn
+import shutil
import sys
-sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
-
-lazy_extractors_filename = sys.argv[1] if len(sys.argv) > 1 else 'hypervideo_dl/extractor/lazy_extractors.py'
-if os.path.exists(lazy_extractors_filename):
- os.remove(lazy_extractors_filename)
-
-# Block plugins from loading
-plugins_dirname = 'ytdlp_plugins'
-plugins_blocked_dirname = 'ytdlp_plugins_blocked'
-if os.path.exists(plugins_dirname):
- os.rename(plugins_dirname, plugins_blocked_dirname)
-
-from hypervideo_dl.extractor import _ALL_CLASSES
-from hypervideo_dl.extractor.common import InfoExtractor, SearchInfoExtractor
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-if os.path.exists(plugins_blocked_dirname):
- os.rename(plugins_blocked_dirname, plugins_dirname)
-with open('devscripts/lazy_load_template.py', 'rt') as f:
- module_template = f.read()
-
-CLASS_PROPERTIES = ['ie_key', 'working', '_match_valid_url', 'suitable', '_match_id', 'get_temp_id']
-module_contents = [
- module_template,
- *[getsource(getattr(InfoExtractor, k)) for k in CLASS_PROPERTIES],
- '\nclass LazyLoadSearchExtractor(LazyLoadExtractor):\n pass\n']
+from inspect import getsource
-ie_template = '''
+from devscripts.utils import get_filename_args, read_file, write_file
+
+NO_ATTR = object()
+STATIC_CLASS_PROPERTIES = [
+ 'IE_NAME', '_ENABLED', '_VALID_URL', # Used for URL matching
+ '_WORKING', 'IE_DESC', '_NETRC_MACHINE', 'SEARCH_KEY', # Used for --extractor-descriptions
+ 'age_limit', # Used for --age-limit (evaluated)
+ '_RETURN_TYPE', # Accessed in CLI only with instance (evaluated)
+]
+CLASS_METHODS = [
+ 'ie_key', 'suitable', '_match_valid_url', # Used for URL matching
+ 'working', 'get_temp_id', '_match_id', # Accessed just before instance creation
+ 'description', # Used for --extractor-descriptions
+ 'is_suitable', # Used for --age-limit
+ 'supports_login', 'is_single_video', # Accessed in CLI only with instance
+]
+IE_TEMPLATE = '''
class {name}({bases}):
- _module = '{module}'
+ _module = {module!r}
'''
-
-
-def get_base_name(base):
- if base is InfoExtractor:
- return 'LazyLoadExtractor'
- elif base is SearchInfoExtractor:
- return 'LazyLoadSearchExtractor'
- else:
- return base.__name__
-
-
-def build_lazy_ie(ie, name):
- s = ie_template.format(
- name=name,
- bases=', '.join(map(get_base_name, ie.__bases__)),
- module=ie.__module__)
- valid_url = getattr(ie, '_VALID_URL', None)
- if not valid_url and hasattr(ie, '_make_valid_url'):
- valid_url = ie._make_valid_url()
- if valid_url:
- s += f' _VALID_URL = {valid_url!r}\n'
- if not ie._WORKING:
- s += ' _WORKING = False\n'
- if ie.suitable.__func__ is not InfoExtractor.suitable.__func__:
- s += f'\n{getsource(ie.suitable)}'
- return s
-
-
-# find the correct sorting and add the required base classes so that subclasses
-# can be correctly created
-classes = _ALL_CLASSES[:-1]
-ordered_cls = []
-while classes:
- for c in classes[:]:
- bases = set(c.__bases__) - set((object, InfoExtractor, SearchInfoExtractor))
- stop = False
- for b in bases:
- if b not in classes and b not in ordered_cls:
- if b.__name__ == 'GenericIE':
- exit()
- classes.insert(0, b)
- stop = True
- if stop:
- break
- if all(b in ordered_cls for b in bases):
- ordered_cls.append(c)
- classes.remove(c)
- break
-ordered_cls.append(_ALL_CLASSES[-1])
-
-names = []
-for ie in ordered_cls:
- name = ie.__name__
- src = build_lazy_ie(ie, name)
- module_contents.append(src)
- if ie in _ALL_CLASSES:
- names.append(name)
-
-module_contents.append(
- '\n_ALL_CLASSES = [{0}]'.format(', '.join(names)))
-
-module_src = '\n'.join(module_contents) + '\n'
-
-with io.open(lazy_extractors_filename, 'wt', encoding='utf-8') as f:
- f.write(module_src)
+MODULE_TEMPLATE = read_file('devscripts/lazy_load_template.py')
+
+
+def main():
+ lazy_extractors_filename = get_filename_args(default_outfile='hypervideo_dl/extractor/lazy_extractors.py')
+ if os.path.exists(lazy_extractors_filename):
+ os.remove(lazy_extractors_filename)
+
+ _ALL_CLASSES = get_all_ies() # Must be before import
+
+ from hypervideo_dl.extractor.common import InfoExtractor, SearchInfoExtractor
+
+ DummyInfoExtractor = type('InfoExtractor', (InfoExtractor,), {'IE_NAME': NO_ATTR})
+ module_src = '\n'.join((
+ MODULE_TEMPLATE,
+ ' _module = None',
+ *extra_ie_code(DummyInfoExtractor),
+ '\nclass LazyLoadSearchExtractor(LazyLoadExtractor):\n pass\n',
+ *build_ies(_ALL_CLASSES, (InfoExtractor, SearchInfoExtractor), DummyInfoExtractor),
+ ))
+
+ write_file(lazy_extractors_filename, f'{module_src}\n')
+
+
+def get_all_ies():
+ PLUGINS_DIRNAME = 'ytdlp_plugins'
+ BLOCKED_DIRNAME = f'{PLUGINS_DIRNAME}_blocked'
+ if os.path.exists(PLUGINS_DIRNAME):
+ # os.rename cannot be used, e.g. in Docker. See https://github.com/hypervideo/hypervideo/pull/4958
+ shutil.move(PLUGINS_DIRNAME, BLOCKED_DIRNAME)
+ try:
+ from hypervideo_dl.extractor.extractors import _ALL_CLASSES
+ finally:
+ if os.path.exists(BLOCKED_DIRNAME):
+ shutil.move(BLOCKED_DIRNAME, PLUGINS_DIRNAME)
+ return _ALL_CLASSES
+
+
+def extra_ie_code(ie, base=None):
+ for var in STATIC_CLASS_PROPERTIES:
+ val = getattr(ie, var)
+ if val != (getattr(base, var) if base else NO_ATTR):
+ yield f' {var} = {val!r}'
+ yield ''
+
+ for name in CLASS_METHODS:
+ f = getattr(ie, name)
+ if not base or f.__func__ != getattr(base, name).__func__:
+ yield getsource(f)
+
+
+def build_ies(ies, bases, attr_base):
+ names = []
+ for ie in sort_ies(ies, bases):
+ yield build_lazy_ie(ie, ie.__name__, attr_base)
+ if ie in ies:
+ names.append(ie.__name__)
+
+ yield f'\n_ALL_CLASSES = [{", ".join(names)}]'
+
+
+def sort_ies(ies, ignored_bases):
+ """find the correct sorting and add the required base classes so that subclasses can be correctly created"""
+ classes, returned_classes = ies[:-1], set()
+ assert ies[-1].__name__ == 'GenericIE', 'Last IE must be GenericIE'
+ while classes:
+ for c in classes[:]:
+ bases = set(c.__bases__) - {object, *ignored_bases}
+ restart = False
+ for b in sorted(bases, key=lambda x: x.__name__):
+ if b not in classes and b not in returned_classes:
+ assert b.__name__ != 'GenericIE', 'Cannot inherit from GenericIE'
+ classes.insert(0, b)
+ restart = True
+ if restart:
+ break
+ if bases <= returned_classes:
+ yield c
+ returned_classes.add(c)
+ classes.remove(c)
+ break
+ yield ies[-1]
+
+
+def build_lazy_ie(ie, name, attr_base):
+ bases = ', '.join({
+ 'InfoExtractor': 'LazyLoadExtractor',
+ 'SearchInfoExtractor': 'LazyLoadSearchExtractor',
+ }.get(base.__name__, base.__name__) for base in ie.__bases__)
+
+ s = IE_TEMPLATE.format(name=name, module=ie.__module__, bases=bases)
+ return s + '\n'.join(extra_ie_code(ie, attr_base))
+
+
+if __name__ == '__main__':
+ main()
diff --git a/devscripts/make_readme.py b/devscripts/make_readme.py
index 1a9a017..6adfca0 100755..100644
--- a/devscripts/make_readme.py
+++ b/devscripts/make_readme.py
@@ -1,31 +1,83 @@
#!/usr/bin/env python3
-# hypervideo --help | make_readme.py
-# This must be run in a console of correct width
+"""
+hypervideo --help | make_readme.py
+This must be run in a console of correct width
+"""
-from __future__ import unicode_literals
-
-import io
+# Allow direct execution
+import os
import sys
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+import functools
import re
+from devscripts.utils import read_file, write_file
+
README_FILE = 'README.md'
-helptext = sys.stdin.read()
-if isinstance(helptext, bytes):
- helptext = helptext.decode('utf-8')
+OPTIONS_START = 'General Options:'
+OPTIONS_END = 'CONFIGURATION'
+EPILOG_START = 'See full documentation'
+ALLOWED_OVERSHOOT = 2
+
+DISABLE_PATCH = object()
+
+
+def take_section(text, start=None, end=None, *, shift=0):
+ return text[
+ text.index(start) + shift if start else None:
+ text.index(end) + shift if end else None
+ ]
+
+
+def apply_patch(text, patch):
+ return text if patch[0] is DISABLE_PATCH else re.sub(*patch, text)
+
+
+options = take_section(sys.stdin.read(), f'\n {OPTIONS_START}', f'\n{EPILOG_START}', shift=1)
-with io.open(README_FILE, encoding='utf-8') as f:
- oldreadme = f.read()
+max_width = max(map(len, options.split('\n')))
+switch_col_width = len(re.search(r'(?m)^\s{5,}', options).group())
+delim = f'\n{" " * switch_col_width}'
-header = oldreadme[:oldreadme.index('# OPTIONS')]
-footer = oldreadme[oldreadme.index('# CONFIGURATION'):]
+PATCHES = (
+ ( # Standardize update message
+ r'(?m)^( -U, --update\s+).+(\n \s.+)*$',
+ r'\1Update this program to the latest version',
+ ),
+ ( # Headings
+ r'(?m)^ (\w.+\n)( (?=\w))?',
+ r'## \1'
+ ),
+ ( # Do not split URLs
+ rf'({delim[:-1]})? (?P<label>\[\S+\] )?(?P<url>https?({delim})?:({delim})?/({delim})?/(({delim})?\S+)+)\s',
+ lambda mobj: ''.join((delim, mobj.group('label') or '', re.sub(r'\s+', '', mobj.group('url')), '\n'))
+ ),
+ ( # Do not split "words"
+ rf'(?m)({delim}\S+)+$',
+ lambda mobj: ''.join((delim, mobj.group(0).replace(delim, '')))
+ ),
+ ( # Allow overshooting last line
+ rf'(?m)^(?P<prev>.+)${delim}(?P<current>.+)$(?!{delim})',
+ lambda mobj: (mobj.group().replace(delim, ' ')
+ if len(mobj.group()) - len(delim) + 1 <= max_width + ALLOWED_OVERSHOOT
+ else mobj.group())
+ ),
+ ( # Avoid newline when a space is available b/w switch and description
+ DISABLE_PATCH, # This creates issues with prepare_manpage
+ r'(?m)^(\s{4}-.{%d})(%s)' % (switch_col_width - 6, delim),
+ r'\1 '
+ ),
+)
-options = helptext[helptext.index(' General Options:') + 19:]
-options = re.sub(r'(?m)^ (\w.+)$', r'## \1', options)
-options = '# OPTIONS\n' + options + '\n'
+readme = read_file(README_FILE)
-with io.open(README_FILE, 'w', encoding='utf-8') as f:
- f.write(header)
- f.write(options)
- f.write(footer)
+write_file(README_FILE, ''.join((
+ take_section(readme, end=f'## {OPTIONS_START}'),
+ functools.reduce(apply_patch, PATCHES, options),
+ take_section(readme, f'# {OPTIONS_END}'),
+)))
diff --git a/devscripts/make_supportedsites.py b/devscripts/make_supportedsites.py
index 9bce04b..5ccc75d 100644
--- a/devscripts/make_supportedsites.py
+++ b/devscripts/make_supportedsites.py
@@ -1,47 +1,19 @@
#!/usr/bin/env python3
-from __future__ import unicode_literals
-import io
-import optparse
+# Allow direct execution
import os
import sys
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-# Import hypervideo_dl
-ROOT_DIR = os.path.join(os.path.dirname(__file__), '..')
-sys.path.insert(0, ROOT_DIR)
-import hypervideo_dl
+
+from devscripts.utils import get_filename_args, write_file
+from hypervideo_dl.extractor import list_extractor_classes
def main():
- parser = optparse.OptionParser(usage='%prog OUTFILE.md')
- options, args = parser.parse_args()
- if len(args) != 1:
- parser.error('Expected an output filename')
-
- outfile, = args
-
- def gen_ies_md(ies):
- for ie in ies:
- ie_md = '**{0}**'.format(ie.IE_NAME)
- if ie.IE_DESC is False:
- continue
- if ie.IE_DESC is not None:
- ie_md += ': {0}'.format(ie.IE_DESC)
- search_key = getattr(ie, 'SEARCH_KEY', None)
- if search_key is not None:
- ie_md += f'; "{ie.SEARCH_KEY}:" prefix'
- if not ie.working():
- ie_md += ' (Currently broken)'
- yield ie_md
-
- ies = sorted(hypervideo_dl.gen_extractors(), key=lambda i: i.IE_NAME.lower())
- out = '# Supported sites\n' + ''.join(
- ' - ' + md + '\n'
- for md in gen_ies_md(ies))
-
- with io.open(outfile, 'w', encoding='utf-8') as outf:
- outf.write(out)
+ out = '\n'.join(ie.description() for ie in list_extractor_classes() if ie.IE_DESC is not False)
+ write_file(get_filename_args(), f'# Supported sites\n{out}\n')
if __name__ == '__main__':
diff --git a/devscripts/posix-locale.sh b/devscripts/posix-locale.sh
deleted file mode 100755
index 0aa7a59..0000000
--- a/devscripts/posix-locale.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-
-# source this file in your shell to get a POSIX locale (which will break many programs, but that's kind of the point)
-
-export LC_ALL=POSIX
-export LANG=POSIX
-export LANGUAGE=POSIX
diff --git a/devscripts/prepare_manpage.py b/devscripts/prepare_manpage.py
index 8920df1..ef41d21 100644
--- a/devscripts/prepare_manpage.py
+++ b/devscripts/prepare_manpage.py
@@ -1,11 +1,22 @@
#!/usr/bin/env python3
-from __future__ import unicode_literals
-import io
-import optparse
+# Allow direct execution
+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
import os.path
import re
+from devscripts.utils import (
+ compose_functions,
+ get_filename_args,
+ read_file,
+ write_file,
+)
+
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
README_FILE = os.path.join(ROOT_DIR, 'README.md')
@@ -24,25 +35,6 @@ yt\-dlp \- A youtube-dl fork with additional features and patches
'''
-def main():
- parser = optparse.OptionParser(usage='%prog OUTFILE.md')
- options, args = parser.parse_args()
- if len(args) != 1:
- parser.error('Expected an output filename')
-
- outfile, = args
-
- with io.open(README_FILE, encoding='utf-8') as f:
- readme = f.read()
-
- readme = filter_excluded_sections(readme)
- readme = move_sections(readme)
- readme = filter_options(readme)
-
- with io.open(outfile, 'w', encoding='utf-8') as outf:
- outf.write(PREFIX + readme)
-
-
def filter_excluded_sections(readme):
EXCLUDED_SECTION_BEGIN_STRING = re.escape('<!-- MANPAGE: BEGIN EXCLUDED SECTION -->')
EXCLUDED_SECTION_END_STRING = re.escape('<!-- MANPAGE: END EXCLUDED SECTION -->')
@@ -94,5 +86,12 @@ def filter_options(readme):
return readme.replace(section, options, 1)
+TRANSFORM = compose_functions(filter_excluded_sections, move_sections, filter_options)
+
+
+def main():
+ write_file(get_filename_args(), PREFIX + TRANSFORM(read_file(README_FILE)))
+
+
if __name__ == '__main__':
main()
diff --git a/devscripts/run_tests.bat b/devscripts/run_tests.bat
index b8bb393..190d239 100644
--- a/devscripts/run_tests.bat
+++ b/devscripts/run_tests.bat
@@ -13,4 +13,5 @@ if ["%~1"]==[""] (
exit /b 1
)
+set PYTHONWARNINGS=error
pytest %test_set%
diff --git a/devscripts/run_tests.sh b/devscripts/run_tests.sh
index c9a75ba..faa642e 100755
--- a/devscripts/run_tests.sh
+++ b/devscripts/run_tests.sh
@@ -1,14 +1,14 @@
-#!/bin/sh
+#!/usr/bin/env sh
-if [ -z $1 ]; then
+if [ -z "$1" ]; then
test_set='test'
-elif [ $1 = 'core' ]; then
+elif [ "$1" = 'core' ]; then
test_set="-m not download"
-elif [ $1 = 'download' ]; then
+elif [ "$1" = 'download' ]; then
test_set="-m download"
else
- echo 'Invalid test type "'$1'". Use "core" | "download"'
+ echo 'Invalid test type "'"$1"'". Use "core" | "download"'
exit 1
fi
-python3 -m pytest "$test_set"
+python3 -bb -Werror -m pytest "$test_set"
diff --git a/devscripts/set-variant.py b/devscripts/set-variant.py
new file mode 100644
index 0000000..c9c8561
--- /dev/null
+++ b/devscripts/set-variant.py
@@ -0,0 +1,36 @@
+#!/usr/bin/env python3
+
+# Allow direct execution
+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+import argparse
+import functools
+import re
+
+from devscripts.utils import compose_functions, read_file, write_file
+
+VERSION_FILE = 'hypervideo_dl/version.py'
+
+
+def parse_options():
+ parser = argparse.ArgumentParser(description='Set the build variant of the package')
+ parser.add_argument('variant', help='Name of the variant')
+ parser.add_argument('-M', '--update-message', default=None, help='Message to show in -U')
+ return parser.parse_args()
+
+
+def property_setter(name, value):
+ return functools.partial(re.sub, rf'(?m)^{name}\s*=\s*.+$', f'{name} = {value!r}')
+
+
+opts = parse_options()
+transform = compose_functions(
+ property_setter('VARIANT', opts.variant),
+ property_setter('UPDATE_HINT', opts.update_message)
+)
+
+write_file(VERSION_FILE, transform(read_file(VERSION_FILE)))
diff --git a/devscripts/utils.py b/devscripts/utils.py
new file mode 100644
index 0000000..3f67e62
--- /dev/null
+++ b/devscripts/utils.py
@@ -0,0 +1,35 @@
+import argparse
+import functools
+
+
+def read_file(fname):
+ with open(fname, encoding='utf-8') as f:
+ return f.read()
+
+
+def write_file(fname, content, mode='w'):
+ with open(fname, mode, encoding='utf-8') as f:
+ return f.write(content)
+
+
+# Get the version without importing the package
+def read_version(fname='hypervideo_dl/version.py'):
+ exec(compile(read_file(fname), fname, 'exec'))
+ return locals()['__version__']
+
+
+def get_filename_args(has_infile=False, default_outfile=None):
+ parser = argparse.ArgumentParser()
+ if has_infile:
+ parser.add_argument('infile', help='Input file')
+ kwargs = {'nargs': '?', 'default': default_outfile} if default_outfile else {}
+ parser.add_argument('outfile', **kwargs, help='Output file')
+
+ opts = parser.parse_args()
+ if has_infile:
+ return opts.infile, opts.outfile
+ return opts.outfile
+
+
+def compose_functions(*functions):
+ return lambda x: functools.reduce(lambda y, f: f(y), functions, x)
diff --git a/devscripts/zsh-completion.py b/devscripts/zsh-completion.py
index c8620a5..4012ae0 100755
--- a/devscripts/zsh-completion.py
+++ b/devscripts/zsh-completion.py
@@ -1,11 +1,12 @@
#!/usr/bin/env python3
-from __future__ import unicode_literals
+# Allow direct execution
import os
-from os.path import dirname as dirn
import sys
-sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
import hypervideo_dl
ZSH_COMPLETION_FILE = "completions/zsh/_hypervideo"
@@ -45,5 +46,5 @@ def build_completion(opt_parser):
f.write(template)
-parser = hypervideo_dl.parseOpts()[0]
+parser = hypervideo_dl.parseOpts(ignore_config_files=True)[0]
build_completion(parser)
diff --git a/docs/.gitignore b/docs/.gitignore
deleted file mode 100644
index 69fa449..0000000
--- a/docs/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-_build/
diff --git a/docs/Makefile b/docs/Makefile
deleted file mode 100644
index 805682f..0000000
--- a/docs/Makefile
+++ /dev/null
@@ -1,177 +0,0 @@
-# Makefile for Sphinx documentation
-#
-
-# You can set these variables from the command line.
-SPHINXOPTS =
-SPHINXBUILD = sphinx-build
-PAPER =
-BUILDDIR = _build
-
-# User-friendly check for sphinx-build
-ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
-$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
-endif
-
-# Internal variables.
-PAPEROPT_a4 = -D latex_paper_size=a4
-PAPEROPT_letter = -D latex_paper_size=letter
-ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
-# the i18n builder cannot share the environment and doctrees with the others
-I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
-
-.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
-
-help:
- @echo "Please use \`make <target>' where <target> is one of"
- @echo " html to make standalone HTML files"
- @echo " dirhtml to make HTML files named index.html in directories"
- @echo " singlehtml to make a single large HTML file"
- @echo " pickle to make pickle files"
- @echo " json to make JSON files"
- @echo " htmlhelp to make HTML files and a HTML help project"
- @echo " qthelp to make HTML files and a qthelp project"
- @echo " devhelp to make HTML files and a Devhelp project"
- @echo " epub to make an epub"
- @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
- @echo " latexpdf to make LaTeX files and run them through pdflatex"
- @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
- @echo " text to make text files"
- @echo " man to make manual pages"
- @echo " texinfo to make Texinfo files"
- @echo " info to make Texinfo files and run them through makeinfo"
- @echo " gettext to make PO message catalogs"
- @echo " changes to make an overview of all changed/added/deprecated items"
- @echo " xml to make Docutils-native XML files"
- @echo " pseudoxml to make pseudoxml-XML files for display purposes"
- @echo " linkcheck to check all external links for integrity"
- @echo " doctest to run all doctests embedded in the documentation (if enabled)"
-
-clean:
- rm -rf $(BUILDDIR)/*
-
-html:
- $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
- @echo
- @echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
-
-dirhtml:
- $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
- @echo
- @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
-
-singlehtml:
- $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
- @echo
- @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
-
-pickle:
- $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
- @echo
- @echo "Build finished; now you can process the pickle files."
-
-json:
- $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
- @echo
- @echo "Build finished; now you can process the JSON files."
-
-htmlhelp:
- $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
- @echo
- @echo "Build finished; now you can run HTML Help Workshop with the" \
- ".hhp project file in $(BUILDDIR)/htmlhelp."
-
-qthelp:
- $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
- @echo
- @echo "Build finished; now you can run "qcollectiongenerator" with the" \
- ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
- @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/hypervideo.qhcp"
- @echo "To view the help file:"
- @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/hypervideo.qhc"
-
-devhelp:
- $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
- @echo
- @echo "Build finished."
- @echo "To view the help file:"
- @echo "# mkdir -p $$HOME/.local/share/devhelp/hypervideo"
- @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/hypervideo"
- @echo "# devhelp"
-
-epub:
- $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
- @echo
- @echo "Build finished. The epub file is in $(BUILDDIR)/epub."
-
-latex:
- $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
- @echo
- @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
- @echo "Run \`make' in that directory to run these through (pdf)latex" \
- "(use \`make latexpdf' here to do that automatically)."
-
-latexpdf:
- $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
- @echo "Running LaTeX files through pdflatex..."
- $(MAKE) -C $(BUILDDIR)/latex all-pdf
- @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
-
-latexpdfja:
- $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
- @echo "Running LaTeX files through platex and dvipdfmx..."
- $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
- @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
-
-text:
- $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
- @echo
- @echo "Build finished. The text files are in $(BUILDDIR)/text."
-
-man:
- $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
- @echo
- @echo "Build finished. The manual pages are in $(BUILDDIR)/man."
-
-texinfo:
- $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
- @echo
- @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
- @echo "Run \`make' in that directory to run these through makeinfo" \
- "(use \`make info' here to do that automatically)."
-
-info:
- $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
- @echo "Running Texinfo files through makeinfo..."
- make -C $(BUILDDIR)/texinfo info
- @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
-
-gettext:
- $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
- @echo
- @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
-
-changes:
- $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
- @echo
- @echo "The overview file is in $(BUILDDIR)/changes."
-
-linkcheck:
- $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
- @echo
- @echo "Link check complete; look for any errors in the above output " \
- "or in $(BUILDDIR)/linkcheck/output.txt."
-
-doctest:
- $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
- @echo "Testing of doctests in the sources finished, look at the " \
- "results in $(BUILDDIR)/doctest/output.txt."
-
-xml:
- $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
- @echo
- @echo "Build finished. The XML files are in $(BUILDDIR)/xml."
-
-pseudoxml:
- $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
- @echo
- @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
diff --git a/docs/conf.py b/docs/conf.py
deleted file mode 100644
index 4b48a85..0000000
--- a/docs/conf.py
+++ /dev/null
@@ -1,71 +0,0 @@
-# coding: utf-8
-#
-# hypervideo documentation build configuration file, created by
-# sphinx-quickstart on Fri Mar 14 21:05:43 2014.
-#
-# This file is execfile()d with the current directory set to its
-# containing dir.
-#
-# Note that not all possible configuration values are present in this
-# autogenerated file.
-#
-# All configuration values have a default; values that are commented out
-# serve to show the default.
-
-import sys
-import os
-# Allows to import hypervideo_dl
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-
-# -- General configuration ------------------------------------------------
-
-# Add any Sphinx extension module names here, as strings. They can be
-# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
-# ones.
-extensions = [
- 'sphinx.ext.autodoc',
-]
-
-# Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
-
-# The suffix of source filenames.
-source_suffix = '.rst'
-
-# The master toctree document.
-master_doc = 'index'
-
-# General information about the project.
-project = u'hypervideo'
-copyright = u'2014, Ricardo Garcia Gonzalez'
-
-# The version info for the project you're documenting, acts as replacement for
-# |version| and |release|, also used in various other places throughout the
-# built documents.
-#
-# The short X.Y version.
-from hypervideo_dl.version import __version__
-version = __version__
-# The full version, including alpha/beta/rc tags.
-release = version
-
-# List of patterns, relative to source directory, that match files and
-# directories to ignore when looking for source files.
-exclude_patterns = ['_build']
-
-# The name of the Pygments (syntax highlighting) style to use.
-pygments_style = 'sphinx'
-
-# -- Options for HTML output ----------------------------------------------
-
-# The theme to use for HTML and HTML Help pages. See the documentation for
-# a list of builtin themes.
-html_theme = 'default'
-
-# Add any paths that contain custom static files (such as style sheets) here,
-# relative to this directory. They are copied after the builtin static files,
-# so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
-
-# Output file base name for HTML help builder.
-htmlhelp_basename = 'hypervideodoc'
diff --git a/docs/index.rst b/docs/index.rst
deleted file mode 100644
index 93b8704..0000000
--- a/docs/index.rst
+++ /dev/null
@@ -1,23 +0,0 @@
-Welcome to hypervideo's documentation!
-======================================
-
-*hypervideo* is a command-line program to download videos from YouTube.com and more sites.
-It can also be used in Python code.
-
-Developer guide
----------------
-
-This section contains information for using *hypervideo* from Python programs.
-
-.. toctree::
- :maxdepth: 2
-
- module_guide
-
-Indices and tables
-==================
-
-* :ref:`genindex`
-* :ref:`modindex`
-* :ref:`search`
-
diff --git a/docs/module_guide.rst b/docs/module_guide.rst
deleted file mode 100644
index a27bc8f..0000000
--- a/docs/module_guide.rst
+++ /dev/null
@@ -1,67 +0,0 @@
-Using the ``hypervideo_dl`` module
-===============================
-
-When using the ``hypervideo_dl`` module, you start by creating an instance of :class:`YoutubeDL` and adding all the available extractors:
-
-.. code-block:: python
-
- >>> from hypervideo_dl import YoutubeDL
- >>> ydl = YoutubeDL()
- >>> ydl.add_default_info_extractors()
-
-Extracting video information
-----------------------------
-
-You use the :meth:`YoutubeDL.extract_info` method for getting the video information, which returns a dictionary:
-
-.. code-block:: python
-
- >>> info = ydl.extract_info('http://www.youtube.com/watch?v=BaW_jenozKc', download=False)
- [youtube] Setting language
- [youtube] BaW_jenozKc: Downloading webpage
- [youtube] BaW_jenozKc: Downloading video info webpage
- [youtube] BaW_jenozKc: Extracting video information
- >>> info['title']
- 'hypervideo test video "\'/\\ä↭𝕐'
- >>> info['height'], info['width']
- (720, 1280)
-
-If you want to download or play the video you can get its url:
-
-.. code-block:: python
-
- >>> info['url']
- 'https://...'
-
-Extracting playlist information
--------------------------------
-
-The playlist information is extracted in a similar way, but the dictionary is a bit different:
-
-.. code-block:: python
-
- >>> playlist = ydl.extract_info('http://www.ted.com/playlists/13/open_source_open_world', download=False)
- [TED] open_source_open_world: Downloading playlist webpage
- ...
- >>> playlist['title']
- 'Open-source, open world'
-
-
-
-You can access the videos in the playlist with the ``entries`` field:
-
-.. code-block:: python
-
- >>> for video in playlist['entries']:
- ... print('Video #%d: %s' % (video['playlist_index'], video['title']))
-
- Video #1: How Arduino is open-sourcing imagination
- Video #2: The year open data went worldwide
- Video #3: Massive-scale online collaboration
- Video #4: The art of asking
- Video #5: How cognitive surplus will change the world
- Video #6: The birth of Wikipedia
- Video #7: Coding a better government
- Video #8: The era of open innovation
- Video #9: The currency of the new economy is trust
-
diff --git a/docs/supportedsites.md b/docs/supportedsites.md
deleted file mode 100644
index ed0d5e9..0000000
--- a/docs/supportedsites.md
+++ /dev/null
@@ -1,1228 +0,0 @@
-# Supported sites
- - **1tv**: Первый канал
- - **20min**
- - **220.ro**
- - **23video**
- - **247sports**
- - **24video**
- - **3qsdn**: 3Q SDN
- - **3sat**
- - **4tube**
- - **56.com**
- - **5min**
- - **6play**
- - **7plus**
- - **8tracks**
- - **91porn**
- - **9c9media**
- - **9gag**
- - **9now.com.au**
- - **abc.net.au**
- - **abc.net.au:iview**
- - **abcnews**
- - **abcnews:video**
- - **abcotvs**: ABC Owned Television Stations
- - **abcotvs:clips**
- - **AcademicEarth:Course**
- - **acast**
- - **acast:channel**
- - **ADN**: Anime Digital Network
- - **AdobeConnect**
- - **adobetv**
- - **adobetv:channel**
- - **adobetv:embed**
- - **adobetv:show**
- - **adobetv:video**
- - **AdultSwim**
- - **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault
- - **aenetworks:collection**
- - **aenetworks:show**
- - **afreecatv**: afreecatv.com
- - **AirMozilla**
- - **AliExpressLive**
- - **AlJazeera**
- - **Allocine**
- - **AlphaPorno**
- - **Amara**
- - **AMCNetworks**
- - **AmericasTestKitchen**
- - **AmericasTestKitchenSeason**
- - **anderetijden**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
- - **AnimeOnDemand**
- - **Anvato**
- - **aol.com**: Yahoo screen and movies
- - **APA**
- - **Aparat**
- - **AppleConnect**
- - **AppleDaily**: 臺灣蘋果日報
- - **ApplePodcasts**
- - **appletrailers**
- - **appletrailers:section**
- - **archive.org**: archive.org videos
- - **ArcPublishing**
- - **ARD**
- - **ARD:mediathek**
- - **ARDBetaMediathek**
- - **Arkena**
- - **arte.sky.it**
- - **ArteTV**
- - **ArteTVEmbed**
- - **ArteTVPlaylist**
- - **AsianCrush**
- - **AsianCrushPlaylist**
- - **AtresPlayer**
- - **ATTTechChannel**
- - **ATVAt**
- - **AudiMedia**
- - **AudioBoom**
- - **audiomack**
- - **audiomack:album**
- - **AWAAN**
- - **awaan:live**
- - **awaan:season**
- - **awaan:video**
- - **AZMedien**: AZ Medien videos
- - **BaiduVideo**: 百度视频
- - **bandaichannel**
- - **Bandcamp**
- - **Bandcamp:album**
- - **Bandcamp:weekly**
- - **bangumi.bilibili.com**: BiliBili番剧
- - **bbc**: BBC
- - **bbc.co.uk**: BBC iPlayer
- - **bbc.co.uk:article**: BBC articles
- - **bbc.co.uk:iplayer:episodes**
- - **bbc.co.uk:iplayer:group**
- - **bbc.co.uk:playlist**
- - **BBVTV**
- - **Beatport**
- - **Beeg**
- - **BehindKink**
- - **Bellator**
- - **BellMedia**
- - **Bet**
- - **bfi:player**
- - **bfmtv**
- - **bfmtv:article**
- - **bfmtv:live**
- - **BibelTV**
- - **Bigflix**
- - **Bild**: Bild.de
- - **BiliBili**
- - **BilibiliAudio**
- - **BilibiliAudioAlbum**
- - **BiliBiliPlayer**
- - **BioBioChileTV**
- - **Biography**
- - **BIQLE**
- - **BitChute**
- - **BitChuteChannel**
- - **BleacherReport**
- - **BleacherReportCMS**
- - **Bloomberg**
- - **BokeCC**
- - **BongaCams**
- - **BostonGlobe**
- - **Box**
- - **Bpb**: Bundeszentrale für politische Bildung
- - **BR**: Bayerischer Rundfunk
- - **BravoTV**
- - **Break**
- - **brightcove:legacy**
- - **brightcove:new**
- - **BRMediathek**: Bayerischer Rundfunk Mediathek
- - **bt:article**: Bergens Tidende Articles
- - **bt:vestlendingen**: Bergens Tidende - Vestlendingen
- - **BusinessInsider**
- - **BuzzFeed**
- - **BYUtv**
- - **Camdemy**
- - **CamdemyFolder**
- - **CamModels**
- - **CamTube**
- - **CamWithHer**
- - **canalc2.tv**
- - **Canalplus**: mycanal.fr and piwiplus.fr
- - **Canvas**
- - **CanvasEen**: canvas.be and een.be
- - **CarambaTV**
- - **CarambaTVPage**
- - **CartoonNetwork**
- - **cbc.ca**
- - **cbc.ca:olympics**
- - **cbc.ca:player**
- - **cbc.ca:watch**
- - **cbc.ca:watch:video**
- - **CBS**
- - **CBSInteractive**
- - **CBSLocal**
- - **CBSLocalArticle**
- - **cbsnews**: CBS News
- - **cbsnews:embed**
- - **cbsnews:livevideo**: CBS News Live Videos
- - **cbssports**
- - **cbssports:embed**
- - **CCMA**
- - **CCTV**: 央视网
- - **CDA**
- - **CeskaTelevize**
- - **CeskaTelevizePorady**
- - **channel9**: Channel 9
- - **CharlieRose**
- - **Chaturbate**
- - **Chilloutzone**
- - **chirbit**
- - **chirbit:profile**
- - **cielotv.it**
- - **Cinchcast**
- - **Cinemax**
- - **CiscoLiveSearch**
- - **CiscoLiveSession**
- - **CJSW**
- - **cliphunter**
- - **Clippit**
- - **ClipRs**
- - **Clipsyndicate**
- - **CloserToTruth**
- - **CloudflareStream**
- - **Cloudy**
- - **Clubic**
- - **Clyp**
- - **cmt.com**
- - **CNBC**
- - **CNBCVideo**
- - **CNN**
- - **CNNArticle**
- - **CNNBlogs**
- - **ComedyCentral**
- - **ComedyCentralTV**
- - **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
- - **CONtv**
- - **Corus**
- - **Coub**
- - **Cracked**
- - **Crackle**
- - **CrooksAndLiars**
- - **crunchyroll**
- - **crunchyroll:playlist**
- - **CSpan**: C-SPAN
- - **CtsNews**: 華視新聞
- - **CTV**
- - **CTVNews**
- - **cu.ntv.co.jp**: Nippon Television Network
- - **Culturebox**
- - **CultureUnplugged**
- - **curiositystream**
- - **curiositystream:collection**
- - **CWTV**
- - **DagelijkseKost**: dagelijksekost.een.be
- - **DailyMail**
- - **dailymotion**
- - **dailymotion:playlist**
- - **dailymotion:user**
- - **daum.net**
- - **daum.net:clip**
- - **daum.net:playlist**
- - **daum.net:user**
- - **DBTV**
- - **DctpTv**
- - **DeezerPlaylist**
- - **defense.gouv.fr**
- - **democracynow**
- - **DHM**: Filmarchiv - Deutsches Historisches Museum
- - **Digg**
- - **DigitallySpeaking**
- - **Digiteka**
- - **Discovery**
- - **DiscoveryGo**
- - **DiscoveryGoPlaylist**
- - **DiscoveryNetworksDe**
- - **DiscoveryPlus**
- - **DiscoveryVR**
- - **Disney**
- - **dlive:stream**
- - **dlive:vod**
- - **Dotsub**
- - **DouyuShow**
- - **DouyuTV**: 斗鱼
- - **DPlay**
- - **DRBonanza**
- - **Dropbox**
- - **DrTuber**
- - **drtv**
- - **drtv:live**
- - **DTube**
- - **Dumpert**
- - **dvtv**: http://video.aktualne.cz/
- - **dw**
- - **dw:article**
- - **EaglePlatform**
- - **EbaumsWorld**
- - **EchoMsk**
- - **egghead:course**: egghead.io course
- - **egghead:lesson**: egghead.io lesson
- - **ehftv**
- - **eHow**
- - **EinsUndEinsTV**
- - **Einthusan**
- - **eitb.tv**
- - **EllenTube**
- - **EllenTubePlaylist**
- - **EllenTubeVideo**
- - **ElPais**: El País
- - **Embedly**
- - **EMPFlix**
- - **Engadget**
- - **Eporner**
- - **EroProfile**
- - **Escapist**
- - **ESPN**
- - **ESPNArticle**
- - **EsriVideo**
- - **Europa**
- - **EWETV**
- - **ExpoTV**
- - **Expressen**
- - **ExtremeTube**
- - **EyedoTV**
- - **facebook**
- - **FacebookPluginsVideo**
- - **faz.net**
- - **fc2**
- - **fc2:embed**
- - **Fczenit**
- - **filmon**
- - **filmon:channel**
- - **Filmweb**
- - **FiveThirtyEight**
- - **FiveTV**
- - **Flickr**
- - **Folketinget**: Folketinget (ft.dk; Danish parliament)
- - **FootyRoom**
- - **Formula1**
- - **FOX**
- - **FOX9**
- - **FOX9News**
- - **Foxgay**
- - **foxnews**: Fox News and Fox Business Video
- - **foxnews:article**
- - **FoxSports**
- - **france2.fr:generation-what**
- - **FranceCulture**
- - **FranceInter**
- - **FranceTV**
- - **FranceTVEmbed**
- - **francetvinfo.fr**
- - **FranceTVJeunesse**
- - **FranceTVSite**
- - **Freesound**
- - **freespeech.org**
- - **FreshLive**
- - **FrontendMasters**
- - **FrontendMastersCourse**
- - **FrontendMastersLesson**
- - **FujiTVFODPlus7**
- - **Funimation**
- - **Funk**
- - **Fusion**
- - **Fux**
- - **Gaia**
- - **GameInformer**
- - **GameSpot**
- - **GameStar**
- - **Gaskrank**
- - **Gazeta**
- - **GDCVault**
- - **GediDigital**
- - **generic**: Generic downloader that works on some sites
- - **Gfycat**
- - **GiantBomb**
- - **Giga**
- - **GlattvisionTV**
- - **Glide**: Glide mobile video messages (glide.me)
- - **Globo**
- - **GloboArticle**
- - **Go**
- - **GodTube**
- - **Golem**
- - **google:podcasts**
- - **google:podcasts:feed**
- - **GoogleDrive**
- - **Goshgay**
- - **GPUTechConf**
- - **Groupon**
- - **hbo**
- - **HearThisAt**
- - **Heise**
- - **HellPorno**
- - **Helsinki**: helsinki.fi
- - **HentaiStigma**
- - **hetklokhuis**
- - **hgtv.com:show**
- - **HGTVDe**
- - **HiDive**
- - **HistoricFilms**
- - **history:player**
- - **history:topic**: History.com Topic
- - **hitbox**
- - **hitbox:live**
- - **HitRecord**
- - **hketv**: 香港教育局教育電視 (HKETV) Educational Television, Hong Kong Educational Bureau
- - **HornBunny**
- - **HotNewHipHop**
- - **hotstar**
- - **hotstar:playlist**
- - **Howcast**
- - **HowStuffWorks**
- - **HRTi**
- - **HRTiPlaylist**
- - **Huajiao**: 花椒直播
- - **HuffPost**: Huffington Post
- - **Hungama**
- - **HungamaSong**
- - **Hypem**
- - **ign.com**
- - **IGNArticle**
- - **IGNVideo**
- - **IHeartRadio**
- - **iheartradio:podcast**
- - **imdb**: Internet Movie Database trailers
- - **imdb:list**: Internet Movie Database lists
- - **Imgur**
- - **imgur:album**
- - **imgur:gallery**
- - **Ina**
- - **Inc**
- - **IndavideoEmbed**
- - **InfoQ**
- - **Instagram**
- - **instagram:tag**: Instagram hashtag search
- - **instagram:user**: Instagram user profile
- - **Internazionale**
- - **InternetVideoArchive**
- - **IPrima**
- - **iqiyi**: 爱奇艺
- - **Ir90Tv**
- - **ITTF**
- - **ITV**
- - **ITVBTCC**
- - **ivi**: ivi.ru
- - **ivi:compilation**: ivi.ru compilations
- - **ivideon**: Ivideon TV
- - **Iwara**
- - **Izlesene**
- - **Jamendo**
- - **JamendoAlbum**
- - **JeuxVideo**
- - **Joj**
- - **Jove**
- - **JWPlatform**
- - **Kakao**
- - **Kaltura**
- - **Kankan**
- - **Karaoketv**
- - **KarriereVideos**
- - **Katsomo**
- - **KeezMovies**
- - **Ketnet**
- - **khanacademy**
- - **khanacademy:unit**
- - **KickStarter**
- - **KinjaEmbed**
- - **KinoPoisk**
- - **KonserthusetPlay**
- - **KrasView**: Красвью
- - **Ku6**
- - **KUSI**
- - **kuwo:album**: 酷我音乐 - 专辑
- - **kuwo:category**: 酷我音乐 - 分类
- - **kuwo:chart**: 酷我音乐 - 排行榜
- - **kuwo:mv**: 酷我音乐 - MV
- - **kuwo:singer**: 酷我音乐 - 歌手
- - **kuwo:song**: 酷我音乐
- - **la7.it**
- - **laola1tv**
- - **laola1tv:embed**
- - **lbry**
- - **lbry:channel**
- - **LCI**
- - **Lcp**
- - **LcpPlay**
- - **Le**: 乐视网
- - **Lecture2Go**
- - **Lecturio**
- - **LecturioCourse**
- - **LecturioDeCourse**
- - **LEGO**
- - **Lemonde**
- - **Lenta**
- - **LePlaylist**
- - **LetvCloud**: 乐视云
- - **Libsyn**
- - **life**: Life.ru
- - **life:embed**
- - **limelight**
- - **limelight:channel**
- - **limelight:channel_list**
- - **LineLive**
- - **LineLiveChannel**
- - **LineTV**
- - **linkedin:learning**
- - **linkedin:learning:course**
- - **LinuxAcademy**
- - **LiTV**
- - **LiveJournal**
- - **LiveLeak**
- - **LiveLeakEmbed**
- - **livestream**
- - **livestream:original**
- - **LnkGo**
- - **loc**: Library of Congress
- - **LocalNews8**
- - **LoveHomePorn**
- - **lrt.lt**
- - **lynda**: lynda.com videos
- - **lynda:course**: lynda.com online courses
- - **m6**
- - **mailru**: Видео@Mail.Ru
- - **mailru:music**: Музыка@Mail.Ru
- - **mailru:music:search**: Музыка@Mail.Ru
- - **MallTV**
- - **mangomolo:live**
- - **mangomolo:video**
- - **ManyVids**
- - **MaoriTV**
- - **Markiza**
- - **MarkizaPage**
- - **massengeschmack.tv**
- - **MatchTV**
- - **MDR**: MDR.DE and KiKA
- - **MedalTV**
- - **media.ccc.de**
- - **media.ccc.de:lists**
- - **Medialaan**
- - **Mediaset**
- - **Mediasite**
- - **MediasiteCatalog**
- - **MediasiteNamedCatalog**
- - **Medici**
- - **megaphone.fm**: megaphone.fm embedded players
- - **Meipai**: 美拍
- - **MelonVOD**
- - **META**
- - **metacafe**
- - **Metacritic**
- - **mewatch**
- - **Mgoon**
- - **MGTV**: 芒果TV
- - **MiaoPai**
- - **minds**
- - **minds:channel**
- - **minds:group**
- - **MinistryGrid**
- - **Minoto**
- - **miomio.tv**
- - **MiTele**: mitele.es
- - **mixcloud**
- - **mixcloud:playlist**
- - **mixcloud:user**
- - **MLB**
- - **MLBVideo**
- - **Mnet**
- - **MNetTV**
- - **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net
- - **Mofosex**
- - **MofosexEmbed**
- - **Mojvideo**
- - **Morningstar**: morningstar.com
- - **Motherless**
- - **MotherlessGroup**
- - **Motorsport**: motorsport.com
- - **MovieClips**
- - **MovieFap**
- - **Moviezine**
- - **MovingImage**
- - **MSN**
- - **mtg**: MTG services
- - **mtv**
- - **mtv.de**
- - **mtv:video**
- - **mtvjapan**
- - **mtvservices:embedded**
- - **MTVUutisetArticle**
- - **MuenchenTV**: münchen.tv
- - **mva**: Microsoft Virtual Academy videos
- - **mva:course**: Microsoft Virtual Academy courses
- - **Mwave**
- - **MwaveMeetGreet**
- - **MyChannels**
- - **MySpace**
- - **MySpace:album**
- - **MySpass**
- - **Myvi**
- - **MyVidster**
- - **MyviEmbed**
- - **MyVisionTV**
- - **n-tv.de**
- - **natgeo:video**
- - **NationalGeographicTV**
- - **Naver**
- - **NBA**
- - **nba:watch**
- - **nba:watch:collection**
- - **NBAChannel**
- - **NBAEmbed**
- - **NBAWatchEmbed**
- - **NBC**
- - **NBCNews**
- - **nbcolympics**
- - **nbcolympics:stream**
- - **NBCSports**
- - **NBCSportsStream**
- - **NBCSportsVPlayer**
- - **ndr**: NDR.de - Norddeutscher Rundfunk
- - **ndr:embed**
- - **ndr:embed:base**
- - **NDTV**
- - **NerdCubedFeed**
- - **netease:album**: 网易云音乐 - 专辑
- - **netease:djradio**: 网易云音乐 - 电台
- - **netease:mv**: 网易云音乐 - MV
- - **netease:playlist**: 网易云音乐 - 歌单
- - **netease:program**: 网易云音乐 - 电台节目
- - **netease:singer**: 网易云音乐 - 歌手
- - **netease:song**: 网易云音乐
- - **NetPlus**
- - **Netzkino**
- - **Newgrounds**
- - **NewgroundsPlaylist**
- - **Newstube**
- - **NextMedia**: 蘋果日報
- - **NextMediaActionNews**: 蘋果日報 - 動新聞
- - **NextTV**: 壹電視
- - **Nexx**
- - **NexxEmbed**
- - **nfl.com** (Currently broken)
- - **nfl.com:article** (Currently broken)
- - **NhkVod**
- - **NhkVodProgram**
- - **nhl.com**
- - **nick.com**
- - **nick.de**
- - **nickelodeon:br**
- - **nickelodeonru**
- - **nicknight**
- - **niconico**: ニコニコ動画
- - **NiconicoPlaylist**
- - **Nintendo**
- - **njoy**: N-JOY
- - **njoy:embed**
- - **NJPWWorld**: 新日本プロレスワールド
- - **NobelPrize**
- - **NonkTube**
- - **Noovo**
- - **Normalboots**
- - **NosVideo**
- - **Nova**: TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz
- - **NovaEmbed**
- - **nowness**
- - **nowness:playlist**
- - **nowness:series**
- - **Noz**
- - **npo**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
- - **npo.nl:live**
- - **npo.nl:radio**
- - **npo.nl:radio:fragment**
- - **Npr**
- - **NRK**
- - **NRKPlaylist**
- - **NRKRadioPodkast**
- - **NRKSkole**: NRK Skole
- - **NRKTV**: NRK TV and NRK Radio
- - **NRKTVDirekte**: NRK TV Direkte and NRK Radio Direkte
- - **NRKTVEpisode**
- - **NRKTVEpisodes**
- - **NRKTVSeason**
- - **NRKTVSeries**
- - **NRLTV**
- - **ntv.ru**
- - **Nuvid**
- - **NYTimes**
- - **NYTimesArticle**
- - **NYTimesCooking**
- - **NZZ**
- - **ocw.mit.edu**
- - **OdaTV**
- - **Odnoklassniki**
- - **OktoberfestTV**
- - **OnDemandKorea**
- - **onet.pl**
- - **onet.tv**
- - **onet.tv:channel**
- - **OnetMVP**
- - **OnionStudios**
- - **Ooyala**
- - **OoyalaExternal**
- - **OraTV**
- - **orf:burgenland**: Radio Burgenland
- - **orf:fm4**: radio FM4
- - **orf:fm4:story**: fm4.orf.at stories
- - **orf:iptv**: iptv.ORF.at
- - **orf:kaernten**: Radio Kärnten
- - **orf:noe**: Radio Niederösterreich
- - **orf:oberoesterreich**: Radio Oberösterreich
- - **orf:oe1**: Radio Österreich 1
- - **orf:oe3**: Radio Österreich 3
- - **orf:salzburg**: Radio Salzburg
- - **orf:steiermark**: Radio Steiermark
- - **orf:tirol**: Radio Tirol
- - **orf:tvthek**: ORF TVthek
- - **orf:vorarlberg**: Radio Vorarlberg
- - **orf:wien**: Radio Wien
- - **OsnatelTV**
- - **OutsideTV**
- - **PacktPub**
- - **PacktPubCourse**
- - **PalcoMP3:artist**
- - **PalcoMP3:song**
- - **PalcoMP3:video**
- - **pandora.tv**: 판도라TV
- - **ParamountNetwork**
- - **parliamentlive.tv**: UK parliament videos
- - **Patreon**
- - **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC)
- - **PearVideo**
- - **PeerTube**
- - **People**
- - **PerformGroup**
- - **periscope**: Periscope
- - **periscope:user**: Periscope user videos
- - **PhilharmonieDeParis**: Philharmonie de Paris
- - **phoenix.de**
- - **Photobucket**
- - **Picarto**
- - **PicartoVod**
- - **Piksel**
- - **Pinkbike**
- - **Pinterest**
- - **PinterestCollection**
- - **Pladform**
- - **Platzi**
- - **PlatziCourse**
- - **play.fm**
- - **player.sky.it**
- - **PlayPlusTV**
- - **PlayStuff**
- - **PlaysTV**
- - **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz
- - **Playvid**
- - **Playwire**
- - **pluralsight**
- - **pluralsight:course**
- - **podomatic**
- - **Pokemon**
- - **PolskieRadio**
- - **PolskieRadioCategory**
- - **Popcorntimes**
- - **PopcornTV**
- - **PornCom**
- - **PornerBros**
- - **PornHd**
- - **PornHub**: PornHub and Thumbzilla
- - **PornHubPagedVideoList**
- - **PornHubUser**
- - **PornHubUserVideosUpload**
- - **Pornotube**
- - **PornoVoisines**
- - **PornoXO**
- - **PornTube**
- - **PressTV**
- - **prosiebensat1**: ProSiebenSat.1 Digital
- - **puhutv**
- - **puhutv:serie**
- - **Puls4**
- - **Pyvideo**
- - **qqmusic**: QQ音乐
- - **qqmusic:album**: QQ音乐 - 专辑
- - **qqmusic:playlist**: QQ音乐 - 歌单
- - **qqmusic:singer**: QQ音乐 - 歌手
- - **qqmusic:toplist**: QQ音乐 - 排行榜
- - **QuantumTV**
- - **Qub**
- - **Quickline**
- - **QuicklineLive**
- - **R7**
- - **R7Article**
- - **radio.de**
- - **radiobremen**
- - **radiocanada**
- - **radiocanada:audiovideo**
- - **radiofrance**
- - **RadioJavan**
- - **Rai**
- - **RaiPlay**
- - **RaiPlayLive**
- - **RaiPlayPlaylist**
- - **RayWenderlich**
- - **RayWenderlichCourse**
- - **RBMARadio**
- - **RDS**: RDS.ca
- - **RedBull**
- - **RedBullEmbed**
- - **RedBullTV**
- - **RedBullTVRrnContent**
- - **Reddit**
- - **RedditR**
- - **RedTube**
- - **RegioTV**
- - **RENTV**
- - **RENTVArticle**
- - **Restudy**
- - **Reuters**
- - **ReverbNation**
- - **RICE**
- - **RMCDecouverte**
- - **RockstarGames**
- - **RoosterTeeth**
- - **RottenTomatoes**
- - **Roxwel**
- - **Rozhlas**
- - **RTBF**
- - **rte**: Raidió Teilifís Éireann TV
- - **rte:radio**: Raidió Teilifís Éireann radio
- - **rtl.nl**: rtl.nl and rtlxl.nl
- - **rtl2**
- - **rtl2:you**
- - **rtl2:you:series**
- - **RTP**
- - **RTS**: RTS.ch
- - **rtve.es:alacarta**: RTVE a la carta
- - **rtve.es:infantil**: RTVE infantil
- - **rtve.es:live**: RTVE.es live streams
- - **rtve.es:television**
- - **RTVNH**
- - **RTVS**
- - **RUHD**
- - **RumbleEmbed**
- - **rutube**: Rutube videos
- - **rutube:channel**: Rutube channels
- - **rutube:embed**: Rutube embedded videos
- - **rutube:movie**: Rutube movies
- - **rutube:person**: Rutube person videos
- - **rutube:playlist**: Rutube playlists
- - **RUTV**: RUTV.RU
- - **Ruutu**
- - **Ruv**
- - **safari**: safaribooksonline.com online video
- - **safari:api**
- - **safari:course**: safaribooksonline.com online courses
- - **SAKTV**
- - **SaltTV**
- - **SampleFocus**
- - **Sapo**: SAPO Vídeos
- - **savefrom.net**
- - **SBS**: sbs.com.au
- - **schooltv**
- - **screen.yahoo:search**: Yahoo screen search
- - **Screencast**
- - **ScreencastOMatic**
- - **ScrippsNetworks**
- - **scrippsnetworks:watch**
- - **SCTE**
- - **SCTECourse**
- - **Seeker**
- - **SenateISVP**
- - **SendtoNews**
- - **Servus**
- - **Sexu**
- - **SeznamZpravy**
- - **SeznamZpravyArticle**
- - **Shahid**
- - **ShahidShow**
- - **Shared**: shared.sx
- - **ShowRoomLive**
- - **simplecast**
- - **simplecast:episode**
- - **simplecast:podcast**
- - **Sina**
- - **sky.it**
- - **sky:news**
- - **sky:sports**
- - **sky:sports:news**
- - **skyacademy.it**
- - **SkylineWebcams**
- - **skynewsarabia:article**
- - **skynewsarabia:video**
- - **Slideshare**
- - **SlidesLive**
- - **Slutload**
- - **Snotr**
- - **Sohu**
- - **SonyLIV**
- - **soundcloud**
- - **soundcloud:playlist**
- - **soundcloud:search**: Soundcloud search
- - **soundcloud:set**
- - **soundcloud:trackstation**
- - **soundcloud:user**
- - **SoundcloudEmbed**
- - **soundgasm**
- - **soundgasm:profile**
- - **southpark.cc.com**
- - **southpark.cc.com:español**
- - **southpark.de**
- - **southpark.nl**
- - **southparkstudios.dk**
- - **SpankBang**
- - **SpankBangPlaylist**
- - **Spankwire**
- - **Spiegel**
- - **sport.francetvinfo.fr**
- - **Sport5**
- - **SportBox**
- - **SportDeutschland**
- - **spotify**
- - **spotify:show**
- - **Spreaker**
- - **SpreakerPage**
- - **SpreakerShow**
- - **SpreakerShowPage**
- - **SpringboardPlatform**
- - **Sprout**
- - **sr:mediathek**: Saarländischer Rundfunk
- - **SRGSSR**
- - **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites
- - **stanfordoc**: Stanford Open ClassRoom
- - **Steam**
- - **Stitcher**
- - **StitcherShow**
- - **StoryFire**
- - **StoryFireSeries**
- - **StoryFireUser**
- - **Streamable**
- - **streamcloud.eu**
- - **StreamCZ**
- - **StreetVoice**
- - **StretchInternet**
- - **stv:player**
- - **SunPorno**
- - **sverigesradio:episode**
- - **sverigesradio:publication**
- - **SVT**
- - **SVTPage**
- - **SVTPlay**: SVT Play and Öppet arkiv
- - **SVTSeries**
- - **SWRMediathek**
- - **Syfy**
- - **SztvHu**
- - **t-online.de**
- - **Tagesschau**
- - **tagesschau:player**
- - **Tass**
- - **TBS**
- - **TDSLifeway**
- - **Teachable**
- - **TeachableCourse**
- - **teachertube**: teachertube.com videos
- - **teachertube:user:collection**: teachertube.com user and collection videos
- - **TeachingChannel**
- - **Teamcoco**
- - **TeamTreeHouse**
- - **TechTalks**
- - **techtv.mit.edu**
- - **ted**
- - **Tele13**
- - **Tele5**
- - **TeleBruxelles**
- - **Telecinco**: telecinco.es, cuatro.com and mediaset.es
- - **Telegraaf**
- - **TeleMB**
- - **TeleQuebec**
- - **TeleQuebecEmission**
- - **TeleQuebecLive**
- - **TeleQuebecSquat**
- - **TeleQuebecVideo**
- - **TeleTask**
- - **Telewebion**
- - **TennisTV**
- - **TenPlay**
- - **TF1**
- - **TFO**
- - **TheIntercept**
- - **ThePlatform**
- - **ThePlatformFeed**
- - **TheScene**
- - **TheStar**
- - **TheSun**
- - **TheWeatherChannel**
- - **ThisAmericanLife**
- - **ThisAV**
- - **ThisOldHouse**
- - **TikTok**
- - **TikTokUser** (Currently broken)
- - **tinypic**: tinypic.com videos
- - **TMZ**
- - **TMZArticle**
- - **TNAFlix**
- - **TNAFlixNetworkEmbed**
- - **toggle**
- - **ToonGoggles**
- - **tou.tv**
- - **Toypics**: Toypics video
- - **ToypicsUser**: Toypics user profile
- - **TrailerAddict** (Currently broken)
- - **Trilulilu**
- - **Trovo**
- - **TrovoVod**
- - **TruNews**
- - **TruTV**
- - **Tube8**
- - **TubiTv**
- - **Tumblr**
- - **tunein:clip**
- - **tunein:program**
- - **tunein:station**
- - **tunein:topic**
- - **TunePk**
- - **Turbo**
- - **tv.dfb.de**
- - **TV2**
- - **tv2.hu**
- - **TV2Article**
- - **TV2DK**
- - **TV2DKBornholmPlay**
- - **TV4**: tv4.se and tv4play.se
- - **TV5MondePlus**: TV5MONDE+
- - **tv5unis**
- - **tv5unis:video**
- - **tv8.it**
- - **TVA**
- - **TVANouvelles**
- - **TVANouvellesArticle**
- - **TVC**
- - **TVCArticle**
- - **TVer**
- - **tvigle**: Интернет-телевидение Tvigle.ru
- - **tvland.com**
- - **TVN24**
- - **TVNet**
- - **TVNoe**
- - **TVNow**
- - **TVNowAnnual**
- - **TVNowNew**
- - **TVNowSeason**
- - **TVNowShow**
- - **tvp**: Telewizja Polska
- - **tvp:embed**: Telewizja Polska
- - **tvp:series**
- - **TVPlayer**
- - **TVPlayHome**
- - **Tweakers**
- - **TwitCasting**
- - **twitch:clips**
- - **twitch:stream**
- - **twitch:vod**
- - **TwitchCollection**
- - **TwitchVideos**
- - **TwitchVideosClips**
- - **TwitchVideosCollections**
- - **twitter**
- - **twitter:amplify**
- - **twitter:broadcast**
- - **twitter:card**
- - **udemy**
- - **udemy:course**
- - **UDNEmbed**: 聯合影音
- - **UFCArabia**
- - **UFCTV**
- - **UKTVPlay**
- - **umg:de**: Universal Music Deutschland
- - **Unistra**
- - **Unity**
- - **uol.com.br**
- - **uplynk**
- - **uplynk:preplay**
- - **Urort**: NRK P3 Urørt
- - **URPlay**
- - **USANetwork**
- - **USAToday**
- - **ustream**
- - **ustream:channel**
- - **ustudio**
- - **ustudio:embed**
- - **Varzesh3**
- - **Vbox7**
- - **VeeHD**
- - **Veoh**
- - **Vesti**: Вести.Ru
- - **Vevo**
- - **VevoPlaylist**
- - **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet
- - **vh1.com**
- - **vhx:embed**
- - **Viafree**
- - **vice**
- - **vice:article**
- - **vice:show**
- - **Vidbit**
- - **Viddler**
- - **Videa**
- - **video.arnes.si**: Arnes Video
- - **video.google:search**: Google Video search
- - **video.sky.it**
- - **video.sky.it:live**
- - **VideoDetective**
- - **videofy.me**
- - **videomore**
- - **videomore:season**
- - **videomore:video**
- - **VideoPress**
- - **Vidio**
- - **VidLii**
- - **vidme**
- - **vidme:user**
- - **vidme:user:likes**
- - **vier**: vier.be and vijf.be
- - **vier:videos**
- - **viewlift**
- - **viewlift:embed**
- - **Viidea**
- - **viki**
- - **viki:channel**
- - **vimeo**
- - **vimeo:album**
- - **vimeo:channel**
- - **vimeo:group**
- - **vimeo:likes**: Vimeo user likes
- - **vimeo:ondemand**
- - **vimeo:review**: Review pages on vimeo
- - **vimeo:user**
- - **vimeo:watchlater**: Vimeo watch later list, "vimeowatchlater" keyword (requires authentication)
- - **Vimple**: Vimple - one-click video hosting
- - **Vine**
- - **vine:user**
- - **Viqeo**
- - **Viu**
- - **viu:ott**
- - **viu:playlist**
- - **Vivo**: vivo.sx
- - **vk**: VK
- - **vk:uservideos**: VK - User's Videos
- - **vk:wallpost**
- - **vlive**
- - **vlive:channel**
- - **vlive:post**
- - **Vodlocker**
- - **VODPl**
- - **VODPlatform**
- - **VoiceRepublic**
- - **Voot**
- - **VoxMedia**
- - **VoxMediaVolume**
- - **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
- - **Vrak**
- - **VRT**: VRT NWS, Flanders News, Flandern Info and Sporza
- - **VrtNU**: VrtNU.be
- - **vrv**
- - **vrv:series**
- - **VShare**
- - **VTM**
- - **VTXTV**
- - **vube**: Vube.com
- - **VuClip**
- - **VVVVID**
- - **VVVVIDShow**
- - **VyboryMos**
- - **Vzaar**
- - **Wakanim**
- - **Walla**
- - **WalyTV**
- - **washingtonpost**
- - **washingtonpost:article**
- - **wat.tv**
- - **WatchBox**
- - **WatchIndianPorn**: Watch Indian Porn
- - **WDR**
- - **wdr:mobile**
- - **WDRElefant**
- - **WDRPage**
- - **Webcaster**
- - **WebcasterFeed**
- - **WebOfStories**
- - **WebOfStoriesPlaylist**
- - **Weibo**
- - **WeiboMobile**
- - **WeiqiTV**: WQTV
- - **Wistia**
- - **WistiaPlaylist**
- - **wnl**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
- - **WorldStarHipHop**
- - **WSJ**: Wall Street Journal
- - **WSJArticle**
- - **WWE**
- - **XBef**
- - **XboxClips**
- - **XFileShare**: XFileShare based sites: Aparat, ClipWatching, GoUnlimited, GoVid, HolaVid, Streamty, TheVideoBee, Uqload, VidBom, vidlo, VidLocker, VidShare, VUp, WolfStream, XVideoSharing
- - **XHamster**
- - **XHamsterEmbed**
- - **XHamsterUser**
- - **xiami:album**: 虾米音乐 - 专辑
- - **xiami:artist**: 虾米音乐 - 歌手
- - **xiami:collection**: 虾米音乐 - 精选集
- - **xiami:song**: 虾米音乐
- - **ximalaya**: 喜马拉雅FM
- - **ximalaya:album**: 喜马拉雅FM 专辑
- - **XMinus**
- - **XNXX**
- - **Xstream**
- - **XTube**
- - **XTubeUser**: XTube user profile
- - **Xuite**: 隨意窩Xuite影音
- - **XVideos**
- - **XXXYMovies**
- - **Yahoo**: Yahoo screen and movies
- - **yahoo:gyao**
- - **yahoo:gyao:player**
- - **yahoo:japannews**: Yahoo! Japan News
- - **YandexDisk**
- - **yandexmusic:album**: Яндекс.Музыка - Альбом
- - **yandexmusic:artist:albums**: Яндекс.Музыка - Артист - Альбомы
- - **yandexmusic:artist:tracks**: Яндекс.Музыка - Артист - Треки
- - **yandexmusic:playlist**: Яндекс.Музыка - Плейлист
- - **yandexmusic:track**: Яндекс.Музыка - Трек
- - **YandexVideo**
- - **YapFiles**
- - **YesJapan**
- - **yinyuetai:video**: 音悦Tai
- - **Ynet**
- - **YouJizz**
- - **youku**: 优酷
- - **youku:show**
- - **YouNowChannel**
- - **YouNowLive**
- - **YouNowMoment**
- - **YouPorn**
- - **YourPorn**
- - **YourUpload**
- - **youtube**: YouTube.com
- - **youtube:favorites**: YouTube.com favourite videos, ":ytfav" for short (requires authentication)
- - **youtube:history**: Youtube watch history, ":ythistory" for short (requires authentication)
- - **youtube:playlist**: YouTube.com playlists
- - **youtube:recommended**: YouTube.com recommended videos, ":ytrec" for short (requires authentication)
- - **youtube:search**: YouTube.com searches
- - **youtube:search:date**: YouTube.com searches, newest videos first
- - **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
- - **youtube:tab**: YouTube.com tab
- - **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
- - **YoutubeYtBe**
- - **YoutubeYtUser**
- - **Zapiks**
- - **Zattoo**
- - **ZattooLive**
- - **ZDF**
- - **ZDFChannel**
- - **Zhihu**
- - **zingmp3**: mp3.zing.vn
- - **zingmp3:album**
- - **zoom**
- - **Zype**
diff --git a/hypervideo.plugin.zsh b/hypervideo.plugin.zsh
deleted file mode 100644
index a5ee9ec..0000000
--- a/hypervideo.plugin.zsh
+++ /dev/null
@@ -1,24 +0,0 @@
-# This allows the hypervideo command to be installed in ZSH using antigen.
-# Antigen is a bundle manager. It allows you to enhance the functionality of
-# your zsh session by installing bundles and themes easily.
-
-# Antigen documentation:
-# http://antigen.sharats.me/
-# https://github.com/zsh-users/antigen
-
-# Install hypervideo:
-# antigen bundle ytdl-org/hypervideo
-# Bundles installed by antigen are available for use immediately.
-
-# Update hypervideo (and all other antigen bundles):
-# antigen update
-
-# The antigen command will download the git repository to a folder and then
-# execute an enabling script (this file). The complete process for loading the
-# code is documented here:
-# https://github.com/zsh-users/antigen#notes-on-writing-plugins
-
-# This specific script just aliases hypervideo to the python script that this
-# library provides. This requires updating the PYTHONPATH to ensure that the
-# full set of code can be located.
-alias hypervideo="PYTHONPATH=$(dirname $0) $(dirname $0)/bin/hypervideo"
diff --git a/hypervideo_dl/YoutubeDL.py b/hypervideo_dl/YoutubeDL.py
index 276f42d..012c3b8 100644
--- a/hypervideo_dl/YoutubeDL.py
+++ b/hypervideo_dl/YoutubeDL.py
@@ -1,8 +1,3 @@
-#!/usr/bin/env python3
-# coding: utf-8
-
-from __future__ import absolute_import, unicode_literals
-
import collections
import contextlib
import datetime
@@ -15,7 +10,7 @@ import json
import locale
import operator
import os
-import platform
+import random
import re
import shutil
import subprocess
@@ -24,151 +19,141 @@ import tempfile
import time
import tokenize
import traceback
-import random
import unicodedata
-
-from enum import Enum
+import urllib.request
from string import ascii_letters
-from .compat import (
- compat_basestring,
- compat_brotli,
- compat_get_terminal_size,
- compat_kwargs,
- compat_numeric_types,
- compat_os_name,
- compat_pycrypto_AES,
- compat_shlex_quote,
- compat_str,
- compat_tokenize_tokenize,
- compat_urllib_error,
- compat_urllib_request,
- compat_urllib_request_DataHandler,
- windows_enable_vt_mode,
-)
+from .cache import Cache
+from .compat import compat_os_name, compat_shlex_quote
from .cookies import load_cookies
+from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
+from .downloader.rtmp import rtmpdump_version
+from .extractor import gen_extractor_classes, get_info_extractor
+from .extractor.common import UnsupportedURLIE
+from .extractor.openload import PhantomJSwrapper
+from .minicurses import format_text
+from .postprocessor import _PLUGIN_CLASSES as plugin_postprocessors
+from .postprocessor import (
+ EmbedThumbnailPP,
+ FFmpegFixupDuplicateMoovPP,
+ FFmpegFixupDurationPP,
+ FFmpegFixupM3u8PP,
+ FFmpegFixupM4aPP,
+ FFmpegFixupStretchedPP,
+ FFmpegFixupTimestampPP,
+ FFmpegMergerPP,
+ FFmpegPostProcessor,
+ FFmpegVideoConvertorPP,
+ MoveFilesAfterDownloadPP,
+ get_postprocessor,
+)
from .utils import (
+ DEFAULT_OUTTMPL,
+ IDENTITY,
+ LINK_TEMPLATES,
+ MEDIA_EXTENSIONS,
+ NO_DEFAULT,
+ NUMBER_RE,
+ OUTTMPL_TYPES,
+ POSTPROCESS_WHEN,
+ STR_FORMAT_RE_TMPL,
+ STR_FORMAT_TYPES,
+ ContentTooShortError,
+ DateRange,
+ DownloadCancelled,
+ DownloadError,
+ EntryNotInPlaylist,
+ ExistingVideoReached,
+ ExtractorError,
+ FormatSorter,
+ GeoRestrictedError,
+ HEADRequest,
+ ISO3166Utils,
+ LazyList,
+ MaxDownloadsReached,
+ Namespace,
+ PagedList,
+ PerRequestProxyHandler,
+ PlaylistEntries,
+ Popen,
+ PostProcessingError,
+ ReExtractInfo,
+ RejectedVideoReached,
+ SameFileError,
+ UnavailableVideoError,
+ UserNotLive,
+ YoutubeDLCookieProcessor,
+ YoutubeDLHandler,
+ YoutubeDLRedirectHandler,
age_restricted,
args_to_str,
- ContentTooShortError,
+ bug_reports_message,
date_from_str,
- DateRange,
- DEFAULT_OUTTMPL,
+ deprecation_warning,
determine_ext,
determine_protocol,
- DownloadCancelled,
- DownloadError,
encode_compat_str,
encodeFilename,
- EntryNotInPlaylist,
error_to_compat_str,
- ExistingVideoReached,
+ escapeHTML,
expand_path,
- ExtractorError,
filter_dict,
float_or_none,
format_bytes,
- format_field,
format_decimal_suffix,
+ format_field,
formatSeconds,
- GeoRestrictedError,
+ get_compatible_ext,
get_domain,
- has_certifi,
- HEADRequest,
- InAdvancePagedList,
int_or_none,
iri_to_uri,
- ISO3166Utils,
+ is_path_like,
join_nonempty,
- LazyList,
- LINK_TEMPLATES,
locked_file,
+ make_archive_id,
make_dir,
make_HTTPS_handler,
- MaxDownloadsReached,
merge_headers,
network_exceptions,
- NO_DEFAULT,
number_of_digits,
orderedSet,
- OUTTMPL_TYPES,
- PagedList,
+ orderedSet_from_options,
parse_filesize,
- PerRequestProxyHandler,
- platform_name,
- Popen,
- POSTPROCESS_WHEN,
- PostProcessingError,
preferredencoding,
prepend_extension,
- ReExtractInfo,
register_socks_protocols,
- RejectedVideoReached,
remove_terminal_sequences,
render_table,
replace_extension,
- SameFileError,
sanitize_filename,
sanitize_path,
sanitize_url,
sanitized_Request,
std_headers,
- STR_FORMAT_RE_TMPL,
- STR_FORMAT_TYPES,
str_or_none,
strftime_or_none,
subtitles_filename,
supports_terminal_sequences,
+ system_identifier,
timetuple_from_msec,
to_high_limit_path,
traverse_obj,
+ try_call,
try_get,
- UnavailableVideoError,
url_basename,
variadic,
version_tuple,
+ windows_enable_vt_mode,
write_json_file,
write_string,
- YoutubeDLCookieProcessor,
- YoutubeDLHandler,
- YoutubeDLRedirectHandler,
-)
-from .cache import Cache
-from .minicurses import format_text
-from .extractor import (
- gen_extractor_classes,
- get_info_extractor,
- _LAZY_LOADER,
- _PLUGIN_CLASSES as plugin_extractors
-)
-from .extractor.openload import PhantomJSwrapper
-from .downloader import (
- FFmpegFD,
- get_suitable_downloader,
- shorten_protocol_name
-)
-from .downloader.rtmp import rtmpdump_version
-from .postprocessor import (
- get_postprocessor,
- EmbedThumbnailPP,
- FFmpegFixupDuplicateMoovPP,
- FFmpegFixupDurationPP,
- FFmpegFixupM3u8PP,
- FFmpegFixupM4aPP,
- FFmpegFixupStretchedPP,
- FFmpegFixupTimestampPP,
- FFmpegMergerPP,
- FFmpegPostProcessor,
- MoveFilesAfterDownloadPP,
- _PLUGIN_CLASSES as plugin_postprocessors
)
-from .version import __version__
+from .version import RELEASE_GIT_HEAD, VARIANT, __version__
if compat_os_name == 'nt':
import ctypes
-class YoutubeDL(object):
+class YoutubeDL:
"""YoutubeDL class.
YoutubeDL objects are the ones responsible of downloading the
@@ -211,13 +196,6 @@ class YoutubeDL(object):
For compatibility, a single list is also accepted
print_to_file: A dict with keys WHEN (same as forceprint) mapped to
a list of tuples with (template, filename)
- forceurl: Force printing final URL. (Deprecated)
- forcetitle: Force printing title. (Deprecated)
- forceid: Force printing ID. (Deprecated)
- forcethumbnail: Force printing thumbnail URL. (Deprecated)
- forcedescription: Force printing description. (Deprecated)
- forcefilename: Force printing final filename. (Deprecated)
- forceduration: Force printing duration. (Deprecated)
forcejson: Force printing info_dict as JSON.
dump_single_json: Force printing the info_dict of the whole playlist
(or video) as a single JSON line.
@@ -261,22 +239,20 @@ class YoutubeDL(object):
Default is 'only_download' for CLI, but False for API
skip_playlist_after_errors: Number of allowed failures until the rest of
the playlist is skipped
- force_generic_extractor: Force downloader to use the generic extractor
+ allowed_extractors: List of regexes to match against extractor names that are allowed
overwrites: Overwrite all video and metadata files if True,
overwrite only non-video files if None
and don't overwrite any file if False
For compatibility with youtube-dl,
"nooverwrites" may also be used instead
- playliststart: Playlist item to start at.
- playlistend: Playlist item to end at.
playlist_items: Specific indices of playlist to download.
- playlistreverse: Download playlist items in reverse order.
playlistrandom: Download playlist items in random order.
+ lazy_playlist: Process playlist entries as they are received.
matchtitle: Download only matching titles.
rejecttitle: Reject downloads for matching titles.
logger: Log messages to a logging.Logger instance.
- logtostderr: Log messages to stderr instead of stdout.
- consoletitle: Display progress in console window's titlebar.
+ logtostderr: Print everything to stderr instead of stdout.
+ consoletitle: Display progress in console window's titlebar.
writedescription: Write the video description to a .description file
writeinfojson: Write the video description to a .info.json file
clean_infojson: Remove private fields from the infojson
@@ -294,15 +270,12 @@ class YoutubeDL(object):
writedesktoplink: Write a Linux internet shortcut file (.desktop)
writesubtitles: Write the video subtitles to a file
writeautomaticsub: Write the automatically generated subtitles to a file
- allsubtitles: Deprecated - Use subtitleslangs = ['all']
- Downloads all the subtitles of the video
- (requires writesubtitles or writeautomaticsub)
listsubtitles: Lists all available subtitles for the video
subtitlesformat: The format code for subtitles
subtitleslangs: List of languages of the subtitles to download (can be regex).
The list may contain "all" to refer to all the available
subtitles. The language can be prefixed with a "-" to
- exclude it from the requested languages. Eg: ['all', '-live_chat']
+ exclude it from the requested languages, e.g. ['all', '-live_chat']
keepvideo: Keep the video file after post-processing
daterange: A DateRange object, download only if the upload_date is in the range.
skip_download: Skip the actual download of the video file
@@ -320,24 +293,28 @@ class YoutubeDL(object):
downloaded.
Videos without view count information are always
downloaded. None for no limit.
- download_archive: File name of a file where all downloads are recorded.
- Videos already present in the file are not downloaded
- again.
+ download_archive: A set, or the name of a file where all downloads are recorded.
+ Videos already present in the file are not downloaded again.
break_on_existing: Stop the download process after attempting to download a
file that is in the archive.
break_on_reject: Stop the download process when encountering a video that
has been filtered out.
break_per_url: Whether break_on_reject and break_on_existing
should act on each input URL as opposed to for the entire queue
- cookiefile: File name where cookies should be read from and dumped to
+ cookiefile: File name or text stream from where cookies should be read and dumped to
cookiesfrombrowser: A tuple containing the name of the browser, the profile
- name/pathfrom where cookies are loaded, and the name of the
- keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')
+ name/path from where cookies are loaded, the name of the keyring,
+ and the container name, e.g. ('chrome', ) or
+ ('vivaldi', 'default', 'BASICTEXT') or ('firefox', 'default', None, 'Meta')
legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
support RFC 5746 secure renegotiation
nocheckcertificate: Do not verify SSL certificates
+ client_certificate: Path to client certificate file in PEM format. May include the private key
+ client_certificate_key: Path to private key file for client certificate
+ client_certificate_password: Password for client certificate private key, if encrypted.
+ If not provided and the key is encrypted, hypervideo will ask interactively
prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
- At the moment, this is only supported by YouTube.
+ (Only supported by some extractors)
http_headers: A dictionary of custom headers to be used for all requests
proxy: URL of the proxy server to use
geo_verification_proxy: URL of the proxy to use for IP address verification
@@ -346,13 +323,17 @@ class YoutubeDL(object):
bidi_workaround: Work around buggy terminals without bidirectional text
support, using fridibi
debug_printtraffic:Print out sent and received HTTP traffic
- include_ads: Download ads as well (deprecated)
default_search: Prepend this string if an input url is not valid.
'auto' for elaborate guessing
encoding: Use this encoding instead of the system-specified.
- extract_flat: Do not resolve URLs, return the immediate result.
- Pass in 'in_playlist' to only show this behavior for
- playlist items.
+ extract_flat: Whether to resolve and process url_results further
+ * False: Always process (default)
+ * True: Never process
+ * 'in_playlist': Do not process inside playlist/multi_video
+ * 'discard': Always process, but don't return the result
+ from inside playlist/multi_video
+ * 'discard_in_playlist': Same as "discard", but only for
+ playlists (not multi_video)
wait_for_video: If given, wait for scheduled streams to become available.
The value should be a tuple containing the range
(min_secs, max_secs) to wait between retries
@@ -362,10 +343,6 @@ class YoutubeDL(object):
* when: When to run the postprocessor. Allowed values are
the entries of utils.POSTPROCESS_WHEN
Assumed to be 'post_process' if not given
- post_hooks: Deprecated - Register a custom postprocessor instead
- A list of functions that get called as the final step
- for each video file, after all postprocessors have been
- called. The filename will be passed as the only argument.
progress_hooks: A list of functions that get called on download
progress, with a dictionary with the entries
* status: One of "downloading", "error", or "finished".
@@ -400,7 +377,7 @@ class YoutubeDL(object):
Progress hooks are guaranteed to be called at least twice
(with status "started" and "finished") if the processing is successful.
- merge_output_format: Extension to use when merging formats.
+ merge_output_format: "/" separated list of extensions to use when merging formats.
final_ext: Expected final extension; used to detect when the file was
already downloaded and converted
fixup: Automatically correct known faults of the file.
@@ -410,8 +387,6 @@ class YoutubeDL(object):
- "detect_or_warn": check whether we can do anything
about it, warn otherwise (default)
source_address: Client-side IP address to bind to.
- call_home: Boolean, true iff we are allowed to contact the
- hypervideo servers for debugging. (BROKEN)
sleep_interval_requests: Number of seconds to sleep between requests
during extraction
sleep_interval: Number of seconds to sleep before each download when
@@ -427,10 +402,14 @@ class YoutubeDL(object):
sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
listformats: Print an overview of available video formats and exit.
list_thumbnails: Print a table of all thumbnails and exit.
- match_filter: A function that gets called with the info_dict of
- every video.
- If it returns a message, the video is ignored.
- If it returns None, the video is downloaded.
+ match_filter: A function that gets called for every video with the signature
+ (info_dict, *, incomplete: bool) -> Optional[str]
+ For backward compatibility with youtube-dl, the signature
+ (info_dict) -> Optional[str] is also allowed.
+ - If it returns a message, the video is ignored.
+ - If it returns None, the video is downloaded.
+ - If it returns utils.NO_DEFAULT, the user is interactively
+ asked whether to download the video.
match_filter_func in utils.py is one example for this.
no_color: Do not emit color codes in output.
geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
@@ -442,17 +421,10 @@ class YoutubeDL(object):
geo_bypass_ip_block:
IP range in CIDR notation that will be used similarly to
geo_bypass_country
-
- The following options determine which downloader is picked:
external_downloader: A dictionary of protocol keys and the executable of the
external downloader to use for it. The allowed protocols
are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
Set the value to 'native' to use the native downloader
- hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
- or {'m3u8': 'ffmpeg'} instead.
- Use the native HLS downloader instead of ffmpeg/avconv
- if True, otherwise use ffmpeg/avconv if False, otherwise
- use downloader suggested by extractor if None.
compat_opts: Compatibility options. See "Differences in default behavior".
The following options do not work when used through the API:
filename, abort-on-error, multistreams, no-live-chat, format-sort
@@ -462,17 +434,29 @@ class YoutubeDL(object):
Allowed keys are 'download', 'postprocess',
'download-title' (console title) and 'postprocess-title'.
The template is mapped on a dictionary with keys 'progress' and 'info'
+ retry_sleep_functions: Dictionary of functions that takes the number of attempts
+ as argument and returns the time to sleep in seconds.
+ Allowed keys are 'http', 'fragment', 'file_access'
+ download_ranges: A callback function that gets called for every video with
+ the signature (info_dict, ydl) -> Iterable[Section].
+ Only the returned sections will be downloaded.
+ Each Section is a dict with the following keys:
+ * start_time: Start time of the section in seconds
+ * end_time: End time of the section in seconds
+ * title: Section title (Optional)
+ * index: Section number (Optional)
+ force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts
+ noprogress: Do not print the progress bar
+ live_from_start: Whether to download livestreams videos from the start
The following parameters are not used by YoutubeDL itself, they are used by
the downloader (see hypervideo_dl/downloader/common.py):
nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
- continuedl, noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
+ continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
external_downloader_args, concurrent_fragment_downloads.
The following options are used by the post processors:
- prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
- otherwise prefer ffmpeg. (avconv support is deprecated)
ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
to the binary or its containing directory.
postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
@@ -490,44 +474,89 @@ class YoutubeDL(object):
discontinuities such as ad breaks (default: False)
extractor_args: A dictionary of arguments to be passed to the extractors.
See "EXTRACTOR ARGUMENTS" for details.
- Eg: {'youtube': {'skip': ['dash', 'hls']}}
+ E.g. {'youtube': {'skip': ['dash', 'hls']}}
mark_watched: Mark videos watched (even with --simulate). Only for YouTube
- youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
+
+ The following options are deprecated and may be removed in the future:
+
+ force_generic_extractor: Force downloader to use the generic extractor
+ - Use allowed_extractors = ['generic', 'default']
+ playliststart: - Use playlist_items
+ Playlist item to start at.
+ playlistend: - Use playlist_items
+ Playlist item to end at.
+ playlistreverse: - Use playlist_items
+ Download playlist items in reverse order.
+ forceurl: - Use forceprint
+ Force printing final URL.
+ forcetitle: - Use forceprint
+ Force printing title.
+ forceid: - Use forceprint
+ Force printing ID.
+ forcethumbnail: - Use forceprint
+ Force printing thumbnail URL.
+ forcedescription: - Use forceprint
+ Force printing description.
+ forcefilename: - Use forceprint
+ Force printing final filename.
+ forceduration: - Use forceprint
+ Force printing duration.
+ allsubtitles: - Use subtitleslangs = ['all']
+ Downloads all the subtitles of the video
+ (requires writesubtitles or writeautomaticsub)
+ include_ads: - Doesn't work
+ Download ads as well
+ call_home: - Not implemented
+ Boolean, true iff we are allowed to contact the
+ hypervideo servers for debugging.
+ post_hooks: - Register a custom postprocessor
+ A list of functions that get called as the final step
+ for each video file, after all postprocessors have been
+ called. The filename will be passed as the only argument.
+ hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.
+ Use the native HLS downloader instead of ffmpeg/avconv
+ if True, otherwise use ffmpeg/avconv if False, otherwise
+ use downloader suggested by extractor if None.
+ prefer_ffmpeg: - avconv support is deprecated
+ If False, use avconv instead of ffmpeg if both are available,
+ otherwise prefer ffmpeg.
+ youtube_include_dash_manifest: - Use extractor_args
If True (default), DASH manifests and related
data will be downloaded and processed by extractor.
You can reduce network I/O by disabling it if you don't
care about DASH. (only for youtube)
- youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
+ youtube_include_hls_manifest: - Use extractor_args
If True (default), HLS manifests and related
data will be downloaded and processed by extractor.
You can reduce network I/O by disabling it if you don't
care about HLS. (only for youtube)
"""
- _NUMERIC_FIELDS = set((
- 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
+ _NUMERIC_FIELDS = {
+ 'width', 'height', 'asr', 'audio_channels', 'fps',
+ 'tbr', 'abr', 'vbr', 'filesize', 'filesize_approx',
'timestamp', 'release_timestamp',
'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
'average_rating', 'comment_count', 'age_limit',
'start_time', 'end_time',
'chapter_number', 'season_number', 'episode_number',
'track_number', 'disc_number', 'release_year',
- ))
+ }
_format_fields = {
# NB: Keep in sync with the docstring of extractor/common.py
'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
- 'width', 'height', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr',
- 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx',
+ 'width', 'height', 'aspect_ratio', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels',
+ 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns',
'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
'preference', 'language', 'language_preference', 'quality', 'source_preference',
'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'downloader_options',
'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
}
_format_selection_exts = {
- 'audio': {'m4a', 'mp3', 'ogg', 'aac'},
- 'video': {'mp4', 'flv', 'webm', '3gp'},
- 'storyboards': {'mhtml'},
+ 'audio': set(MEDIA_EXTENSIONS.common_audio),
+ 'video': set(MEDIA_EXTENSIONS.common_video + ('3gp', )),
+ 'storyboards': set(MEDIA_EXTENSIONS.storyboards),
}
def __init__(self, params=None, auto_init=True):
@@ -554,21 +583,30 @@ class YoutubeDL(object):
self.cache = Cache(self)
windows_enable_vt_mode()
- self._out_files = {
- 'error': sys.stderr,
- 'print': sys.stderr if self.params.get('logtostderr') else sys.stdout,
- 'console': None if compat_os_name == 'nt' else next(
+ stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout
+ self._out_files = Namespace(
+ out=stdout,
+ error=sys.stderr,
+ screen=sys.stderr if self.params.get('quiet') else stdout,
+ console=None if compat_os_name == 'nt' else next(
filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)
- }
- self._out_files['screen'] = sys.stderr if self.params.get('quiet') else self._out_files['print']
- self._allow_colors = {
- type_: not self.params.get('no_color') and supports_terminal_sequences(self._out_files[type_])
- for type_ in ('screen', 'error')
- }
-
- if sys.version_info < (3, 6):
- self.report_warning(
- 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
+ )
+ self._allow_colors = Namespace(**{
+ type_: not self.params.get('no_color') and supports_terminal_sequences(stream)
+ for type_, stream in self._out_files.items_ if type_ != 'console'
+ })
+
+ # The code is left like this to be reused for future deprecations
+ MIN_SUPPORTED, MIN_RECOMMENDED = (3, 7), (3, 7)
+ current_version = sys.version_info[:2]
+ if current_version < MIN_RECOMMENDED:
+ msg = ('Support for Python version %d.%d has been deprecated. '
+ 'See https://github.com/hypervideo/hypervideo/issues/3764 for more details.'
+ '\n You will no longer receive updates on this version')
+ if current_version < MIN_SUPPORTED:
+ msg = 'Python version %d.%d is no longer supported'
+ self.deprecation_warning(
+ f'{msg}! Please update to Python %d.%d or above' % (*current_version, *MIN_RECOMMENDED))
if self.params.get('allow_unplayable_formats'):
self.report_warning(
@@ -577,9 +615,33 @@ class YoutubeDL(object):
' If you experience any issues while using this option, '
f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
+ if self.params.get('bidi_workaround', False):
+ try:
+ import pty
+ master, slave = pty.openpty()
+ width = shutil.get_terminal_size().columns
+ width_args = [] if width is None else ['-w', str(width)]
+ sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}
+ try:
+ self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
+ except OSError:
+ self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
+ self._output_channel = os.fdopen(master, 'rb')
+ except OSError as ose:
+ if ose.errno == errno.ENOENT:
+ self.report_warning(
+ 'Could not find fribidi executable, ignoring --bidi-workaround. '
+ 'Make sure that fribidi is an executable file in one of the directories in your $PATH.')
+ else:
+ raise
+
+ self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
+ if auto_init and auto_init != 'no_verbose_header':
+ self.print_debug_header()
+
def check_deprecated(param, option, suggestion):
if self.params.get(param) is not None:
- self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
+ self.report_warning(f'{option} is deprecated. Use {suggestion} instead')
return True
return False
@@ -594,9 +656,9 @@ class YoutubeDL(object):
for msg in self.params.get('_warnings', []):
self.report_warning(msg)
for msg in self.params.get('_deprecation_warnings', []):
- self.deprecation_warning(msg)
+ self.deprecated_feature(msg)
- if 'list-formats' in self.params.get('compat_opts', []):
+ if 'list-formats' in self.params['compat_opts']:
self.params['listformats_table'] = False
if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
@@ -609,6 +671,13 @@ class YoutubeDL(object):
else:
self.params['nooverwrites'] = not self.params['overwrites']
+ if self.params.get('simulate') is None and any((
+ self.params.get('list_thumbnails'),
+ self.params.get('listformats'),
+ self.params.get('listsubtitles'),
+ )):
+ self.params['simulate'] = 'list_only'
+
self.params.setdefault('forceprint', {})
self.params.setdefault('print_to_file', {})
@@ -616,31 +685,8 @@ class YoutubeDL(object):
if not isinstance(params['forceprint'], dict):
self.params['forceprint'] = {'video': params['forceprint']}
- if self.params.get('bidi_workaround', False):
- try:
- import pty
- master, slave = pty.openpty()
- width = compat_get_terminal_size().columns
- if width is None:
- width_args = []
- else:
- width_args = ['-w', str(width)]
- sp_kwargs = dict(
- stdin=subprocess.PIPE,
- stdout=slave,
- stderr=self._out_files['error'])
- try:
- self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
- except OSError:
- self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
- self._output_channel = os.fdopen(master, 'rb')
- except OSError as ose:
- if ose.errno == errno.ENOENT:
- self.report_warning(
- 'Could not find fribidi executable, ignoring --bidi-workaround. '
- 'Make sure that fribidi is an executable file in one of the directories in your $PATH.')
- else:
- raise
+ if auto_init:
+ self.add_default_info_extractors()
if (sys.platform != 'win32'
and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
@@ -652,7 +698,7 @@ class YoutubeDL(object):
'Set the LC_ALL environment variable to fix this.')
self.params['restrictfilenames'] = True
- self.outtmpl_dict = self.parse_outtmpl()
+ self._parse_outtmpl()
# Creating format selector here allows us to catch syntax errors before the extraction
self.format_selector = (
@@ -663,13 +709,6 @@ class YoutubeDL(object):
# Set http_headers defaults according to std_headers
self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))
- self._setup_opener()
-
- if auto_init:
- if auto_init != 'no_verbose_header':
- self.print_debug_header()
- self.add_default_info_extractors()
-
hooks = {
'post_hooks': self.add_post_hook,
'progress_hooks': self.add_progress_hook,
@@ -683,28 +722,31 @@ class YoutubeDL(object):
pp_def = dict(pp_def_raw)
when = pp_def.pop('when', 'post_process')
self.add_post_processor(
- get_postprocessor(pp_def.pop('key'))(self, **compat_kwargs(pp_def)),
+ get_postprocessor(pp_def.pop('key'))(self, **pp_def),
when=when)
+ self._setup_opener()
register_socks_protocols()
def preload_download_archive(fn):
"""Preload the archive, if any is specified"""
+ archive = set()
if fn is None:
- return False
+ return archive
+ elif not is_path_like(fn):
+ return fn
+
self.write_debug(f'Loading archive file {fn!r}')
try:
with locked_file(fn, 'r', encoding='utf-8') as archive_file:
for line in archive_file:
- self.archive.add(line.strip())
- except IOError as ioe:
+ archive.add(line.strip())
+ except OSError as ioe:
if ioe.errno != errno.ENOENT:
raise
- return False
- return True
+ return archive
- self.archive = set()
- preload_download_archive(self.params.get('download_archive'))
+ self.archive = preload_download_archive(self.params.get('download_archive'))
def warn_if_short_id(self, argv):
# short YouTube ID starting with dash?
@@ -730,13 +772,6 @@ class YoutubeDL(object):
self._ies_instances[ie_key] = ie
ie.set_downloader(self)
- def _get_info_extractor_class(self, ie_key):
- ie = self._ies.get(ie_key)
- if ie is None:
- ie = get_info_extractor(ie_key)
- self.add_info_extractor(ie)
- return ie
-
def get_info_extractor(self, ie_key):
"""
Get an instance of an IE with name ie_key, it will try to get one from
@@ -753,11 +788,23 @@ class YoutubeDL(object):
"""
Add the InfoExtractors returned by gen_extractors to the end of the list
"""
- for ie in gen_extractor_classes():
- self.add_info_extractor(ie)
+ all_ies = {ie.IE_NAME.lower(): ie for ie in gen_extractor_classes()}
+ all_ies['end'] = UnsupportedURLIE()
+ try:
+ ie_names = orderedSet_from_options(
+ self.params.get('allowed_extractors', ['default']), {
+ 'all': list(all_ies),
+ 'default': [name for name, ie in all_ies.items() if ie._ENABLED],
+ }, use_regex=True)
+ except re.error as e:
+ raise ValueError(f'Wrong regex for allowed_extractors: {e.pattern}')
+ for name in ie_names:
+ self.add_info_extractor(all_ies[name])
+ self.write_debug(f'Loaded {len(ie_names)} extractors')
def add_post_processor(self, pp, when='post_process'):
"""Add a PostProcessor object to the end of the chain."""
+ assert when in POSTPROCESS_WHEN, f'Invalid when={when}'
self._pps[when].append(pp)
pp.set_downloader(self)
@@ -781,11 +828,11 @@ class YoutubeDL(object):
return message
assert hasattr(self, '_output_process')
- assert isinstance(message, compat_str)
+ assert isinstance(message, str)
line_count = message.count('\n') + 1
- self._output_process.stdin.write((message + '\n').encode('utf-8'))
+ self._output_process.stdin.write((message + '\n').encode())
self._output_process.stdin.flush()
- res = ''.join(self._output_channel.readline().decode('utf-8')
+ res = ''.join(self._output_channel.readline().decode()
for _ in range(line_count))
return res[:-len('\n')]
@@ -799,12 +846,14 @@ class YoutubeDL(object):
def to_stdout(self, message, skip_eol=False, quiet=None):
"""Print message to stdout"""
if quiet is not None:
- self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. Use "YoutubeDL.to_screen" instead')
- self._write_string(
- '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
- self._out_files['print'])
-
- def to_screen(self, message, skip_eol=False, quiet=None):
+ self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. '
+ 'Use "YoutubeDL.to_screen" instead')
+ if skip_eol is not False:
+ self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. '
+ 'Use "YoutubeDL.to_screen" instead')
+ self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out)
+
+ def to_screen(self, message, skip_eol=False, quiet=None, only_once=False):
"""Print message to screen if not in quiet mode"""
if self.params.get('logger'):
self.params['logger'].debug(message)
@@ -813,20 +862,20 @@ class YoutubeDL(object):
return
self._write_string(
'%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
- self._out_files['screen'])
+ self._out_files.screen, only_once=only_once)
def to_stderr(self, message, only_once=False):
"""Print message to stderr"""
- assert isinstance(message, compat_str)
+ assert isinstance(message, str)
if self.params.get('logger'):
self.params['logger'].error(message)
else:
- self._write_string('%s\n' % self._bidi_workaround(message), self._out_files['error'], only_once=only_once)
+ self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once)
def _send_console_code(self, code):
- if compat_os_name == 'nt' or not self._out_files['console']:
+ if compat_os_name == 'nt' or not self._out_files.console:
return
- self._write_string(code, self._out_files['console'])
+ self._write_string(code, self._out_files.console)
def to_console_title(self, message):
if not self.params.get('consoletitle', False):
@@ -894,16 +943,19 @@ class YoutubeDL(object):
raise DownloadError(message, exc_info)
self._download_retcode = 1
- class Styles(Enum):
- HEADERS = 'yellow'
- EMPHASIS = 'light blue'
- ID = 'green'
- DELIM = 'blue'
- ERROR = 'red'
- WARNING = 'yellow'
- SUPPRESS = 'light black'
+ Styles = Namespace(
+ HEADERS='yellow',
+ EMPHASIS='light blue',
+ FILENAME='green',
+ ID='green',
+ DELIM='blue',
+ ERROR='red',
+ WARNING='yellow',
+ SUPPRESS='light black',
+ )
def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
+ text = str(text)
if test_encoding:
original_text = text
# handle.encoding can be None. See https://github.com/hypervideo/hypervideo/issues/2711
@@ -911,17 +963,16 @@ class YoutubeDL(object):
text = text.encode(encoding, 'ignore').decode(encoding)
if fallback is not None and text != original_text:
text = fallback
- if isinstance(f, self.Styles):
- f = f.value
return format_text(text, f) if allow_colors else text if fallback is None else fallback
+ def _format_out(self, *args, **kwargs):
+ return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs)
+
def _format_screen(self, *args, **kwargs):
- return self._format_text(
- self._out_files['screen'], self._allow_colors['screen'], *args, **kwargs)
+ return self._format_text(self._out_files.screen, self._allow_colors.screen, *args, **kwargs)
def _format_err(self, *args, **kwargs):
- return self._format_text(
- self._out_files['error'], self._allow_colors['error'], *args, **kwargs)
+ return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs)
def report_warning(self, message, only_once=False):
'''
@@ -935,11 +986,14 @@ class YoutubeDL(object):
return
self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
- def deprecation_warning(self, message):
+ def deprecation_warning(self, message, *, stacklevel=0):
+ deprecation_warning(
+ message, stacklevel=stacklevel + 1, printer=self.report_error, is_error=False)
+
+ def deprecated_feature(self, message):
if self.params.get('logger') is not None:
- self.params['logger'].warning(f'DeprecationWarning: {message}')
- else:
- self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
+ self.params['logger'].warning(f'Deprecated Feature: {message}')
+ self.to_stderr(f'{self._format_err("Deprecated Feature:", self.Styles.ERROR)} {message}', True)
def report_error(self, message, *args, **kwargs):
'''
@@ -952,7 +1006,7 @@ class YoutubeDL(object):
'''Log debug message or Print message to stderr'''
if not self.params.get('verbose', False):
return
- message = '[debug] %s' % message
+ message = f'[debug] {message}'
if self.params.get('logger'):
self.params['logger'].debug(message)
else:
@@ -973,7 +1027,7 @@ class YoutubeDL(object):
self.to_screen('Deleting existing file')
def raise_no_formats(self, info, forced=False, *, msg=None):
- has_drm = info.get('__has_drm')
+ has_drm = info.get('_has_drm')
ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
if forced or not ignored:
@@ -983,37 +1037,27 @@ class YoutubeDL(object):
self.report_warning(msg)
def parse_outtmpl(self):
- outtmpl_dict = self.params.get('outtmpl', {})
- if not isinstance(outtmpl_dict, dict):
- outtmpl_dict = {'default': outtmpl_dict}
- # Remove spaces in the default template
- if self.params.get('restrictfilenames'):
+ self.deprecation_warning('"YoutubeDL.parse_outtmpl" is deprecated and may be removed in a future version')
+ self._parse_outtmpl()
+ return self.params['outtmpl']
+
+ def _parse_outtmpl(self):
+ sanitize = IDENTITY
+ if self.params.get('restrictfilenames'): # Remove spaces in the default template
sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
- else:
- sanitize = lambda x: x
- outtmpl_dict.update({
- k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
- if outtmpl_dict.get(k) is None})
- for key, val in outtmpl_dict.items():
- if isinstance(val, bytes):
- self.report_warning(
- 'Parameter outtmpl is bytes, but should be a unicode string. '
- 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
- return outtmpl_dict
+
+ outtmpl = self.params.setdefault('outtmpl', {})
+ if not isinstance(outtmpl, dict):
+ self.params['outtmpl'] = outtmpl = {'default': outtmpl}
+ outtmpl.update({k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl.get(k) is None})
def get_output_path(self, dir_type='', filename=None):
paths = self.params.get('paths', {})
- assert isinstance(paths, dict)
+ assert isinstance(paths, dict), '"paths" parameter must be a dictionary'
path = os.path.join(
expand_path(paths.get('home', '').strip()),
expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
filename or '')
-
- # Temporary fix for #4787
- # 'Treat' all problem characters by passing filename through preferredencoding
- # to workaround encoding issues with subprocess on python2 @ Windows
- if sys.version_info < (3, 0) and sys.platform == 'win32':
- path = encodeFilename(path, True).decode(preferredencoding())
return sanitize_path(path, force=self.params.get('windowsfilenames'))
@staticmethod
@@ -1023,11 +1067,11 @@ class YoutubeDL(object):
# '%%' intact for template dict substitution step. Working around
# with boundary-alike separator hack.
sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
- outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
+ outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')
# outtmpl should be expand_path'ed before template dict substitution
# because meta fields may contain env variables we don't want to
- # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
+ # be expanded. E.g. for outtmpl "%(title)s.%(ext)s" and
# title "Hello $PATH", we don't want `$PATH` to be expanded.
return expand_path(outtmpl).replace(sep, '')
@@ -1043,7 +1087,7 @@ class YoutubeDL(object):
def validate_outtmpl(cls, outtmpl):
''' @return None or Exception object '''
outtmpl = re.sub(
- STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBUDS]'),
+ STR_FORMAT_RE_TMPL.format('[^)]*', '[ljhqBUDS]'),
lambda mobj: f'{mobj.group(0)[:-1]}s',
cls._outtmpl_expandpath(outtmpl))
try:
@@ -1056,6 +1100,7 @@ class YoutubeDL(object):
def _copy_infodict(info_dict):
info_dict = dict(info_dict)
info_dict.pop('__postprocessors', None)
+ info_dict.pop('__pending_error', None)
return info_dict
def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
@@ -1071,7 +1116,7 @@ class YoutubeDL(object):
formatSeconds(info_dict['duration'], '-' if sanitize else ':')
if info_dict.get('duration', None) is not None
else None)
- info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
+ info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads)
info_dict['video_autonumber'] = self._num_videos
if info_dict.get('resolution') is None:
info_dict['resolution'] = self.format_resolution(info_dict, default=None)
@@ -1079,38 +1124,51 @@ class YoutubeDL(object):
# For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
# of %(field)s to %(field)0Nd for backward compatibility
field_size_compat_map = {
- 'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0),
+ 'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0),
'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
'autonumber': self.params.get('autonumber_size') or 5,
}
TMPL_DICT = {}
- EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBUDS]'))
+ EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljhqBUDS]'))
MATH_FUNCTIONS = {
'+': float.__add__,
'-': float.__sub__,
}
# Field is of the form key1.key2...
- # where keys (except first) can be string, int or slice
- FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
- MATH_FIELD_RE = r'''(?:{field}|{num})'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
+ # where keys (except first) can be string, int, slice or "{field, ...}"
+ FIELD_INNER_RE = r'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'}
+ FIELD_RE = r'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % {
+ 'inner': FIELD_INNER_RE,
+ 'field': rf'\w*(?:\.{FIELD_INNER_RE})*'
+ }
+ MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'
MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
- INTERNAL_FORMAT_RE = re.compile(r'''(?x)
+ INTERNAL_FORMAT_RE = re.compile(rf'''(?x)
(?P<negate>-)?
- (?P<fields>{field})
- (?P<maths>(?:{math_op}{math_field})*)
+ (?P<fields>{FIELD_RE})
+ (?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
(?:>(?P<strf_format>.+?))?
(?P<remaining>
(?P<alternate>(?<!\\),[^|&)]+)?
(?:&(?P<replacement>.*?))?
(?:\|(?P<default>.*?))?
- )$'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
+ )$''')
+
+ def _traverse_infodict(fields):
+ fields = [f for x in re.split(r'\.({.+?})\.?', fields)
+ for f in ([x] if x.startswith('{') else x.split('.'))]
+ for i in (0, -1):
+ if fields and not fields[i]:
+ fields.pop(i)
- def _traverse_infodict(k):
- k = k.split('.')
- if k[0] == '':
- k.pop(0)
- return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
+ for i, f in enumerate(fields):
+ if not f.startswith('{'):
+ continue
+ assert f.endswith('}'), f'No closing brace for {f} in {fields}'
+ fields[i] = {k: k.split('.') for k in f[1:-1].split(',')}
+
+ return traverse_obj(info_dict, fields, is_user_input=True, traverse_string=True)
def get_value(mdict):
# Object traversal
@@ -1146,6 +1204,9 @@ class YoutubeDL(object):
if mdict['strf_format']:
value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
+ # XXX: Workaround for https://github.com/hypervideo/hypervideo/issues/4485
+ if sanitize and value == '':
+ value = None
return value
na = self.params.get('outtmpl_na_placeholder', 'NA')
@@ -1153,7 +1214,7 @@ class YoutubeDL(object):
def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
return sanitize_filename(str(value), restricted=restricted, is_id=(
bool(re.search(r'(^|[_.])id(\.|$)', key))
- if 'filename-sanitization' in self.params.get('compat_opts', [])
+ if 'filename-sanitization' in self.params['compat_opts']
else NO_DEFAULT))
sanitizer = sanitize if callable(sanitize) else filename_sanitizer
@@ -1183,7 +1244,7 @@ class YoutubeDL(object):
fmt = outer_mobj.group('format')
if fmt == 's' and value is not None and key in field_size_compat_map.keys():
- fmt = '0{:d}d'.format(field_size_compat_map[key])
+ fmt = f'0{field_size_compat_map[key]:d}d'
value = default if value is None else value if replacement is None else replacement
@@ -1193,12 +1254,16 @@ class YoutubeDL(object):
delim = '\n' if '#' in flags else ', '
value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
elif fmt[-1] == 'j': # json
- value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
+ value, fmt = json.dumps(
+ value, default=_dumpjson_default,
+ indent=4 if '#' in flags else None, ensure_ascii='+' not in flags), str_fmt
+ elif fmt[-1] == 'h': # html
+ value, fmt = escapeHTML(str(value)), str_fmt
elif fmt[-1] == 'q': # quoted
value = map(str, variadic(value) if '#' in flags else [value])
value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
elif fmt[-1] == 'B': # bytes
- value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
+ value = f'%{str_fmt}'.encode() % str(value).encode()
value, fmt = value.decode('utf-8', 'ignore'), 's'
elif fmt[-1] == 'U': # unicode normalized
value, fmt = unicodedata.normalize(
@@ -1242,7 +1307,7 @@ class YoutubeDL(object):
def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):
assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'
if outtmpl is None:
- outtmpl = self.outtmpl_dict.get(tmpl_type or 'default', self.outtmpl_dict['default'])
+ outtmpl = self.params['outtmpl'].get(tmpl_type or 'default', self.params['outtmpl']['default'])
try:
outtmpl = self._outtmpl_expandpath(outtmpl)
filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
@@ -1291,11 +1356,19 @@ class YoutubeDL(object):
return self.get_output_path(dir_type, filename)
def _match_entry(self, info_dict, incomplete=False, silent=False):
- """ Returns None if the file should be downloaded """
+ """Returns None if the file should be downloaded"""
+ _type = info_dict.get('_type', 'video')
+ assert incomplete or _type == 'video', 'Only video result can be considered complete'
- video_title = info_dict.get('title', info_dict.get('id', 'video'))
+ video_title = info_dict.get('title', info_dict.get('id', 'entry'))
def check_filter():
+ if _type in ('playlist', 'multi_video'):
+ return
+ elif _type in ('url', 'url_transparent') and not try_call(
+ lambda: self.get_info_extractor(info_dict['ie_key']).is_single_video(info_dict['url'])):
+ return
+
if 'title' in info_dict:
# This can happen when we're just evaluating the playlist
title = info_dict['title']
@@ -1307,11 +1380,12 @@ class YoutubeDL(object):
if rejecttitle:
if re.search(rejecttitle, title, re.IGNORECASE):
return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
+
date = info_dict.get('upload_date')
if date is not None:
dateRange = self.params.get('daterange', DateRange())
if date not in dateRange:
- return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
+ return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'
view_count = info_dict.get('view_count')
if view_count is not None:
min_views = self.params.get('min_views')
@@ -1330,7 +1404,16 @@ class YoutubeDL(object):
except TypeError:
# For backward compatibility
ret = None if incomplete else match_filter(info_dict)
- if ret is not None:
+ if ret is NO_DEFAULT:
+ while True:
+ filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME)
+ reply = input(self._format_screen(
+ f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip()
+ if reply in {'y', ''}:
+ return None
+ elif reply == 'n':
+ return f'Skipping {video_title}'
+ elif ret is not None:
return ret
return None
@@ -1356,18 +1439,19 @@ class YoutubeDL(object):
def extract_info(self, url, download=True, ie_key=None, extra_info=None,
process=True, force_generic_extractor=False):
"""
- Return a list with a dictionary for each video extracted.
+ Extract and return the information dictionary of the URL
Arguments:
- url -- URL to extract
+ @param url URL to extract
Keyword arguments:
- download -- whether to download videos during extraction
- ie_key -- extractor key hint
- extra_info -- dictionary containing the extra values to add to each result
- process -- whether to resolve all unresolved references (URLs, playlist items),
- must be True for download to work.
- force_generic_extractor -- force using the generic extractor
+ @param download Whether to download videos
+ @param process Whether to resolve all unresolved references (URLs, playlist items).
+ Must be True for download to work
+ @param ie_key Use only the extractor with this key
+
+ @param extra_info Dictionary containing the extra values to add to the info (For internal use only)
+ @force_generic_extractor Force using the generic extractor (Deprecated; use ie_key='Generic')
"""
if extra_info is None:
@@ -1377,11 +1461,11 @@ class YoutubeDL(object):
ie_key = 'Generic'
if ie_key:
- ies = {ie_key: self._get_info_extractor_class(ie_key)}
+ ies = {ie_key: self._ies[ie_key]} if ie_key in self._ies else {}
else:
ies = self._ies
- for ie_key, ie in ies.items():
+ for key, ie in ies.items():
if not ie.suitable(url):
continue
@@ -1390,16 +1474,18 @@ class YoutubeDL(object):
'and will probably not work.')
temp_id = ie.get_temp_id(url)
- if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
- self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
+ if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': key}):
+ self.to_screen(f'[{key}] {temp_id}: has already been recorded in the archive')
if self.params.get('break_on_existing', False):
raise ExistingVideoReached()
break
- return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
+ return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process)
else:
- self.report_error('no suitable InfoExtractor for URL %s' % url)
+ extractors_restricted = self.params.get('allowed_extractors') not in (None, ['default'])
+ self.report_error(f'No suitable extractor{format_field(ie_key, None, " (%s)")} found for URL {url}',
+ tb=False if extractors_restricted else None)
- def __handle_extraction_exceptions(func):
+ def _handle_extraction_exceptions(func):
@functools.wraps(func)
def wrapper(self, *args, **kwargs):
while True:
@@ -1431,7 +1517,7 @@ class YoutubeDL(object):
break
return wrapper
- def _wait_for_video(self, ie_result):
+ def _wait_for_video(self, ie_result={}):
if (not self.params.get('wait_for_video')
or ie_result.get('_type', 'video') != 'video'
or ie_result.get('formats') or ie_result.get('url')):
@@ -1442,7 +1528,12 @@ class YoutubeDL(object):
def progress(msg):
nonlocal last_msg
- self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True)
+ full_msg = f'{msg}\n'
+ if not self.params.get('noprogress'):
+ full_msg = msg + ' ' * (len(last_msg) - len(msg)) + '\r'
+ elif last_msg:
+ return
+ self.to_screen(full_msg, skip_eol=True)
last_msg = msg
min_wait, max_wait = self.params.get('wait_for_video')
@@ -1450,7 +1541,7 @@ class YoutubeDL(object):
if diff is None and ie_result.get('live_status') == 'is_upcoming':
diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
self.report_warning('Release time of video is not known')
- elif (diff or 0) <= 0:
+ elif ie_result and (diff or 0) <= 0:
self.report_warning('Video should already be available according to extracted info')
diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
@@ -1472,10 +1563,18 @@ class YoutubeDL(object):
self.to_screen('')
raise
- @__handle_extraction_exceptions
+ @_handle_extraction_exceptions
def __extract_info(self, url, ie, download, extra_info, process):
- ie_result = ie.extract(url)
+ try:
+ ie_result = ie.extract(url)
+ except UserNotLive as e:
+ if process:
+ if self.params.get('wait_for_video'):
+ self.report_warning(e)
+ self._wait_for_video()
+ raise
if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
+ self.report_warning(f'Extractor {ie.IE_NAME} returned nothing{bug_reports_message()}')
return
if isinstance(ie_result, list):
# Backwards compatibility: old IE result format
@@ -1523,7 +1622,8 @@ class YoutubeDL(object):
result_type = ie_result.get('_type', 'video')
if result_type in ('url', 'url_transparent'):
- ie_result['url'] = sanitize_url(ie_result['url'])
+ ie_result['url'] = sanitize_url(
+ ie_result['url'], scheme='http' if self.params.get('prefer_insecure') else 'https')
if ie_result.get('original_url'):
extra_info.setdefault('original_url', ie_result['original_url'])
@@ -1537,7 +1637,9 @@ class YoutubeDL(object):
self.add_default_extra_info(info_copy, ie, ie_result['url'])
self.add_extra_info(info_copy, extra_info)
info_copy, _ = self.pre_process(info_copy)
+ self._fill_common_fields(info_copy, False)
self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
+ self._raise_pending_errors(info_copy)
if self.params.get('force_write_download_archive', False):
self.record_download_archive(info_copy)
return ie_result
@@ -1545,10 +1647,11 @@ class YoutubeDL(object):
if result_type == 'video':
self.add_extra_info(ie_result, extra_info)
ie_result = self.process_video_result(ie_result, download=download)
+ self._raise_pending_errors(ie_result)
additional_urls = (ie_result or {}).get('additional_urls')
if additional_urls:
# TODO: Improve MetadataParserPP to allow setting a list
- if isinstance(additional_urls, compat_str):
+ if isinstance(additional_urls, str):
additional_urls = [additional_urls]
self.to_screen(
'[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
@@ -1579,9 +1682,13 @@ class YoutubeDL(object):
if not info:
return info
+ exempted_fields = {'_type', 'url', 'ie_key'}
+ if not ie_result.get('section_end') and ie_result.get('section_start') is None:
+ # For video clips, the id etc of the clip extractor should be used
+ exempted_fields |= {'id', 'extractor', 'extractor_key'}
+
new_result = info.copy()
- new_result.update(filter_dict(ie_result, lambda k, v: (
- v is not None and k not in {'_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'})))
+ new_result.update(filter_dict(ie_result, lambda k, v: v is not None and k not in exempted_fields))
# Extracted info may not be a video result (i.e.
# info.get('_type', 'video') != video) but rather an url or
@@ -1597,8 +1704,8 @@ class YoutubeDL(object):
elif result_type in ('playlist', 'multi_video'):
# Protect from infinite recursion due to recursively nested playlists
# (see https://github.com/ytdl-org/youtube-dl/issues/27833)
- webpage_url = ie_result['webpage_url']
- if webpage_url in self._playlist_urls:
+ webpage_url = ie_result.get('webpage_url') # Playlists maynot have webpage_url
+ if webpage_url and webpage_url in self._playlist_urls:
self.to_screen(
'[download] Skipping already downloaded playlist: %s'
% ie_result.get('title') or ie_result.get('id'))
@@ -1640,124 +1747,65 @@ class YoutubeDL(object):
return make_dir(path, self.report_error)
@staticmethod
- def _playlist_infodict(ie_result, **kwargs):
- return {
- **ie_result,
+ def _playlist_infodict(ie_result, strict=False, **kwargs):
+ info = {
+ 'playlist_count': ie_result.get('playlist_count'),
'playlist': ie_result.get('title') or ie_result.get('id'),
'playlist_id': ie_result.get('id'),
'playlist_title': ie_result.get('title'),
'playlist_uploader': ie_result.get('uploader'),
'playlist_uploader_id': ie_result.get('uploader_id'),
- 'playlist_index': 0,
**kwargs,
}
+ if strict:
+ return info
+ if ie_result.get('webpage_url'):
+ info.update({
+ 'webpage_url': ie_result['webpage_url'],
+ 'webpage_url_basename': url_basename(ie_result['webpage_url']),
+ 'webpage_url_domain': get_domain(ie_result['webpage_url']),
+ })
+ return {
+ **info,
+ 'playlist_index': 0,
+ '__last_playlist_index': max(ie_result['requested_entries'] or (0, 0)),
+ 'extractor': ie_result['extractor'],
+ 'extractor_key': ie_result['extractor_key'],
+ }
def __process_playlist(self, ie_result, download):
- # We process each entry in the playlist
- playlist = ie_result.get('title') or ie_result.get('id')
- self.to_screen('[download] Downloading playlist: %s' % playlist)
-
- if 'entries' not in ie_result:
- raise EntryNotInPlaylist('There are no entries')
-
- MissingEntry = object()
- incomplete_entries = bool(ie_result.get('requested_entries'))
- if incomplete_entries:
- def fill_missing_entries(entries, indices):
- ret = [MissingEntry] * max(indices)
- for i, entry in zip(indices, entries):
- ret[i - 1] = entry
- return ret
- ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
-
- playlist_results = []
-
- playliststart = self.params.get('playliststart', 1)
- playlistend = self.params.get('playlistend')
- # For backwards compatibility, interpret -1 as whole list
- if playlistend == -1:
- playlistend = None
-
- playlistitems_str = self.params.get('playlist_items')
- playlistitems = None
- if playlistitems_str is not None:
- def iter_playlistitems(format):
- for string_segment in format.split(','):
- if '-' in string_segment:
- start, end = string_segment.split('-')
- for item in range(int(start), int(end) + 1):
- yield int(item)
- else:
- yield int(string_segment)
- playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
+ """Process each entry in the playlist"""
+ assert ie_result['_type'] in ('playlist', 'multi_video')
- ie_entries = ie_result['entries']
- if isinstance(ie_entries, list):
- playlist_count = len(ie_entries)
- msg = f'Collected {playlist_count} videos; downloading %d of them'
- ie_result['playlist_count'] = ie_result.get('playlist_count') or playlist_count
-
- def get_entry(i):
- return ie_entries[i - 1]
- else:
- msg = 'Downloading %d videos'
- if not isinstance(ie_entries, (PagedList, LazyList)):
- ie_entries = LazyList(ie_entries)
- elif isinstance(ie_entries, InAdvancePagedList):
- if ie_entries._pagesize == 1:
- playlist_count = ie_entries._pagecount
-
- def get_entry(i):
- return YoutubeDL.__handle_extraction_exceptions(
- lambda self, i: ie_entries[i - 1]
- )(self, i)
-
- entries, broken = [], False
- items = playlistitems if playlistitems is not None else itertools.count(playliststart)
- for i in items:
- if i == 0:
- continue
- if playlistitems is None and playlistend is not None and playlistend < i:
- break
- entry = None
- try:
- entry = get_entry(i)
- if entry is MissingEntry:
- raise EntryNotInPlaylist()
- except (IndexError, EntryNotInPlaylist):
- if incomplete_entries:
- raise EntryNotInPlaylist(f'Entry {i} cannot be found')
- elif not playlistitems:
- break
- entries.append(entry)
- try:
- if entry is not None:
- self._match_entry(entry, incomplete=True, silent=True)
- except (ExistingVideoReached, RejectedVideoReached):
- broken = True
- break
- ie_result['entries'] = entries
+ common_info = self._playlist_infodict(ie_result, strict=True)
+ title = common_info.get('playlist') or '<Untitled>'
+ if self._match_entry(common_info, incomplete=True) is not None:
+ return
+ self.to_screen(f'[download] Downloading {ie_result["_type"]}: {title}')
- # Save playlist_index before re-ordering
- entries = [
- ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
- for i, entry in enumerate(entries, 1)
- if entry is not None]
- n_entries = len(entries)
+ all_entries = PlaylistEntries(self, ie_result)
+ entries = orderedSet(all_entries.get_requested_items(), lazy=True)
- if not (ie_result.get('playlist_count') or broken or playlistitems or playlistend):
- ie_result['playlist_count'] = n_entries
+ lazy = self.params.get('lazy_playlist')
+ if lazy:
+ resolved_entries, n_entries = [], 'N/A'
+ ie_result['requested_entries'], ie_result['entries'] = None, None
+ else:
+ entries = resolved_entries = list(entries)
+ n_entries = len(resolved_entries)
+ ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])
+ if not ie_result.get('playlist_count'):
+ # Better to do this after potentially exhausting entries
+ ie_result['playlist_count'] = all_entries.get_full_count()
- if not playlistitems and (playliststart != 1 or playlistend):
- playlistitems = list(range(playliststart, playliststart + n_entries))
- ie_result['requested_entries'] = playlistitems
+ extra = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries))
+ ie_copy = collections.ChainMap(ie_result, extra)
_infojson_written = False
write_playlist_files = self.params.get('allow_playlist_files', True)
if write_playlist_files and self.params.get('list_thumbnails'):
self.list_thumbnails(ie_result)
if write_playlist_files and not self.params.get('simulate'):
- ie_copy = self._playlist_infodict(ie_result, n_entries=n_entries)
_infojson_written = self._write_info_json(
'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
if _infojson_written is None:
@@ -1766,57 +1814,72 @@ class YoutubeDL(object):
self.prepare_filename(ie_copy, 'pl_description')) is None:
return
# TODO: This should be passed to ThumbnailsConvertor if necessary
- self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
-
- if self.params.get('playlistreverse', False):
- entries = entries[::-1]
- if self.params.get('playlistrandom', False):
+ self._write_thumbnails('playlist', ie_result, self.prepare_filename(ie_copy, 'pl_thumbnail'))
+
+ if lazy:
+ if self.params.get('playlistreverse') or self.params.get('playlistrandom'):
+ self.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once=True)
+ elif self.params.get('playlistreverse'):
+ entries.reverse()
+ elif self.params.get('playlistrandom'):
random.shuffle(entries)
- x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
+ self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} items'
+ f'{format_field(ie_result, "playlist_count", " of %s")}')
+
+ keep_resolved_entries = self.params.get('extract_flat') != 'discard'
+ if self.params.get('extract_flat') == 'discard_in_playlist':
+ keep_resolved_entries = ie_result['_type'] != 'playlist'
+ if keep_resolved_entries:
+ self.write_debug('The information of all playlist entries will be held in memory')
- self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
failures = 0
max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
- for i, entry_tuple in enumerate(entries, 1):
- playlist_index, entry = entry_tuple
- if 'playlist-index' in self.params.get('compat_opts', []):
- playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
- self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
- # This __x_forwarded_for_ip thing is a bit ugly but requires
- # minimal changes
- if x_forwarded_for:
- entry['__x_forwarded_for_ip'] = x_forwarded_for
- extra = {
- 'n_entries': n_entries,
- '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
- 'playlist_count': ie_result.get('playlist_count'),
+ for i, (playlist_index, entry) in enumerate(entries):
+ if lazy:
+ resolved_entries.append((playlist_index, entry))
+ if not entry:
+ continue
+
+ entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')
+ if not lazy and 'playlist-index' in self.params.get('compat_opts', []):
+ playlist_index = ie_result['requested_entries'][i]
+
+ entry_copy = collections.ChainMap(entry, {
+ **common_info,
+ 'n_entries': int_or_none(n_entries),
'playlist_index': playlist_index,
- 'playlist_autonumber': i,
- 'playlist': playlist,
- 'playlist_id': ie_result.get('id'),
- 'playlist_title': ie_result.get('title'),
- 'playlist_uploader': ie_result.get('uploader'),
- 'playlist_uploader_id': ie_result.get('uploader_id'),
- 'extractor': ie_result['extractor'],
- 'webpage_url': ie_result['webpage_url'],
- 'webpage_url_basename': url_basename(ie_result['webpage_url']),
- 'webpage_url_domain': get_domain(ie_result['webpage_url']),
- 'extractor_key': ie_result['extractor_key'],
- }
+ 'playlist_autonumber': i + 1,
+ })
- if self._match_entry(entry, incomplete=True) is not None:
+ if self._match_entry(entry_copy, incomplete=True) is not None:
+ # For compatabilty with youtube-dl. See https://github.com/hypervideo/hypervideo/issues/4369
+ resolved_entries[i] = (playlist_index, NO_DEFAULT)
continue
+ self.to_screen('[download] Downloading item %s of %s' % (
+ self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
+
+ extra.update({
+ 'playlist_index': playlist_index,
+ 'playlist_autonumber': i + 1,
+ })
entry_result = self.__process_iterable_entry(entry, download, extra)
if not entry_result:
failures += 1
if failures >= max_failures:
self.report_error(
- 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
+ f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')
break
- playlist_results.append(entry_result)
- ie_result['entries'] = playlist_results
+ if keep_resolved_entries:
+ resolved_entries[i] = (playlist_index, entry_result)
+
+ # Update with processed data
+ ie_result['entries'] = [e for _, e in resolved_entries if e is not NO_DEFAULT]
+ ie_result['requested_entries'] = [i for i, e in resolved_entries if e is not NO_DEFAULT]
+ if ie_result['requested_entries'] == try_call(lambda: list(range(1, ie_result['playlist_count'] + 1))):
+ # Do not set for full playlist
+ ie_result.pop('requested_entries')
# Write the updated info to json
if _infojson_written is True and self._write_info_json(
@@ -1825,10 +1888,10 @@ class YoutubeDL(object):
return
ie_result = self.run_all_pps('playlist', ie_result)
- self.to_screen(f'[download] Finished downloading playlist: {playlist}')
+ self.to_screen(f'[download] Finished downloading playlist: {title}')
return ie_result
- @__handle_extraction_exceptions
+ @_handle_extraction_exceptions
def __process_iterable_entry(self, entry, download, extra_info):
return self.process_ie_result(
entry, download=download, extra_info=extra_info)
@@ -1910,7 +1973,7 @@ class YoutubeDL(object):
temp_file.close()
try:
success, _ = self.dl(temp_file.name, f, test=True)
- except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
+ except (DownloadError, OSError, ValueError) + network_exceptions:
success = False
finally:
if os.path.exists(temp_file.name):
@@ -1934,12 +1997,12 @@ class YoutubeDL(object):
and download
and (
not can_merge()
- or info_dict.get('is_live', False)
- or self.outtmpl_dict['default'] == '-'))
+ or info_dict.get('is_live') and not self.params.get('live_from_start')
+ or self.params['outtmpl']['default'] == '-'))
compat = (
prefer_best
or self.params.get('allow_multiple_audio_streams', False)
- or 'format-spec' in self.params.get('compat_opts', []))
+ or 'format-spec' in self.params['compat_opts'])
return (
'best/bestvideo+bestaudio' if prefer_best
@@ -1950,7 +2013,7 @@ class YoutubeDL(object):
def syntax_error(note, start):
message = (
'Invalid format specification: '
- '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
+ '{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1]))
return SyntaxError(message)
PICKFIRST = 'PICKFIRST'
@@ -1973,8 +2036,8 @@ class YoutubeDL(object):
filter_parts.append(string)
def _remove_unused_ops(tokens):
- # Remove operators that we don't use and join them with the surrounding strings
- # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
+ # Remove operators that we don't use and join them with the surrounding strings.
+ # E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
ALLOWED_OPS = ('/', '+', ',', '(', ')')
last_string, last_start, last_end, last_line = None, None, None, None
for type, string, start, end, line in tokens:
@@ -2054,7 +2117,7 @@ class YoutubeDL(object):
raise syntax_error('Expected a selector', start)
current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
else:
- raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
+ raise syntax_error(f'Operator not recognized: "{string}"', start)
elif type == tokenize.ENDMARKER:
break
if current_selector:
@@ -2090,14 +2153,13 @@ class YoutubeDL(object):
the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
- output_ext = self.params.get('merge_output_format')
- if not output_ext:
- if the_only_video:
- output_ext = the_only_video['ext']
- elif the_only_audio and not video_fmts:
- output_ext = the_only_audio['ext']
- else:
- output_ext = 'mkv'
+ output_ext = get_compatible_ext(
+ vcodecs=[f.get('vcodec') for f in video_fmts],
+ acodecs=[f.get('acodec') for f in audio_fmts],
+ vexts=[f['ext'] for f in video_fmts],
+ aexts=[f['ext'] for f in audio_fmts],
+ preferences=(try_call(lambda: self.params['merge_output_format'].split('/'))
+ or self.params.get('prefer_free_formats') and ('webm', 'mkv')))
filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
@@ -2123,6 +2185,7 @@ class YoutubeDL(object):
'vcodec': the_only_video.get('vcodec'),
'vbr': the_only_video.get('vbr'),
'stretched_ratio': the_only_video.get('stretched_ratio'),
+ 'aspect_ratio': the_only_video.get('aspect_ratio'),
})
if the_only_audio:
@@ -2130,6 +2193,7 @@ class YoutubeDL(object):
'acodec': the_only_audio.get('acodec'),
'abr': the_only_audio.get('abr'),
'asr': the_only_audio.get('asr'),
+ 'audio_channels': the_only_audio.get('audio_channels')
})
return new_dict
@@ -2178,7 +2242,8 @@ class YoutubeDL(object):
yield from _check_formats(ctx['formats'][::-1])
elif format_spec == 'mergeall':
def selector_function(ctx):
- formats = list(_check_formats(ctx['formats']))
+ formats = list(_check_formats(
+ f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none'))
if not formats:
return
merged_format = formats[-1]
@@ -2235,7 +2300,7 @@ class YoutubeDL(object):
matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
try:
yield matches[format_idx - 1]
- except IndexError:
+ except LazyList.IndexError:
return
filters = [self._build_format_filter(f) for f in selector.filters]
@@ -2247,13 +2312,13 @@ class YoutubeDL(object):
return selector_function(ctx_copy)
return final_selector
- stream = io.BytesIO(format_spec.encode('utf-8'))
+ stream = io.BytesIO(format_spec.encode())
try:
- tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
+ tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline)))
except tokenize.TokenError:
raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
- class TokenIterator(object):
+ class TokenIterator:
def __init__(self, tokens):
self.tokens = tokens
self.counter = 0
@@ -2279,7 +2344,7 @@ class YoutubeDL(object):
def _calc_headers(self, info_dict):
res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})
- cookies = self._calc_cookies(info_dict)
+ cookies = self._calc_cookies(info_dict['url'])
if cookies:
res['Cookie'] = cookies
@@ -2290,8 +2355,8 @@ class YoutubeDL(object):
return res
- def _calc_cookies(self, info_dict):
- pr = sanitized_Request(info_dict['url'])
+ def _calc_cookies(self, url):
+ pr = sanitized_Request(url)
self.cookiejar.add_cookie_header(pr)
return pr.get_header('Cookie')
@@ -2335,17 +2400,20 @@ class YoutubeDL(object):
else:
info_dict['thumbnails'] = thumbnails
- def _fill_common_fields(self, info_dict, is_video=True):
+ def _fill_common_fields(self, info_dict, final=True):
# TODO: move sanitization here
- if is_video:
- # playlists are allowed to lack "title"
- info_dict['fulltitle'] = info_dict.get('title')
- if 'title' not in info_dict:
+ if final:
+ title = info_dict.get('title', NO_DEFAULT)
+ if title is NO_DEFAULT:
raise ExtractorError('Missing "title" field in extractor result',
video_id=info_dict['id'], ie=info_dict['extractor'])
- elif not info_dict.get('title'):
- self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
- info_dict['title'] = f'{info_dict["extractor"]} video #{info_dict["id"]}'
+ info_dict['fulltitle'] = title
+ if not title:
+ if title == '':
+ self.write_debug('Extractor gave empty title. Creating a generic title')
+ else:
+ self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
+ info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'
if info_dict.get('duration') is not None:
info_dict['duration_string'] = formatSeconds(info_dict['duration'])
@@ -2358,11 +2426,9 @@ class YoutubeDL(object):
if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
# Working around out-of-range timestamp values (e.g. negative ones on Windows,
# see http://bugs.python.org/issue1646728)
- try:
+ with contextlib.suppress(ValueError, OverflowError, OSError):
upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
info_dict[date_key] = upload_date.strftime('%Y%m%d')
- except (ValueError, OverflowError, OSError):
- pass
live_keys = ('is_live', 'was_live')
live_status = info_dict.get('live_status')
@@ -2380,13 +2446,32 @@ class YoutubeDL(object):
for key in live_keys:
if info_dict.get(key) is None:
info_dict[key] = (live_status == key)
+ if live_status == 'post_live':
+ info_dict['was_live'] = True
# Auto generate title fields corresponding to the *_number fields when missing
# in order to always have clean titles. This is very common for TV series.
for field in ('chapter', 'season', 'episode'):
- if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
+ if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
+ def _raise_pending_errors(self, info):
+ err = info.pop('__pending_error', None)
+ if err:
+ self.report_error(err, tb=False)
+
+ def sort_formats(self, info_dict):
+ formats = self._get_formats(info_dict)
+ if not formats:
+ return
+ # Backward compatibility with InfoExtractor._sort_formats
+ field_preference = formats[0].pop('__sort_fields', None)
+ if field_preference:
+ info_dict['_format_sort_fields'] = field_preference
+
+ formats.sort(key=FormatSorter(
+ self, info_dict.get('_format_sort_fields', [])).calculate_preference)
+
def process_video_result(self, info_dict, download=True):
assert info_dict.get('_type', 'video') == 'video'
self._num_videos += 1
@@ -2403,24 +2488,40 @@ class YoutubeDL(object):
def sanitize_string_field(info, string_field):
field = info.get(string_field)
- if field is None or isinstance(field, compat_str):
+ if field is None or isinstance(field, str):
return
report_force_conversion(string_field, 'a string', 'string')
- info[string_field] = compat_str(field)
+ info[string_field] = str(field)
def sanitize_numeric_fields(info):
for numeric_field in self._NUMERIC_FIELDS:
field = info.get(numeric_field)
- if field is None or isinstance(field, compat_numeric_types):
+ if field is None or isinstance(field, (int, float)):
continue
report_force_conversion(numeric_field, 'numeric', 'int')
info[numeric_field] = int_or_none(field)
sanitize_string_field(info_dict, 'id')
sanitize_numeric_fields(info_dict)
+ if info_dict.get('section_end') and info_dict.get('section_start') is not None:
+ info_dict['duration'] = round(info_dict['section_end'] - info_dict['section_start'], 3)
if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
self.report_warning('"duration" field is negative, there is an error in extractor')
+ chapters = info_dict.get('chapters') or []
+ if chapters and chapters[0].get('start_time'):
+ chapters.insert(0, {'start_time': 0})
+
+ dummy_chapter = {'end_time': 0, 'start_time': info_dict.get('duration')}
+ for idx, (prev, current, next_) in enumerate(zip(
+ (dummy_chapter, *chapters), chapters, (*chapters[1:], dummy_chapter)), 1):
+ if current.get('start_time') is None:
+ current['start_time'] = prev.get('end_time')
+ if not current.get('end_time'):
+ current['end_time'] = next_.get('start_time')
+ if not current.get('title'):
+ current['title'] = f'<Untitled Chapter {idx}>'
+
if 'playlist' not in info_dict:
# It isn't part of a playlist
info_dict['playlist'] = None
@@ -2456,20 +2557,18 @@ class YoutubeDL(object):
info_dict['requested_subtitles'] = self.process_subtitles(
info_dict['id'], subtitles, automatic_captions)
- if info_dict.get('formats') is None:
- # There's only one format available
- formats = [info_dict]
- else:
- formats = info_dict['formats']
+ self.sort_formats(info_dict)
+ formats = self._get_formats(info_dict)
- info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
+ # or None ensures --clean-infojson removes it
+ info_dict['_has_drm'] = any(f.get('has_drm') for f in formats) or None
if not self.params.get('allow_unplayable_formats'):
formats = [f for f in formats if not f.get('has_drm')]
- if info_dict['__has_drm'] and all(
- f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
- self.report_warning(
- 'This video is DRM protected and only images are available for download. '
- 'Use --list-formats to see them')
+
+ if formats and all(f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
+ self.report_warning(
+ f'{"This video is DRM protected and " if info_dict["_has_drm"] else ""}'
+ 'only images are available for download. Use --list-formats to see them'.capitalize())
get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
if not get_from_start:
@@ -2481,9 +2580,6 @@ class YoutubeDL(object):
'--live-from-start is passed, but there are no formats that can be downloaded from the start. '
'If you want to download from the current time, use --no-live-from-start'))
- if not formats:
- self.raise_no_formats(info_dict)
-
def is_wellformed(f):
url = f.get('url')
if not url:
@@ -2496,7 +2592,10 @@ class YoutubeDL(object):
return True
# Filter out malformed formats for better extraction robustness
- formats = list(filter(is_wellformed, formats))
+ formats = list(filter(is_wellformed, formats or []))
+
+ if not formats:
+ self.raise_no_formats(info_dict)
formats_dict = {}
@@ -2506,7 +2605,7 @@ class YoutubeDL(object):
sanitize_numeric_fields(format)
format['url'] = sanitize_url(format['url'])
if not format.get('format_id'):
- format['format_id'] = compat_str(i)
+ format['format_id'] = str(i)
else:
# Sanitize format_id from characters used in format selector expression
format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
@@ -2542,9 +2641,11 @@ class YoutubeDL(object):
format['resolution'] = self.format_resolution(format, default=None)
if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
format['dynamic_range'] = 'SDR'
+ if format.get('aspect_ratio') is None:
+ format['aspect_ratio'] = try_call(lambda: round(format['width'] / format['height'], 2))
if (info_dict.get('duration') and format.get('tbr')
and not format.get('filesize') and not format.get('filesize_approx')):
- format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)
+ format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8))
# Add HTTP headers, so that external programs can use them from the
# json output
@@ -2574,10 +2675,9 @@ class YoutubeDL(object):
info_dict, _ = self.pre_process(info_dict, 'after_filter')
# The pre-processors may have modified the formats
- formats = info_dict.get('formats', [info_dict])
+ formats = self._get_formats(info_dict)
- list_only = self.params.get('simulate') is None and (
- self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
+ list_only = self.params.get('simulate') == 'list_only'
interactive_format_selection = not list_only and self.format_selector == '-'
if self.params.get('list_thumbnails'):
self.list_thumbnails(info_dict)
@@ -2591,7 +2691,7 @@ class YoutubeDL(object):
if list_only:
# Without this printing, -F --print-json will not work
self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
- return
+ return info_dict
format_selector = self.format_selector
if format_selector is None:
@@ -2632,20 +2732,39 @@ class YoutubeDL(object):
# Process what we can, even without any available formats.
formats_to_download = [{}]
- best_format = formats_to_download[-1]
+ requested_ranges = tuple(self.params.get('download_ranges', lambda *_: [{}])(info_dict, self))
+ best_format, downloaded_formats = formats_to_download[-1], []
if download:
- if best_format:
- self.to_screen(
- f'[info] {info_dict["id"]}: Downloading {len(formats_to_download)} format(s): '
- + ', '.join([f['format_id'] for f in formats_to_download]))
+ if best_format and requested_ranges:
+ def to_screen(*msg):
+ self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')
+
+ to_screen(f'Downloading {len(formats_to_download)} format(s):',
+ (f['format_id'] for f in formats_to_download))
+ if requested_ranges != ({}, ):
+ to_screen(f'Downloading {len(requested_ranges)} time ranges:',
+ (f'{c["start_time"]:.1f}-{c["end_time"]:.1f}' for c in requested_ranges))
max_downloads_reached = False
- for i, fmt in enumerate(formats_to_download):
- formats_to_download[i] = new_info = self._copy_infodict(info_dict)
+
+ for fmt, chapter in itertools.product(formats_to_download, requested_ranges):
+ new_info = self._copy_infodict(info_dict)
new_info.update(fmt)
+ offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf')
+ end_time = offset + min(chapter.get('end_time', duration), duration)
+ if chapter or offset:
+ new_info.update({
+ 'section_start': offset + chapter.get('start_time', 0),
+ # duration may not be accurate. So allow deviations <1sec
+ 'section_end': end_time if end_time <= offset + duration + 1 else None,
+ 'section_title': chapter.get('title'),
+ 'section_number': chapter.get('index'),
+ })
+ downloaded_formats.append(new_info)
try:
self.process_info(new_info)
except MaxDownloadsReached:
max_downloads_reached = True
+ self._raise_pending_errors(new_info)
# Remove copied info
for key, val in tuple(new_info.items()):
if info_dict.get(key) == val:
@@ -2653,12 +2772,12 @@ class YoutubeDL(object):
if max_downloads_reached:
break
- write_archive = set(f.get('__write_download_archive', False) for f in formats_to_download)
+ write_archive = {f.get('__write_download_archive', False) for f in downloaded_formats}
assert write_archive.issubset({True, False, 'ignore'})
if True in write_archive and False not in write_archive:
self.record_download_archive(info_dict)
- info_dict['requested_downloads'] = formats_to_download
+ info_dict['requested_downloads'] = downloaded_formats
info_dict = self.run_all_pps('after_video', info_dict)
if max_downloads_reached:
raise MaxDownloadsReached()
@@ -2669,50 +2788,35 @@ class YoutubeDL(object):
def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
"""Select the requested subtitles and their format"""
- available_subs = {}
+ available_subs, normal_sub_langs = {}, []
if normal_subtitles and self.params.get('writesubtitles'):
available_subs.update(normal_subtitles)
+ normal_sub_langs = tuple(normal_subtitles.keys())
if automatic_captions and self.params.get('writeautomaticsub'):
for lang, cap_info in automatic_captions.items():
if lang not in available_subs:
available_subs[lang] = cap_info
- if (not self.params.get('writesubtitles') and not
- self.params.get('writeautomaticsub') or not
- available_subs):
+ if not available_subs or (
+ not self.params.get('writesubtitles')
+ and not self.params.get('writeautomaticsub')):
return None
- all_sub_langs = available_subs.keys()
+ all_sub_langs = tuple(available_subs.keys())
if self.params.get('allsubtitles', False):
requested_langs = all_sub_langs
elif self.params.get('subtitleslangs', False):
- # A list is used so that the order of languages will be the same as
- # given in subtitleslangs. See https://github.com/hypervideo/hypervideo/issues/1041
- requested_langs = []
- for lang_re in self.params.get('subtitleslangs'):
- discard = lang_re[0] == '-'
- if discard:
- lang_re = lang_re[1:]
- if lang_re == 'all':
- if discard:
- requested_langs = []
- else:
- requested_langs.extend(all_sub_langs)
- continue
- current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
- if discard:
- for lang in current_langs:
- while lang in requested_langs:
- requested_langs.remove(lang)
- else:
- requested_langs.extend(current_langs)
- requested_langs = orderedSet(requested_langs)
- elif 'en' in available_subs:
- requested_langs = ['en']
+ try:
+ requested_langs = orderedSet_from_options(
+ self.params.get('subtitleslangs'), {'all': all_sub_langs}, use_regex=True)
+ except re.error as e:
+ raise ValueError(f'Wrong regex for subtitlelangs: {e.pattern}')
+ elif normal_sub_langs:
+ requested_langs = ['en'] if 'en' in normal_sub_langs else normal_sub_langs[:1]
else:
- requested_langs = [list(all_sub_langs)[0]]
+ requested_langs = ['en'] if 'en' in all_sub_langs else all_sub_langs[:1]
if requested_langs:
- self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
+ self.to_screen(f'[info] {video_id}: Downloading subtitles: {", ".join(requested_langs)}')
formats_query = self.params.get('subtitlesformat', 'best')
formats_preference = formats_query.split('/') if formats_query else []
@@ -2720,7 +2824,7 @@ class YoutubeDL(object):
for lang in requested_langs:
formats = available_subs.get(lang)
if formats is None:
- self.report_warning('%s subtitles not available for %s' % (lang, video_id))
+ self.report_warning(f'{lang} subtitles not available for {video_id}')
continue
for ext in formats_preference:
if ext == 'best':
@@ -2748,12 +2852,16 @@ class YoutubeDL(object):
info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
def format_tmpl(tmpl):
- mobj = re.match(r'\w+(=?)$', tmpl)
- if mobj and mobj.group(1):
- return f'{tmpl[:-1]} = %({tmpl[:-1]})r'
- elif mobj:
- return f'%({tmpl})s'
- return tmpl
+ mobj = re.fullmatch(r'([\w.:,]|-\d|(?P<dict>{([\w.:,]|-\d)+}))+=?', tmpl)
+ if not mobj:
+ return tmpl
+
+ fmt = '%({})s'
+ if tmpl.startswith('{'):
+ tmpl = f'.{tmpl}'
+ if tmpl.endswith('='):
+ tmpl, fmt = tmpl[:-1], '{0} = %({0})#j'
+ return '\n'.join(map(fmt.format, [tmpl] if mobj.group('dict') else tmpl.split(',')))
for tmpl in self.params['forceprint'].get(key, []):
self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
@@ -2763,7 +2871,7 @@ class YoutubeDL(object):
tmpl = format_tmpl(tmpl)
self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
if self._ensure_dir_exists(filename):
- with io.open(filename, 'a', encoding='utf-8') as f:
+ with open(filename, 'a', encoding='utf-8') as f:
f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')
def __forced_printings(self, info_dict, filename, incomplete):
@@ -2833,7 +2941,7 @@ class YoutubeDL(object):
urls = '", "'.join(
(f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
for f in info.get('requested_formats', []) or [info])
- self.write_debug('Invoking downloader on "%s"' % urls)
+ self.write_debug(f'Invoking {fd.FD_NAME} downloader on "{urls}"')
# Note: Ideally info should be a deep-copied so that hooks cannot modify it.
# But it may contain objects that are not deep-copyable
@@ -2861,8 +2969,6 @@ class YoutubeDL(object):
if 'format' not in info_dict and 'ext' in info_dict:
info_dict['format'] = info_dict['ext']
- # This is mostly just for backward compatibility of process_info
- # As a side-effect, this allows for format-specific filters
if self._match_entry(info_dict) is not None:
info_dict['__write_download_archive'] = 'ignore'
return
@@ -2879,8 +2985,13 @@ class YoutubeDL(object):
# Forced printings
self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
+ def check_max_downloads():
+ if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'):
+ raise MaxDownloadsReached()
+
if self.params.get('simulate'):
info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
+ check_max_downloads()
return
if full_filename is None:
@@ -2928,11 +3039,11 @@ class YoutubeDL(object):
else:
try:
self.to_screen('[info] Writing video annotations to: ' + annofn)
- with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
+ with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
annofile.write(info_dict['annotations'])
except (KeyError, TypeError):
self.report_warning('There are no annotations to write.')
- except (OSError, IOError):
+ except OSError:
self.report_error('Cannot write annotations file: ' + annofn)
return
@@ -2951,13 +3062,13 @@ class YoutubeDL(object):
return True
try:
self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
- with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
- newline='\r\n' if link_type == 'url' else '\n') as linkfile:
+ with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
+ newline='\r\n' if link_type == 'url' else '\n') as linkfile:
template_vars = {'url': url}
if link_type == 'desktop':
template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
- except (OSError, IOError):
+ except OSError:
self.report_error(f'Cannot write internet shortcut {linkfn}')
return False
return True
@@ -2984,12 +3095,8 @@ class YoutubeDL(object):
info_dict.clear()
info_dict.update(new_info)
- try:
- new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
- replace_info_dict(new_info)
- except PostProcessingError as err:
- self.report_error('Preprocessing: %s' % str(err))
- return
+ new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
+ replace_info_dict(new_info)
if self.params.get('skip_download'):
info_dict['filepath'] = temp_filename
@@ -3011,40 +3118,25 @@ class YoutubeDL(object):
info_dict['ext'] = os.path.splitext(file)[1][1:]
return file
- success = True
- if info_dict.get('requested_formats') is not None:
-
- def compatible_formats(formats):
- # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
- video_formats = [format for format in formats if format.get('vcodec') != 'none']
- audio_formats = [format for format in formats if format.get('acodec') != 'none']
- if len(video_formats) > 2 or len(audio_formats) > 2:
- return False
-
- # Check extension
- exts = set(format.get('ext') for format in formats)
- COMPATIBLE_EXTS = (
- set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
- set(('webm',)),
- )
- for ext_sets in COMPATIBLE_EXTS:
- if ext_sets.issuperset(exts):
- return True
- # TODO: Check acodec/vcodec
- return False
+ fd, success = None, True
+ if info_dict.get('protocol') or info_dict.get('url'):
+ fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
+ if fd is not FFmpegFD and (
+ info_dict.get('section_start') or info_dict.get('section_end')):
+ msg = ('This format cannot be partially downloaded' if FFmpegFD.available()
+ else 'You have requested downloading the video partially, but ffmpeg is not installed')
+ self.report_error(f'{msg}. Aborting')
+ return
+ if info_dict.get('requested_formats') is not None:
requested_formats = info_dict['requested_formats']
old_ext = info_dict['ext']
if self.params.get('merge_output_format') is None:
- if not compatible_formats(requested_formats):
- info_dict['ext'] = 'mkv'
- self.report_warning(
- 'Requested formats are incompatible for merge and will be merged into mkv')
if (info_dict['ext'] == 'webm'
and info_dict.get('thumbnails')
# check with type instead of pp_key, __name__, or isinstance
# since we dont want any custom PPs to trigger this
- and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
+ and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])): # noqa: E721
info_dict['ext'] = 'mkv'
self.report_warning(
'webm doesn\'t support embedding a thumbnail, mkv will be used')
@@ -3058,7 +3150,7 @@ class YoutubeDL(object):
os.path.splitext(filename)[0]
if filename_real_ext in (old_ext, new_ext)
else filename)
- return '%s.%s' % (filename_wo_ext, ext)
+ return f'{filename_wo_ext}.{ext}'
# Ensure filename always has a correct extension for successful merge
full_filename = correct_ext(full_filename)
@@ -3066,10 +3158,8 @@ class YoutubeDL(object):
dl_filename = existing_video_file(full_filename, temp_filename)
info_dict['__real_download'] = False
- downloaded = []
merger = FFmpegMergerPP(self)
-
- fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
+ downloaded = []
if dl_filename is not None:
self.report_file_already_downloaded(dl_filename)
elif fd:
@@ -3143,12 +3233,13 @@ class YoutubeDL(object):
except network_exceptions as err:
self.report_error('unable to download video data: %s' % error_to_compat_str(err))
return
- except (OSError, IOError) as err:
+ except OSError as err:
raise UnavailableVideoError(err)
except (ContentTooShortError, ) as err:
- self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
+ self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})')
return
+ self._raise_pending_errors(info_dict)
if success and full_filename != '-':
def fixup():
@@ -3159,16 +3250,16 @@ class YoutubeDL(object):
if fixup_policy in ('ignore', 'never'):
return
elif fixup_policy == 'warn':
- do_fixup = False
+ do_fixup = 'warn'
elif fixup_policy != 'force':
assert fixup_policy in ('detect_or_warn', None)
if not info_dict.get('__real_download'):
do_fixup = False
def ffmpeg_fixup(cndn, msg, cls):
- if not cndn:
+ if not (do_fixup and cndn):
return
- if not do_fixup:
+ elif do_fixup == 'warn':
self.report_warning(f'{vid}: {msg}')
return
pp = cls(self)
@@ -3178,30 +3269,32 @@ class YoutubeDL(object):
self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
stretched_ratio = info_dict.get('stretched_ratio')
- ffmpeg_fixup(
- stretched_ratio not in (1, None),
- f'Non-uniform pixel ratio {stretched_ratio}',
- FFmpegFixupStretchedPP)
-
- ffmpeg_fixup(
- (info_dict.get('requested_formats') is None
- and info_dict.get('container') == 'm4a_dash'
- and info_dict.get('ext') == 'm4a'),
- 'writing DASH m4a. Only some players support this container',
- FFmpegFixupM4aPP)
+ ffmpeg_fixup(stretched_ratio not in (1, None),
+ f'Non-uniform pixel ratio {stretched_ratio}',
+ FFmpegFixupStretchedPP)
downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
- downloader = downloader.__name__ if downloader else None
+ downloader = downloader.FD_NAME if downloader else None
- if info_dict.get('requested_formats') is None: # Not necessary if doing merger
- ffmpeg_fixup(downloader == 'HlsFD',
+ ext = info_dict.get('ext')
+ postprocessed_by_ffmpeg = info_dict.get('requested_formats') or any((
+ isinstance(pp, FFmpegVideoConvertorPP)
+ and resolve_recode_mapping(ext, pp.mapping)[0] not in (ext, None)
+ ) for pp in self._pps['post_process'])
+
+ if not postprocessed_by_ffmpeg:
+ ffmpeg_fixup(ext == 'm4a' and info_dict.get('container') == 'm4a_dash',
+ 'writing DASH m4a. Only some players support this container',
+ FFmpegFixupM4aPP)
+ ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts')
+ or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
FFmpegFixupM3u8PP)
ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
- ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
- ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP)
+ ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
+ ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP)
fixup()
try:
@@ -3217,15 +3310,10 @@ class YoutubeDL(object):
return
info_dict['__write_download_archive'] = True
+ assert info_dict is original_infodict # Make sure the info_dict was modified in-place
if self.params.get('force_write_download_archive'):
info_dict['__write_download_archive'] = True
-
- # Make sure the info_dict was modified in-place
- assert info_dict is original_infodict
-
- max_downloads = self.params.get('max_downloads')
- if max_downloads is not None and self._num_downloads >= int(max_downloads):
- raise MaxDownloadsReached()
+ check_max_downloads()
def __download_wrapper(self, func):
@functools.wraps(func)
@@ -3234,13 +3322,11 @@ class YoutubeDL(object):
res = func(*args, **kwargs)
except UnavailableVideoError as e:
self.report_error(e)
- except MaxDownloadsReached as e:
- self.to_screen(f'[info] {e}')
- raise
except DownloadCancelled as e:
self.to_screen(f'[info] {e}')
if not self.params.get('break_per_url'):
raise
+ self._num_downloads = 0
else:
if self.params.get('dump_single_json', False):
self.post_extract(res)
@@ -3250,7 +3336,7 @@ class YoutubeDL(object):
def download(self, url_list):
"""Download a given list of URLs."""
url_list = variadic(url_list) # Passing a single URL is a common mistake
- outtmpl = self.outtmpl_dict['default']
+ outtmpl = self.params['outtmpl']['default']
if (len(url_list) > 1
and outtmpl != '-'
and '%' not in outtmpl
@@ -3289,11 +3375,17 @@ class YoutubeDL(object):
return info_dict
info_dict.setdefault('epoch', int(time.time()))
info_dict.setdefault('_type', 'video')
+ info_dict.setdefault('_version', {
+ 'version': __version__,
+ 'current_git_head': current_git_head(),
+ 'release_git_head': RELEASE_GIT_HEAD,
+ 'repository': REPOSITORY,
+ })
if remove_private_keys:
- reject = lambda k, v: v is None or (k.startswith('_') and k != '_type') or k in {
+ reject = lambda k, v: v is None or k.startswith('__') or k in {
'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
- 'entries', 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber',
+ 'entries', 'filepath', '_filename', 'infojson_filename', 'original_url', 'playlist_autonumber',
}
else:
reject = lambda k, v: False
@@ -3315,6 +3407,17 @@ class YoutubeDL(object):
''' Alias of sanitize_info for backward compatibility '''
return YoutubeDL.sanitize_info(info_dict, actually_filter)
+ def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None):
+ for filename in set(filter(None, files_to_delete)):
+ if msg:
+ self.to_screen(msg % filename)
+ try:
+ os.remove(filename)
+ except OSError:
+ self.report_warning(f'Unable to delete file {filename}')
+ if filename in info.get('__files_to_move', []): # NB: Delete even if None
+ del info['__files_to_move'][filename]
+
@staticmethod
def post_extract(info_dict):
def actual_post_extract(info_dict):
@@ -3347,14 +3450,8 @@ class YoutubeDL(object):
for f in files_to_delete:
infodict['__files_to_move'].setdefault(f, '')
else:
- for old_filename in set(files_to_delete):
- self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
- try:
- os.remove(encodeFilename(old_filename))
- except (IOError, OSError):
- self.report_warning('Unable to remove downloaded original file')
- if old_filename in infodict['__files_to_move']:
- del infodict['__files_to_move'][old_filename]
+ self._delete_downloaded_files(
+ *files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')
return infodict
def run_all_pps(self, key, info, *, additional_pps=None):
@@ -3366,7 +3463,12 @@ class YoutubeDL(object):
def pre_process(self, ie_info, key='pre_process', files_to_move=None):
info = dict(ie_info)
info['__files_to_move'] = files_to_move or {}
- info = self.run_all_pps(key, info)
+ try:
+ info = self.run_all_pps(key, info)
+ except PostProcessingError as err:
+ msg = f'Preprocessing: {err}'
+ info.setdefault('__pending_error', msg)
+ self.report_error(msg, is_error=False)
return info, info.pop('__files_to_move', None)
def post_process(self, filename, info, files_to_move=None):
@@ -3396,18 +3498,15 @@ class YoutubeDL(object):
break
else:
return
- return '%s %s' % (extractor.lower(), video_id)
+ return make_archive_id(extractor, video_id)
def in_download_archive(self, info_dict):
- fn = self.params.get('download_archive')
- if fn is None:
+ if not self.archive:
return False
- vid_id = self._make_archive_id(info_dict)
- if not vid_id:
- return False # Incomplete video information
-
- return vid_id in self.archive
+ vid_ids = [self._make_archive_id(info_dict)]
+ vid_ids.extend(info_dict.get('_old_archive_ids') or [])
+ return any(id_ in self.archive for id_ in vid_ids)
def record_download_archive(self, info_dict):
fn = self.params.get('download_archive')
@@ -3415,9 +3514,11 @@ class YoutubeDL(object):
return
vid_id = self._make_archive_id(info_dict)
assert vid_id
+
self.write_debug(f'Adding to archive: {vid_id}')
- with locked_file(fn, 'a', encoding='utf-8') as archive_file:
- archive_file.write(vid_id + '\n')
+ if is_path_like(fn):
+ with locked_file(fn, 'a', encoding='utf-8') as archive_file:
+ archive_file.write(vid_id + '\n')
self.archive.add(vid_id)
@staticmethod
@@ -3436,7 +3537,7 @@ class YoutubeDL(object):
def _list_format_headers(self, *headers):
if self.params.get('listformats_table', True) is not False:
- return [self._format_screen(header, self.Styles.HEADERS) for header in headers]
+ return [self._format_out(header, self.Styles.HEADERS) for header in headers]
return headers
def _format_note(self, fdict):
@@ -3499,11 +3600,17 @@ class YoutubeDL(object):
res += '~' + format_bytes(fdict['filesize_approx'])
return res
- def render_formats_table(self, info_dict):
- if not info_dict.get('formats') and not info_dict.get('url'):
- return None
+ def _get_formats(self, info_dict):
+ if info_dict.get('formats') is None:
+ if info_dict.get('url') and info_dict.get('_type', 'video') == 'video':
+ return [info_dict]
+ return []
+ return info_dict['formats']
- formats = info_dict.get('formats', [info_dict])
+ def render_formats_table(self, info_dict):
+ formats = self._get_formats(info_dict)
+ if not formats:
+ return
if not self.params.get('listformats_table', True) is not False:
table = [
[
@@ -3511,33 +3618,45 @@ class YoutubeDL(object):
format_field(f, 'ext'),
self.format_resolution(f),
self._format_note(f)
- ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
+ ] for f in formats if (f.get('preference') or 0) >= -1000]
return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
- delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)
+ def simplified_codec(f, field):
+ assert field in ('acodec', 'vcodec')
+ codec = f.get(field, 'unknown')
+ if not codec:
+ return 'unknown'
+ elif codec != 'none':
+ return '.'.join(codec.split('.')[:4])
+
+ if field == 'vcodec' and f.get('acodec') == 'none':
+ return 'images'
+ elif field == 'acodec' and f.get('vcodec') == 'none':
+ return ''
+ return self._format_out('audio only' if field == 'vcodec' else 'video only',
+ self.Styles.SUPPRESS)
+
+ delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True)
table = [
[
- self._format_screen(format_field(f, 'format_id'), self.Styles.ID),
+ self._format_out(format_field(f, 'format_id'), self.Styles.ID),
format_field(f, 'ext'),
format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
- format_field(f, 'fps', '\t%d'),
+ format_field(f, 'fps', '\t%d', func=round),
format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
+ format_field(f, 'audio_channels', '\t%s'),
delim,
format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
- format_field(f, 'tbr', '\t%dk'),
+ format_field(f, 'tbr', '\t%dk', func=round),
shorten_protocol_name(f.get('protocol', '')),
delim,
- format_field(f, 'vcodec', default='unknown').replace(
- 'none', 'images' if f.get('acodec') == 'none'
- else self._format_screen('audio only', self.Styles.SUPPRESS)),
- format_field(f, 'vbr', '\t%dk'),
- format_field(f, 'acodec', default='unknown').replace(
- 'none', '' if f.get('vcodec') == 'none'
- else self._format_screen('video only', self.Styles.SUPPRESS)),
- format_field(f, 'abr', '\t%dk'),
- format_field(f, 'asr', '\t%dHz'),
+ simplified_codec(f, 'vcodec'),
+ format_field(f, 'vbr', '\t%dk', func=round),
+ simplified_codec(f, 'acodec'),
+ format_field(f, 'abr', '\t%dk', func=round),
+ format_field(f, 'asr', '\t%s', func=format_decimal_suffix),
join_nonempty(
- self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
+ self._format_out('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
format_field(f, 'language', '[%s]'),
join_nonempty(format_field(f, 'format_note'),
format_field(f, 'container', ignore=(None, f.get('ext'))),
@@ -3545,12 +3664,12 @@ class YoutubeDL(object):
delim=' '),
] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
header_line = self._list_format_headers(
- 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
+ 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', 'CH', delim, '\tFILESIZE', '\tTBR', 'PROTO',
delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
return render_table(
header_line, table, hide_empty=True,
- delim=self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True))
+ delim=self._format_out('\u2500', self.Styles.DELIM, '-', test_encoding=True))
def render_thumbnails_table(self, info_dict):
thumbnails = list(info_dict.get('thumbnails') or [])
@@ -3558,7 +3677,7 @@ class YoutubeDL(object):
return None
return render_table(
self._list_format_headers('ID', 'Width', 'Height', 'URL'),
- [[t.get('id'), t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])
+ [[t.get('id'), t.get('width') or 'unknown', t.get('height') or 'unknown', t['url']] for t in thumbnails])
def render_subtitles_table(self, video_id, subtitles):
def _row(lang, formats):
@@ -3593,7 +3712,7 @@ class YoutubeDL(object):
def urlopen(self, req):
""" Start an HTTP download """
- if isinstance(req, compat_basestring):
+ if isinstance(req, str):
req = sanitized_Request(req)
return self._opener.open(req, timeout=self._socket_timeout)
@@ -3601,18 +3720,27 @@ class YoutubeDL(object):
if not self.params.get('verbose'):
return
+ from . import _IN_CLI # Must be delayed import
+
+ # These imports can be slow. So import them only as needed
+ from .extractor.extractors import _LAZY_LOADER
+ from .extractor.extractors import _PLUGIN_CLASSES as plugin_extractors
+
def get_encoding(stream):
ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
if not supports_terminal_sequences(stream):
- from .compat import WINDOWS_VT_MODE
+ from .utils import WINDOWS_VT_MODE # Must be imported locally
ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
return ret
- encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
+ encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % (
locale.getpreferredencoding(),
sys.getfilesystemencoding(),
- get_encoding(self._out_files['screen']), get_encoding(self._out_files['error']),
- self.get_encoding())
+ self.get_encoding(),
+ ', '.join(
+ f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_
+ if stream is not None and key != 'console')
+ )
logger = self.params.get('logger')
if logger:
@@ -3623,11 +3751,19 @@ class YoutubeDL(object):
write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
source = detect_variant()
+ if VARIANT not in (None, 'pip'):
+ source += '*'
write_debug(join_nonempty(
- 'hypervideo version', __version__,
+ f'{"hypervideo" if REPOSITORY == "hypervideo/hypervideo" else REPOSITORY} version',
+ __version__,
f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
'' if source == 'unknown' else f'({source})',
+ '' if _IN_CLI else 'API',
delim=' '))
+
+ if not _IN_CLI:
+ write_debug(f'params: {self.params}')
+
if not _LAZY_LOADER:
if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
write_debug('Lazy loading extractors is forcibly disabled')
@@ -3637,41 +3773,17 @@ class YoutubeDL(object):
write_debug('Plugins: %s' % [
'%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
- if self.params.get('compat_opts'):
- write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
+ if self.params['compat_opts']:
+ write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts']))
- if source == 'source':
- try:
- sp = Popen(
- ['git', 'rev-parse', '--short', 'HEAD'],
- stdout=subprocess.PIPE, stderr=subprocess.PIPE,
- cwd=os.path.dirname(os.path.abspath(__file__)))
- out, err = sp.communicate_or_kill()
- out = out.decode().strip()
- if re.match('[0-9a-f]+', out):
- write_debug('Git HEAD: %s' % out)
- except Exception:
- try:
- sys.exc_clear()
- except Exception:
- pass
-
- def python_implementation():
- impl_name = platform.python_implementation()
- if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
- return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
- return impl_name
-
- write_debug('Python version %s (%s %s) - %s' % (
- platform.python_version(),
- python_implementation(),
- platform.architecture()[0],
- platform_name()))
+ if current_git_head():
+ write_debug(f'Git HEAD: {current_git_head()}')
+ write_debug(system_identifier())
exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
if ffmpeg_features:
- exe_versions['ffmpeg'] += ' (%s)' % ','.join(ffmpeg_features)
+ exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features))
exe_versions['rtmpdump'] = rtmpdump_version()
exe_versions['phantomjs'] = PhantomJSwrapper._version()
@@ -3680,21 +3792,14 @@ class YoutubeDL(object):
) or 'none'
write_debug('exe versions: %s' % exe_str)
- from .downloader.websocket import has_websockets
- from .postprocessor.embedthumbnail import has_mutagen
- from .cookies import SQLITE_AVAILABLE, SECRETSTORAGE_AVAILABLE
-
- lib_str = join_nonempty(
- compat_brotli and compat_brotli.__name__,
- has_certifi and 'certifi',
- compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
- SECRETSTORAGE_AVAILABLE and 'secretstorage',
- has_mutagen and 'mutagen',
- SQLITE_AVAILABLE and 'sqlite',
- has_websockets and 'websockets',
- delim=', ') or 'none'
- write_debug('Optional libraries: %s' % lib_str)
+ from .compat.compat_utils import get_package_info
+ from .dependencies import available_dependencies
+
+ write_debug('Optional libraries: %s' % (', '.join(sorted({
+ join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
+ })) or 'none'))
+ self._setup_opener()
proxy_map = {}
for handler in self._opener.handlers:
if hasattr(handler, 'proxies'):
@@ -3703,10 +3808,10 @@ class YoutubeDL(object):
# Not implemented
if False and self.params.get('call_home'):
- ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
+ ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode()
write_debug('Public IP address: %s' % ipaddr)
latest_version = self.urlopen(
- 'https://yt-dl.org/latest/version').read().decode('utf-8')
+ 'https://yt-dl.org/latest/version').read().decode()
if version_tuple(latest_version) > version_tuple(__version__):
self.report_warning(
'You are using an outdated version (newest version: %s)! '
@@ -3714,6 +3819,8 @@ class YoutubeDL(object):
latest_version)
def _setup_opener(self):
+ if hasattr(self, '_opener'):
+ return
timeout_val = self.params.get('socket_timeout')
self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
@@ -3730,7 +3837,7 @@ class YoutubeDL(object):
else:
proxies = {'http': opts_proxy, 'https': opts_proxy}
else:
- proxies = compat_urllib_request.getproxies()
+ proxies = urllib.request.getproxies()
# Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
if 'http' in proxies and 'https' not in proxies:
proxies['https'] = proxies['http']
@@ -3740,19 +3847,19 @@ class YoutubeDL(object):
https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
redirect_handler = YoutubeDLRedirectHandler()
- data_handler = compat_urllib_request_DataHandler()
+ data_handler = urllib.request.DataHandler()
# When passing our own FileHandler instance, build_opener won't add the
# default FileHandler and allows us to disable the file protocol, which
# can be used for malicious purposes (see
# https://github.com/ytdl-org/youtube-dl/issues/8227)
- file_handler = compat_urllib_request.FileHandler()
+ file_handler = urllib.request.FileHandler()
def file_open(*args, **kwargs):
- raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in hypervideo for security reasons')
+ raise urllib.error.URLError('file:// scheme is explicitly disabled in hypervideo for security reasons')
file_handler.file_open = file_open
- opener = compat_urllib_request.build_opener(
+ opener = urllib.request.build_opener(
proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
# Delete the default user-agent header, which would otherwise apply in
@@ -3796,7 +3903,7 @@ class YoutubeDL(object):
try:
write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
return True
- except (OSError, IOError):
+ except OSError:
self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
return None
@@ -3817,9 +3924,9 @@ class YoutubeDL(object):
else:
try:
self.to_screen(f'[info] Writing {label} description to: {descfn}')
- with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
+ with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
descfile.write(ie_result['description'])
- except (OSError, IOError):
+ except OSError:
self.report_error(f'Cannot write {label} description file {descfn}')
return None
return True
@@ -3853,12 +3960,12 @@ class YoutubeDL(object):
try:
# Use newline='' to prevent conversion of newline characters
# See https://github.com/ytdl-org/youtube-dl/issues/10268
- with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
+ with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
subfile.write(sub_info['data'])
sub_info['filepath'] = sub_filename
ret.append((sub_filename, sub_filename_final))
continue
- except (OSError, IOError):
+ except OSError:
self.report_error(f'Cannot write video subtitles file {sub_filename}')
return None
diff --git a/hypervideo_dl/__init__.py b/hypervideo_dl/__init__.py
index dc53a9e..8ac1c0c 100644
--- a/hypervideo_dl/__init__.py
+++ b/hypervideo_dl/__init__.py
@@ -1,81 +1,80 @@
#!/usr/bin/python
-# coding: utf-8
+f'You are using an unsupported version of Python. Only Python versions 3.6 and above are supported by hypervideo' # noqa: F541
__license__ = 'CC0-1.0'
-import codecs
-import io
+import getpass
import itertools
+import optparse
import os
-import random
import re
import sys
+from .compat import compat_shlex_quote, workaround_optparse_bug9161
+from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS
+from .downloader import FileDownloader
+from .downloader.external import get_external_downloader
+from .extractor import list_extractor_classes
+from .extractor.adobepass import MSO_INFO
+from .extractor.common import InfoExtractor
from .options import parseOpts
-from .compat import (
- compat_getpass,
- compat_os_name,
- compat_shlex_quote,
- workaround_optparse_bug9161,
+from .postprocessor import (
+ FFmpegExtractAudioPP,
+ FFmpegSubtitlesConvertorPP,
+ FFmpegThumbnailsConvertorPP,
+ FFmpegVideoConvertorPP,
+ FFmpegVideoRemuxerPP,
+ MetadataFromFieldPP,
+ MetadataParserPP,
)
-from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS
from .utils import (
+ NO_DEFAULT,
+ POSTPROCESS_WHEN,
DateRange,
- decodeOption,
DownloadCancelled,
DownloadError,
+ GeoUtils,
+ PlaylistEntries,
+ SameFileError,
+ decodeOption,
+ download_range_func,
expand_path,
float_or_none,
- GeoUtils,
+ format_field,
int_or_none,
match_filter_func,
- NO_DEFAULT,
parse_duration,
preferredencoding,
read_batch_urls,
+ read_stdin,
render_table,
- SameFileError,
setproctitle,
std_headers,
traverse_obj,
+ variadic,
write_string,
)
-from .downloader import (
- FileDownloader,
-)
-from .extractor import gen_extractors, list_extractors
-from .extractor.common import InfoExtractor
-from .extractor.adobepass import MSO_INFO
-from .postprocessor import (
- FFmpegExtractAudioPP,
- FFmpegSubtitlesConvertorPP,
- FFmpegThumbnailsConvertorPP,
- FFmpegVideoConvertorPP,
- FFmpegVideoRemuxerPP,
- MetadataFromFieldPP,
- MetadataParserPP,
-)
from .YoutubeDL import YoutubeDL
+def _exit(status=0, *args):
+ for msg in args:
+ sys.stderr.write(msg)
+ raise SystemExit(status)
+
+
def get_urls(urls, batchfile, verbose):
# Batch file verification
batch_urls = []
if batchfile is not None:
try:
- if batchfile == '-':
- write_string('Reading URLs from stdin - EOF (%s) to end:\n' % (
- 'Ctrl+Z' if compat_os_name == 'nt' else 'Ctrl+D'))
- batchfd = sys.stdin
- else:
- batchfd = io.open(
- expand_path(batchfile),
- 'r', encoding='utf-8', errors='ignore')
- batch_urls = read_batch_urls(batchfd)
+ batch_urls = read_batch_urls(
+ read_stdin('URLs') if batchfile == '-'
+ else open(expand_path(batchfile), encoding='utf-8', errors='ignore'))
if verbose:
write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n')
- except IOError:
- sys.exit('ERROR: batch file %s could not be read' % batchfile)
+ except OSError:
+ _exit(f'ERROR: batch file {batchfile} could not be read')
_enc = preferredencoding()
return [
url.strip().decode(_enc, 'ignore') if isinstance(url, bytes) else url.strip()
@@ -83,6 +82,11 @@ def get_urls(urls, batchfile, verbose):
def print_extractor_information(opts, urls):
+ # Importing GenericIE is currently slow since it imports other extractors
+ # TODO: Move this back to module level after generalization of embed detection
+ from .extractor.generic import GenericIE
+
+ out = ''
if opts.list_extractors:
for ie in list_extractors(opts.age_limit):
write_string(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie.working() else '') + '\n', out=sys.stdout)
@@ -218,15 +222,11 @@ def validate_options(opts):
validate_regex('format sorting', f, InfoExtractor.FormatSort.regex)
# Postprocessor formats
- validate_in('audio format', opts.audioformat, ['best'] + list(FFmpegExtractAudioPP.SUPPORTED_EXTS))
+ validate_regex('audio format', opts.audioformat, FFmpegExtractAudioPP.FORMAT_RE)
validate_in('subtitle format', opts.convertsubtitles, FFmpegSubtitlesConvertorPP.SUPPORTED_EXTS)
- validate_in('thumbnail format', opts.convertthumbnails, FFmpegThumbnailsConvertorPP.SUPPORTED_EXTS)
- if opts.recodevideo is not None:
- opts.recodevideo = opts.recodevideo.replace(' ', '')
- validate_regex('video recode format', opts.recodevideo, FFmpegVideoConvertorPP.FORMAT_RE)
- if opts.remuxvideo is not None:
- opts.remuxvideo = opts.remuxvideo.replace(' ', '')
- validate_regex('video remux format', opts.remuxvideo, FFmpegVideoRemuxerPP.FORMAT_RE)
+ validate_regex('thumbnail format', opts.convertthumbnails, FFmpegThumbnailsConvertorPP.FORMAT_RE)
+ validate_regex('recode video format', opts.recodevideo, FFmpegVideoConvertorPP.FORMAT_RE)
+ validate_regex('remux video format', opts.remuxvideo, FFmpegVideoRemuxerPP.FORMAT_RE)
if opts.audioquality:
opts.audioquality = opts.audioquality.strip('k').strip('K')
# int_or_none prevents inf, nan
@@ -248,6 +248,28 @@ def validate_options(opts):
opts.extractor_retries = parse_retries('extractor', opts.extractor_retries)
opts.file_access_retries = parse_retries('file access', opts.file_access_retries)
+ # Retry sleep function
+ def parse_sleep_func(expr):
+ NUMBER_RE = r'\d+(?:\.\d+)?'
+ op, start, limit, step, *_ = tuple(re.fullmatch(
+ rf'(?:(linear|exp)=)?({NUMBER_RE})(?::({NUMBER_RE})?)?(?::({NUMBER_RE}))?',
+ expr.strip()).groups()) + (None, None)
+
+ if op == 'exp':
+ return lambda n: min(float(start) * (float(step or 2) ** n), float(limit or 'inf'))
+ else:
+ default_step = start if op or limit else 0
+ return lambda n: min(float(start) + float(step or default_step) * n, float(limit or 'inf'))
+
+ for key, expr in opts.retry_sleep.items():
+ if not expr:
+ del opts.retry_sleep[key]
+ continue
+ try:
+ opts.retry_sleep[key] = parse_sleep_func(expr)
+ except AttributeError:
+ raise ValueError(f'invalid {key} retry sleep expression {expr!r}')
+
# Bytes
def parse_bytes(name, value):
if value is None:
@@ -292,20 +314,25 @@ def validate_options(opts):
'Cannot download a video and extract audio into the same file! '
f'Use "{outtmpl_default}.%(ext)s" instead of "{outtmpl_default}" as the output template')
- # Remove chapters
- remove_chapters_patterns, opts.remove_ranges = [], []
- for regex in opts.remove_chapters or []:
- if regex.startswith('*'):
- dur = list(map(parse_duration, regex[1:].split('-')))
- if len(dur) == 2 and all(t is not None for t in dur):
- opts.remove_ranges.append(tuple(dur))
+ def parse_chapters(name, value):
+ chapters, ranges = [], []
+ for regex in value or []:
+ if regex.startswith('*'):
+ for range in regex[1:].split(','):
+ dur = tuple(map(parse_duration, range.strip().split('-')))
+ if len(dur) == 2 and all(t is not None for t in dur):
+ ranges.append(dur)
+ else:
+ raise ValueError(f'invalid {name} time range "{regex}". Must be of the form *start-end')
continue
- raise ValueError(f'invalid --remove-chapters time range "{regex}". Must be of the form *start-end')
- try:
- remove_chapters_patterns.append(re.compile(regex))
- except re.error as err:
- raise ValueError(f'invalid --remove-chapters regex "{regex}" - {err}')
- opts.remove_chapters = remove_chapters_patterns
+ try:
+ chapters.append(re.compile(regex))
+ except re.error as err:
+ raise ValueError(f'invalid {name} regex "{regex}" - {err}')
+ return chapters, ranges
+
+ opts.remove_chapters, opts.remove_ranges = parse_chapters('--remove-chapters', opts.remove_chapters)
+ opts.download_ranges = download_range_func(*parse_chapters('--download-sections', opts.download_ranges))
# Cookies from browser
if opts.cookiesfrombrowser:
@@ -349,6 +376,12 @@ def validate_options(opts):
opts.parse_metadata = list(itertools.chain(*map(metadataparser_actions, parse_metadata)))
# Other options
+ if opts.playlist_items is not None:
+ try:
+ tuple(PlaylistEntries.parse_playlist_items(opts.playlist_items))
+ except Exception as err:
+ raise ValueError(f'Invalid playlist-items {opts.playlist_items!r}: {err}')
+
geo_bypass_code = opts.geo_bypass_ip_block or opts.geo_bypass_country
if geo_bypass_code is not None:
try:
@@ -369,6 +402,17 @@ def validate_options(opts):
if opts.no_sponsorblock:
opts.sponsorblock_mark = opts.sponsorblock_remove = set()
+ default_downloader = None
+ for proto, path in opts.external_downloader.items():
+ if path == 'native':
+ continue
+ ed = get_external_downloader(path)
+ if ed is None:
+ raise ValueError(
+ f'No such {format_field(proto, None, "%s ", ignore="default")}external downloader "{path}"')
+ elif ed and proto == 'default':
+ default_downloader = ed.get_basename()
+
warnings, deprecation_warnings = [], []
# Common mistake: -f best
@@ -379,13 +423,18 @@ def validate_options(opts):
'If you know what you are doing and want only the best pre-merged format, use "-f b" instead to suppress this warning')))
# --(postprocessor/downloader)-args without name
- def report_args_compat(name, value, key1, key2=None):
+ def report_args_compat(name, value, key1, key2=None, where=None):
if key1 in value and key2 not in value:
- warnings.append(f'{name} arguments given without specifying name. The arguments will be given to all {name}s')
+ warnings.append(f'{name.title()} arguments given without specifying name. '
+ f'The arguments will be given to {where or f"all {name}s"}')
return True
return False
- report_args_compat('external downloader', opts.external_downloader_args, 'default')
+ if report_args_compat('external downloader', opts.external_downloader_args,
+ 'default', where=default_downloader) and default_downloader:
+ # Compat with youtube-dl's behavior. See https://github.com/ytdl-org/youtube-dl/commit/49c5293014bc11ec8c009856cd63cffa6296c1e1
+ opts.external_downloader_args.setdefault(default_downloader, opts.external_downloader_args.pop('default'))
+
if report_args_compat('post-processor', opts.postprocessor_args, 'default-compat', 'default'):
opts.postprocessor_args['default'] = opts.postprocessor_args.pop('default-compat')
opts.postprocessor_args.setdefault('sponskrub', [])
@@ -404,6 +453,9 @@ def validate_options(opts):
setattr(opts, opt1, default)
# Conflicting options
+ report_conflict('--playlist-reverse', 'playlist_reverse', '--playlist-random', 'playlist_random')
+ report_conflict('--playlist-reverse', 'playlist_reverse', '--lazy-playlist', 'lazy_playlist')
+ report_conflict('--playlist-random', 'playlist_random', '--lazy-playlist', 'lazy_playlist')
report_conflict('--dateafter', 'dateafter', '--date', 'date', default=None)
report_conflict('--datebefore', 'datebefore', '--date', 'date', default=None)
report_conflict('--exec-before-download', 'exec_before_dl_cmd', '"--exec before_dl:"', 'exec_cmd', opts.exec_cmd.get('before_dl'))
@@ -478,9 +530,9 @@ def validate_options(opts):
# Ask for passwords
if opts.username is not None and opts.password is None:
- opts.password = compat_getpass('Type account password and press [Return]: ')
+ opts.password = getpass.getpass('Type account password and press [Return]: ')
if opts.ap_username is not None and opts.ap_password is None:
- opts.ap_password = compat_getpass('Type TV provider account password and press [Return]: ')
+ opts.ap_password = getpass.getpass('Type TV provider account password and press [Return]: ')
return warnings, deprecation_warnings
@@ -634,7 +686,7 @@ def parse_options(argv=None):
final_ext = (
opts.recodevideo if opts.recodevideo in FFmpegVideoConvertorPP.SUPPORTED_EXTS
else opts.remuxvideo if opts.remuxvideo in FFmpegVideoRemuxerPP.SUPPORTED_EXTS
- else opts.audioformat if (opts.extractaudio and opts.audioformat != 'best')
+ else opts.audioformat if (opts.extractaudio and opts.audioformat in FFmpegExtractAudioPP.SUPPORTED_EXTS)
else None)
return parser, opts, urls, {
@@ -690,6 +742,7 @@ def parse_options(argv=None):
'file_access_retries': opts.file_access_retries,
'fragment_retries': opts.fragment_retries,
'extractor_retries': opts.extractor_retries,
+ 'retry_sleep_functions': opts.retry_sleep,
'skip_unavailable_fragments': opts.skip_unavailable_fragments,
'keep_fragments': opts.keep_fragments,
'concurrent_fragment_downloads': opts.concurrent_fragment_downloads,
@@ -704,6 +757,7 @@ def parse_options(argv=None):
'playlistend': opts.playlistend,
'playlistreverse': opts.playlist_reverse,
'playlistrandom': opts.playlist_random,
+ 'lazy_playlist': opts.lazy_playlist,
'noplaylist': opts.noplaylist,
'logtostderr': opts.outtmpl.get('default') == '-',
'consoletitle': opts.consoletitle,
@@ -735,6 +789,7 @@ def parse_options(argv=None):
'verbose': opts.verbose,
'dump_intermediate_pages': opts.dump_intermediate_pages,
'write_pages': opts.write_pages,
+ 'load_pages': opts.load_pages,
'test': opts.test,
'keepvideo': opts.keepvideo,
'min_filesize': opts.min_filesize,
@@ -783,6 +838,8 @@ def parse_options(argv=None):
'max_sleep_interval': opts.max_sleep_interval,
'sleep_interval_subtitles': opts.sleep_interval_subtitles,
'external_downloader': opts.external_downloader,
+ 'download_ranges': opts.download_ranges,
+ 'force_keyframes_at_cuts': opts.force_keyframes_at_cuts,
'list_thumbnails': opts.list_thumbnails,
'playlist_items': opts.playlist_items,
'xattr_set_filesize': opts.xattr_set_filesize,
@@ -821,52 +878,66 @@ def _real_main(argv=None):
if opts.dump_user_agent:
ua = traverse_obj(opts.headers, 'User-Agent', casesense=False, default=std_headers['User-Agent'])
write_string(f'{ua}\n', out=sys.stdout)
- sys.exit(0)
+ return
if print_extractor_information(opts, all_urls):
- sys.exit(0)
+ return
with YoutubeDL(ydl_opts) as ydl:
+ pre_process = opts.update_self or opts.rm_cachedir
actual_use = all_urls or opts.load_info_filename
- # Remove cache dir
if opts.rm_cachedir:
ydl.cache.remove()
- # Maybe do nothing
+ updater = Updater(ydl)
+ if opts.update_self and updater.update() and actual_use:
+ if updater.cmd:
+ return updater.restart()
+ # This code is reachable only for zip variant in py < 3.10
+ # It makes sense to exit here, but the old behavior is to continue
+ ydl.report_warning('Restart hypervideo to use the updated version')
+ # return 100, 'ERROR: The program must exit for the update to complete'
+
if not actual_use:
+ if pre_process:
+ return ydl._download_retcode
+
ydl.warn_if_short_id(sys.argv[1:] if argv is None else argv)
parser.error(
'You must provide at least one URL.\n'
'Type hypervideo --help to see a list of all options.')
+ parser.destroy()
try:
if opts.load_info_filename is not None:
- retcode = ydl.download_with_info_file(expand_path(opts.load_info_filename))
+ return ydl.download_with_info_file(expand_path(opts.load_info_filename))
else:
- retcode = ydl.download(all_urls)
+ return ydl.download(all_urls)
except DownloadCancelled:
ydl.to_screen('Aborting remaining downloads')
- retcode = 101
-
- sys.exit(retcode)
+ return 101
def main(argv=None):
try:
- _real_main(argv)
+ _exit(*variadic(_real_main(argv)))
except DownloadError:
- sys.exit(1)
+ _exit(1)
except SameFileError as e:
- sys.exit(f'ERROR: {e}')
+ _exit(f'ERROR: {e}')
except KeyboardInterrupt:
- sys.exit('\nERROR: Interrupted by user')
+ _exit('\nERROR: Interrupted by user')
except BrokenPipeError as e:
# https://docs.python.org/3/library/signal.html#note-on-sigpipe
devnull = os.open(os.devnull, os.O_WRONLY)
os.dup2(devnull, sys.stdout.fileno())
- sys.exit(f'\nERROR: {e}')
+ _exit(f'\nERROR: {e}')
+ except optparse.OptParseError as e:
+ _exit(2, f'\n{e}')
+
+from .extractor import gen_extractors, list_extractors
__all__ = [
'main',
diff --git a/hypervideo_dl/__main__.py b/hypervideo_dl/__main__.py
index 49765e4..c45082e 100644
--- a/hypervideo_dl/__main__.py
+++ b/hypervideo_dl/__main__.py
@@ -1,13 +1,11 @@
#!/usr/bin/env python3
-from __future__ import unicode_literals
# Execute with
-# $ python hypervideo_dl/__main__.py (2.6+)
-# $ python -m hypervideo_dl (2.7+)
+# $ python -m hypervideo_dl
import sys
-if __package__ is None and not hasattr(sys, 'frozen'):
+if __package__ is None and not getattr(sys, 'frozen', False):
# direct call of __main__.py
import os.path
path = os.path.realpath(os.path.abspath(__file__))
diff --git a/hypervideo_dl/aes.py b/hypervideo_dl/aes.py
index b37f0dd..60ce99c 100644
--- a/hypervideo_dl/aes.py
+++ b/hypervideo_dl/aes.py
@@ -1,26 +1,18 @@
-from __future__ import unicode_literals
-
+import base64
from math import ceil
-from .compat import (
- compat_b64decode,
- compat_ord,
- compat_pycrypto_AES,
-)
-from .utils import (
- bytes_to_intlist,
- intlist_to_bytes,
-)
-
+from .compat import compat_ord
+from .dependencies import Cryptodome_AES
+from .utils import bytes_to_intlist, intlist_to_bytes
-if compat_pycrypto_AES:
+if Cryptodome_AES:
def aes_cbc_decrypt_bytes(data, key, iv):
""" Decrypt bytes with AES-CBC using pycryptodome """
- return compat_pycrypto_AES.new(key, compat_pycrypto_AES.MODE_CBC, iv).decrypt(data)
+ return Cryptodome_AES.new(key, Cryptodome_AES.MODE_CBC, iv).decrypt(data)
def aes_gcm_decrypt_and_verify_bytes(data, key, tag, nonce):
""" Decrypt bytes with AES-GCM using pycryptodome """
- return compat_pycrypto_AES.new(key, compat_pycrypto_AES.MODE_GCM, nonce).decrypt_and_verify(data, tag)
+ return Cryptodome_AES.new(key, Cryptodome_AES.MODE_GCM, nonce).decrypt_and_verify(data, tag)
else:
def aes_cbc_decrypt_bytes(data, key, iv):
@@ -32,16 +24,59 @@ else:
return intlist_to_bytes(aes_gcm_decrypt_and_verify(*map(bytes_to_intlist, (data, key, tag, nonce))))
+def aes_cbc_encrypt_bytes(data, key, iv, **kwargs):
+ return intlist_to_bytes(aes_cbc_encrypt(*map(bytes_to_intlist, (data, key, iv)), **kwargs))
+
+
+BLOCK_SIZE_BYTES = 16
+
+
def unpad_pkcs7(data):
return data[:-compat_ord(data[-1])]
-BLOCK_SIZE_BYTES = 16
+def pkcs7_padding(data):
+ """
+ PKCS#7 padding
+
+ @param {int[]} data cleartext
+ @returns {int[]} padding data
+ """
+
+ remaining_length = BLOCK_SIZE_BYTES - len(data) % BLOCK_SIZE_BYTES
+ return data + [remaining_length] * remaining_length
+
+
+def pad_block(block, padding_mode):
+ """
+ Pad a block with the given padding mode
+ @param {int[]} block block to pad
+ @param padding_mode padding mode
+ """
+ padding_size = BLOCK_SIZE_BYTES - len(block)
+
+ PADDING_BYTE = {
+ 'pkcs7': padding_size,
+ 'iso7816': 0x0,
+ 'whitespace': 0x20,
+ 'zero': 0x0,
+ }
+
+ if padding_size < 0:
+ raise ValueError('Block size exceeded')
+ elif padding_mode not in PADDING_BYTE:
+ raise NotImplementedError(f'Padding mode {padding_mode} is not implemented')
+
+ if padding_mode == 'iso7816' and padding_size:
+ block = block + [0x80] # NB: += mutates list
+ padding_size -= 1
+
+ return block + [PADDING_BYTE[padding_mode]] * padding_size
def aes_ecb_encrypt(data, key, iv=None):
"""
- Encrypt with aes in ECB mode
+ Encrypt with aes in ECB mode. Using PKCS#7 padding
@param {int[]} data cleartext
@param {int[]} key 16/24/32-Byte cipher key
@@ -54,8 +89,7 @@ def aes_ecb_encrypt(data, key, iv=None):
encrypted_data = []
for i in range(block_count):
block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
- encrypted_data += aes_encrypt(block, expanded_key)
- encrypted_data = encrypted_data[:len(data)]
+ encrypted_data += aes_encrypt(pkcs7_padding(block), expanded_key)
return encrypted_data
@@ -145,13 +179,14 @@ def aes_cbc_decrypt(data, key, iv):
return decrypted_data
-def aes_cbc_encrypt(data, key, iv):
+def aes_cbc_encrypt(data, key, iv, *, padding_mode='pkcs7'):
"""
- Encrypt with aes in CBC mode. Using PKCS#7 padding
+ Encrypt with aes in CBC mode
@param {int[]} data cleartext
@param {int[]} key 16/24/32-Byte cipher key
@param {int[]} iv 16-Byte IV
+ @param padding_mode Padding mode to use
@returns {int[]} encrypted data
"""
expanded_key = key_expansion(key)
@@ -161,8 +196,8 @@ def aes_cbc_encrypt(data, key, iv):
previous_cipher_block = iv
for i in range(block_count):
block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
- remaining_length = BLOCK_SIZE_BYTES - len(block)
- block += [remaining_length] * remaining_length
+ block = pad_block(block, padding_mode)
+
mixed_block = xor(block, previous_cipher_block)
encrypted_block = aes_encrypt(mixed_block, expanded_key)
@@ -273,8 +308,8 @@ def aes_decrypt_text(data, password, key_size_bytes):
"""
NONCE_LENGTH_BYTES = 8
- data = bytes_to_intlist(compat_b64decode(data))
- password = bytes_to_intlist(password.encode('utf-8'))
+ data = bytes_to_intlist(base64.b64decode(data))
+ password = bytes_to_intlist(password.encode())
key = password[:key_size_bytes] + [0] * (key_size_bytes - len(password))
key = aes_encrypt(key[:BLOCK_SIZE_BYTES], key_expansion(key)) * (key_size_bytes // BLOCK_SIZE_BYTES)
@@ -503,20 +538,30 @@ def ghash(subkey, data):
last_y = [0] * BLOCK_SIZE_BYTES
for i in range(0, len(data), BLOCK_SIZE_BYTES):
- block = data[i : i + BLOCK_SIZE_BYTES] # noqa: E203
+ block = data[i: i + BLOCK_SIZE_BYTES]
last_y = block_product(xor(last_y, block), subkey)
return last_y
__all__ = [
- 'aes_ctr_decrypt',
'aes_cbc_decrypt',
'aes_cbc_decrypt_bytes',
+ 'aes_ctr_decrypt',
'aes_decrypt_text',
- 'aes_encrypt',
+ 'aes_decrypt',
+ 'aes_ecb_decrypt',
'aes_gcm_decrypt_and_verify',
'aes_gcm_decrypt_and_verify_bytes',
+
+ 'aes_cbc_encrypt',
+ 'aes_cbc_encrypt_bytes',
+ 'aes_ctr_encrypt',
+ 'aes_ecb_encrypt',
+ 'aes_encrypt',
+
'key_expansion',
+ 'pad_block',
+ 'pkcs7_padding',
'unpad_pkcs7',
]
diff --git a/hypervideo_dl/cache.py b/hypervideo_dl/cache.py
index 24acb1b..2e9c1ef 100644
--- a/hypervideo_dl/cache.py
+++ b/hypervideo_dl/cache.py
@@ -1,28 +1,23 @@
-from __future__ import unicode_literals
-
+import contextlib
import errno
-import io
import json
import os
import re
import shutil
import traceback
-from .compat import compat_getenv
-from .utils import (
- expand_path,
- write_json_file,
-)
+from .utils import expand_path, traverse_obj, version_tuple, write_json_file
+from .version import __version__
-class Cache(object):
+class Cache:
def __init__(self, ydl):
self._ydl = ydl
def _get_root_dir(self):
res = self._ydl.params.get('cachedir')
if res is None:
- cache_root = compat_getenv('XDG_CACHE_HOME', '~/.cache')
+ cache_root = os.getenv('XDG_CACHE_HOME', '~/.cache')
res = os.path.join(cache_root, 'hypervideo')
return expand_path(res)
@@ -31,7 +26,7 @@ class Cache(object):
'invalid section %r' % section
assert re.match(r'^[a-zA-Z0-9_.-]+$', key), 'invalid key %r' % key
return os.path.join(
- self._get_root_dir(), section, '%s.%s' % (key, dtype))
+ self._get_root_dir(), section, f'{key}.{dtype}')
@property
def enabled(self):
@@ -51,33 +46,37 @@ class Cache(object):
if ose.errno != errno.EEXIST:
raise
self._ydl.write_debug(f'Saving {section}.{key} to cache')
- write_json_file(data, fn)
+ write_json_file({'hypervideo_version': __version__, 'data': data}, fn)
except Exception:
tb = traceback.format_exc()
- self._ydl.report_warning(
- 'Writing cache to %r failed: %s' % (fn, tb))
+ self._ydl.report_warning(f'Writing cache to {fn!r} failed: {tb}')
+
+ def _validate(self, data, min_ver):
+ version = traverse_obj(data, 'hypervideo_version')
+ if not version: # Backward compatibility
+ data, version = {'data': data}, '2022.08.19'
+ if not min_ver or version_tuple(version) >= version_tuple(min_ver):
+ return data['data']
+ self._ydl.write_debug(f'Discarding old cache from version {version} (needs {min_ver})')
- def load(self, section, key, dtype='json', default=None):
+ def load(self, section, key, dtype='json', default=None, *, min_ver=None):
assert dtype in ('json',)
if not self.enabled:
return default
cache_fn = self._get_cache_fn(section, key, dtype)
- try:
+ with contextlib.suppress(OSError):
try:
- with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
+ with open(cache_fn, encoding='utf-8') as cachef:
self._ydl.write_debug(f'Loading {section}.{key} from cache')
- return json.load(cachef)
- except ValueError:
+ return self._validate(json.load(cachef), min_ver)
+ except (ValueError, KeyError):
try:
file_size = os.path.getsize(cache_fn)
- except (OSError, IOError) as oe:
+ except OSError as oe:
file_size = str(oe)
- self._ydl.report_warning(
- 'Cache retrieval from %s failed (%s)' % (cache_fn, file_size))
- except IOError:
- pass # No cache available
+ self._ydl.report_warning(f'Cache retrieval from {cache_fn} failed ({file_size})')
return default
diff --git a/hypervideo_dl/compat.py b/hypervideo_dl/compat.py
deleted file mode 100644
index bdea14c..0000000
--- a/hypervideo_dl/compat.py
+++ /dev/null
@@ -1,330 +0,0 @@
-# coding: utf-8
-
-import asyncio
-import base64
-import collections
-import ctypes
-import getpass
-import html
-import html.parser
-import http
-import http.client
-import http.cookiejar
-import http.cookies
-import http.server
-import itertools
-import optparse
-import os
-import re
-import shlex
-import shutil
-import socket
-import struct
-import subprocess
-import sys
-import tokenize
-import urllib
-import xml.etree.ElementTree as etree
-from subprocess import DEVNULL
-
-
-# HTMLParseError has been deprecated in Python 3.3 and removed in
-# Python 3.5. Introducing dummy exception for Python >3.5 for compatible
-# and uniform cross-version exception handling
-class compat_HTMLParseError(Exception):
- pass
-
-
-# compat_ctypes_WINFUNCTYPE = ctypes.WINFUNCTYPE
-# will not work since ctypes.WINFUNCTYPE does not exist in UNIX machines
-def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
- return ctypes.WINFUNCTYPE(*args, **kwargs)
-
-
-class _TreeBuilder(etree.TreeBuilder):
- def doctype(self, name, pubid, system):
- pass
-
-
-def compat_etree_fromstring(text):
- return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder()))
-
-
-compat_os_name = os._name if os.name == 'java' else os.name
-
-
-if compat_os_name == 'nt':
- def compat_shlex_quote(s):
- return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"')
-else:
- from shlex import quote as compat_shlex_quote
-
-
-def compat_ord(c):
- if type(c) is int:
- return c
- else:
- return ord(c)
-
-
-def compat_setenv(key, value, env=os.environ):
- env[key] = value
-
-
-if compat_os_name == 'nt' and sys.version_info < (3, 8):
- # os.path.realpath on Windows does not follow symbolic links
- # prior to Python 3.8 (see https://bugs.python.org/issue9949)
- def compat_realpath(path):
- while os.path.islink(path):
- path = os.path.abspath(os.readlink(path))
- return path
-else:
- compat_realpath = os.path.realpath
-
-
-def compat_print(s):
- assert isinstance(s, compat_str)
- print(s)
-
-
-# Fix https://github.com/ytdl-org/youtube-dl/issues/4223
-# See http://bugs.python.org/issue9161 for what is broken
-def workaround_optparse_bug9161():
- op = optparse.OptionParser()
- og = optparse.OptionGroup(op, 'foo')
- try:
- og.add_option('-t')
- except TypeError:
- real_add_option = optparse.OptionGroup.add_option
-
- def _compat_add_option(self, *args, **kwargs):
- enc = lambda v: (
- v.encode('ascii', 'replace') if isinstance(v, compat_str)
- else v)
- bargs = [enc(a) for a in args]
- bkwargs = dict(
- (k, enc(v)) for k, v in kwargs.items())
- return real_add_option(self, *bargs, **bkwargs)
- optparse.OptionGroup.add_option = _compat_add_option
-
-
-try:
- compat_Pattern = re.Pattern
-except AttributeError:
- compat_Pattern = type(re.compile(''))
-
-
-try:
- compat_Match = re.Match
-except AttributeError:
- compat_Match = type(re.compile('').match(''))
-
-
-try:
- compat_asyncio_run = asyncio.run # >= 3.7
-except AttributeError:
- def compat_asyncio_run(coro):
- try:
- loop = asyncio.get_event_loop()
- except RuntimeError:
- loop = asyncio.new_event_loop()
- asyncio.set_event_loop(loop)
- loop.run_until_complete(coro)
-
- asyncio.run = compat_asyncio_run
-
-
-try: # >= 3.7
- asyncio.tasks.all_tasks
-except AttributeError:
- asyncio.tasks.all_tasks = asyncio.tasks.Task.all_tasks
-
-try:
- import websockets as compat_websockets
-except ImportError:
- compat_websockets = None
-
-# Python 3.8+ does not honor %HOME% on windows, but this breaks compatibility with youtube-dl
-# See https://github.com/hypervideo/hypervideo/issues/792
-# https://docs.python.org/3/library/os.path.html#os.path.expanduser
-if compat_os_name in ('nt', 'ce') and 'HOME' in os.environ:
- _userhome = os.environ['HOME']
-
- def compat_expanduser(path):
- if not path.startswith('~'):
- return path
- i = path.replace('\\', '/', 1).find('/') # ~user
- if i < 0:
- i = len(path)
- userhome = os.path.join(os.path.dirname(_userhome), path[1:i]) if i > 1 else _userhome
- return userhome + path[i:]
-else:
- compat_expanduser = os.path.expanduser
-
-
-try:
- from Cryptodome.Cipher import AES as compat_pycrypto_AES
-except ImportError:
- try:
- from Crypto.Cipher import AES as compat_pycrypto_AES
- except ImportError:
- compat_pycrypto_AES = None
-
-try:
- import brotlicffi as compat_brotli
-except ImportError:
- try:
- import brotli as compat_brotli
- except ImportError:
- compat_brotli = None
-
-WINDOWS_VT_MODE = False if compat_os_name == 'nt' else None
-
-
-def windows_enable_vt_mode(): # TODO: Do this the proper way https://bugs.python.org/issue30075
- if compat_os_name != 'nt':
- return
- global WINDOWS_VT_MODE
- startupinfo = subprocess.STARTUPINFO()
- startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
- try:
- subprocess.Popen('', shell=True, startupinfo=startupinfo)
- WINDOWS_VT_MODE = True
- except Exception:
- pass
-
-
-# Deprecated
-
-compat_basestring = str
-compat_chr = chr
-compat_filter = filter
-compat_input = input
-compat_integer_types = (int, )
-compat_kwargs = lambda kwargs: kwargs
-compat_map = map
-compat_numeric_types = (int, float, complex)
-compat_str = str
-compat_xpath = lambda xpath: xpath
-compat_zip = zip
-
-compat_collections_abc = collections.abc
-compat_HTMLParser = html.parser.HTMLParser
-compat_HTTPError = urllib.error.HTTPError
-compat_Struct = struct.Struct
-compat_b64decode = base64.b64decode
-compat_cookiejar = http.cookiejar
-compat_cookiejar_Cookie = compat_cookiejar.Cookie
-compat_cookies = http.cookies
-compat_cookies_SimpleCookie = compat_cookies.SimpleCookie
-compat_etree_Element = etree.Element
-compat_etree_register_namespace = etree.register_namespace
-compat_get_terminal_size = shutil.get_terminal_size
-compat_getenv = os.getenv
-compat_getpass = getpass.getpass
-compat_html_entities = html.entities
-compat_html_entities_html5 = compat_html_entities.html5
-compat_http_client = http.client
-compat_http_server = http.server
-compat_itertools_count = itertools.count
-compat_parse_qs = urllib.parse.parse_qs
-compat_shlex_split = shlex.split
-compat_socket_create_connection = socket.create_connection
-compat_struct_pack = struct.pack
-compat_struct_unpack = struct.unpack
-compat_subprocess_get_DEVNULL = lambda: DEVNULL
-compat_tokenize_tokenize = tokenize.tokenize
-compat_urllib_error = urllib.error
-compat_urllib_parse = urllib.parse
-compat_urllib_parse_quote = urllib.parse.quote
-compat_urllib_parse_quote_plus = urllib.parse.quote_plus
-compat_urllib_parse_unquote = urllib.parse.unquote
-compat_urllib_parse_unquote_plus = urllib.parse.unquote_plus
-compat_urllib_parse_unquote_to_bytes = urllib.parse.unquote_to_bytes
-compat_urllib_parse_urlencode = urllib.parse.urlencode
-compat_urllib_parse_urlparse = urllib.parse.urlparse
-compat_urllib_parse_urlunparse = urllib.parse.urlunparse
-compat_urllib_request = urllib.request
-compat_urllib_request_DataHandler = urllib.request.DataHandler
-compat_urllib_response = urllib.response
-compat_urlparse = urllib.parse
-compat_urlretrieve = urllib.request.urlretrieve
-compat_xml_parse_error = etree.ParseError
-
-
-# Set public objects
-
-__all__ = [
- 'WINDOWS_VT_MODE',
- 'compat_HTMLParseError',
- 'compat_HTMLParser',
- 'compat_HTTPError',
- 'compat_Match',
- 'compat_Pattern',
- 'compat_Struct',
- 'compat_asyncio_run',
- 'compat_b64decode',
- 'compat_basestring',
- 'compat_brotli',
- 'compat_chr',
- 'compat_collections_abc',
- 'compat_cookiejar',
- 'compat_cookiejar_Cookie',
- 'compat_cookies',
- 'compat_cookies_SimpleCookie',
- 'compat_ctypes_WINFUNCTYPE',
- 'compat_etree_Element',
- 'compat_etree_fromstring',
- 'compat_etree_register_namespace',
- 'compat_expanduser',
- 'compat_filter',
- 'compat_get_terminal_size',
- 'compat_getenv',
- 'compat_getpass',
- 'compat_html_entities',
- 'compat_html_entities_html5',
- 'compat_http_client',
- 'compat_http_server',
- 'compat_input',
- 'compat_integer_types',
- 'compat_itertools_count',
- 'compat_kwargs',
- 'compat_map',
- 'compat_numeric_types',
- 'compat_ord',
- 'compat_os_name',
- 'compat_parse_qs',
- 'compat_print',
- 'compat_pycrypto_AES',
- 'compat_realpath',
- 'compat_setenv',
- 'compat_shlex_quote',
- 'compat_shlex_split',
- 'compat_socket_create_connection',
- 'compat_str',
- 'compat_struct_pack',
- 'compat_struct_unpack',
- 'compat_subprocess_get_DEVNULL',
- 'compat_tokenize_tokenize',
- 'compat_urllib_error',
- 'compat_urllib_parse',
- 'compat_urllib_parse_quote',
- 'compat_urllib_parse_quote_plus',
- 'compat_urllib_parse_unquote',
- 'compat_urllib_parse_unquote_plus',
- 'compat_urllib_parse_unquote_to_bytes',
- 'compat_urllib_parse_urlencode',
- 'compat_urllib_parse_urlparse',
- 'compat_urllib_parse_urlunparse',
- 'compat_urllib_request',
- 'compat_urllib_request_DataHandler',
- 'compat_urllib_response',
- 'compat_urlparse',
- 'compat_urlretrieve',
- 'compat_websockets',
- 'compat_xml_parse_error',
- 'compat_xpath',
- 'compat_zip',
- 'windows_enable_vt_mode',
- 'workaround_optparse_bug9161',
-]
diff --git a/hypervideo_dl/compat/__init__.py b/hypervideo_dl/compat/__init__.py
new file mode 100644
index 0000000..2f2621b
--- /dev/null
+++ b/hypervideo_dl/compat/__init__.py
@@ -0,0 +1,78 @@
+import os
+import sys
+import warnings
+import xml.etree.ElementTree as etree
+
+from ._deprecated import * # noqa: F401, F403
+from .compat_utils import passthrough_module
+
+# XXX: Implement this the same way as other DeprecationWarnings without circular import
+passthrough_module(__name__, '._legacy', callback=lambda attr: warnings.warn(
+ DeprecationWarning(f'{__name__}.{attr} is deprecated'), stacklevel=3))
+
+
+# HTMLParseError has been deprecated in Python 3.3 and removed in
+# Python 3.5. Introducing dummy exception for Python >3.5 for compatible
+# and uniform cross-version exception handling
+class compat_HTMLParseError(ValueError):
+ pass
+
+
+class _TreeBuilder(etree.TreeBuilder):
+ def doctype(self, name, pubid, system):
+ pass
+
+
+def compat_etree_fromstring(text):
+ return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder()))
+
+
+compat_os_name = os._name if os.name == 'java' else os.name
+
+
+if compat_os_name == 'nt':
+ def compat_shlex_quote(s):
+ import re
+ return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"')
+else:
+ from shlex import quote as compat_shlex_quote # noqa: F401
+
+
+def compat_ord(c):
+ return c if isinstance(c, int) else ord(c)
+
+
+if compat_os_name == 'nt' and sys.version_info < (3, 8):
+ # os.path.realpath on Windows does not follow symbolic links
+ # prior to Python 3.8 (see https://bugs.python.org/issue9949)
+ def compat_realpath(path):
+ while os.path.islink(path):
+ path = os.path.abspath(os.readlink(path))
+ return os.path.realpath(path)
+else:
+ compat_realpath = os.path.realpath
+
+
+# Python 3.8+ does not honor %HOME% on windows, but this breaks compatibility with youtube-dl
+# See https://github.com/hypervideo/hypervideo/issues/792
+# https://docs.python.org/3/library/os.path.html#os.path.expanduser
+if compat_os_name in ('nt', 'ce'):
+ def compat_expanduser(path):
+ HOME = os.environ.get('HOME')
+ if not HOME:
+ return os.path.expanduser(path)
+ elif not path.startswith('~'):
+ return path
+ i = path.replace('\\', '/', 1).find('/') # ~user
+ if i < 0:
+ i = len(path)
+ userhome = os.path.join(os.path.dirname(HOME), path[1:i]) if i > 1 else HOME
+ return userhome + path[i:]
+else:
+ compat_expanduser = os.path.expanduser
+
+
+# NB: Add modules that are imported dynamically here so that PyInstaller can find them
+# See https://github.com/pyinstaller/pyinstaller-hooks-contrib/issues/438
+if False:
+ from . import _legacy # noqa: F401
diff --git a/hypervideo_dl/compat/_deprecated.py b/hypervideo_dl/compat/_deprecated.py
new file mode 100644
index 0000000..342f1f8
--- /dev/null
+++ b/hypervideo_dl/compat/_deprecated.py
@@ -0,0 +1,16 @@
+"""Deprecated - New code should avoid these"""
+
+import base64
+import urllib.error
+import urllib.parse
+
+compat_str = str
+
+compat_b64decode = base64.b64decode
+
+compat_HTTPError = urllib.error.HTTPError
+compat_urlparse = urllib.parse
+compat_parse_qs = urllib.parse.parse_qs
+compat_urllib_parse_unquote = urllib.parse.unquote
+compat_urllib_parse_urlencode = urllib.parse.urlencode
+compat_urllib_parse_urlparse = urllib.parse.urlparse
diff --git a/hypervideo_dl/compat/_legacy.py b/hypervideo_dl/compat/_legacy.py
new file mode 100644
index 0000000..d19333d
--- /dev/null
+++ b/hypervideo_dl/compat/_legacy.py
@@ -0,0 +1,97 @@
+""" Do not use! """
+
+import collections
+import ctypes
+import getpass
+import html.entities
+import html.parser
+import http.client
+import http.cookiejar
+import http.cookies
+import http.server
+import itertools
+import os
+import shlex
+import shutil
+import socket
+import struct
+import tokenize
+import urllib.error
+import urllib.parse
+import urllib.request
+import xml.etree.ElementTree as etree
+from subprocess import DEVNULL
+
+# isort: split
+import asyncio # noqa: F401
+import re # noqa: F401
+from asyncio import run as compat_asyncio_run # noqa: F401
+from re import Pattern as compat_Pattern # noqa: F401
+from re import match as compat_Match # noqa: F401
+
+from .compat_utils import passthrough_module
+from ..dependencies import Cryptodome_AES as compat_pycrypto_AES # noqa: F401
+from ..dependencies import brotli as compat_brotli # noqa: F401
+from ..dependencies import websockets as compat_websockets # noqa: F401
+
+passthrough_module(__name__, '...utils', ('WINDOWS_VT_MODE', 'windows_enable_vt_mode'))
+
+
+# compat_ctypes_WINFUNCTYPE = ctypes.WINFUNCTYPE
+# will not work since ctypes.WINFUNCTYPE does not exist in UNIX machines
+def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
+ return ctypes.WINFUNCTYPE(*args, **kwargs)
+
+
+def compat_setenv(key, value, env=os.environ):
+ env[key] = value
+
+
+compat_basestring = str
+compat_casefold = str.casefold
+compat_chr = chr
+compat_collections_abc = collections.abc
+compat_cookiejar = http.cookiejar
+compat_cookiejar_Cookie = http.cookiejar.Cookie
+compat_cookies = http.cookies
+compat_cookies_SimpleCookie = http.cookies.SimpleCookie
+compat_etree_Element = etree.Element
+compat_etree_register_namespace = etree.register_namespace
+compat_filter = filter
+compat_get_terminal_size = shutil.get_terminal_size
+compat_getenv = os.getenv
+compat_getpass = getpass.getpass
+compat_html_entities = html.entities
+compat_html_entities_html5 = html.entities.html5
+compat_HTMLParser = html.parser.HTMLParser
+compat_http_client = http.client
+compat_http_server = http.server
+compat_input = input
+compat_integer_types = (int, )
+compat_itertools_count = itertools.count
+compat_kwargs = lambda kwargs: kwargs
+compat_map = map
+compat_numeric_types = (int, float, complex)
+compat_print = print
+compat_shlex_split = shlex.split
+compat_socket_create_connection = socket.create_connection
+compat_Struct = struct.Struct
+compat_struct_pack = struct.pack
+compat_struct_unpack = struct.unpack
+compat_subprocess_get_DEVNULL = lambda: DEVNULL
+compat_tokenize_tokenize = tokenize.tokenize
+compat_urllib_error = urllib.error
+compat_urllib_parse = urllib.parse
+compat_urllib_parse_quote = urllib.parse.quote
+compat_urllib_parse_quote_plus = urllib.parse.quote_plus
+compat_urllib_parse_unquote_plus = urllib.parse.unquote_plus
+compat_urllib_parse_unquote_to_bytes = urllib.parse.unquote_to_bytes
+compat_urllib_parse_urlunparse = urllib.parse.urlunparse
+compat_urllib_request = urllib.request
+compat_urllib_request_DataHandler = urllib.request.DataHandler
+compat_urllib_response = urllib.response
+compat_urlretrieve = urllib.request.urlretrieve
+compat_xml_parse_error = etree.ParseError
+compat_xpath = lambda xpath: xpath
+compat_zip = zip
+workaround_optparse_bug9161 = lambda: None
diff --git a/hypervideo_dl/compat/compat_utils.py b/hypervideo_dl/compat/compat_utils.py
new file mode 100644
index 0000000..1bf6566
--- /dev/null
+++ b/hypervideo_dl/compat/compat_utils.py
@@ -0,0 +1,70 @@
+import collections
+import contextlib
+import importlib
+import sys
+import types
+
+_NO_ATTRIBUTE = object()
+
+_Package = collections.namedtuple('Package', ('name', 'version'))
+
+
+def get_package_info(module):
+ parent = module.__name__.split('.')[0]
+ parent_module = None
+ with contextlib.suppress(ImportError):
+ parent_module = importlib.import_module(parent)
+
+ for attr in ('__version__', 'version_string', 'version'):
+ version = getattr(parent_module, attr, None)
+ if version is not None:
+ break
+ return _Package(getattr(module, '_hypervideo_dl__identifier', parent), str(version))
+
+
+def _is_package(module):
+ try:
+ module.__getattribute__('__path__')
+ except AttributeError:
+ return False
+ return True
+
+
+def passthrough_module(parent, child, allowed_attributes=None, *, callback=lambda _: None):
+ parent_module = importlib.import_module(parent)
+ child_module = None # Import child module only as needed
+
+ class PassthroughModule(types.ModuleType):
+ def __getattr__(self, attr):
+ if _is_package(parent_module):
+ with contextlib.suppress(ImportError):
+ return importlib.import_module(f'.{attr}', parent)
+
+ ret = self.__from_child(attr)
+ if ret is _NO_ATTRIBUTE:
+ raise AttributeError(f'module {parent} has no attribute {attr}')
+ callback(attr)
+ return ret
+
+ def __from_child(self, attr):
+ if allowed_attributes is None:
+ if attr.startswith('__') and attr.endswith('__'):
+ return _NO_ATTRIBUTE
+ elif attr not in allowed_attributes:
+ return _NO_ATTRIBUTE
+
+ nonlocal child_module
+ child_module = child_module or importlib.import_module(child, parent)
+
+ with contextlib.suppress(AttributeError):
+ return getattr(child_module, attr)
+
+ if _is_package(child_module):
+ with contextlib.suppress(ImportError):
+ return importlib.import_module(f'.{attr}', child)
+
+ return _NO_ATTRIBUTE
+
+ # Python 3.6 does not have module level __getattr__
+ # https://peps.python.org/pep-0562/
+ sys.modules[parent].__class__ = PassthroughModule
diff --git a/hypervideo_dl/compat/functools.py b/hypervideo_dl/compat/functools.py
new file mode 100644
index 0000000..ec003ea
--- /dev/null
+++ b/hypervideo_dl/compat/functools.py
@@ -0,0 +1,26 @@
+# flake8: noqa: F405
+from functools import * # noqa: F403
+
+from .compat_utils import passthrough_module
+
+passthrough_module(__name__, 'functools')
+del passthrough_module
+
+try:
+ cache # >= 3.9
+except NameError:
+ cache = lru_cache(maxsize=None)
+
+try:
+ cached_property # >= 3.8
+except NameError:
+ class cached_property:
+ def __init__(self, func):
+ update_wrapper(self, func)
+ self.func = func
+
+ def __get__(self, instance, _):
+ if instance is None:
+ return self
+ setattr(instance, self.func.__name__, self.func(instance))
+ return getattr(instance, self.func.__name__)
diff --git a/hypervideo_dl/compat/imghdr.py b/hypervideo_dl/compat/imghdr.py
new file mode 100644
index 0000000..5d64ab0
--- /dev/null
+++ b/hypervideo_dl/compat/imghdr.py
@@ -0,0 +1,16 @@
+tests = {
+ 'webp': lambda h: h[0:4] == b'RIFF' and h[8:] == b'WEBP',
+ 'png': lambda h: h[:8] == b'\211PNG\r\n\032\n',
+ 'jpeg': lambda h: h[6:10] in (b'JFIF', b'Exif'),
+ 'gif': lambda h: h[:6] in (b'GIF87a', b'GIF89a'),
+}
+
+
+def what(file=None, h=None):
+ """Detect format of image (Currently supports jpeg, png, webp, gif only)
+ Ref: https://github.com/python/cpython/blob/3.10/Lib/imghdr.py
+ """
+ if h is None:
+ with open(file, 'rb') as f:
+ h = f.read(12)
+ return next((type_ for type_, test in tests.items() if test(h)), None)
diff --git a/hypervideo_dl/compat/shutil.py b/hypervideo_dl/compat/shutil.py
new file mode 100644
index 0000000..23239d5
--- /dev/null
+++ b/hypervideo_dl/compat/shutil.py
@@ -0,0 +1,30 @@
+# flake8: noqa: F405
+from shutil import * # noqa: F403
+
+from .compat_utils import passthrough_module
+
+passthrough_module(__name__, 'shutil')
+del passthrough_module
+
+
+import sys
+
+if sys.platform.startswith('freebsd'):
+ import errno
+ import os
+ import shutil
+
+ # Workaround for PermissionError when using restricted ACL mode on FreeBSD
+ def copy2(src, dst, *args, **kwargs):
+ if os.path.isdir(dst):
+ dst = os.path.join(dst, os.path.basename(src))
+ shutil.copyfile(src, dst, *args, **kwargs)
+ try:
+ shutil.copystat(src, dst, *args, **kwargs)
+ except PermissionError as e:
+ if e.errno != getattr(errno, 'EPERM', None):
+ raise
+ return dst
+
+ def move(*args, copy_function=copy2, **kwargs):
+ return shutil.move(*args, copy_function=copy_function, **kwargs)
diff --git a/hypervideo_dl/cookies.py b/hypervideo_dl/cookies.py
index f963729..97457a1 100644
--- a/hypervideo_dl/cookies.py
+++ b/hypervideo_dl/cookies.py
@@ -1,12 +1,16 @@
+import base64
import contextlib
-import ctypes
+import http.cookiejar
+import http.cookies
import json
import os
+import re
import shutil
import struct
import subprocess
import sys
import tempfile
+import time
from datetime import datetime, timedelta, timezone
from enum import Enum, auto
from hashlib import pbkdf2_hmac
@@ -16,39 +20,21 @@ from .aes import (
aes_gcm_decrypt_and_verify_bytes,
unpad_pkcs7,
)
-from .compat import (
- compat_b64decode,
- compat_cookiejar_Cookie,
+from .dependencies import (
+ _SECRETSTORAGE_UNAVAILABLE_REASON,
+ secretstorage,
+ sqlite3,
)
+from .minicurses import MultilinePrinter, QuietMultilinePrinter
from .utils import (
- error_to_str,
- expand_path,
Popen,
YoutubeDLCookieJar,
+ error_to_str,
+ expand_path,
+ is_path_like,
+ try_call,
)
-try:
- import sqlite3
- SQLITE_AVAILABLE = True
-except ImportError:
- # although sqlite3 is part of the standard library, it is possible to compile python without
- # sqlite support. See: https://github.com/hypervideo/hypervideo/issues/544
- SQLITE_AVAILABLE = False
-
-
-try:
- import secretstorage
- SECRETSTORAGE_AVAILABLE = True
-except ImportError:
- SECRETSTORAGE_AVAILABLE = False
- SECRETSTORAGE_UNAVAILABLE_REASON = (
- 'as the `secretstorage` module is not installed. '
- 'Please install by running `python3 -m pip install secretstorage`.')
-except Exception as _err:
- SECRETSTORAGE_AVAILABLE = False
- SECRETSTORAGE_UNAVAILABLE_REASON = f'as the `secretstorage` module could not be initialized. {_err}'
-
-
CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
@@ -73,37 +59,72 @@ class YDLLogger:
if self._ydl:
self._ydl.report_error(message)
+ class ProgressBar(MultilinePrinter):
+ _DELAY, _timer = 0.1, 0
+
+ def print(self, message):
+ if time.time() - self._timer > self._DELAY:
+ self.print_at_line(f'[Cookies] {message}', 0)
+ self._timer = time.time()
+
+ def progress_bar(self):
+ """Return a context manager with a print method. (Optional)"""
+ # Do not print to files/pipes, loggers, or when --no-progress is used
+ if not self._ydl or self._ydl.params.get('noprogress') or self._ydl.params.get('logger'):
+ return
+ file = self._ydl._out_files.error
+ try:
+ if not file.isatty():
+ return
+ except BaseException:
+ return
+ return self.ProgressBar(file, preserve_output=False)
+
+
+def _create_progress_bar(logger):
+ if hasattr(logger, 'progress_bar'):
+ printer = logger.progress_bar()
+ if printer:
+ return printer
+ printer = QuietMultilinePrinter()
+ printer.print = lambda _: None
+ return printer
+
def load_cookies(cookie_file, browser_specification, ydl):
cookie_jars = []
if browser_specification is not None:
- browser_name, profile, keyring = _parse_browser_specification(*browser_specification)
- cookie_jars.append(extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring))
+ browser_name, profile, keyring, container = _parse_browser_specification(*browser_specification)
+ cookie_jars.append(
+ extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring, container=container))
if cookie_file is not None:
- cookie_file = expand_path(cookie_file)
+ is_filename = is_path_like(cookie_file)
+ if is_filename:
+ cookie_file = expand_path(cookie_file)
+
jar = YoutubeDLCookieJar(cookie_file)
- if os.access(cookie_file, os.R_OK):
+ if not is_filename or os.access(cookie_file, os.R_OK):
jar.load(ignore_discard=True, ignore_expires=True)
cookie_jars.append(jar)
return _merge_cookie_jars(cookie_jars)
-def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None):
+def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None, container=None):
if browser_name == 'firefox':
- return _extract_firefox_cookies(profile, logger)
+ return _extract_firefox_cookies(profile, container, logger)
elif browser_name == 'safari':
return _extract_safari_cookies(profile, logger)
elif browser_name in CHROMIUM_BASED_BROWSERS:
return _extract_chrome_cookies(browser_name, profile, keyring, logger)
else:
- raise ValueError('unknown browser: {}'.format(browser_name))
+ raise ValueError(f'unknown browser: {browser_name}')
-def _extract_firefox_cookies(profile, logger):
+def _extract_firefox_cookies(profile, container, logger):
logger.info('Extracting cookies from firefox')
- if not SQLITE_AVAILABLE:
+ if not sqlite3:
logger.warning('Cannot extract cookies from firefox without sqlite3 support. '
'Please use a python interpreter compiled with sqlite3 support')
return YoutubeDLCookieJar()
@@ -115,25 +136,54 @@ def _extract_firefox_cookies(profile, logger):
else:
search_root = os.path.join(_firefox_browser_dir(), profile)
- cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite')
+ cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite', logger)
if cookie_database_path is None:
- raise FileNotFoundError('could not find firefox cookies database in {}'.format(search_root))
- logger.debug('Extracting cookies from: "{}"'.format(cookie_database_path))
+ raise FileNotFoundError(f'could not find firefox cookies database in {search_root}')
+ logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
+
+ container_id = None
+ if container not in (None, 'none'):
+ containers_path = os.path.join(os.path.dirname(cookie_database_path), 'containers.json')
+ if not os.path.isfile(containers_path) or not os.access(containers_path, os.R_OK):
+ raise FileNotFoundError(f'could not read containers.json in {search_root}')
+ with open(containers_path) as containers:
+ identities = json.load(containers).get('identities', [])
+ container_id = next((context.get('userContextId') for context in identities if container in (
+ context.get('name'),
+ try_call(lambda: re.fullmatch(r'userContext([^\.]+)\.label', context['l10nID']).group())
+ )), None)
+ if not isinstance(container_id, int):
+ raise ValueError(f'could not find firefox container "{container}" in containers.json')
with tempfile.TemporaryDirectory(prefix='hypervideo_dl') as tmpdir:
cursor = None
try:
cursor = _open_database_copy(cookie_database_path, tmpdir)
- cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies')
+ if isinstance(container_id, int):
+ logger.debug(
+ f'Only loading cookies from firefox container "{container}", ID {container_id}')
+ cursor.execute(
+ 'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE originAttributes LIKE ? OR originAttributes LIKE ?',
+ (f'%userContextId={container_id}', f'%userContextId={container_id}&%'))
+ elif container == 'none':
+ logger.debug('Only loading cookies not belonging to any container')
+ cursor.execute(
+ 'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE NOT INSTR(originAttributes,"userContextId=")')
+ else:
+ cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies')
jar = YoutubeDLCookieJar()
- for host, name, value, path, expiry, is_secure in cursor.fetchall():
- cookie = compat_cookiejar_Cookie(
- version=0, name=name, value=value, port=None, port_specified=False,
- domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'),
- path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False,
- comment=None, comment_url=None, rest={})
- jar.set_cookie(cookie)
- logger.info('Extracted {} cookies from firefox'.format(len(jar)))
+ with _create_progress_bar(logger) as progress_bar:
+ table = cursor.fetchall()
+ total_cookie_count = len(table)
+ for i, (host, name, value, path, expiry, is_secure) in enumerate(table):
+ progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
+ cookie = http.cookiejar.Cookie(
+ version=0, name=name, value=value, port=None, port_specified=False,
+ domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'),
+ path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False,
+ comment=None, comment_url=None, rest={})
+ jar.set_cookie(cookie)
+ logger.info(f'Extracted {len(jar)} cookies from firefox')
return jar
finally:
if cursor is not None:
@@ -141,39 +191,25 @@ def _extract_firefox_cookies(profile, logger):
def _firefox_browser_dir():
- if sys.platform in ('linux', 'linux2'):
- return os.path.expanduser('~/.mozilla/firefox')
- elif sys.platform == 'win32':
- return os.path.expandvars(r'%APPDATA%\Mozilla\Firefox\Profiles')
+ if sys.platform in ('cygwin', 'win32'):
+ return os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles')
elif sys.platform == 'darwin':
return os.path.expanduser('~/Library/Application Support/Firefox')
- else:
- raise ValueError('unsupported platform: {}'.format(sys.platform))
+ return os.path.expanduser('~/.mozilla/firefox')
def _get_chromium_based_browser_settings(browser_name):
# https://chromium.googlesource.com/chromium/src/+/HEAD/docs/user_data_dir.md
- if sys.platform in ('linux', 'linux2'):
- config = _config_home()
- browser_dir = {
- 'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'),
- 'chrome': os.path.join(config, 'google-chrome'),
- 'chromium': os.path.join(config, 'chromium'),
- 'edge': os.path.join(config, 'microsoft-edge'),
- 'opera': os.path.join(config, 'opera'),
- 'vivaldi': os.path.join(config, 'vivaldi'),
- }[browser_name]
-
- elif sys.platform == 'win32':
+ if sys.platform in ('cygwin', 'win32'):
appdata_local = os.path.expandvars('%LOCALAPPDATA%')
appdata_roaming = os.path.expandvars('%APPDATA%')
browser_dir = {
- 'brave': os.path.join(appdata_local, r'BraveSoftware\Brave-Browser\User Data'),
- 'chrome': os.path.join(appdata_local, r'Google\Chrome\User Data'),
- 'chromium': os.path.join(appdata_local, r'Chromium\User Data'),
- 'edge': os.path.join(appdata_local, r'Microsoft\Edge\User Data'),
- 'opera': os.path.join(appdata_roaming, r'Opera Software\Opera Stable'),
- 'vivaldi': os.path.join(appdata_local, r'Vivaldi\User Data'),
+ 'brave': os.path.join(appdata_local, R'BraveSoftware\Brave-Browser\User Data'),
+ 'chrome': os.path.join(appdata_local, R'Google\Chrome\User Data'),
+ 'chromium': os.path.join(appdata_local, R'Chromium\User Data'),
+ 'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'),
+ 'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'),
+ 'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'),
}[browser_name]
elif sys.platform == 'darwin':
@@ -188,7 +224,15 @@ def _get_chromium_based_browser_settings(browser_name):
}[browser_name]
else:
- raise ValueError('unsupported platform: {}'.format(sys.platform))
+ config = _config_home()
+ browser_dir = {
+ 'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'),
+ 'chrome': os.path.join(config, 'google-chrome'),
+ 'chromium': os.path.join(config, 'chromium'),
+ 'edge': os.path.join(config, 'microsoft-edge'),
+ 'opera': os.path.join(config, 'opera'),
+ 'vivaldi': os.path.join(config, 'vivaldi'),
+ }[browser_name]
# Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
# dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
@@ -211,11 +255,11 @@ def _get_chromium_based_browser_settings(browser_name):
def _extract_chrome_cookies(browser_name, profile, keyring, logger):
- logger.info('Extracting cookies from {}'.format(browser_name))
+ logger.info(f'Extracting cookies from {browser_name}')
- if not SQLITE_AVAILABLE:
- logger.warning(('Cannot extract cookies from {} without sqlite3 support. '
- 'Please use a python interpreter compiled with sqlite3 support').format(browser_name))
+ if not sqlite3:
+ logger.warning(f'Cannot extract cookies from {browser_name} without sqlite3 support. '
+ 'Please use a python interpreter compiled with sqlite3 support')
return YoutubeDLCookieJar()
config = _get_chromium_based_browser_settings(browser_name)
@@ -229,13 +273,13 @@ def _extract_chrome_cookies(browser_name, profile, keyring, logger):
if config['supports_profiles']:
search_root = os.path.join(config['browser_dir'], profile)
else:
- logger.error('{} does not support profiles'.format(browser_name))
+ logger.error(f'{browser_name} does not support profiles')
search_root = config['browser_dir']
- cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies')
+ cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies', logger)
if cookie_database_path is None:
- raise FileNotFoundError('could not find {} cookies database in "{}"'.format(browser_name, search_root))
- logger.debug('Extracting cookies from: "{}"'.format(cookie_database_path))
+ raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"')
+ logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger, keyring=keyring)
@@ -246,45 +290,55 @@ def _extract_chrome_cookies(browser_name, profile, keyring, logger):
cursor.connection.text_factory = bytes
column_names = _get_column_names(cursor, 'cookies')
secure_column = 'is_secure' if 'is_secure' in column_names else 'secure'
- cursor.execute('SELECT host_key, name, value, encrypted_value, path, '
- 'expires_utc, {} FROM cookies'.format(secure_column))
+ cursor.execute(f'SELECT host_key, name, value, encrypted_value, path, expires_utc, {secure_column} FROM cookies')
jar = YoutubeDLCookieJar()
failed_cookies = 0
unencrypted_cookies = 0
- for host_key, name, value, encrypted_value, path, expires_utc, is_secure in cursor.fetchall():
- host_key = host_key.decode('utf-8')
- name = name.decode('utf-8')
- value = value.decode('utf-8')
- path = path.decode('utf-8')
-
- if not value and encrypted_value:
- value = decryptor.decrypt(encrypted_value)
- if value is None:
+ with _create_progress_bar(logger) as progress_bar:
+ table = cursor.fetchall()
+ total_cookie_count = len(table)
+ for i, line in enumerate(table):
+ progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
+ is_encrypted, cookie = _process_chrome_cookie(decryptor, *line)
+ if not cookie:
failed_cookies += 1
continue
- else:
- unencrypted_cookies += 1
-
- cookie = compat_cookiejar_Cookie(
- version=0, name=name, value=value, port=None, port_specified=False,
- domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
- path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False,
- comment=None, comment_url=None, rest={})
- jar.set_cookie(cookie)
+ elif not is_encrypted:
+ unencrypted_cookies += 1
+ jar.set_cookie(cookie)
if failed_cookies > 0:
- failed_message = ' ({} could not be decrypted)'.format(failed_cookies)
+ failed_message = f' ({failed_cookies} could not be decrypted)'
else:
failed_message = ''
- logger.info('Extracted {} cookies from {}{}'.format(len(jar), browser_name, failed_message))
- counts = decryptor.cookie_counts.copy()
+ logger.info(f'Extracted {len(jar)} cookies from {browser_name}{failed_message}')
+ counts = decryptor._cookie_counts.copy()
counts['unencrypted'] = unencrypted_cookies
- logger.debug('cookie version breakdown: {}'.format(counts))
+ logger.debug(f'cookie version breakdown: {counts}')
return jar
finally:
if cursor is not None:
cursor.connection.close()
+def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, path, expires_utc, is_secure):
+ host_key = host_key.decode()
+ name = name.decode()
+ value = value.decode()
+ path = path.decode()
+ is_encrypted = not value and encrypted_value
+
+ if is_encrypted:
+ value = decryptor.decrypt(encrypted_value)
+ if value is None:
+ return is_encrypted, None
+
+ return is_encrypted, http.cookiejar.Cookie(
+ version=0, name=name, value=value, port=None, port_specified=False,
+ domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
+ path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False,
+ comment=None, comment_url=None, rest={})
+
+
class ChromeCookieDecryptor:
"""
Overview:
@@ -311,24 +365,18 @@ class ChromeCookieDecryptor:
- KeyStorageLinux::CreateService
"""
- def decrypt(self, encrypted_value):
- raise NotImplementedError
+ _cookie_counts = {}
- @property
- def cookie_counts(self):
- raise NotImplementedError
+ def decrypt(self, encrypted_value):
+ raise NotImplementedError('Must be implemented by sub classes')
def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None):
- if sys.platform in ('linux', 'linux2'):
- return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring)
- elif sys.platform == 'darwin':
+ if sys.platform == 'darwin':
return MacChromeCookieDecryptor(browser_keyring_name, logger)
- elif sys.platform == 'win32':
+ elif sys.platform in ('win32', 'cygwin'):
return WindowsChromeCookieDecryptor(browser_root, logger)
- else:
- raise NotImplementedError('Chrome cookie decryption is not supported '
- 'on this platform: {}'.format(sys.platform))
+ return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring)
class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
@@ -345,10 +393,6 @@ class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
# https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_linux.cc
return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16)
- @property
- def cookie_counts(self):
- return self._cookie_counts
-
def decrypt(self, encrypted_value):
version = encrypted_value[:3]
ciphertext = encrypted_value[3:]
@@ -382,10 +426,6 @@ class MacChromeCookieDecryptor(ChromeCookieDecryptor):
# https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16)
- @property
- def cookie_counts(self):
- return self._cookie_counts
-
def decrypt(self, encrypted_value):
version = encrypted_value[:3]
ciphertext = encrypted_value[3:]
@@ -411,10 +451,6 @@ class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
self._v10_key = _get_windows_v10_key(browser_root, logger)
self._cookie_counts = {'v10': 0, 'other': 0}
- @property
- def cookie_counts(self):
- return self._cookie_counts
-
def decrypt(self, encrypted_value):
version = encrypted_value[:3]
ciphertext = encrypted_value[3:]
@@ -443,14 +479,14 @@ class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
self._cookie_counts['other'] += 1
# any other prefix means the data is DPAPI encrypted
# https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
- return _decrypt_windows_dpapi(encrypted_value, self._logger).decode('utf-8')
+ return _decrypt_windows_dpapi(encrypted_value, self._logger).decode()
def _extract_safari_cookies(profile, logger):
if profile is not None:
logger.error('safari does not support profiles')
if sys.platform != 'darwin':
- raise ValueError('unsupported platform: {}'.format(sys.platform))
+ raise ValueError(f'unsupported platform: {sys.platform}')
cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies')
@@ -464,7 +500,7 @@ def _extract_safari_cookies(profile, logger):
cookies_data = f.read()
jar = parse_safari_cookies(cookies_data, logger=logger)
- logger.info('Extracted {} cookies from safari'.format(len(jar)))
+ logger.info(f'Extracted {len(jar)} cookies from safari')
return jar
@@ -480,7 +516,7 @@ class DataParser:
def read_bytes(self, num_bytes):
if num_bytes < 0:
- raise ParserError('invalid read of {} bytes'.format(num_bytes))
+ raise ParserError(f'invalid read of {num_bytes} bytes')
end = self.cursor + num_bytes
if end > len(self._data):
raise ParserError('reached end of input')
@@ -491,7 +527,7 @@ class DataParser:
def expect_bytes(self, expected_value, message):
value = self.read_bytes(len(expected_value))
if value != expected_value:
- raise ParserError('unexpected value: {} != {} ({})'.format(value, expected_value, message))
+ raise ParserError(f'unexpected value: {value} != {expected_value} ({message})')
def read_uint(self, big_endian=False):
data_format = '>I' if big_endian else '<I'
@@ -506,16 +542,15 @@ class DataParser:
while True:
c = self.read_bytes(1)
if c == b'\x00':
- return b''.join(buffer).decode('utf-8')
+ return b''.join(buffer).decode()
else:
buffer.append(c)
def skip(self, num_bytes, description='unknown'):
if num_bytes > 0:
- self._logger.debug('skipping {} bytes ({}): {}'.format(
- num_bytes, description, self.read_bytes(num_bytes)))
+ self._logger.debug(f'skipping {num_bytes} bytes ({description}): {self.read_bytes(num_bytes)!r}')
elif num_bytes < 0:
- raise ParserError('invalid skip of {} bytes'.format(num_bytes))
+ raise ParserError(f'invalid skip of {num_bytes} bytes')
def skip_to(self, offset, description='unknown'):
self.skip(offset - self.cursor, description)
@@ -542,15 +577,17 @@ def _parse_safari_cookies_page(data, jar, logger):
number_of_cookies = p.read_uint()
record_offsets = [p.read_uint() for _ in range(number_of_cookies)]
if number_of_cookies == 0:
- logger.debug('a cookies page of size {} has no cookies'.format(len(data)))
+ logger.debug(f'a cookies page of size {len(data)} has no cookies')
return
p.skip_to(record_offsets[0], 'unknown page header field')
- for record_offset in record_offsets:
- p.skip_to(record_offset, 'space between records')
- record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger)
- p.read_bytes(record_length)
+ with _create_progress_bar(logger) as progress_bar:
+ for i, record_offset in enumerate(record_offsets):
+ progress_bar.print(f'Loading cookie {i: 6d}/{number_of_cookies: 6d}')
+ p.skip_to(record_offset, 'space between records')
+ record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger)
+ p.read_bytes(record_length)
p.skip_to_end('space in between pages')
@@ -587,7 +624,7 @@ def _parse_safari_cookies_record(data, jar, logger):
p.skip_to(record_size, 'space at the end of the record')
- cookie = compat_cookiejar_Cookie(
+ cookie = http.cookiejar.Cookie(
version=0, name=name, value=value, port=None, port_specified=False,
domain=domain, domain_specified=bool(domain), domain_initial_dot=domain.startswith('.'),
path=path, path_specified=bool(path), secure=is_secure, expires=expiration_date, discard=False,
@@ -686,7 +723,7 @@ def _choose_linux_keyring(logger):
SelectBackend
"""
desktop_environment = _get_linux_desktop_environment(os.environ)
- logger.debug('detected desktop environment: {}'.format(desktop_environment.name))
+ logger.debug(f'detected desktop environment: {desktop_environment.name}')
if desktop_environment == _LinuxDesktopEnvironment.KDE:
linux_keyring = _LinuxKeyring.KWALLET
elif desktop_environment == _LinuxDesktopEnvironment.OTHER:
@@ -707,23 +744,21 @@ def _get_kwallet_network_wallet(logger):
"""
default_wallet = 'kdewallet'
try:
- proc = Popen([
+ stdout, _, returncode = Popen.run([
'dbus-send', '--session', '--print-reply=literal',
'--dest=org.kde.kwalletd5',
'/modules/kwalletd5',
'org.kde.KWallet.networkWallet'
- ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
+ ], text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
- stdout, stderr = proc.communicate_or_kill()
- if proc.returncode != 0:
+ if returncode:
logger.warning('failed to read NetworkWallet')
return default_wallet
else:
- network_wallet = stdout.decode('utf-8').strip()
- logger.debug('NetworkWallet = "{}"'.format(network_wallet))
- return network_wallet
+ logger.debug(f'NetworkWallet = "{stdout.strip()}"')
+ return stdout.strip()
except Exception as e:
- logger.warning('exception while obtaining NetworkWallet: {}'.format(e))
+ logger.warning(f'exception while obtaining NetworkWallet: {e}')
return default_wallet
@@ -739,17 +774,16 @@ def _get_kwallet_password(browser_keyring_name, logger):
network_wallet = _get_kwallet_network_wallet(logger)
try:
- proc = Popen([
+ stdout, _, returncode = Popen.run([
'kwallet-query',
- '--read-password', '{} Safe Storage'.format(browser_keyring_name),
- '--folder', '{} Keys'.format(browser_keyring_name),
+ '--read-password', f'{browser_keyring_name} Safe Storage',
+ '--folder', f'{browser_keyring_name} Keys',
network_wallet
], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
- stdout, stderr = proc.communicate_or_kill()
- if proc.returncode != 0:
- logger.error('kwallet-query failed with return code {}. Please consult '
- 'the kwallet-query man page for details'.format(proc.returncode))
+ if returncode:
+ logger.error(f'kwallet-query failed with return code {returncode}. '
+ 'Please consult the kwallet-query man page for details')
return b''
else:
if stdout.lower().startswith(b'failed to read'):
@@ -764,17 +798,15 @@ def _get_kwallet_password(browser_keyring_name, logger):
return b''
else:
logger.debug('password found')
- if stdout[-1:] == b'\n':
- stdout = stdout[:-1]
- return stdout
+ return stdout.rstrip(b'\n')
except Exception as e:
logger.warning(f'exception running kwallet-query: {error_to_str(e)}')
return b''
def _get_gnome_keyring_password(browser_keyring_name, logger):
- if not SECRETSTORAGE_AVAILABLE:
- logger.error('secretstorage not available {}'.format(SECRETSTORAGE_UNAVAILABLE_REASON))
+ if not secretstorage:
+ logger.error(f'secretstorage not available {_SECRETSTORAGE_UNAVAILABLE_REASON}')
return b''
# the Gnome keyring does not seem to organise keys in the same way as KWallet,
# using `dbus-monitor` during startup, it can be observed that chromium lists all keys
@@ -783,7 +815,7 @@ def _get_gnome_keyring_password(browser_keyring_name, logger):
with contextlib.closing(secretstorage.dbus_init()) as con:
col = secretstorage.get_default_collection(con)
for item in col.get_all_items():
- if item.get_label() == '{} Safe Storage'.format(browser_keyring_name):
+ if item.get_label() == f'{browser_keyring_name} Safe Storage':
return item.get_secret()
else:
logger.error('failed to read from keyring')
@@ -813,35 +845,35 @@ def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
def _get_mac_keyring_password(browser_keyring_name, logger):
logger.debug('using find-generic-password to obtain password from OSX keychain')
try:
- proc = Popen(
+ stdout, _, returncode = Popen.run(
['security', 'find-generic-password',
'-w', # write password to stdout
'-a', browser_keyring_name, # match 'account'
- '-s', '{} Safe Storage'.format(browser_keyring_name)], # match 'service'
+ '-s', f'{browser_keyring_name} Safe Storage'], # match 'service'
stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
-
- stdout, stderr = proc.communicate_or_kill()
- if stdout[-1:] == b'\n':
- stdout = stdout[:-1]
- return stdout
+ if returncode:
+ logger.warning('find-generic-password failed')
+ return None
+ return stdout.rstrip(b'\n')
except Exception as e:
logger.warning(f'exception running find-generic-password: {error_to_str(e)}')
return None
def _get_windows_v10_key(browser_root, logger):
- path = _find_most_recently_used_file(browser_root, 'Local State')
+ path = _find_most_recently_used_file(browser_root, 'Local State', logger)
if path is None:
logger.error('could not find local state file')
return None
- with open(path, 'r', encoding='utf8') as f:
+ logger.debug(f'Found local state file at "{path}"')
+ with open(path, encoding='utf8') as f:
data = json.load(f)
try:
base64_key = data['os_crypt']['encrypted_key']
except KeyError:
logger.error('no encrypted key in Local State')
return None
- encrypted_key = compat_b64decode(base64_key)
+ encrypted_key = base64.b64decode(base64_key)
prefix = b'DPAPI'
if not encrypted_key.startswith(prefix):
logger.error('invalid key')
@@ -856,7 +888,7 @@ def pbkdf2_sha1(password, salt, iterations, key_length):
def _decrypt_aes_cbc(ciphertext, key, logger, initialization_vector=b' ' * 16):
plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
try:
- return plaintext.decode('utf-8')
+ return plaintext.decode()
except UnicodeDecodeError:
logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
return None
@@ -870,7 +902,7 @@ def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger):
return None
try:
- return plaintext.decode('utf-8')
+ return plaintext.decode()
except UnicodeDecodeError:
logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
return None
@@ -881,10 +913,12 @@ def _decrypt_windows_dpapi(ciphertext, logger):
References:
- https://docs.microsoft.com/en-us/windows/win32/api/dpapi/nf-dpapi-cryptunprotectdata
"""
- from ctypes.wintypes import DWORD
+
+ import ctypes
+ import ctypes.wintypes
class DATA_BLOB(ctypes.Structure):
- _fields_ = [('cbData', DWORD),
+ _fields_ = [('cbData', ctypes.wintypes.DWORD),
('pbData', ctypes.POINTER(ctypes.c_char))]
buffer = ctypes.create_string_buffer(ciphertext)
@@ -921,17 +955,20 @@ def _open_database_copy(database_path, tmpdir):
def _get_column_names(cursor, table_name):
- table_info = cursor.execute('PRAGMA table_info({})'.format(table_name)).fetchall()
- return [row[1].decode('utf-8') for row in table_info]
+ table_info = cursor.execute(f'PRAGMA table_info({table_name})').fetchall()
+ return [row[1].decode() for row in table_info]
-def _find_most_recently_used_file(root, filename):
+def _find_most_recently_used_file(root, filename, logger):
# if there are multiple browser profiles, take the most recently used one
- paths = []
- for root, dirs, files in os.walk(root):
- for file in files:
- if file == filename:
- paths.append(os.path.join(root, file))
+ i, paths = 0, []
+ with _create_progress_bar(logger) as progress_bar:
+ for curr_root, dirs, files in os.walk(root):
+ for file in files:
+ i += 1
+ progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched')
+ if file == filename:
+ paths.append(os.path.join(curr_root, file))
return None if not paths else max(paths, key=lambda path: os.lstat(path).st_mtime)
@@ -949,11 +986,102 @@ def _is_path(value):
return os.path.sep in value
-def _parse_browser_specification(browser_name, profile=None, keyring=None):
+def _parse_browser_specification(browser_name, profile=None, keyring=None, container=None):
if browser_name not in SUPPORTED_BROWSERS:
raise ValueError(f'unsupported browser: "{browser_name}"')
if keyring not in (None, *SUPPORTED_KEYRINGS):
raise ValueError(f'unsupported keyring: "{keyring}"')
- if profile is not None and _is_path(profile):
- profile = os.path.expanduser(profile)
- return browser_name, profile, keyring
+ if profile is not None and _is_path(expand_path(profile)):
+ profile = expand_path(profile)
+ return browser_name, profile, keyring, container
+
+
+class LenientSimpleCookie(http.cookies.SimpleCookie):
+ """More lenient version of http.cookies.SimpleCookie"""
+ # From https://github.com/python/cpython/blob/v3.10.7/Lib/http/cookies.py
+ # We use Morsel's legal key chars to avoid errors on setting values
+ _LEGAL_KEY_CHARS = r'\w\d' + re.escape('!#$%&\'*+-.:^_`|~')
+ _LEGAL_VALUE_CHARS = _LEGAL_KEY_CHARS + re.escape('(),/<=>?@[]{}')
+
+ _RESERVED = {
+ "expires",
+ "path",
+ "comment",
+ "domain",
+ "max-age",
+ "secure",
+ "httponly",
+ "version",
+ "samesite",
+ }
+
+ _FLAGS = {"secure", "httponly"}
+
+ # Added 'bad' group to catch the remaining value
+ _COOKIE_PATTERN = re.compile(r"""
+ \s* # Optional whitespace at start of cookie
+ (?P<key> # Start of group 'key'
+ [""" + _LEGAL_KEY_CHARS + r"""]+?# Any word of at least one letter
+ ) # End of group 'key'
+ ( # Optional group: there may not be a value.
+ \s*=\s* # Equal Sign
+ ( # Start of potential value
+ (?P<val> # Start of group 'val'
+ "(?:[^\\"]|\\.)*" # Any doublequoted string
+ | # or
+ \w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr
+ | # or
+ [""" + _LEGAL_VALUE_CHARS + r"""]* # Any word or empty string
+ ) # End of group 'val'
+ | # or
+ (?P<bad>(?:\\;|[^;])*?) # 'bad' group fallback for invalid values
+ ) # End of potential value
+ )? # End of optional value group
+ \s* # Any number of spaces.
+ (\s+|;|$) # Ending either at space, semicolon, or EOS.
+ """, re.ASCII | re.VERBOSE)
+
+ def load(self, data):
+ # Workaround for https://github.com/hypervideo/hypervideo/issues/4776
+ if not isinstance(data, str):
+ return super().load(data)
+
+ morsel = None
+ for match in self._COOKIE_PATTERN.finditer(data):
+ if match.group('bad'):
+ morsel = None
+ continue
+
+ key, value = match.group('key', 'val')
+
+ is_attribute = False
+ if key.startswith('$'):
+ key = key[1:]
+ is_attribute = True
+
+ lower_key = key.lower()
+ if lower_key in self._RESERVED:
+ if morsel is None:
+ continue
+
+ if value is None:
+ if lower_key not in self._FLAGS:
+ morsel = None
+ continue
+ value = True
+ else:
+ value, _ = self.value_decode(value)
+
+ morsel[key] = value
+
+ elif is_attribute:
+ morsel = None
+
+ elif value is not None:
+ morsel = self.get(key, http.cookies.Morsel())
+ real_value, coded_value = self.value_decode(value)
+ morsel.set(key, real_value, coded_value)
+ self[key] = morsel
+
+ else:
+ morsel = None
diff --git a/hypervideo_dl/dependencies.py b/hypervideo_dl/dependencies.py
new file mode 100644
index 0000000..a913169
--- /dev/null
+++ b/hypervideo_dl/dependencies.py
@@ -0,0 +1,97 @@
+# flake8: noqa: F401
+"""Imports all optional dependencies for the project.
+An attribute "_hypervideo_dl__identifier" may be inserted into the module if it uses an ambiguous namespace"""
+
+try:
+ import brotlicffi as brotli
+except ImportError:
+ try:
+ import brotli
+ except ImportError:
+ brotli = None
+
+
+try:
+ import certifi
+except ImportError:
+ certifi = None
+else:
+ from os.path import exists as _path_exists
+
+ # The certificate may not be bundled in executable
+ if not _path_exists(certifi.where()):
+ certifi = None
+
+
+try:
+ from Cryptodome.Cipher import AES as Cryptodome_AES
+except ImportError:
+ try:
+ from Crypto.Cipher import AES as Cryptodome_AES
+ except (ImportError, SyntaxError): # Old Crypto gives SyntaxError in newer Python
+ Cryptodome_AES = None
+ else:
+ try:
+ # In pycrypto, mode defaults to ECB. See:
+ # https://www.pycryptodome.org/en/latest/src/vs_pycrypto.html#:~:text=not%20have%20ECB%20as%20default%20mode
+ Cryptodome_AES.new(b'abcdefghijklmnop')
+ except TypeError:
+ pass
+ else:
+ Cryptodome_AES._hypervideo_dl__identifier = 'pycrypto'
+
+
+try:
+ import mutagen
+except ImportError:
+ mutagen = None
+
+
+secretstorage = None
+try:
+ import secretstorage
+ _SECRETSTORAGE_UNAVAILABLE_REASON = None
+except ImportError:
+ _SECRETSTORAGE_UNAVAILABLE_REASON = (
+ 'as the `secretstorage` module is not installed. '
+ 'Please install by running `python3 -m pip install secretstorage`')
+except Exception as _err:
+ _SECRETSTORAGE_UNAVAILABLE_REASON = f'as the `secretstorage` module could not be initialized. {_err}'
+
+
+try:
+ import sqlite3
+except ImportError:
+ # although sqlite3 is part of the standard library, it is possible to compile python without
+ # sqlite support. See: https://github.com/hypervideo/hypervideo/issues/544
+ sqlite3 = None
+
+
+try:
+ import websockets
+except (ImportError, SyntaxError):
+ # websockets 3.10 on python 3.6 causes SyntaxError
+ # See https://github.com/hypervideo/hypervideo/issues/2633
+ websockets = None
+
+
+try:
+ import xattr # xattr or pyxattr
+except ImportError:
+ xattr = None
+else:
+ if hasattr(xattr, 'set'): # pyxattr
+ xattr._hypervideo_dl__identifier = 'pyxattr'
+
+
+all_dependencies = {k: v for k, v in globals().items() if not k.startswith('_')}
+
+
+available_dependencies = {k: v for k, v in all_dependencies.items() if v}
+
+
+__all__ = [
+ 'all_dependencies',
+ 'available_dependencies',
+ *all_dependencies.keys(),
+]
diff --git a/hypervideo_dl/downloader/__init__.py b/hypervideo_dl/downloader/__init__.py
index 96d484d..c34dbce 100644
--- a/hypervideo_dl/downloader/__init__.py
+++ b/hypervideo_dl/downloader/__init__.py
@@ -1,10 +1,4 @@
-from __future__ import unicode_literals
-
-from ..compat import compat_str
-from ..utils import (
- determine_protocol,
- NO_DEFAULT
-)
+from ..utils import NO_DEFAULT, determine_protocol
def get_suitable_downloader(info_dict, params={}, default=NO_DEFAULT, protocol=None, to_stdout=False):
@@ -29,21 +23,18 @@ def get_suitable_downloader(info_dict, params={}, default=NO_DEFAULT, protocol=N
# Some of these require get_suitable_downloader
from .common import FileDownloader
from .dash import DashSegmentsFD
+from .external import FFmpegFD, get_external_downloader
from .f4m import F4mFD
from .fc2 import FC2LiveFD
from .hls import HlsFD
from .http import HttpFD
-from .rtmp import RtmpFD
-from .rtsp import RtspFD
from .ism import IsmFD
from .mhtml import MhtmlFD
from .niconico import NiconicoDmcFD
+from .rtmp import RtmpFD
+from .rtsp import RtspFD
from .websocket import WebSocketFragmentFD
from .youtube_live_chat import YoutubeLiveChatFD
-from .external import (
- get_external_downloader,
- FFmpegFD,
-)
PROTOCOL_MAP = {
'rtmp': RtmpFD,
@@ -68,10 +59,11 @@ PROTOCOL_MAP = {
def shorten_protocol_name(proto, simplify=False):
short_protocol_names = {
- 'm3u8_native': 'm3u8_n',
- 'rtmp_ffmpeg': 'rtmp_f',
+ 'm3u8_native': 'm3u8',
+ 'm3u8': 'm3u8F',
+ 'rtmp_ffmpeg': 'rtmpF',
'http_dash_segments': 'dash',
- 'http_dash_segments_generator': 'dash_g',
+ 'http_dash_segments_generator': 'dashG',
'niconico_dmc': 'dmc',
'websocket_frag': 'WSfrag',
}
@@ -79,6 +71,7 @@ def shorten_protocol_name(proto, simplify=False):
short_protocol_names.update({
'https': 'http',
'ftps': 'ftp',
+ 'm3u8': 'm3u8', # Reverse above m3u8 mapping
'm3u8_native': 'm3u8',
'http_dash_segments_generator': 'dash',
'rtmp_ffmpeg': 'rtmp',
@@ -93,13 +86,13 @@ def _get_suitable_downloader(info_dict, protocol, params, default):
if default is NO_DEFAULT:
default = HttpFD
- # if (info_dict.get('start_time') or info_dict.get('end_time')) and not info_dict.get('requested_formats') and FFmpegFD.can_download(info_dict):
- # return FFmpegFD
+ if (info_dict.get('section_start') or info_dict.get('section_end')) and FFmpegFD.can_download(info_dict):
+ return FFmpegFD
info_dict['protocol'] = protocol
downloaders = params.get('external_downloader')
external_downloader = (
- downloaders if isinstance(downloaders, compat_str) or downloaders is None
+ downloaders if isinstance(downloaders, str) or downloaders is None
else downloaders.get(shorten_protocol_name(protocol, True), downloaders.get('default')))
if external_downloader is None:
diff --git a/hypervideo_dl/downloader/common.py b/hypervideo_dl/downloader/common.py
index 7cef3e8..72d4822 100644
--- a/hypervideo_dl/downloader/common.py
+++ b/hypervideo_dl/downloader/common.py
@@ -1,30 +1,39 @@
-from __future__ import division, unicode_literals
-
+import contextlib
+import errno
+import functools
import os
+import random
import re
import time
-import random
-import errno
+from ..minicurses import (
+ BreaklineStatusPrinter,
+ MultilineLogger,
+ MultilinePrinter,
+ QuietMultilinePrinter,
+)
from ..utils import (
+ IDENTITY,
+ NO_DEFAULT,
+ LockingUnsupportedError,
+ Namespace,
+ RetryManager,
+ classproperty,
decodeArgument,
encodeFilename,
- error_to_compat_str,
format_bytes,
+ join_nonempty,
+ parse_bytes,
+ remove_start,
sanitize_open,
shell_quote,
timeconvert,
timetuple_from_msec,
-)
-from ..minicurses import (
- MultilineLogger,
- MultilinePrinter,
- QuietMultilinePrinter,
- BreaklineStatusPrinter
+ try_call,
)
-class FileDownloader(object):
+class FileDownloader:
"""File Downloader class.
File downloader objects are the ones responsible of downloading the
@@ -39,6 +48,7 @@ class FileDownloader(object):
verbose: Print additional info to stdout.
quiet: Do not print messages to stdout.
ratelimit: Download speed limit, in bytes/sec.
+ continuedl: Attempt to continue downloads if possible
throttledratelimit: Assume the download is being throttled below this speed (bytes/sec)
retries: Number of times to retry for HTTP error 5xx
file_access_retries: Number of times to retry on file access error
@@ -62,6 +72,7 @@ class FileDownloader(object):
useful for bypassing bandwidth throttling imposed by
a webserver (experimental)
progress_template: See YoutubeDL.py
+ retry_sleep_functions: See YoutubeDL.py
Subclasses of this one must re-define the real_download method.
"""
@@ -71,21 +82,51 @@ class FileDownloader(object):
def __init__(self, ydl, params):
"""Create a FileDownloader object with the given options."""
- self.ydl = ydl
+ self._set_ydl(ydl)
self._progress_hooks = []
self.params = params
self._prepare_multiline_status()
self.add_progress_hook(self.report_progress)
+ def _set_ydl(self, ydl):
+ self.ydl = ydl
+
+ for func in (
+ 'deprecation_warning',
+ 'deprecated_feature',
+ 'report_error',
+ 'report_file_already_downloaded',
+ 'report_warning',
+ 'to_console_title',
+ 'to_stderr',
+ 'trouble',
+ 'write_debug',
+ ):
+ if not hasattr(self, func):
+ setattr(self, func, getattr(ydl, func))
+
+ def to_screen(self, *args, **kargs):
+ self.ydl.to_screen(*args, quiet=self.params.get('quiet'), **kargs)
+
+ __to_screen = to_screen
+
+ @classproperty
+ def FD_NAME(cls):
+ return re.sub(r'(?<=[a-z])(?=[A-Z])', '_', cls.__name__[:-2]).lower()
+
@staticmethod
def format_seconds(seconds):
+ if seconds is None:
+ return ' Unknown'
time = timetuple_from_msec(seconds * 1000)
if time.hours > 99:
return '--:--:--'
- if not time.hours:
- return '%02d:%02d' % time[1:-1]
return '%02d:%02d:%02d' % time[:-1]
+ @classmethod
+ def format_eta(cls, seconds):
+ return f'{remove_start(cls.format_seconds(seconds), "00:"):>8s}'
+
@staticmethod
def calc_percent(byte_counter, data_len):
if data_len is None:
@@ -94,11 +135,7 @@ class FileDownloader(object):
@staticmethod
def format_percent(percent):
- if percent is None:
- return '---.-%'
- elif percent == 100:
- return '100%'
- return '%6s' % ('%3.1f%%' % percent)
+ return ' N/A%' if percent is None else f'{percent:>5.1f}%'
@staticmethod
def calc_eta(start, now, total, current):
@@ -113,12 +150,6 @@ class FileDownloader(object):
return int((float(total) - float(current)) / rate)
@staticmethod
- def format_eta(eta):
- if eta is None:
- return '--:--'
- return FileDownloader.format_seconds(eta)
-
- @staticmethod
def calc_speed(start, now, bytes):
dif = now - start
if bytes == 0 or dif < 0.001: # One millisecond
@@ -127,13 +158,11 @@ class FileDownloader(object):
@staticmethod
def format_speed(speed):
- if speed is None:
- return '%10s' % '---b/s'
- return '%10s' % ('%s/s' % format_bytes(speed))
+ return ' Unknown B/s' if speed is None else f'{format_bytes(speed):>10s}/s'
@staticmethod
def format_retries(retries):
- return 'inf' if retries == float('inf') else '%.0f' % retries
+ return 'inf' if retries == float('inf') else int(retries)
@staticmethod
def best_block_size(elapsed_time, bytes):
@@ -151,33 +180,7 @@ class FileDownloader(object):
@staticmethod
def parse_bytes(bytestr):
"""Parse a string indicating a byte quantity into an integer."""
- matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
- if matchobj is None:
- return None
- number = float(matchobj.group(1))
- multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
- return int(round(number * multiplier))
-
- def to_screen(self, *args, **kargs):
- self.ydl.to_screen(*args, quiet=self.params.get('quiet'), **kargs)
-
- def to_stderr(self, message):
- self.ydl.to_stderr(message)
-
- def to_console_title(self, message):
- self.ydl.to_console_title(message)
-
- def trouble(self, *args, **kargs):
- self.ydl.trouble(*args, **kargs)
-
- def report_warning(self, *args, **kargs):
- self.ydl.report_warning(*args, **kargs)
-
- def report_error(self, *args, **kargs):
- self.ydl.report_error(*args, **kargs)
-
- def write_debug(self, *args, **kargs):
- self.ydl.write_debug(*args, **kargs)
+ parse_bytes(bytestr)
def slow_down(self, start_time, now, byte_counter):
"""Sleep if the download speed is over the rate limit."""
@@ -211,30 +214,31 @@ class FileDownloader(object):
return filename + '.ytdl'
def wrap_file_access(action, *, fatal=False):
- def outer(func):
- def inner(self, *args, **kwargs):
- file_access_retries = self.params.get('file_access_retries', 0)
- retry = 0
- while True:
- try:
- return func(self, *args, **kwargs)
- except (IOError, OSError) as err:
- retry = retry + 1
- if retry > file_access_retries or err.errno not in (errno.EACCES, errno.EINVAL):
- if not fatal:
- self.report_error(f'unable to {action} file: {err}')
- return
- raise
- self.to_screen(
- f'[download] Unable to {action} file due to file access error. '
- f'Retrying (attempt {retry} of {self.format_retries(file_access_retries)}) ...')
- time.sleep(0.01)
- return inner
- return outer
+ def error_callback(err, count, retries, *, fd):
+ return RetryManager.report_retry(
+ err, count, retries, info=fd.__to_screen,
+ warn=lambda e: (time.sleep(0.01), fd.to_screen(f'[download] Unable to {action} file: {e}')),
+ error=None if fatal else lambda e: fd.report_error(f'Unable to {action} file: {e}'),
+ sleep_func=fd.params.get('retry_sleep_functions', {}).get('file_access'))
+
+ def wrapper(self, func, *args, **kwargs):
+ for retry in RetryManager(self.params.get('file_access_retries'), error_callback, fd=self):
+ try:
+ return func(self, *args, **kwargs)
+ except OSError as err:
+ if err.errno in (errno.EACCES, errno.EINVAL):
+ retry.error = err
+ continue
+ retry.error_callback(err, 1, 0)
+
+ return functools.partial(functools.partialmethod, wrapper)
@wrap_file_access('open', fatal=True)
def sanitize_open(self, filename, open_mode):
- return sanitize_open(filename, open_mode)
+ f, filename = sanitize_open(filename, open_mode)
+ if not getattr(f, 'locked', None):
+ self.write_debug(f'{LockingUnsupportedError.msg}. Proceeding without locking', only_once=True)
+ return f, filename
@wrap_file_access('remove')
def try_remove(self, filename):
@@ -261,10 +265,8 @@ class FileDownloader(object):
# Ignore obviously invalid dates
if filetime == 0:
return
- try:
+ with contextlib.suppress(Exception):
os.utime(filename, (time.time(), filetime))
- except Exception:
- pass
return filetime
def report_destination(self, filename):
@@ -277,26 +279,26 @@ class FileDownloader(object):
elif self.ydl.params.get('logger'):
self._multiline = MultilineLogger(self.ydl.params['logger'], lines)
elif self.params.get('progress_with_newline'):
- self._multiline = BreaklineStatusPrinter(self.ydl._out_files['screen'], lines)
+ self._multiline = BreaklineStatusPrinter(self.ydl._out_files.out, lines)
else:
- self._multiline = MultilinePrinter(self.ydl._out_files['screen'], lines, not self.params.get('quiet'))
+ self._multiline = MultilinePrinter(self.ydl._out_files.out, lines, not self.params.get('quiet'))
self._multiline.allow_colors = self._multiline._HAVE_FULLCAP and not self.params.get('no_color')
def _finish_multiline_status(self):
self._multiline.end()
- _progress_styles = {
- 'downloaded_bytes': 'light blue',
- 'percent': 'light blue',
- 'eta': 'yellow',
- 'speed': 'green',
- 'elapsed': 'bold white',
- 'total_bytes': '',
- 'total_bytes_estimate': '',
- }
+ ProgressStyles = Namespace(
+ downloaded_bytes='light blue',
+ percent='light blue',
+ eta='yellow',
+ speed='green',
+ elapsed='bold white',
+ total_bytes='',
+ total_bytes_estimate='',
+ )
def _report_progress_status(self, s, default_template):
- for name, style in self._progress_styles.items():
+ for name, style in self.ProgressStyles.items_:
name = f'_{name}_str'
if name not in s:
continue
@@ -320,78 +322,73 @@ class FileDownloader(object):
self._multiline.stream, self._multiline.allow_colors, *args, **kwargs)
def report_progress(self, s):
+ def with_fields(*tups, default=''):
+ for *fields, tmpl in tups:
+ if all(s.get(f) is not None for f in fields):
+ return tmpl
+ return default
+
+ _format_bytes = lambda k: f'{format_bytes(s.get(k)):>10s}'
+
if s['status'] == 'finished':
if self.params.get('noprogress'):
self.to_screen('[download] Download completed')
- msg_template = '100%%'
- if s.get('total_bytes') is not None:
- s['_total_bytes_str'] = format_bytes(s['total_bytes'])
- msg_template += ' of %(_total_bytes_str)s'
- if s.get('elapsed') is not None:
- s['_elapsed_str'] = self.format_seconds(s['elapsed'])
- msg_template += ' in %(_elapsed_str)s'
- s['_percent_str'] = self.format_percent(100)
- self._report_progress_status(s, msg_template)
- return
+ speed = try_call(lambda: s['total_bytes'] / s['elapsed'])
+ s.update({
+ 'speed': speed,
+ '_speed_str': self.format_speed(speed).strip(),
+ '_total_bytes_str': _format_bytes('total_bytes'),
+ '_elapsed_str': self.format_seconds(s.get('elapsed')),
+ '_percent_str': self.format_percent(100),
+ })
+ self._report_progress_status(s, join_nonempty(
+ '100%%',
+ with_fields(('total_bytes', 'of %(_total_bytes_str)s')),
+ with_fields(('elapsed', 'in %(_elapsed_str)s')),
+ with_fields(('speed', 'at %(_speed_str)s')),
+ delim=' '))
if s['status'] != 'downloading':
return
- if s.get('eta') is not None:
- s['_eta_str'] = self.format_eta(s['eta'])
- else:
- s['_eta_str'] = 'Unknown'
-
- if s.get('total_bytes') and s.get('downloaded_bytes') is not None:
- s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes'])
- elif s.get('total_bytes_estimate') and s.get('downloaded_bytes') is not None:
- s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes_estimate'])
- else:
- if s.get('downloaded_bytes') == 0:
- s['_percent_str'] = self.format_percent(0)
- else:
- s['_percent_str'] = 'Unknown %'
-
- if s.get('speed') is not None:
- s['_speed_str'] = self.format_speed(s['speed'])
- else:
- s['_speed_str'] = 'Unknown speed'
-
- if s.get('total_bytes') is not None:
- s['_total_bytes_str'] = format_bytes(s['total_bytes'])
- msg_template = '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s'
- elif s.get('total_bytes_estimate') is not None:
- s['_total_bytes_estimate_str'] = format_bytes(s['total_bytes_estimate'])
- msg_template = '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s'
- else:
- if s.get('downloaded_bytes') is not None:
- s['_downloaded_bytes_str'] = format_bytes(s['downloaded_bytes'])
- if s.get('elapsed'):
- s['_elapsed_str'] = self.format_seconds(s['elapsed'])
- msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)'
- else:
- msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s'
- else:
- msg_template = '%(_percent_str)s at %(_speed_str)s ETA %(_eta_str)s'
- if s.get('fragment_index') and s.get('fragment_count'):
- msg_template += ' (frag %(fragment_index)s/%(fragment_count)s)'
- elif s.get('fragment_index'):
- msg_template += ' (frag %(fragment_index)s)'
+ s.update({
+ '_eta_str': self.format_eta(s.get('eta')).strip(),
+ '_speed_str': self.format_speed(s.get('speed')),
+ '_percent_str': self.format_percent(try_call(
+ lambda: 100 * s['downloaded_bytes'] / s['total_bytes'],
+ lambda: 100 * s['downloaded_bytes'] / s['total_bytes_estimate'],
+ lambda: s['downloaded_bytes'] == 0 and 0)),
+ '_total_bytes_str': _format_bytes('total_bytes'),
+ '_total_bytes_estimate_str': _format_bytes('total_bytes_estimate'),
+ '_downloaded_bytes_str': _format_bytes('downloaded_bytes'),
+ '_elapsed_str': self.format_seconds(s.get('elapsed')),
+ })
+
+ msg_template = with_fields(
+ ('total_bytes', '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s'),
+ ('total_bytes_estimate', '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s'),
+ ('downloaded_bytes', 'elapsed', '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)'),
+ ('downloaded_bytes', '%(_downloaded_bytes_str)s at %(_speed_str)s'),
+ default='%(_percent_str)s at %(_speed_str)s ETA %(_eta_str)s')
+
+ msg_template += with_fields(
+ ('fragment_index', 'fragment_count', ' (frag %(fragment_index)s/%(fragment_count)s)'),
+ ('fragment_index', ' (frag %(fragment_index)s)'))
self._report_progress_status(s, msg_template)
def report_resuming_byte(self, resume_len):
"""Report attempt to resume at given byte."""
self.to_screen('[download] Resuming download at byte %s' % resume_len)
- def report_retry(self, err, count, retries):
- """Report retry in case of HTTP error 5xx"""
- self.to_screen(
- '[download] Got server HTTP error: %s. Retrying (attempt %d of %s) ...'
- % (error_to_compat_str(err), count, self.format_retries(retries)))
-
- def report_file_already_downloaded(self, *args, **kwargs):
- """Report file has already been fully downloaded."""
- return self.ydl.report_file_already_downloaded(*args, **kwargs)
+ def report_retry(self, err, count, retries, frag_index=NO_DEFAULT, fatal=True):
+ """Report retry"""
+ is_frag = False if frag_index is NO_DEFAULT else 'fragment'
+ RetryManager.report_retry(
+ err, count, retries, info=self.__to_screen,
+ warn=lambda msg: self.__to_screen(f'[download] Got error: {msg}'),
+ error=IDENTITY if not fatal else lambda e: self.report_error(f'\r[download] Got error: {e}'),
+ sleep_func=self.params.get('retry_sleep_functions', {}).get(is_frag or 'http'),
+ suffix=f'fragment{"s" if frag_index is None else f" {frag_index}"}' if is_frag else None)
def report_unable_to_resume(self):
"""Report it was impossible to resume download."""
@@ -431,25 +428,16 @@ class FileDownloader(object):
self._finish_multiline_status()
return True, False
- if subtitle is False:
- min_sleep_interval = self.params.get('sleep_interval')
- if min_sleep_interval:
- max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval)
- sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval)
- self.to_screen(
- '[download] Sleeping %s seconds ...' % (
- int(sleep_interval) if sleep_interval.is_integer()
- else '%.2f' % sleep_interval))
- time.sleep(sleep_interval)
+ if subtitle:
+ sleep_interval = self.params.get('sleep_interval_subtitles') or 0
else:
- sleep_interval_sub = 0
- if type(self.params.get('sleep_interval_subtitles')) is int:
- sleep_interval_sub = self.params.get('sleep_interval_subtitles')
- if sleep_interval_sub > 0:
- self.to_screen(
- '[download] Sleeping %s seconds ...' % (
- sleep_interval_sub))
- time.sleep(sleep_interval_sub)
+ min_sleep_interval = self.params.get('sleep_interval') or 0
+ sleep_interval = random.uniform(
+ min_sleep_interval, self.params.get('max_sleep_interval') or min_sleep_interval)
+ if sleep_interval > 0:
+ self.to_screen(f'[download] Sleeping {sleep_interval:.2f} seconds ...')
+ time.sleep(sleep_interval)
+
ret = self.real_download(filename, info_dict)
self._finish_multiline_status()
return ret, True
@@ -459,8 +447,7 @@ class FileDownloader(object):
raise NotImplementedError('This method must be implemented by subclasses')
def _hook_progress(self, status, info_dict):
- if not self._progress_hooks:
- return
+ # Ideally we want to make a copy of the dict, but that is too slow
status['info_dict'] = info_dict
# youtube-dl passes the same status object to all the hooks.
# Some third party scripts seems to be relying on this.
@@ -482,4 +469,4 @@ class FileDownloader(object):
if exe is None:
exe = os.path.basename(str_args[0])
- self.write_debug('%s command line: %s' % (exe, shell_quote(str_args)))
+ self.write_debug(f'{exe} command line: {shell_quote(str_args)}')
diff --git a/hypervideo_dl/downloader/dash.py b/hypervideo_dl/downloader/dash.py
index a845ee7..4328d73 100644
--- a/hypervideo_dl/downloader/dash.py
+++ b/hypervideo_dl/downloader/dash.py
@@ -1,10 +1,9 @@
-from __future__ import unicode_literals
import time
+import urllib.parse
-from ..downloader import get_suitable_downloader
+from . import get_suitable_downloader
from .fragment import FragmentFD
-
-from ..utils import urljoin
+from ..utils import update_url_query, urljoin
class DashSegmentsFD(FragmentFD):
@@ -42,24 +41,29 @@ class DashSegmentsFD(FragmentFD):
self._prepare_and_start_frag_download(ctx, fmt)
ctx['start'] = real_start
- fragments_to_download = self._get_fragments(fmt, ctx)
+ extra_query = None
+ extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url')
+ if extra_param_to_segment_url:
+ extra_query = urllib.parse.parse_qs(extra_param_to_segment_url)
+
+ fragments_to_download = self._get_fragments(fmt, ctx, extra_query)
if real_downloader:
self.to_screen(
- '[%s] Fragment downloads will be delegated to %s' % (self.FD_NAME, real_downloader.get_basename()))
+ f'[{self.FD_NAME}] Fragment downloads will be delegated to {real_downloader.get_basename()}')
info_dict['fragments'] = list(fragments_to_download)
fd = real_downloader(self.ydl, self.params)
return fd.real_download(filename, info_dict)
args.append([ctx, fragments_to_download, fmt])
- return self.download_and_append_fragments_multiple(*args)
+ return self.download_and_append_fragments_multiple(*args, is_fatal=lambda idx: idx == 0)
def _resolve_fragments(self, fragments, ctx):
fragments = fragments(ctx) if callable(fragments) else fragments
return [next(iter(fragments))] if self.params.get('test') else fragments
- def _get_fragments(self, fmt, ctx):
+ def _get_fragments(self, fmt, ctx, extra_query):
fragment_base_url = fmt.get('fragment_base_url')
fragments = self._resolve_fragments(fmt['fragments'], ctx)
@@ -72,9 +76,12 @@ class DashSegmentsFD(FragmentFD):
if not fragment_url:
assert fragment_base_url
fragment_url = urljoin(fragment_base_url, fragment['path'])
+ if extra_query:
+ fragment_url = update_url_query(fragment_url, extra_query)
yield {
'frag_index': frag_index,
+ 'fragment_count': fragment.get('fragment_count'),
'index': i,
'url': fragment_url,
}
diff --git a/hypervideo_dl/downloader/external.py b/hypervideo_dl/downloader/external.py
index b99dc37..75257a7 100644
--- a/hypervideo_dl/downloader/external.py
+++ b/hypervideo_dl/downloader/external.py
@@ -1,5 +1,4 @@
-from __future__ import unicode_literals
-
+import enum
import os.path
import re
import subprocess
@@ -7,30 +6,35 @@ import sys
import time
from .fragment import FragmentFD
-from ..compat import (
- compat_setenv,
- compat_str,
-)
-from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS
+from ..compat import functools
+from ..postprocessor.ffmpeg import EXT_TO_OUT_FORMATS, FFmpegPostProcessor
from ..utils import (
+ Popen,
+ RetryManager,
+ _configuration_args,
+ check_executable,
classproperty,
+ cli_bool_option,
cli_option,
cli_valueless_option,
- cli_bool_option,
- _configuration_args,
determine_ext,
- encodeFilename,
encodeArgument,
+ encodeFilename,
handle_youtubedl_headers,
- check_executable,
- Popen,
remove_end,
+ traverse_obj,
)
+class Features(enum.Enum):
+ TO_STDOUT = enum.auto()
+ MULTIPLE_FORMATS = enum.auto()
+
+
class ExternalFD(FragmentFD):
SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps')
- can_download_to_stdout = False
+ SUPPORTED_FEATURES = ()
+ _CAPTURE_STDERR = True
def real_download(self, filename, info_dict):
self.report_destination(filename)
@@ -56,7 +60,7 @@ class ExternalFD(FragmentFD):
}
if filename != '-':
fsize = os.path.getsize(encodeFilename(tmpfilename))
- self.to_screen('\r[%s] Downloaded %s bytes' % (self.get_basename(), fsize))
+ self.to_screen(f'\r[{self.get_basename()}] Downloaded {fsize} bytes')
self.try_rename(tmpfilename, filename)
status.update({
'downloaded_bytes': fsize,
@@ -78,7 +82,7 @@ class ExternalFD(FragmentFD):
def EXE_NAME(cls):
return cls.get_basename()
- @property
+ @functools.cached_property
def exe(self):
return self.EXE_NAME
@@ -94,9 +98,11 @@ class ExternalFD(FragmentFD):
@classmethod
def supports(cls, info_dict):
- return (
- (cls.can_download_to_stdout or not info_dict.get('to_stdout'))
- and info_dict['protocol'] in cls.SUPPORTED_PROTOCOLS)
+ return all((
+ not info_dict.get('to_stdout') or Features.TO_STDOUT in cls.SUPPORTED_FEATURES,
+ '+' not in info_dict['protocol'] or Features.MULTIPLE_FORMATS in cls.SUPPORTED_FEATURES,
+ all(proto in cls.SUPPORTED_PROTOCOLS for proto in info_dict['protocol'].split('+')),
+ ))
@classmethod
def can_download(cls, info_dict, path=None):
@@ -123,33 +129,28 @@ class ExternalFD(FragmentFD):
self._debug_cmd(cmd)
if 'fragments' not in info_dict:
- p = Popen(cmd, stderr=subprocess.PIPE)
- _, stderr = p.communicate_or_kill()
- if p.returncode != 0:
- self.to_stderr(stderr.decode('utf-8', 'replace'))
- return p.returncode
+ _, stderr, returncode = Popen.run(
+ cmd, text=True, stderr=subprocess.PIPE if self._CAPTURE_STDERR else None)
+ if returncode and stderr:
+ self.to_stderr(stderr)
+ return returncode
- fragment_retries = self.params.get('fragment_retries', 0)
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
- count = 0
- while count <= fragment_retries:
- p = Popen(cmd, stderr=subprocess.PIPE)
- _, stderr = p.communicate_or_kill()
- if p.returncode == 0:
+ retry_manager = RetryManager(self.params.get('fragment_retries'), self.report_retry,
+ frag_index=None, fatal=not skip_unavailable_fragments)
+ for retry in retry_manager:
+ _, stderr, returncode = Popen.run(cmd, text=True, stderr=subprocess.PIPE)
+ if not returncode:
break
# TODO: Decide whether to retry based on error code
# https://aria2.github.io/manual/en/html/aria2c.html#exit-status
- self.to_stderr(stderr.decode('utf-8', 'replace'))
- count += 1
- if count <= fragment_retries:
- self.to_screen(
- '[%s] Got error. Retrying fragments (attempt %d of %s)...'
- % (self.get_basename(), count, self.format_retries(fragment_retries)))
- if count > fragment_retries:
- if not skip_unavailable_fragments:
- self.report_error('Giving up after %s fragment retries' % fragment_retries)
- return -1
+ if stderr:
+ self.to_stderr(stderr)
+ retry.error = Exception()
+ continue
+ if not skip_unavailable_fragments and retry_manager.error:
+ return -1
decrypt_fragment = self.decrypter(info_dict)
dest, _ = self.sanitize_open(tmpfilename, 'wb')
@@ -157,7 +158,7 @@ class ExternalFD(FragmentFD):
fragment_filename = '%s-Frag%d' % (tmpfilename, frag_index)
try:
src, _ = self.sanitize_open(fragment_filename, 'rb')
- except IOError as err:
+ except OSError as err:
if skip_unavailable_fragments and frag_index > 1:
self.report_skip_fragment(frag_index, err)
continue
@@ -174,12 +175,13 @@ class ExternalFD(FragmentFD):
class CurlFD(ExternalFD):
AVAILABLE_OPT = '-V'
+ _CAPTURE_STDERR = False # curl writes the progress to stderr
def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '--location', '-o', tmpfilename, '--compressed']
if info_dict.get('http_headers') is not None:
for key, val in info_dict['http_headers'].items():
- cmd += ['--header', '%s: %s' % (key, val)]
+ cmd += ['--header', f'{key}: {val}']
cmd += self._bool_option('--continue-at', 'continuedl', '-', '0')
cmd += self._valueless_option('--silent', 'noprogress')
@@ -198,16 +200,6 @@ class CurlFD(ExternalFD):
cmd += ['--', info_dict['url']]
return cmd
- def _call_downloader(self, tmpfilename, info_dict):
- cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)]
-
- self._debug_cmd(cmd)
-
- # curl writes the progress to stderr so don't capture it.
- p = Popen(cmd)
- p.communicate_or_kill()
- return p.returncode
-
class AxelFD(ExternalFD):
AVAILABLE_OPT = '-V'
@@ -216,7 +208,7 @@ class AxelFD(ExternalFD):
cmd = [self.exe, '-o', tmpfilename]
if info_dict.get('http_headers') is not None:
for key, val in info_dict['http_headers'].items():
- cmd += ['-H', '%s: %s' % (key, val)]
+ cmd += ['-H', f'{key}: {val}']
cmd += self._configuration_args()
cmd += ['--', info_dict['url']]
return cmd
@@ -229,7 +221,7 @@ class WgetFD(ExternalFD):
cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies', '--compression=auto']
if info_dict.get('http_headers') is not None:
for key, val in info_dict['http_headers'].items():
- cmd += ['--header', '%s: %s' % (key, val)]
+ cmd += ['--header', f'{key}: {val}']
cmd += self._option('--limit-rate', 'ratelimit')
retry = self._option('--tries', 'retries')
if len(retry) == 2:
@@ -240,7 +232,7 @@ class WgetFD(ExternalFD):
proxy = self.params.get('proxy')
if proxy:
for var in ('http_proxy', 'https_proxy'):
- cmd += ['--execute', '%s=%s' % (var, proxy)]
+ cmd += ['--execute', f'{var}={proxy}']
cmd += self._valueless_option('--no-check-certificate', 'nocheckcertificate')
cmd += self._configuration_args()
cmd += ['--', info_dict['url']]
@@ -260,6 +252,10 @@ class Aria2cFD(ExternalFD):
check_results = (not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES)
return all(check_results)
+ @staticmethod
+ def _aria2c_filename(fn):
+ return fn if os.path.isabs(fn) else f'.{os.path.sep}{fn}'
+
def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '-c',
'--console-log-level=warn', '--summary-interval=0', '--download-result=hide',
@@ -271,7 +267,7 @@ class Aria2cFD(ExternalFD):
if info_dict.get('http_headers') is not None:
for key, val in info_dict['http_headers'].items():
- cmd += ['--header', '%s: %s' % (key, val)]
+ cmd += ['--header', f'{key}: {val}']
cmd += self._option('--max-overall-download-limit', 'ratelimit')
cmd += self._option('--interface', 'source_address')
cmd += self._option('--all-proxy', 'proxy')
@@ -288,11 +284,9 @@ class Aria2cFD(ExternalFD):
# https://github.com/aria2/aria2/issues/1373
dn = os.path.dirname(tmpfilename)
if dn:
- if not os.path.isabs(dn):
- dn = '.%s%s' % (os.path.sep, dn)
- cmd += ['--dir', dn + os.path.sep]
+ cmd += ['--dir', self._aria2c_filename(dn) + os.path.sep]
if 'fragments' not in info_dict:
- cmd += ['--out', '.%s%s' % (os.path.sep, os.path.basename(tmpfilename))]
+ cmd += ['--out', self._aria2c_filename(os.path.basename(tmpfilename))]
cmd += ['--auto-file-renaming=false']
if 'fragments' in info_dict:
@@ -301,11 +295,11 @@ class Aria2cFD(ExternalFD):
url_list = []
for frag_index, fragment in enumerate(info_dict['fragments']):
fragment_filename = '%s-Frag%d' % (os.path.basename(tmpfilename), frag_index)
- url_list.append('%s\n\tout=%s' % (fragment['url'], fragment_filename))
+ url_list.append('%s\n\tout=%s' % (fragment['url'], self._aria2c_filename(fragment_filename)))
stream, _ = self.sanitize_open(url_list_file, 'wb')
- stream.write('\n'.join(url_list).encode('utf-8'))
+ stream.write('\n'.join(url_list).encode())
stream.close()
- cmd += ['-i', url_list_file]
+ cmd += ['-i', self._aria2c_filename(url_list_file)]
else:
cmd += ['--', info_dict['url']]
return cmd
@@ -320,13 +314,13 @@ class HttpieFD(ExternalFD):
if info_dict.get('http_headers') is not None:
for key, val in info_dict['http_headers'].items():
- cmd += ['%s:%s' % (key, val)]
+ cmd += [f'{key}:{val}']
return cmd
class FFmpegFD(ExternalFD):
SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'm3u8', 'm3u8_native', 'rtsp', 'rtmp', 'rtmp_ffmpeg', 'mms', 'http_dash_segments')
- can_download_to_stdout = True
+ SUPPORTED_FEATURES = (Features.TO_STDOUT, Features.MULTIPLE_FORMATS)
@classmethod
def available(cls, path=None):
@@ -334,10 +328,6 @@ class FFmpegFD(ExternalFD):
# Fixme: This may be wrong when --ffmpeg-location is used
return FFmpegPostProcessor().available
- @classmethod
- def supports(cls, info_dict):
- return all(proto in cls.SUPPORTED_PROTOCOLS for proto in info_dict['protocol'].split('+'))
-
def on_process_started(self, proc, stdin):
""" Override this in subclasses """
pass
@@ -368,9 +358,11 @@ class FFmpegFD(ExternalFD):
if not self.params.get('verbose'):
args += ['-hide_banner']
- args += info_dict.get('_ffmpeg_args', [])
+ args += traverse_obj(info_dict, ('downloader_options', 'ffmpeg_args'), default=[])
- # This option exists only for compatibility. Extractors should use `_ffmpeg_args` instead
+ # These exists only for compatibility. Extractors should use
+ # info_dict['downloader_options']['ffmpeg_args'] instead
+ args += info_dict.get('_ffmpeg_args') or []
seekable = info_dict.get('_seekable')
if seekable is not None:
# setting -seekable prevents ffmpeg from guessing if the server
@@ -380,20 +372,15 @@ class FFmpegFD(ExternalFD):
# http://trac.ffmpeg.org/ticket/6125#comment:10
args += ['-seekable', '1' if seekable else '0']
- # start_time = info_dict.get('start_time') or 0
- # if start_time:
- # args += ['-ss', compat_str(start_time)]
- # end_time = info_dict.get('end_time')
- # if end_time:
- # args += ['-t', compat_str(end_time - start_time)]
-
- if info_dict.get('http_headers') is not None and re.match(r'^https?://', urls[0]):
- # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
- # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
- headers = handle_youtubedl_headers(info_dict['http_headers'])
- args += [
+ http_headers = None
+ if info_dict.get('http_headers'):
+ youtubedl_headers = handle_youtubedl_headers(info_dict['http_headers'])
+ http_headers = [
+ # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
+ # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
'-headers',
- ''.join('%s: %s\r\n' % (key, val) for key, val in headers.items())]
+ ''.join(f'{key}: {val}\r\n' for key, val in youtubedl_headers.items())
+ ]
env = None
proxy = self.params.get('proxy')
@@ -411,8 +398,8 @@ class FFmpegFD(ExternalFD):
# We could switch to the following code if we are able to detect version properly
# args += ['-http_proxy', proxy]
env = os.environ.copy()
- compat_setenv('HTTP_PROXY', proxy, env=env)
- compat_setenv('http_proxy', proxy, env=env)
+ env['HTTP_PROXY'] = proxy
+ env['http_proxy'] = proxy
protocol = info_dict.get('protocol')
@@ -442,20 +429,31 @@ class FFmpegFD(ExternalFD):
if isinstance(conn, list):
for entry in conn:
args += ['-rtmp_conn', entry]
- elif isinstance(conn, compat_str):
+ elif isinstance(conn, str):
args += ['-rtmp_conn', conn]
+ start_time, end_time = info_dict.get('section_start') or 0, info_dict.get('section_end')
+
for i, url in enumerate(urls):
+ if http_headers is not None and re.match(r'^https?://', url):
+ args += http_headers
+ if start_time:
+ args += ['-ss', str(start_time)]
+ if end_time:
+ args += ['-t', str(end_time - start_time)]
+
args += self._configuration_args((f'_i{i + 1}', '_i')) + ['-i', url]
- args += ['-c', 'copy']
+ if not (start_time or end_time) or not self.params.get('force_keyframes_at_cuts'):
+ args += ['-c', 'copy']
+
if info_dict.get('requested_formats') or protocol == 'http_dash_segments':
for (i, fmt) in enumerate(info_dict.get('requested_formats') or [info_dict]):
stream_number = fmt.get('manifest_stream_number', 0)
args.extend(['-map', f'{i}:{stream_number}'])
if self.params.get('test', False):
- args += ['-fs', compat_str(self._TEST_FILE_SIZE)]
+ args += ['-fs', str(self._TEST_FILE_SIZE)]
ext = info_dict['ext']
if protocol in ('m3u8', 'm3u8_native'):
@@ -490,24 +488,23 @@ class FFmpegFD(ExternalFD):
args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True))
self._debug_cmd(args)
- proc = Popen(args, stdin=subprocess.PIPE, env=env)
- if url in ('-', 'pipe:'):
- self.on_process_started(proc, proc.stdin)
- try:
- retval = proc.wait()
- except BaseException as e:
- # subprocces.run would send the SIGKILL signal to ffmpeg and the
- # mp4 file couldn't be played, but if we ask ffmpeg to quit it
- # produces a file that is playable (this is mostly useful for live
- # streams). Note that Windows is not affected and produces playable
- # files (see https://github.com/ytdl-org/youtube-dl/issues/8300).
- if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32' and url not in ('-', 'pipe:'):
- proc.communicate_or_kill(b'q')
- else:
- proc.kill()
- proc.wait()
- raise
- return retval
+ with Popen(args, stdin=subprocess.PIPE, env=env) as proc:
+ if url in ('-', 'pipe:'):
+ self.on_process_started(proc, proc.stdin)
+ try:
+ retval = proc.wait()
+ except BaseException as e:
+ # subprocces.run would send the SIGKILL signal to ffmpeg and the
+ # mp4 file couldn't be played, but if we ask ffmpeg to quit it
+ # produces a file that is playable (this is mostly useful for live
+ # streams). Note that Windows is not affected and produces playable
+ # files (see https://github.com/ytdl-org/youtube-dl/issues/8300).
+ if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32' and url not in ('-', 'pipe:'):
+ proc.communicate_or_kill(b'q')
+ else:
+ proc.kill(timeout=None)
+ raise
+ return retval
class AVconvFD(FFmpegFD):
@@ -520,16 +517,14 @@ _BY_NAME = {
if name.endswith('FD') and name not in ('ExternalFD', 'FragmentFD')
}
-_BY_EXE = {klass.EXE_NAME: klass for klass in _BY_NAME.values()}
-
def list_external_downloaders():
return sorted(_BY_NAME.keys())
def get_external_downloader(external_downloader):
- """ Given the name of the executable, see whether we support the given
- downloader . """
- # Drop .exe extension on Windows
+ """ Given the name of the executable, see whether we support the given downloader """
bn = os.path.splitext(os.path.basename(external_downloader))[0]
- return _BY_NAME.get(bn, _BY_EXE.get(bn))
+ return _BY_NAME.get(bn) or next((
+ klass for klass in _BY_NAME.values() if klass.EXE_NAME in bn
+ ), None)
diff --git a/hypervideo_dl/downloader/f4m.py b/hypervideo_dl/downloader/f4m.py
index 0008b7c..306f921 100644
--- a/hypervideo_dl/downloader/f4m.py
+++ b/hypervideo_dl/downloader/f4m.py
@@ -1,23 +1,14 @@
-from __future__ import division, unicode_literals
-
+import base64
import io
import itertools
+import struct
import time
+import urllib.error
+import urllib.parse
from .fragment import FragmentFD
-from ..compat import (
- compat_b64decode,
- compat_etree_fromstring,
- compat_urlparse,
- compat_urllib_error,
- compat_urllib_parse_urlparse,
- compat_struct_pack,
- compat_struct_unpack,
-)
-from ..utils import (
- fix_xml_ampersands,
- xpath_text,
-)
+from ..compat import compat_etree_fromstring
+from ..utils import fix_xml_ampersands, xpath_text
class DataTruncatedError(Exception):
@@ -40,13 +31,13 @@ class FlvReader(io.BytesIO):
# Utility functions for reading numbers and strings
def read_unsigned_long_long(self):
- return compat_struct_unpack('!Q', self.read_bytes(8))[0]
+ return struct.unpack('!Q', self.read_bytes(8))[0]
def read_unsigned_int(self):
- return compat_struct_unpack('!I', self.read_bytes(4))[0]
+ return struct.unpack('!I', self.read_bytes(4))[0]
def read_unsigned_char(self):
- return compat_struct_unpack('!B', self.read_bytes(1))[0]
+ return struct.unpack('!B', self.read_bytes(1))[0]
def read_string(self):
res = b''
@@ -193,7 +184,7 @@ def build_fragments_list(boot_info):
first_frag_number = fragment_run_entry_table[0]['first']
fragments_counter = itertools.count(first_frag_number)
for segment, fragments_count in segment_run_table['segment_run']:
- # In some live HDS streams (for example Rai), `fragments_count` is
+ # In some live HDS streams (e.g. Rai), `fragments_count` is
# abnormal and causing out-of-memory errors. It's OK to change the
# number of fragments for live streams as they are updated periodically
if fragments_count == 4294967295 and boot_info['live']:
@@ -208,11 +199,11 @@ def build_fragments_list(boot_info):
def write_unsigned_int(stream, val):
- stream.write(compat_struct_pack('!I', val))
+ stream.write(struct.pack('!I', val))
def write_unsigned_int_24(stream, val):
- stream.write(compat_struct_pack('!I', val)[1:])
+ stream.write(struct.pack('!I', val)[1:])
def write_flv_header(stream):
@@ -261,8 +252,6 @@ class F4mFD(FragmentFD):
A downloader for f4m manifests or AdobeHDS.
"""
- FD_NAME = 'f4m'
-
def _get_unencrypted_media(self, doc):
media = doc.findall(_add_ns('media'))
if not media:
@@ -308,12 +297,12 @@ class F4mFD(FragmentFD):
# 1. http://live-1-1.rutube.ru/stream/1024/HDS/SD/C2NKsS85HQNckgn5HdEmOQ/1454167650/S-s604419906/move/four/dirs/upper/1024-576p.f4m
bootstrap_url = node.get('url')
if bootstrap_url:
- bootstrap_url = compat_urlparse.urljoin(
+ bootstrap_url = urllib.parse.urljoin(
base_url, bootstrap_url)
boot_info = self._get_bootstrap_from_url(bootstrap_url)
else:
bootstrap_url = None
- bootstrap = compat_b64decode(node.text)
+ bootstrap = base64.b64decode(node.text)
boot_info = read_bootstrap_info(bootstrap)
return boot_info, bootstrap_url
@@ -343,14 +332,14 @@ class F4mFD(FragmentFD):
# Prefer baseURL for relative URLs as per 11.2 of F4M 3.0 spec.
man_base_url = get_base_url(doc) or man_url
- base_url = compat_urlparse.urljoin(man_base_url, media.attrib['url'])
+ base_url = urllib.parse.urljoin(man_base_url, media.attrib['url'])
bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
boot_info, bootstrap_url = self._parse_bootstrap_node(
bootstrap_node, man_base_url)
live = boot_info['live']
metadata_node = media.find(_add_ns('metadata'))
if metadata_node is not None:
- metadata = compat_b64decode(metadata_node.text)
+ metadata = base64.b64decode(metadata_node.text)
else:
metadata = None
@@ -378,7 +367,7 @@ class F4mFD(FragmentFD):
if not live:
write_metadata_tag(dest_stream, metadata)
- base_url_parsed = compat_urllib_parse_urlparse(base_url)
+ base_url_parsed = urllib.parse.urlparse(base_url)
self._start_frag_download(ctx, info_dict)
@@ -398,9 +387,10 @@ class F4mFD(FragmentFD):
query.append(info_dict['extra_param_to_segment_url'])
url_parsed = base_url_parsed._replace(path=base_url_parsed.path + name, query='&'.join(query))
try:
- success, down_data = self._download_fragment(ctx, url_parsed.geturl(), info_dict)
+ success = self._download_fragment(ctx, url_parsed.geturl(), info_dict)
if not success:
return False
+ down_data = self._read_fragment(ctx)
reader = FlvReader(down_data)
while True:
try:
@@ -417,7 +407,7 @@ class F4mFD(FragmentFD):
if box_type == b'mdat':
self._append_fragment(ctx, box_data)
break
- except (compat_urllib_error.HTTPError, ) as err:
+ except urllib.error.HTTPError as err:
if live and (err.code == 404 or err.code == 410):
# We didn't keep up with the live window. Continue
# with the next available fragment.
@@ -434,6 +424,4 @@ class F4mFD(FragmentFD):
msg = 'Missed %d fragments' % (fragments_list[0][1] - (frag_i + 1))
self.report_warning(msg)
- self._finish_frag_download(ctx, info_dict)
-
- return True
+ return self._finish_frag_download(ctx, info_dict)
diff --git a/hypervideo_dl/downloader/fc2.py b/hypervideo_dl/downloader/fc2.py
index 157bcf2..f9763de 100644
--- a/hypervideo_dl/downloader/fc2.py
+++ b/hypervideo_dl/downloader/fc2.py
@@ -1,5 +1,3 @@
-from __future__ import division, unicode_literals
-
import threading
from .common import FileDownloader
@@ -20,6 +18,9 @@ class FC2LiveFD(FileDownloader):
heartbeat_state = [None, 1]
def heartbeat():
+ if heartbeat_state[1] < 0:
+ return
+
try:
heartbeat_state[1] += 1
ws.send('{"name":"heartbeat","arguments":{},"id":%d}' % heartbeat_state[1])
@@ -38,4 +39,8 @@ class FC2LiveFD(FileDownloader):
'ws': None,
'protocol': 'live_ffmpeg',
})
- return FFmpegFD(self.ydl, self.params or {}).download(filename, new_info_dict)
+ try:
+ return FFmpegFD(self.ydl, self.params or {}).download(filename, new_info_dict)
+ finally:
+ # stop heartbeating
+ heartbeat_state[1] = -1
diff --git a/hypervideo_dl/downloader/fragment.py b/hypervideo_dl/downloader/fragment.py
index a991c6d..e61bd0e 100644
--- a/hypervideo_dl/downloader/fragment.py
+++ b/hypervideo_dl/downloader/fragment.py
@@ -1,28 +1,20 @@
-from __future__ import division, unicode_literals
-
+import concurrent.futures
+import contextlib
import http.client
import json
import math
import os
+import struct
import time
-
-try:
- import concurrent.futures
- can_threaded_download = True
-except ImportError:
- can_threaded_download = False
+import urllib.error
from .common import FileDownloader
from .http import HttpFD
from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
-from ..compat import (
- compat_os_name,
- compat_urllib_error,
- compat_struct_pack,
-)
+from ..compat import compat_os_name
from ..utils import (
DownloadError,
- error_to_compat_str,
+ RetryManager,
encodeFilename,
sanitized_Request,
traverse_obj,
@@ -33,9 +25,7 @@ class HttpQuietDownloader(HttpFD):
def to_screen(self, *args, **kargs):
pass
- def report_retry(self, err, count, retries):
- super().to_screen(
- f'[download] Got server HTTP error: {err}. Retrying (attempt {count} of {self.format_retries(retries)}) ...')
+ to_console_title = to_screen
class FragmentFD(FileDownloader):
@@ -75,9 +65,9 @@ class FragmentFD(FileDownloader):
"""
def report_retry_fragment(self, err, frag_index, count, retries):
- self.to_screen(
- '\r[download] Got server HTTP error: %s. Retrying fragment %d (attempt %d of %s) ...'
- % (error_to_compat_str(err), frag_index, count, self.format_retries(retries)))
+ self.deprecation_warning('hypervideo_dl.downloader.FragmentFD.report_retry_fragment is deprecated. '
+ 'Use hypervideo_dl.downloader.FileDownloader.report_retry instead')
+ return self.report_retry(err, count, retries, frag_index)
def report_skip_fragment(self, frag_index, err=None):
err = f' {err};' if err else ''
@@ -131,7 +121,7 @@ class FragmentFD(FileDownloader):
'request_data': request_data,
'ctx_id': ctx.get('ctx_id'),
}
- success = ctx['dl'].download(fragment_filename, fragment_info_dict)
+ success, _ = ctx['dl'].download(fragment_filename, fragment_info_dict)
if not success:
return False
if fragment_info_dict.get('filetime'):
@@ -140,6 +130,8 @@ class FragmentFD(FileDownloader):
return True
def _read_fragment(self, ctx):
+ if not ctx.get('fragment_filename_sanitized'):
+ return None
try:
down, frag_sanitized = self.sanitize_open(ctx['fragment_filename_sanitized'], 'rb')
except FileNotFoundError:
@@ -172,21 +164,13 @@ class FragmentFD(FileDownloader):
total_frags_str += ' (not including %d ad)' % ad_frags
else:
total_frags_str = 'unknown (live)'
- self.to_screen(
- '[%s] Total fragments: %s' % (self.FD_NAME, total_frags_str))
+ self.to_screen(f'[{self.FD_NAME}] Total fragments: {total_frags_str}')
self.report_destination(ctx['filename'])
- dl = HttpQuietDownloader(
- self.ydl,
- {
- 'continuedl': self.params.get('continuedl', True),
- 'quiet': self.params.get('quiet'),
- 'noprogress': True,
- 'ratelimit': self.params.get('ratelimit'),
- 'retries': self.params.get('retries', 0),
- 'nopart': self.params.get('nopart', False),
- 'test': self.params.get('test', False),
- }
- )
+ dl = HttpQuietDownloader(self.ydl, {
+ **self.params,
+ 'noprogress': True,
+ 'test': False,
+ })
tmpfilename = self.temp_name(ctx['filename'])
open_mode = 'wb'
resume_len = 0
@@ -259,6 +243,9 @@ class FragmentFD(FileDownloader):
if s['status'] not in ('downloading', 'finished'):
return
+ if not total_frags and ctx.get('fragment_count'):
+ state['fragment_count'] = ctx['fragment_count']
+
if ctx_id is not None and s.get('ctx_id') != ctx_id:
return
@@ -308,18 +295,23 @@ class FragmentFD(FileDownloader):
self.try_remove(ytdl_filename)
elapsed = time.time() - ctx['started']
- if ctx['tmpfilename'] == '-':
- downloaded_bytes = ctx['complete_frags_downloaded_bytes']
+ to_file = ctx['tmpfilename'] != '-'
+ if to_file:
+ downloaded_bytes = os.path.getsize(encodeFilename(ctx['tmpfilename']))
else:
+ downloaded_bytes = ctx['complete_frags_downloaded_bytes']
+
+ if not downloaded_bytes:
+ if to_file:
+ self.try_remove(ctx['tmpfilename'])
+ self.report_error('The downloaded file is empty')
+ return False
+ elif to_file:
self.try_rename(ctx['tmpfilename'], ctx['filename'])
- if self.params.get('updatetime', True):
- filetime = ctx.get('fragment_filetime')
- if filetime:
- try:
- os.utime(ctx['filename'], (time.time(), filetime))
- except Exception:
- pass
- downloaded_bytes = os.path.getsize(encodeFilename(ctx['filename']))
+ filetime = ctx.get('fragment_filetime')
+ if self.params.get('updatetime', True) and filetime:
+ with contextlib.suppress(Exception):
+ os.utime(ctx['filename'], (time.time(), filetime))
self._hook_progress({
'downloaded_bytes': downloaded_bytes,
@@ -331,6 +323,7 @@ class FragmentFD(FileDownloader):
'max_progress': ctx.get('max_progress'),
'progress_idx': ctx.get('progress_idx'),
}, info_dict)
+ return True
def _prepare_external_frag_download(self, ctx):
if 'live' not in ctx:
@@ -342,8 +335,7 @@ class FragmentFD(FileDownloader):
total_frags_str += ' (not including %d ad)' % ad_frags
else:
total_frags_str = 'unknown (live)'
- self.to_screen(
- '[%s] Total fragments: %s' % (self.FD_NAME, total_frags_str))
+ self.to_screen(f'[{self.FD_NAME}] Total fragments: {total_frags_str}')
tmpfilename = self.temp_name(ctx['filename'])
@@ -362,10 +354,12 @@ class FragmentFD(FileDownloader):
return _key_cache[url]
def decrypt_fragment(fragment, frag_content):
+ if frag_content is None:
+ return
decrypt_info = fragment.get('decrypt_info')
if not decrypt_info or decrypt_info['METHOD'] != 'AES-128':
return frag_content
- iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', fragment['media_sequence'])
+ iv = decrypt_info.get('IV') or struct.pack('>8xq', fragment['media_sequence'])
decrypt_info['KEY'] = decrypt_info.get('KEY') or _get_key(info_dict.get('_decryption_key_url') or decrypt_info['URI'])
# Don't decrypt the content in tests since the data is explicitly truncated and it's not to a valid block
# size (see https://github.com/ytdl-org/youtube-dl/pull/27660). Tests only care that the correct data downloaded,
@@ -376,7 +370,7 @@ class FragmentFD(FileDownloader):
return decrypt_fragment
- def download_and_append_fragments_multiple(self, *args, pack_func=None, finish_func=None):
+ def download_and_append_fragments_multiple(self, *args, **kwargs):
'''
@params (ctx1, fragments1, info_dict1), (ctx2, fragments2, info_dict2), ...
all args must be either tuple or list
@@ -384,7 +378,7 @@ class FragmentFD(FileDownloader):
interrupt_trigger = [True]
max_progress = len(args)
if max_progress == 1:
- return self.download_and_append_fragments(*args[0], pack_func=pack_func, finish_func=finish_func)
+ return self.download_and_append_fragments(*args[0], **kwargs)
max_workers = self.params.get('concurrent_fragment_downloads', 1)
if max_progress > 1:
self._prepare_multiline_status(max_progress)
@@ -394,8 +388,7 @@ class FragmentFD(FileDownloader):
ctx['max_progress'] = max_progress
ctx['progress_idx'] = idx
return self.download_and_append_fragments(
- ctx, fragments, info_dict, pack_func=pack_func, finish_func=finish_func,
- tpe=tpe, interrupt_trigger=interrupt_trigger)
+ ctx, fragments, info_dict, **kwargs, tpe=tpe, interrupt_trigger=interrupt_trigger)
class FTPE(concurrent.futures.ThreadPoolExecutor):
# has to stop this or it's going to wait on the worker thread itself
@@ -442,18 +435,12 @@ class FragmentFD(FileDownloader):
return result
def download_and_append_fragments(
- self, ctx, fragments, info_dict, *, pack_func=None, finish_func=None,
- tpe=None, interrupt_trigger=None):
- if not interrupt_trigger:
- interrupt_trigger = (True, )
-
- fragment_retries = self.params.get('fragment_retries', 0)
- is_fatal = (
- ((lambda _: False) if info_dict.get('is_live') else (lambda idx: idx == 0))
- if self.params.get('skip_unavailable_fragments', True) else (lambda _: True))
+ self, ctx, fragments, info_dict, *, is_fatal=(lambda idx: False),
+ pack_func=(lambda content, idx: content), finish_func=None,
+ tpe=None, interrupt_trigger=(True, )):
- if not pack_func:
- pack_func = lambda frag_content, _: frag_content
+ if not self.params.get('skip_unavailable_fragments', True):
+ is_fatal = lambda _: True
def download_fragment(fragment, ctx):
if not interrupt_trigger[0]:
@@ -467,31 +454,25 @@ class FragmentFD(FileDownloader):
headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1)
# Never skip the first fragment
- fatal, count = is_fatal(fragment.get('index') or (frag_index - 1)), 0
- while count <= fragment_retries:
- try:
- if self._download_fragment(ctx, fragment['url'], info_dict, headers):
- break
- return
- except (compat_urllib_error.HTTPError, http.client.IncompleteRead) as err:
- # Unavailable (possibly temporary) fragments may be served.
- # First we try to retry then either skip or abort.
- # See https://github.com/ytdl-org/youtube-dl/issues/10165,
- # https://github.com/ytdl-org/youtube-dl/issues/10448).
- count += 1
- ctx['last_error'] = err
- if count <= fragment_retries:
- self.report_retry_fragment(err, frag_index, count, fragment_retries)
- except DownloadError:
- # Don't retry fragment if error occurred during HTTP downloading
- # itself since it has own retry settings
- if not fatal:
- break
- raise
+ fatal = is_fatal(fragment.get('index') or (frag_index - 1))
- if count > fragment_retries and fatal:
- ctx['dest_stream'].close()
- self.report_error('Giving up after %s fragment retries' % fragment_retries)
+ def error_callback(err, count, retries):
+ if fatal and count > retries:
+ ctx['dest_stream'].close()
+ self.report_retry(err, count, retries, frag_index, fatal)
+ ctx['last_error'] = err
+
+ for retry in RetryManager(self.params.get('fragment_retries'), error_callback):
+ try:
+ ctx['fragment_count'] = fragment.get('fragment_count')
+ if not self._download_fragment(ctx, fragment['url'], info_dict, headers):
+ return
+ except (urllib.error.HTTPError, http.client.IncompleteRead) as err:
+ retry.error = err
+ continue
+ except DownloadError: # has own retry settings
+ if fatal:
+ raise
def append_fragment(frag_content, frag_index, ctx):
if frag_content:
@@ -508,8 +489,7 @@ class FragmentFD(FileDownloader):
max_workers = math.ceil(
self.params.get('concurrent_fragment_downloads', 1) / ctx.get('max_progress', 1))
- if can_threaded_download and max_workers > 1:
-
+ if max_workers > 1:
def _download_fragment(fragment):
ctx_copy = ctx.copy()
download_fragment(fragment, ctx_copy)
@@ -517,23 +497,36 @@ class FragmentFD(FileDownloader):
self.report_warning('The download speed shown is only of one thread. This is a known issue and patches are welcome')
with tpe or concurrent.futures.ThreadPoolExecutor(max_workers) as pool:
- for fragment, frag_index, frag_filename in pool.map(_download_fragment, fragments):
- ctx['fragment_filename_sanitized'] = frag_filename
- ctx['fragment_index'] = frag_index
- result = append_fragment(decrypt_fragment(fragment, self._read_fragment(ctx)), frag_index, ctx)
- if not result:
- return False
+ try:
+ for fragment, frag_index, frag_filename in pool.map(_download_fragment, fragments):
+ ctx.update({
+ 'fragment_filename_sanitized': frag_filename,
+ 'fragment_index': frag_index,
+ })
+ if not append_fragment(decrypt_fragment(fragment, self._read_fragment(ctx)), frag_index, ctx):
+ return False
+ except KeyboardInterrupt:
+ self._finish_multiline_status()
+ self.report_error(
+ 'Interrupted by user. Waiting for all threads to shutdown...', is_error=False, tb=False)
+ pool.shutdown(wait=False)
+ raise
else:
for fragment in fragments:
if not interrupt_trigger[0]:
break
- download_fragment(fragment, ctx)
- result = append_fragment(decrypt_fragment(fragment, self._read_fragment(ctx)), fragment['frag_index'], ctx)
+ try:
+ download_fragment(fragment, ctx)
+ result = append_fragment(
+ decrypt_fragment(fragment, self._read_fragment(ctx)), fragment['frag_index'], ctx)
+ except KeyboardInterrupt:
+ if info_dict.get('is_live'):
+ break
+ raise
if not result:
return False
if finish_func is not None:
ctx['dest_stream'].write(finish_func())
ctx['dest_stream'].flush()
- self._finish_frag_download(ctx, info_dict)
- return True
+ return self._finish_frag_download(ctx, info_dict)
diff --git a/hypervideo_dl/downloader/hls.py b/hypervideo_dl/downloader/hls.py
index f3f32b5..4520edc 100644
--- a/hypervideo_dl/downloader/hls.py
+++ b/hypervideo_dl/downloader/hls.py
@@ -1,23 +1,14 @@
-from __future__ import unicode_literals
-
-import re
-import io
import binascii
+import io
+import re
+import urllib.parse
-from ..downloader import get_suitable_downloader
-from .fragment import FragmentFD
+from . import get_suitable_downloader
from .external import FFmpegFD
-
-from ..compat import (
- compat_pycrypto_AES,
- compat_urlparse,
-)
-from ..utils import (
- parse_m3u8_attributes,
- update_url_query,
- bug_reports_message,
-)
+from .fragment import FragmentFD
from .. import webvtt
+from ..dependencies import Cryptodome_AES
+from ..utils import bug_reports_message, parse_m3u8_attributes, update_url_query
class HlsFD(FragmentFD):
@@ -70,12 +61,18 @@ class HlsFD(FragmentFD):
s = urlh.read().decode('utf-8', 'ignore')
can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None
- if can_download and not compat_pycrypto_AES and '#EXT-X-KEY:METHOD=AES-128' in s:
- if FFmpegFD.available():
+ if can_download:
+ has_ffmpeg = FFmpegFD.available()
+ no_crypto = not Cryptodome_AES and '#EXT-X-KEY:METHOD=AES-128' in s
+ if no_crypto and has_ffmpeg:
can_download, message = False, 'The stream has AES-128 encryption and pycryptodome is not available'
- else:
+ elif no_crypto:
message = ('The stream has AES-128 encryption and neither ffmpeg nor pycryptodome are available; '
'Decryption will be performed natively, but will be extremely slow')
+ elif info_dict.get('extractor_key') == 'Generic' and re.search(r'(?m)#EXT-X-MEDIA-SEQUENCE:(?!0$)', s):
+ install_ffmpeg = '' if has_ffmpeg else 'install ffmpeg and '
+ message = ('Live HLS streams are not supported by the native downloader. If this is a livestream, '
+ f'please {install_ffmpeg}add "--downloader ffmpeg --hls-use-mpegts" to your command')
if not can_download:
has_drm = re.search('|'.join([
r'#EXT-X-FAXS-CM:', # Adobe Flash Access
@@ -102,8 +99,7 @@ class HlsFD(FragmentFD):
if real_downloader and not real_downloader.supports_manifest(s):
real_downloader = None
if real_downloader:
- self.to_screen(
- '[%s] Fragment downloads will be delegated to %s' % (self.FD_NAME, real_downloader.get_basename()))
+ self.to_screen(f'[{self.FD_NAME}] Fragment downloads will be delegated to {real_downloader.get_basename()}')
def is_ad_fragment_start(s):
return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s
@@ -150,7 +146,7 @@ class HlsFD(FragmentFD):
extra_query = None
extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url')
if extra_param_to_segment_url:
- extra_query = compat_urlparse.parse_qs(extra_param_to_segment_url)
+ extra_query = urllib.parse.parse_qs(extra_param_to_segment_url)
i = 0
media_sequence = 0
decrypt_info = {'METHOD': 'NONE'}
@@ -172,7 +168,7 @@ class HlsFD(FragmentFD):
frag_url = (
line
if re.match(r'^https?://', line)
- else compat_urlparse.urljoin(man_url, line))
+ else urllib.parse.urljoin(man_url, line))
if extra_query:
frag_url = update_url_query(frag_url, extra_query)
@@ -197,10 +193,18 @@ class HlsFD(FragmentFD):
frag_url = (
map_info.get('URI')
if re.match(r'^https?://', map_info.get('URI'))
- else compat_urlparse.urljoin(man_url, map_info.get('URI')))
+ else urllib.parse.urljoin(man_url, map_info.get('URI')))
if extra_query:
frag_url = update_url_query(frag_url, extra_query)
+ if map_info.get('BYTERANGE'):
+ splitted_byte_range = map_info.get('BYTERANGE').split('@')
+ sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end']
+ byte_range = {
+ 'start': sub_range_start,
+ 'end': sub_range_start + int(splitted_byte_range[0]),
+ }
+
fragments.append({
'frag_index': frag_index,
'url': frag_url,
@@ -210,14 +214,6 @@ class HlsFD(FragmentFD):
})
media_sequence += 1
- if map_info.get('BYTERANGE'):
- splitted_byte_range = map_info.get('BYTERANGE').split('@')
- sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end']
- byte_range = {
- 'start': sub_range_start,
- 'end': sub_range_start + int(splitted_byte_range[0]),
- }
-
elif line.startswith('#EXT-X-KEY'):
decrypt_url = decrypt_info.get('URI')
decrypt_info = parse_m3u8_attributes(line[11:])
@@ -225,7 +221,7 @@ class HlsFD(FragmentFD):
if 'IV' in decrypt_info:
decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:].zfill(32))
if not re.match(r'^https?://', decrypt_info['URI']):
- decrypt_info['URI'] = compat_urlparse.urljoin(
+ decrypt_info['URI'] = urllib.parse.urljoin(
man_url, decrypt_info['URI'])
if extra_query:
decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query)
@@ -339,7 +335,7 @@ class HlsFD(FragmentFD):
continue
block.write_into(output)
- return output.getvalue().encode('utf-8')
+ return output.getvalue().encode()
def fin_fragments():
dedup_window = extra_state.get('webvtt_dedup_window')
@@ -350,7 +346,7 @@ class HlsFD(FragmentFD):
for cue in dedup_window:
webvtt.CueBlock.from_json(cue).write_into(output)
- return output.getvalue().encode('utf-8')
+ return output.getvalue().encode()
self.download_and_append_fragments(
ctx, fragments, info_dict, pack_func=pack_fragment, finish_func=fin_fragments)
diff --git a/hypervideo_dl/downloader/http.py b/hypervideo_dl/downloader/http.py
index 591a9b0..95c870e 100644
--- a/hypervideo_dl/downloader/http.py
+++ b/hypervideo_dl/downloader/http.py
@@ -1,29 +1,33 @@
-from __future__ import unicode_literals
-
+import http.client
import os
+import random
+import socket
import ssl
import time
-import random
+import urllib.error
from .common import FileDownloader
-from ..compat import (
- compat_urllib_error,
- compat_http_client
-)
from ..utils import (
ContentTooShortError,
+ RetryManager,
+ ThrottledDownload,
+ XAttrMetadataError,
+ XAttrUnavailableError,
encodeFilename,
int_or_none,
parse_http_range,
sanitized_Request,
- ThrottledDownload,
try_call,
write_xattr,
- XAttrMetadataError,
- XAttrUnavailableError,
)
-RESPONSE_READ_EXCEPTIONS = (TimeoutError, ConnectionError, ssl.SSLError, compat_http_client.HTTPException)
+RESPONSE_READ_EXCEPTIONS = (
+ TimeoutError,
+ socket.timeout, # compat: py < 3.10
+ ConnectionError,
+ ssl.SSLError,
+ http.client.HTTPException
+)
class HttpFD(FileDownloader):
@@ -69,9 +73,6 @@ class HttpFD(FileDownloader):
ctx.is_resume = ctx.resume_len > 0
- count = 0
- retries = self.params.get('retries', 0)
-
class SucceedDownload(Exception):
pass
@@ -134,19 +135,18 @@ class HttpFD(FileDownloader):
if has_range:
content_range = ctx.data.headers.get('Content-Range')
content_range_start, content_range_end, content_len = parse_http_range(content_range)
- if content_range_start is not None and range_start == content_range_start:
- # Content-Range is present and matches requested Range, resume is possible
- accept_content_len = (
+ # Content-Range is present and matches requested Range, resume is possible
+ if range_start == content_range_start and (
# Non-chunked download
not ctx.chunk_size
# Chunked download and requested piece or
# its part is promised to be served
or content_range_end == range_end
- or content_len < range_end)
- if accept_content_len:
- ctx.content_len = content_len
- ctx.data_len = min(content_len, req_end or content_len) - (req_start or 0)
- return
+ or content_len < range_end):
+ ctx.content_len = content_len
+ if content_len or req_end:
+ ctx.data_len = min(content_len or req_end, req_end or content_len) - (req_start or 0)
+ return
# Content-Range is either not present or invalid. Assuming remote webserver is
# trying to send the whole file, resume is not possible, so wiping the local file
# and performing entire redownload
@@ -154,7 +154,7 @@ class HttpFD(FileDownloader):
ctx.resume_len = 0
ctx.open_mode = 'wb'
ctx.data_len = ctx.content_len = int_or_none(ctx.data.info().get('Content-length', None))
- except (compat_urllib_error.HTTPError, ) as err:
+ except urllib.error.HTTPError as err:
if err.code == 416:
# Unable to resume (requested range not satisfiable)
try:
@@ -162,7 +162,7 @@ class HttpFD(FileDownloader):
ctx.data = self.ydl.urlopen(
sanitized_Request(url, request_data, headers))
content_length = ctx.data.info()['Content-Length']
- except (compat_urllib_error.HTTPError, ) as err:
+ except urllib.error.HTTPError as err:
if err.code < 500 or err.code >= 600:
raise
else:
@@ -195,7 +195,7 @@ class HttpFD(FileDownloader):
# Unexpected HTTP error
raise
raise RetryDownload(err)
- except compat_urllib_error.URLError as err:
+ except urllib.error.URLError as err:
if isinstance(err.reason, ssl.CertificateError):
raise
raise RetryDownload(err)
@@ -204,6 +204,12 @@ class HttpFD(FileDownloader):
except RESPONSE_READ_EXCEPTIONS as err:
raise RetryDownload(err)
+ def close_stream():
+ if ctx.stream is not None:
+ if not ctx.tmpfilename == '-':
+ ctx.stream.close()
+ ctx.stream = None
+
def download():
data_len = ctx.data.info().get('Content-length', None)
@@ -220,10 +226,12 @@ class HttpFD(FileDownloader):
min_data_len = self.params.get('min_filesize')
max_data_len = self.params.get('max_filesize')
if min_data_len is not None and data_len < min_data_len:
- self.to_screen('\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
+ self.to_screen(
+ f'\r[download] File is smaller than min-filesize ({data_len} bytes < {min_data_len} bytes). Aborting.')
return False
if max_data_len is not None and data_len > max_data_len:
- self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
+ self.to_screen(
+ f'\r[download] File is larger than max-filesize ({data_len} bytes > {max_data_len} bytes). Aborting.')
return False
byte_counter = 0 + ctx.resume_len
@@ -235,12 +243,9 @@ class HttpFD(FileDownloader):
before = start # start measuring
def retry(e):
- to_stdout = ctx.tmpfilename == '-'
- if ctx.stream is not None:
- if not to_stdout:
- ctx.stream.close()
- ctx.stream = None
- ctx.resume_len = byte_counter if to_stdout else os.path.getsize(encodeFilename(ctx.tmpfilename))
+ close_stream()
+ ctx.resume_len = (byte_counter if ctx.tmpfilename == '-'
+ else os.path.getsize(encodeFilename(ctx.tmpfilename)))
raise RetryDownload(e)
while True:
@@ -264,19 +269,19 @@ class HttpFD(FileDownloader):
assert ctx.stream is not None
ctx.filename = self.undo_temp_name(ctx.tmpfilename)
self.report_destination(ctx.filename)
- except (OSError, IOError) as err:
+ except OSError as err:
self.report_error('unable to open for writing: %s' % str(err))
return False
if self.params.get('xattr_set_filesize', False) and data_len is not None:
try:
- write_xattr(ctx.tmpfilename, 'user.ytdl.filesize', str(data_len).encode('utf-8'))
+ write_xattr(ctx.tmpfilename, 'user.ytdl.filesize', str(data_len).encode())
except (XAttrUnavailableError, XAttrMetadataError) as err:
self.report_error('unable to set filesize xattr: %s' % str(err))
try:
ctx.stream.write(data_block)
- except (IOError, OSError) as err:
+ except OSError as err:
self.to_stderr('\n')
self.report_error('unable to write data: %s' % str(err))
return False
@@ -342,9 +347,7 @@ class HttpFD(FileDownloader):
if data_len is not None and byte_counter != data_len:
err = ContentTooShortError(byte_counter, int(data_len))
- if count <= retries:
- retry(err)
- raise err
+ retry(err)
self.try_rename(ctx.tmpfilename, ctx.filename)
@@ -363,21 +366,20 @@ class HttpFD(FileDownloader):
return True
- while count <= retries:
+ for retry in RetryManager(self.params.get('retries'), self.report_retry):
try:
establish_connection()
return download()
- except RetryDownload as e:
- count += 1
- if count <= retries:
- self.report_retry(e.source_error, count, retries)
- else:
- self.to_screen(f'[download] Got server HTTP error: {e.source_error}')
+ except RetryDownload as err:
+ retry.error = err.source_error
continue
except NextFragment:
+ retry.error = None
+ retry.attempt -= 1
continue
except SucceedDownload:
return True
-
- self.report_error('giving up after %s retries' % retries)
+ except: # noqa: E722
+ close_stream()
+ raise
return False
diff --git a/hypervideo_dl/downloader/ism.py b/hypervideo_dl/downloader/ism.py
index 4d5618c..a157a8a 100644
--- a/hypervideo_dl/downloader/ism.py
+++ b/hypervideo_dl/downloader/ism.py
@@ -1,27 +1,23 @@
-from __future__ import unicode_literals
-
-import time
import binascii
import io
+import struct
+import time
+import urllib.error
from .fragment import FragmentFD
-from ..compat import (
- compat_Struct,
- compat_urllib_error,
-)
+from ..utils import RetryManager
+u8 = struct.Struct('>B')
+u88 = struct.Struct('>Bx')
+u16 = struct.Struct('>H')
+u1616 = struct.Struct('>Hxx')
+u32 = struct.Struct('>I')
+u64 = struct.Struct('>Q')
-u8 = compat_Struct('>B')
-u88 = compat_Struct('>Bx')
-u16 = compat_Struct('>H')
-u1616 = compat_Struct('>Hxx')
-u32 = compat_Struct('>I')
-u64 = compat_Struct('>Q')
-
-s88 = compat_Struct('>bx')
-s16 = compat_Struct('>h')
-s1616 = compat_Struct('>hxx')
-s32 = compat_Struct('>i')
+s88 = struct.Struct('>bx')
+s16 = struct.Struct('>h')
+s1616 = struct.Struct('>hxx')
+s32 = struct.Struct('>i')
unity_matrix = (s32.pack(0x10000) + s32.pack(0) * 3) * 2 + s32.pack(0x40000000)
@@ -142,6 +138,8 @@ def write_piff_header(stream, params):
if fourcc == 'AACL':
sample_entry_box = box(b'mp4a', sample_entry_payload)
+ if fourcc == 'EC-3':
+ sample_entry_box = box(b'ec-3', sample_entry_payload)
elif stream_type == 'video':
sample_entry_payload += u16.pack(0) # pre defined
sample_entry_payload += u16.pack(0) # reserved
@@ -156,7 +154,7 @@ def write_piff_header(stream, params):
sample_entry_payload += u16.pack(0x18) # depth
sample_entry_payload += s16.pack(-1) # pre defined
- codec_private_data = binascii.unhexlify(params['codec_private_data'].encode('utf-8'))
+ codec_private_data = binascii.unhexlify(params['codec_private_data'].encode())
if fourcc in ('H264', 'AVC1'):
sps, pps = codec_private_data.split(u32.pack(1))[1:]
avcc_payload = u8.pack(1) # configuration version
@@ -235,8 +233,6 @@ class IsmFD(FragmentFD):
Download segments in a ISM manifest
"""
- FD_NAME = 'ism'
-
def real_download(self, filename, info_dict):
segments = info_dict['fragments'][:1] if self.params.get(
'test', False) else info_dict['fragments']
@@ -252,7 +248,6 @@ class IsmFD(FragmentFD):
'ism_track_written': False,
})
- fragment_retries = self.params.get('fragment_retries', 0)
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
frag_index = 0
@@ -260,8 +255,10 @@ class IsmFD(FragmentFD):
frag_index += 1
if frag_index <= ctx['fragment_index']:
continue
- count = 0
- while count <= fragment_retries:
+
+ retry_manager = RetryManager(self.params.get('fragment_retries'), self.report_retry,
+ frag_index=frag_index, fatal=not skip_unavailable_fragments)
+ for retry in retry_manager:
try:
success = self._download_fragment(ctx, segment['url'], info_dict)
if not success:
@@ -274,18 +271,13 @@ class IsmFD(FragmentFD):
write_piff_header(ctx['dest_stream'], info_dict['_download_params'])
extra_state['ism_track_written'] = True
self._append_fragment(ctx, frag_content)
- break
- except compat_urllib_error.HTTPError as err:
- count += 1
- if count <= fragment_retries:
- self.report_retry_fragment(err, frag_index, count, fragment_retries)
- if count > fragment_retries:
- if skip_unavailable_fragments:
- self.report_skip_fragment(frag_index)
+ except urllib.error.HTTPError as err:
+ retry.error = err
continue
- self.report_error('giving up after %s fragment retries' % fragment_retries)
- return False
- self._finish_frag_download(ctx, info_dict)
+ if retry_manager.error:
+ if not skip_unavailable_fragments:
+ return False
+ self.report_skip_fragment(frag_index)
- return True
+ return self._finish_frag_download(ctx, info_dict)
diff --git a/hypervideo_dl/downloader/mhtml.py b/hypervideo_dl/downloader/mhtml.py
index c8332c0..170a78d 100644
--- a/hypervideo_dl/downloader/mhtml.py
+++ b/hypervideo_dl/downloader/mhtml.py
@@ -1,24 +1,15 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import io
import quopri
import re
import uuid
from .fragment import FragmentFD
-from ..utils import (
- escapeHTML,
- formatSeconds,
- srt_subtitles_timecode,
- urljoin,
-)
+from ..compat import imghdr
+from ..utils import escapeHTML, formatSeconds, srt_subtitles_timecode, urljoin
from ..version import __version__ as YT_DLP_VERSION
class MhtmlFD(FragmentFD):
- FD_NAME = 'mhtml'
-
_STYLESHEET = """\
html, body {
margin: 0;
@@ -62,7 +53,7 @@ body > figure > img {
def _escape_mime(s):
return '=?utf-8?Q?' + (b''.join(
bytes((b,)) if b >= 0x20 else b'=%02X' % b
- for b in quopri.encodestring(s.encode('utf-8'), header=True)
+ for b in quopri.encodestring(s.encode(), header=True)
)).decode('us-ascii') + '?='
def _gen_cid(self, i, fragment, frag_boundary):
@@ -159,7 +150,7 @@ body > figure > img {
length=len(stub),
title=self._escape_mime(title),
stub=stub
- ).encode('utf-8'))
+ ).encode())
extra_state['header_written'] = True
for i, fragment in enumerate(fragments):
@@ -176,21 +167,13 @@ body > figure > img {
continue
frag_content = self._read_fragment(ctx)
- mime_type = b'image/jpeg'
- if frag_content.startswith(b'\x89PNG\r\n\x1a\n'):
- mime_type = b'image/png'
- if frag_content.startswith((b'GIF87a', b'GIF89a')):
- mime_type = b'image/gif'
- if frag_content.startswith(b'RIFF') and frag_content[8:12] == 'WEBP':
- mime_type = b'image/webp'
-
frag_header = io.BytesIO()
frag_header.write(
b'--%b\r\n' % frag_boundary.encode('us-ascii'))
frag_header.write(
b'Content-ID: <%b>\r\n' % self._gen_cid(i, fragment, frag_boundary).encode('us-ascii'))
frag_header.write(
- b'Content-type: %b\r\n' % mime_type)
+ b'Content-type: %b\r\n' % f'image/{imghdr.what(h=frag_content) or "jpeg"}'.encode())
frag_header.write(
b'Content-length: %u\r\n' % len(frag_content))
frag_header.write(
@@ -203,5 +186,4 @@ body > figure > img {
ctx['dest_stream'].write(
b'--%b--\r\n\r\n' % frag_boundary.encode('us-ascii'))
- self._finish_frag_download(ctx, info_dict)
- return True
+ return self._finish_frag_download(ctx, info_dict)
diff --git a/hypervideo_dl/downloader/niconico.py b/hypervideo_dl/downloader/niconico.py
index 521dfec..77ed39e 100644
--- a/hypervideo_dl/downloader/niconico.py
+++ b/hypervideo_dl/downloader/niconico.py
@@ -1,22 +1,17 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import threading
+from . import get_suitable_downloader
from .common import FileDownloader
-from ..downloader import get_suitable_downloader
-from ..extractor.niconico import NiconicoIE
from ..utils import sanitized_Request
class NiconicoDmcFD(FileDownloader):
""" Downloading niconico douga from DMC with heartbeat """
- FD_NAME = 'niconico_dmc'
-
def real_download(self, filename, info_dict):
- self.to_screen('[%s] Downloading from DMC' % self.FD_NAME)
+ from ..extractor.niconico import NiconicoIE
+ self.to_screen('[%s] Downloading from DMC' % self.FD_NAME)
ie = NiconicoIE(self.ydl)
info_dict, heartbeat_info_dict = ie._get_heartbeat_info(info_dict)
@@ -54,4 +49,4 @@ class NiconicoDmcFD(FileDownloader):
with heartbeat_lock:
timer[0].cancel()
download_complete = True
- return success
+ return success
diff --git a/hypervideo_dl/downloader/rtmp.py b/hypervideo_dl/downloader/rtmp.py
index 90f1acf..0e09525 100644
--- a/hypervideo_dl/downloader/rtmp.py
+++ b/hypervideo_dl/downloader/rtmp.py
@@ -1,18 +1,15 @@
-from __future__ import unicode_literals
-
import os
import re
import subprocess
import time
from .common import FileDownloader
-from ..compat import compat_str
from ..utils import (
+ Popen,
check_executable,
- encodeFilename,
encodeArgument,
+ encodeFilename,
get_exe_version,
- Popen,
)
@@ -94,8 +91,7 @@ class RtmpFD(FileDownloader):
self.to_screen('')
return proc.wait()
except BaseException: # Including KeyboardInterrupt
- proc.kill()
- proc.wait()
+ proc.kill(timeout=None)
raise
url = info_dict['url']
@@ -146,7 +142,7 @@ class RtmpFD(FileDownloader):
if isinstance(conn, list):
for entry in conn:
basic_args += ['--conn', entry]
- elif isinstance(conn, compat_str):
+ elif isinstance(conn, str):
basic_args += ['--conn', conn]
if protocol is not None:
basic_args += ['--protocol', protocol]
diff --git a/hypervideo_dl/downloader/rtsp.py b/hypervideo_dl/downloader/rtsp.py
index 7815d59..e89269f 100644
--- a/hypervideo_dl/downloader/rtsp.py
+++ b/hypervideo_dl/downloader/rtsp.py
@@ -1,13 +1,8 @@
-from __future__ import unicode_literals
-
import os
import subprocess
from .common import FileDownloader
-from ..utils import (
- check_executable,
- encodeFilename,
-)
+from ..utils import check_executable, encodeFilename
class RtspFD(FileDownloader):
@@ -32,7 +27,7 @@ class RtspFD(FileDownloader):
retval = subprocess.call(args)
if retval == 0:
fsize = os.path.getsize(encodeFilename(tmpfilename))
- self.to_screen('\r[%s] %s bytes' % (args[0], fsize))
+ self.to_screen(f'\r[{args[0]}] {fsize} bytes')
self.try_rename(tmpfilename, filename)
self._hook_progress({
'downloaded_bytes': fsize,
diff --git a/hypervideo_dl/downloader/websocket.py b/hypervideo_dl/downloader/websocket.py
index 58e2bce..6837ff1 100644
--- a/hypervideo_dl/downloader/websocket.py
+++ b/hypervideo_dl/downloader/websocket.py
@@ -1,19 +1,12 @@
+import asyncio
+import contextlib
import os
import signal
-import asyncio
import threading
-try:
- import websockets
-except (ImportError, SyntaxError):
- # websockets 3.10 on python 3.6 causes SyntaxError
- # See https://github.com/hypervideo/hypervideo/issues/2633
- has_websockets = False
-else:
- has_websockets = True
-
from .common import FileDownloader
from .external import FFmpegFD
+from ..dependencies import websockets
class FFmpegSinkFD(FileDownloader):
@@ -26,14 +19,12 @@ class FFmpegSinkFD(FileDownloader):
async def call_conn(proc, stdin):
try:
await self.real_connection(stdin, info_dict)
- except (BrokenPipeError, OSError):
+ except OSError:
pass
finally:
- try:
+ with contextlib.suppress(OSError):
stdin.flush()
stdin.close()
- except OSError:
- pass
os.kill(os.getpid(), signal.SIGINT)
class FFmpegStdinFD(FFmpegFD):
diff --git a/hypervideo_dl/downloader/youtube_live_chat.py b/hypervideo_dl/downloader/youtube_live_chat.py
index dd21ac8..dfd290a 100644
--- a/hypervideo_dl/downloader/youtube_live_chat.py
+++ b/hypervideo_dl/downloader/youtube_live_chat.py
@@ -1,24 +1,20 @@
-from __future__ import division, unicode_literals
-
import json
import time
+import urllib.error
from .fragment import FragmentFD
-from ..compat import compat_urllib_error
from ..utils import (
- try_get,
+ RegexNotFoundError,
+ RetryManager,
dict_get,
int_or_none,
- RegexNotFoundError,
+ try_get,
)
-from ..extractor.youtube import YoutubeBaseInfoExtractor as YT_BaseIE
class YoutubeLiveChatFD(FragmentFD):
""" Downloads YouTube live chats fragment by fragment """
- FD_NAME = 'youtube_live_chat'
-
def real_download(self, filename, info_dict):
video_id = info_dict['video_id']
self.to_screen('[%s] Downloading live chat' % self.FD_NAME)
@@ -26,7 +22,6 @@ class YoutubeLiveChatFD(FragmentFD):
self.report_warning('Live chat download runs until the livestream ends. '
'If you wish to download the video simultaneously, run a separate hypervideo instance')
- fragment_retries = self.params.get('fragment_retries', 0)
test = self.params.get('test', False)
ctx = {
@@ -35,7 +30,9 @@ class YoutubeLiveChatFD(FragmentFD):
'total_frags': None,
}
- ie = YT_BaseIE(self.ydl)
+ from ..extractor.youtube import YoutubeBaseInfoExtractor
+
+ ie = YoutubeBaseInfoExtractor(self.ydl)
start_time = int(time.time() * 1000)
@@ -54,7 +51,7 @@ class YoutubeLiveChatFD(FragmentFD):
replay_chat_item_action = action['replayChatItemAction']
offset = int(replay_chat_item_action['videoOffsetTimeMsec'])
processed_fragment.extend(
- json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n')
+ json.dumps(action, ensure_ascii=False).encode() + b'\n')
if offset is not None:
continuation = try_get(
live_chat_continuation,
@@ -96,7 +93,7 @@ class YoutubeLiveChatFD(FragmentFD):
'isLive': True,
}
processed_fragment.extend(
- json.dumps(pseudo_action, ensure_ascii=False).encode('utf-8') + b'\n')
+ json.dumps(pseudo_action, ensure_ascii=False).encode() + b'\n')
continuation_data_getters = [
lambda x: x['continuations'][0]['invalidationContinuationData'],
lambda x: x['continuations'][0]['timedContinuationData'],
@@ -112,8 +109,7 @@ class YoutubeLiveChatFD(FragmentFD):
return continuation_id, live_offset, click_tracking_params
def download_and_parse_fragment(url, frag_index, request_data=None, headers=None):
- count = 0
- while count <= fragment_retries:
+ for retry in RetryManager(self.params.get('fragment_retries'), self.report_retry, frag_index=frag_index):
try:
success = dl_fragment(url, request_data, headers)
if not success:
@@ -128,21 +124,15 @@ class YoutubeLiveChatFD(FragmentFD):
live_chat_continuation = try_get(
data,
lambda x: x['continuationContents']['liveChatContinuation'], dict) or {}
- if info_dict['protocol'] == 'youtube_live_chat_replay':
- if frag_index == 1:
- continuation_id, offset, click_tracking_params = try_refresh_replay_beginning(live_chat_continuation)
- else:
- continuation_id, offset, click_tracking_params = parse_actions_replay(live_chat_continuation)
- elif info_dict['protocol'] == 'youtube_live_chat':
- continuation_id, offset, click_tracking_params = parse_actions_live(live_chat_continuation)
- return True, continuation_id, offset, click_tracking_params
- except compat_urllib_error.HTTPError as err:
- count += 1
- if count <= fragment_retries:
- self.report_retry_fragment(err, frag_index, count, fragment_retries)
- if count > fragment_retries:
- self.report_error('giving up after %s fragment retries' % fragment_retries)
- return False, None, None, None
+
+ func = (info_dict['protocol'] == 'youtube_live_chat' and parse_actions_live
+ or frag_index == 1 and try_refresh_replay_beginning
+ or parse_actions_replay)
+ return (True, *func(live_chat_continuation))
+ except urllib.error.HTTPError as err:
+ retry.error = err
+ continue
+ return False, None, None, None
self._prepare_and_start_frag_download(ctx, info_dict)
@@ -190,7 +180,7 @@ class YoutubeLiveChatFD(FragmentFD):
request_data['context']['clickTracking'] = {'clickTrackingParams': click_tracking_params}
headers = ie.generate_api_headers(ytcfg=ytcfg, visitor_data=visitor_data)
headers.update({'content-type': 'application/json'})
- fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode('utf-8') + b'\n'
+ fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode() + b'\n'
success, continuation_id, offset, click_tracking_params = download_and_parse_fragment(
url, frag_index, fragment_request_data, headers)
else:
@@ -201,8 +191,7 @@ class YoutubeLiveChatFD(FragmentFD):
if test:
break
- self._finish_frag_download(ctx, info_dict)
- return True
+ return self._finish_frag_download(ctx, info_dict)
@staticmethod
def parse_live_timestamp(action):
diff --git a/hypervideo_dl/extractor/__init__.py b/hypervideo_dl/extractor/__init__.py
index b354842..6bfa4bd 100644
--- a/hypervideo_dl/extractor/__init__.py
+++ b/hypervideo_dl/extractor/__init__.py
@@ -1,33 +1,15 @@
-import os
+from ..compat.compat_utils import passthrough_module
-from ..utils import load_plugins
-
-_LAZY_LOADER = False
-if not os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
- try:
- from .lazy_extractors import *
- from .lazy_extractors import _ALL_CLASSES
- _LAZY_LOADER = True
- except ImportError:
- pass
-
-if not _LAZY_LOADER:
- from .extractors import *
- _ALL_CLASSES = [
- klass
- for name, klass in globals().items()
- if name.endswith('IE') and name != 'GenericIE'
- ]
- _ALL_CLASSES.append(GenericIE)
-
-_PLUGIN_CLASSES = load_plugins('extractor', 'IE', globals())
-_ALL_CLASSES = list(_PLUGIN_CLASSES.values()) + _ALL_CLASSES
+passthrough_module(__name__, '.extractors')
+del passthrough_module
def gen_extractor_classes():
""" Return a list of supported extractors.
The order does matter; the first extractor matched is the one handling the URL.
"""
+ from .extractors import _ALL_CLASSES
+
return _ALL_CLASSES
@@ -38,17 +20,23 @@ def gen_extractors():
return [klass() for klass in gen_extractor_classes()]
-def list_extractors(age_limit):
- """
- Return a list of extractors that are suitable for the given age,
- sorted by extractor ID.
- """
+def list_extractor_classes(age_limit=None):
+ """Return a list of extractors that are suitable for the given age, sorted by extractor name"""
+ from .generic import GenericIE
+
+ yield from sorted(filter(
+ lambda ie: ie.is_suitable(age_limit) and ie != GenericIE,
+ gen_extractor_classes()), key=lambda ie: ie.IE_NAME.lower())
+ yield GenericIE
- return sorted(
- filter(lambda ie: ie.is_suitable(age_limit), gen_extractors()),
- key=lambda ie: ie.IE_NAME.lower())
+
+def list_extractors(age_limit=None):
+ """Return a list of extractor instances that are suitable for the given age, sorted by extractor name"""
+ return [ie() for ie in list_extractor_classes(age_limit)]
def get_info_extractor(ie_name):
"""Returns the info extractor class with the given ie_name"""
- return globals()[ie_name + 'IE']
+ from . import extractors
+
+ return getattr(extractors, f'{ie_name}IE')
diff --git a/hypervideo_dl/extractor/_extractors.py b/hypervideo_dl/extractor/_extractors.py
new file mode 100644
index 0000000..2fe15f6
--- /dev/null
+++ b/hypervideo_dl/extractor/_extractors.py
@@ -0,0 +1,2354 @@
+# flake8: noqa: F401
+
+from .youtube import ( # Youtube is moved to the top to improve performance
+ YoutubeIE,
+ YoutubeClipIE,
+ YoutubeFavouritesIE,
+ YoutubeNotificationsIE,
+ YoutubeHistoryIE,
+ YoutubeTabIE,
+ YoutubeLivestreamEmbedIE,
+ YoutubePlaylistIE,
+ YoutubeRecommendedIE,
+ YoutubeSearchDateIE,
+ YoutubeSearchIE,
+ YoutubeSearchURLIE,
+ YoutubeMusicSearchURLIE,
+ YoutubeSubscriptionsIE,
+ YoutubeStoriesIE,
+ YoutubeTruncatedIDIE,
+ YoutubeTruncatedURLIE,
+ YoutubeYtBeIE,
+ YoutubeYtUserIE,
+ YoutubeWatchLaterIE,
+ YoutubeShortsAudioPivotIE
+)
+
+from .abc import (
+ ABCIE,
+ ABCIViewIE,
+ ABCIViewShowSeriesIE,
+)
+from .abcnews import (
+ AbcNewsIE,
+ AbcNewsVideoIE,
+)
+from .abcotvs import (
+ ABCOTVSIE,
+ ABCOTVSClipsIE,
+)
+from .abematv import (
+ AbemaTVIE,
+ AbemaTVTitleIE,
+)
+from .academicearth import AcademicEarthCourseIE
+from .acast import (
+ ACastIE,
+ ACastChannelIE,
+)
+from .acfun import AcFunVideoIE, AcFunBangumiIE
+from .adn import ADNIE
+from .adobeconnect import AdobeConnectIE
+from .adobetv import (
+ AdobeTVEmbedIE,
+ AdobeTVIE,
+ AdobeTVShowIE,
+ AdobeTVChannelIE,
+ AdobeTVVideoIE,
+)
+from .adultswim import AdultSwimIE
+from .aenetworks import (
+ AENetworksIE,
+ AENetworksCollectionIE,
+ AENetworksShowIE,
+ HistoryTopicIE,
+ HistoryPlayerIE,
+ BiographyIE,
+)
+from .aeonco import AeonCoIE
+from .afreecatv import (
+ AfreecaTVIE,
+ AfreecaTVLiveIE,
+ AfreecaTVUserIE,
+)
+from .agora import (
+ TokFMAuditionIE,
+ TokFMPodcastIE,
+ WyborczaPodcastIE,
+ WyborczaVideoIE,
+)
+from .airmozilla import AirMozillaIE
+from .aljazeera import AlJazeeraIE
+from .alphaporno import AlphaPornoIE
+from .amara import AmaraIE
+from .alura import (
+ AluraIE,
+ AluraCourseIE
+)
+from .amcnetworks import AMCNetworksIE
+from .amazon import AmazonStoreIE
+from .amazonminitv import (
+ AmazonMiniTVIE,
+ AmazonMiniTVSeasonIE,
+ AmazonMiniTVSeriesIE,
+)
+from .americastestkitchen import (
+ AmericasTestKitchenIE,
+ AmericasTestKitchenSeasonIE,
+)
+from .angel import AngelIE
+from .anvato import AnvatoIE
+from .aol import AolIE
+from .allocine import AllocineIE
+from .aliexpress import AliExpressLiveIE
+from .alsace20tv import (
+ Alsace20TVIE,
+ Alsace20TVEmbedIE,
+)
+from .apa import APAIE
+from .aparat import AparatIE
+from .appleconnect import AppleConnectIE
+from .appletrailers import (
+ AppleTrailersIE,
+ AppleTrailersSectionIE,
+)
+from .applepodcasts import ApplePodcastsIE
+from .archiveorg import (
+ ArchiveOrgIE,
+ YoutubeWebArchiveIE,
+)
+from .arcpublishing import ArcPublishingIE
+from .arkena import ArkenaIE
+from .ard import (
+ ARDBetaMediathekIE,
+ ARDIE,
+ ARDMediathekIE,
+)
+from .arte import (
+ ArteTVIE,
+ ArteTVEmbedIE,
+ ArteTVPlaylistIE,
+ ArteTVCategoryIE,
+)
+from .arnes import ArnesIE
+from .asiancrush import (
+ AsianCrushIE,
+ AsianCrushPlaylistIE,
+)
+from .atresplayer import AtresPlayerIE
+from .atscaleconf import AtScaleConfEventIE
+from .atttechchannel import ATTTechChannelIE
+from .atvat import ATVAtIE
+from .audimedia import AudiMediaIE
+from .audioboom import AudioBoomIE
+from .audiodraft import (
+ AudiodraftCustomIE,
+ AudiodraftGenericIE,
+)
+from .audiomack import AudiomackIE, AudiomackAlbumIE
+from .audius import (
+ AudiusIE,
+ AudiusTrackIE,
+ AudiusPlaylistIE,
+ AudiusProfileIE,
+)
+from .awaan import (
+ AWAANIE,
+ AWAANVideoIE,
+ AWAANLiveIE,
+ AWAANSeasonIE,
+)
+from .azmedien import AZMedienIE
+from .baidu import BaiduVideoIE
+from .banbye import (
+ BanByeIE,
+ BanByeChannelIE,
+)
+from .bandaichannel import BandaiChannelIE
+from .bandcamp import (
+ BandcampIE,
+ BandcampAlbumIE,
+ BandcampWeeklyIE,
+ BandcampUserIE,
+)
+from .bannedvideo import BannedVideoIE
+from .bbc import (
+ BBCCoUkIE,
+ BBCCoUkArticleIE,
+ BBCCoUkIPlayerEpisodesIE,
+ BBCCoUkIPlayerGroupIE,
+ BBCCoUkPlaylistIE,
+ BBCIE,
+)
+from .beeg import BeegIE
+from .behindkink import BehindKinkIE
+from .bellmedia import BellMediaIE
+from .beatport import BeatportIE
+from .berufetv import BerufeTVIE
+from .bet import BetIE
+from .bfi import BFIPlayerIE
+from .bfmtv import (
+ BFMTVIE,
+ BFMTVLiveIE,
+ BFMTVArticleIE,
+)
+from .bibeltv import BibelTVIE
+from .bigflix import BigflixIE
+from .bigo import BigoIE
+from .bild import BildIE
+from .bilibili import (
+ BiliBiliIE,
+ BiliBiliBangumiIE,
+ BiliBiliBangumiMediaIE,
+ BiliBiliSearchIE,
+ BilibiliCategoryIE,
+ BilibiliAudioIE,
+ BilibiliAudioAlbumIE,
+ BiliBiliPlayerIE,
+ BilibiliSpaceVideoIE,
+ BilibiliSpaceAudioIE,
+ BilibiliSpacePlaylistIE,
+ BiliIntlIE,
+ BiliIntlSeriesIE,
+ BiliLiveIE,
+)
+from .biobiochiletv import BioBioChileTVIE
+from .bitchute import (
+ BitChuteIE,
+ BitChuteChannelIE,
+)
+from .bitwave import (
+ BitwaveReplayIE,
+ BitwaveStreamIE,
+)
+from .biqle import BIQLEIE
+from .blackboardcollaborate import BlackboardCollaborateIE
+from .bleacherreport import (
+ BleacherReportIE,
+ BleacherReportCMSIE,
+)
+from .blogger import BloggerIE
+from .bloomberg import BloombergIE
+from .bokecc import BokeCCIE
+from .bongacams import BongaCamsIE
+from .bostonglobe import BostonGlobeIE
+from .box import BoxIE
+from .booyah import BooyahClipsIE
+from .bpb import BpbIE
+from .br import (
+ BRIE,
+ BRMediathekIE,
+)
+from .bravotv import BravoTVIE
+from .breakcom import BreakIE
+from .breitbart import BreitBartIE
+from .brightcove import (
+ BrightcoveLegacyIE,
+ BrightcoveNewIE,
+)
+from .businessinsider import BusinessInsiderIE
+from .bundesliga import BundesligaIE
+from .buzzfeed import BuzzFeedIE
+from .byutv import BYUtvIE
+from .c56 import C56IE
+from .cableav import CableAVIE
+from .callin import CallinIE
+from .caltrans import CaltransIE
+from .cam4 import CAM4IE
+from .camdemy import (
+ CamdemyIE,
+ CamdemyFolderIE
+)
+from .cammodels import CamModelsIE
+from .camsoda import CamsodaIE
+from .camtasia import CamtasiaEmbedIE
+from .camwithher import CamWithHerIE
+from .canalalpha import CanalAlphaIE
+from .canalplus import CanalplusIE
+from .canalc2 import Canalc2IE
+from .canvas import (
+ CanvasIE,
+ CanvasEenIE,
+ VrtNUIE,
+ DagelijkseKostIE,
+)
+from .carambatv import (
+ CarambaTVIE,
+ CarambaTVPageIE,
+)
+from .cartoonnetwork import CartoonNetworkIE
+from .cbc import (
+ CBCIE,
+ CBCPlayerIE,
+ CBCGemIE,
+ CBCGemPlaylistIE,
+ CBCGemLiveIE,
+)
+from .cbs import CBSIE
+from .cbslocal import (
+ CBSLocalIE,
+ CBSLocalArticleIE,
+)
+from .cbsinteractive import CBSInteractiveIE
+from .cbsnews import (
+ CBSNewsEmbedIE,
+ CBSNewsIE,
+ CBSNewsLiveVideoIE,
+)
+from .cbssports import (
+ CBSSportsEmbedIE,
+ CBSSportsIE,
+ TwentyFourSevenSportsIE,
+)
+from .ccc import (
+ CCCIE,
+ CCCPlaylistIE,
+)
+from .ccma import CCMAIE
+from .cctv import CCTVIE
+from .cda import CDAIE
+from .cellebrite import CellebriteIE
+from .ceskatelevize import CeskaTelevizeIE
+from .cgtn import CGTNIE
+from .channel9 import Channel9IE
+from .charlierose import CharlieRoseIE
+from .chaturbate import ChaturbateIE
+from .chilloutzone import ChilloutzoneIE
+from .chingari import (
+ ChingariIE,
+ ChingariUserIE,
+)
+from .chirbit import (
+ ChirbitIE,
+ ChirbitProfileIE,
+)
+from .cinchcast import CinchcastIE
+from .cinemax import CinemaxIE
+from .cinetecamilano import CinetecaMilanoIE
+from .ciscolive import (
+ CiscoLiveSessionIE,
+ CiscoLiveSearchIE,
+)
+from .ciscowebex import CiscoWebexIE
+from .cjsw import CJSWIE
+from .cliphunter import CliphunterIE
+from .clippit import ClippitIE
+from .cliprs import ClipRsIE
+from .clipsyndicate import ClipsyndicateIE
+from .closertotruth import CloserToTruthIE
+from .cloudflarestream import CloudflareStreamIE
+from .cloudy import CloudyIE
+from .clubic import ClubicIE
+from .clyp import ClypIE
+from .cmt import CMTIE
+from .cnbc import (
+ CNBCIE,
+ CNBCVideoIE,
+)
+from .cnn import (
+ CNNIE,
+ CNNBlogsIE,
+ CNNArticleIE,
+ CNNIndonesiaIE,
+)
+from .coub import CoubIE
+from .comedycentral import (
+ ComedyCentralIE,
+ ComedyCentralTVIE,
+)
+from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
+from .commonprotocols import (
+ MmsIE,
+ RtmpIE,
+ ViewSourceIE,
+)
+from .condenast import CondeNastIE
+from .contv import CONtvIE
+from .corus import CorusIE
+from .cpac import (
+ CPACIE,
+ CPACPlaylistIE,
+)
+from .cozytv import CozyTVIE
+from .cracked import CrackedIE
+from .crackle import CrackleIE
+from .craftsy import CraftsyIE
+from .crooksandliars import CrooksAndLiarsIE
+from .crowdbunker import (
+ CrowdBunkerIE,
+ CrowdBunkerChannelIE,
+)
+from .crunchyroll import (
+ CrunchyrollBetaIE,
+ CrunchyrollBetaShowIE,
+)
+from .cspan import CSpanIE, CSpanCongressIE
+from .ctsnews import CtsNewsIE
+from .ctv import CTVIE
+from .ctvnews import CTVNewsIE
+from .cultureunplugged import CultureUnpluggedIE
+from .curiositystream import (
+ CuriosityStreamIE,
+ CuriosityStreamCollectionsIE,
+ CuriosityStreamSeriesIE,
+)
+from .cwtv import CWTVIE
+from .cybrary import (
+ CybraryIE,
+ CybraryCourseIE
+)
+from .daftsex import DaftsexIE
+from .dailymail import DailyMailIE
+from .dailymotion import (
+ DailymotionIE,
+ DailymotionPlaylistIE,
+ DailymotionUserIE,
+)
+from .dailywire import (
+ DailyWireIE,
+ DailyWirePodcastIE,
+)
+from .damtomo import (
+ DamtomoRecordIE,
+ DamtomoVideoIE,
+)
+from .daum import (
+ DaumIE,
+ DaumClipIE,
+ DaumPlaylistIE,
+ DaumUserIE,
+)
+from .daystar import DaystarClipIE
+from .dbtv import DBTVIE
+from .dctp import DctpTvIE
+from .deezer import (
+ DeezerPlaylistIE,
+ DeezerAlbumIE,
+)
+from .democracynow import DemocracynowIE
+from .detik import DetikEmbedIE
+from .dfb import DFBIE
+from .dhm import DHMIE
+from .digg import DiggIE
+from .dotsub import DotsubIE
+from .douyutv import (
+ DouyuShowIE,
+ DouyuTVIE,
+)
+from .dplay import (
+ DPlayIE,
+ DiscoveryPlusIE,
+ HGTVDeIE,
+ GoDiscoveryIE,
+ TravelChannelIE,
+ CookingChannelIE,
+ HGTVUsaIE,
+ FoodNetworkIE,
+ InvestigationDiscoveryIE,
+ DestinationAmericaIE,
+ AmHistoryChannelIE,
+ ScienceChannelIE,
+ DIYNetworkIE,
+ DiscoveryLifeIE,
+ AnimalPlanetIE,
+ TLCIE,
+ MotorTrendIE,
+ MotorTrendOnDemandIE,
+ DiscoveryPlusIndiaIE,
+ DiscoveryNetworksDeIE,
+ DiscoveryPlusItalyIE,
+ DiscoveryPlusItalyShowIE,
+ DiscoveryPlusIndiaShowIE,
+)
+from .dreisat import DreiSatIE
+from .drbonanza import DRBonanzaIE
+from .drtuber import DrTuberIE
+from .drtv import (
+ DRTVIE,
+ DRTVLiveIE,
+)
+from .dtube import DTubeIE
+from .dvtv import DVTVIE
+from .duboku import (
+ DubokuIE,
+ DubokuPlaylistIE
+)
+from .dumpert import DumpertIE
+from .defense import DefenseGouvFrIE
+from .deuxm import (
+ DeuxMIE,
+ DeuxMNewsIE
+)
+from .digitalconcerthall import DigitalConcertHallIE
+from .discovery import DiscoveryIE
+from .disney import DisneyIE
+from .dispeak import DigitallySpeakingIE
+from .dropbox import DropboxIE
+from .dropout import (
+ DropoutSeasonIE,
+ DropoutIE
+)
+from .dw import (
+ DWIE,
+ DWArticleIE,
+)
+from .eagleplatform import EaglePlatformIE, ClipYouEmbedIE
+from .ebaumsworld import EbaumsWorldIE
+from .echomsk import EchoMskIE
+from .egghead import (
+ EggheadCourseIE,
+ EggheadLessonIE,
+)
+from .ehow import EHowIE
+from .eighttracks import EightTracksIE
+from .einthusan import EinthusanIE
+from .eitb import EitbIE
+from .ellentube import (
+ EllenTubeIE,
+ EllenTubeVideoIE,
+ EllenTubePlaylistIE,
+)
+from .elonet import ElonetIE
+from .elpais import ElPaisIE
+from .embedly import EmbedlyIE
+from .engadget import EngadgetIE
+from .epicon import (
+ EpiconIE,
+ EpiconSeriesIE,
+)
+from .epoch import EpochIE
+from .eporner import EpornerIE
+from .eroprofile import (
+ EroProfileIE,
+ EroProfileAlbumIE,
+)
+from .ertgr import (
+ ERTFlixCodenameIE,
+ ERTFlixIE,
+ ERTWebtvEmbedIE,
+)
+from .escapist import EscapistIE
+from .espn import (
+ ESPNIE,
+ WatchESPNIE,
+ ESPNArticleIE,
+ FiveThirtyEightIE,
+ ESPNCricInfoIE,
+)
+from .esri import EsriVideoIE
+from .europa import EuropaIE
+from .europeantour import EuropeanTourIE
+from .eurosport import EurosportIE
+from .euscreen import EUScreenIE
+from .expotv import ExpoTVIE
+from .expressen import ExpressenIE
+from .extremetube import ExtremeTubeIE
+from .eyedotv import EyedoTVIE
+from .facebook import (
+ FacebookIE,
+ FacebookPluginsVideoIE,
+ FacebookRedirectURLIE,
+ FacebookReelIE,
+)
+from .fancode import (
+ FancodeVodIE,
+ FancodeLiveIE
+)
+
+from .faz import FazIE
+from .fc2 import (
+ FC2IE,
+ FC2EmbedIE,
+ FC2LiveIE,
+)
+from .fczenit import FczenitIE
+from .fifa import FifaIE
+from .filmmodu import FilmmoduIE
+from .filmon import (
+ FilmOnIE,
+ FilmOnChannelIE,
+)
+from .filmweb import FilmwebIE
+from .firsttv import FirstTVIE
+from .fivetv import FiveTVIE
+from .flickr import FlickrIE
+from .folketinget import FolketingetIE
+from .footyroom import FootyRoomIE
+from .formula1 import Formula1IE
+from .fourtube import (
+ FourTubeIE,
+ PornTubeIE,
+ PornerBrosIE,
+ FuxIE,
+)
+from .fourzerostudio import (
+ FourZeroStudioArchiveIE,
+ FourZeroStudioClipIE,
+)
+from .fox import FOXIE
+from .fox9 import (
+ FOX9IE,
+ FOX9NewsIE,
+)
+from .foxgay import FoxgayIE
+from .foxnews import (
+ FoxNewsIE,
+ FoxNewsArticleIE,
+ FoxNewsVideoIE,
+)
+from .foxsports import FoxSportsIE
+from .fptplay import FptplayIE
+from .franceinter import FranceInterIE
+from .francetv import (
+ FranceTVIE,
+ FranceTVSiteIE,
+ FranceTVInfoIE,
+)
+from .freesound import FreesoundIE
+from .freespeech import FreespeechIE
+from .frontendmasters import (
+ FrontendMastersIE,
+ FrontendMastersLessonIE,
+ FrontendMastersCourseIE
+)
+from .freetv import (
+ FreeTvIE,
+ FreeTvMoviesIE,
+)
+from .fujitv import FujiTVFODPlus7IE
+from .funimation import (
+ FunimationIE,
+ FunimationPageIE,
+ FunimationShowIE,
+)
+from .funk import FunkIE
+from .fusion import FusionIE
+from .fuyintv import FuyinTVIE
+from .gab import (
+ GabTVIE,
+ GabIE,
+)
+from .gaia import GaiaIE
+from .gameinformer import GameInformerIE
+from .gamejolt import (
+ GameJoltIE,
+ GameJoltUserIE,
+ GameJoltGameIE,
+ GameJoltGameSoundtrackIE,
+ GameJoltCommunityIE,
+ GameJoltSearchIE,
+)
+from .gamespot import GameSpotIE
+from .gamestar import GameStarIE
+from .gaskrank import GaskrankIE
+from .gazeta import GazetaIE
+from .gdcvault import GDCVaultIE
+from .gedidigital import GediDigitalIE
+from .generic import GenericIE
+from .genius import (
+ GeniusIE,
+ GeniusLyricsIE,
+)
+from .gettr import (
+ GettrIE,
+ GettrStreamingIE,
+)
+from .gfycat import GfycatIE
+from .giantbomb import GiantBombIE
+from .giga import GigaIE
+from .glide import GlideIE
+from .globo import (
+ GloboIE,
+ GloboArticleIE,
+)
+from .go import GoIE
+from .godtube import GodTubeIE
+from .gofile import GofileIE
+from .golem import GolemIE
+from .goodgame import GoodGameIE
+from .googledrive import (
+ GoogleDriveIE,
+ GoogleDriveFolderIE,
+)
+from .googlepodcasts import (
+ GooglePodcastsIE,
+ GooglePodcastsFeedIE,
+)
+from .googlesearch import GoogleSearchIE
+from .gopro import GoProIE
+from .goplay import GoPlayIE
+from .goshgay import GoshgayIE
+from .gotostage import GoToStageIE
+from .gputechconf import GPUTechConfIE
+from .gronkh import (
+ GronkhIE,
+ GronkhFeedIE,
+ GronkhVodsIE
+)
+from .groupon import GrouponIE
+from .harpodeon import HarpodeonIE
+from .hbo import HBOIE
+from .hearthisat import HearThisAtIE
+from .heise import HeiseIE
+from .hellporno import HellPornoIE
+from .helsinki import HelsinkiIE
+from .hentaistigma import HentaiStigmaIE
+from .hgtv import HGTVComShowIE
+from .hketv import HKETVIE
+from .hidive import HiDiveIE
+from .historicfilms import HistoricFilmsIE
+from .hitbox import HitboxIE, HitboxLiveIE
+from .hitrecord import HitRecordIE
+from .holodex import HolodexIE
+from .hotnewhiphop import HotNewHipHopIE
+from .hotstar import (
+ HotStarIE,
+ HotStarPrefixIE,
+ HotStarPlaylistIE,
+ HotStarSeasonIE,
+ HotStarSeriesIE,
+)
+from .howcast import HowcastIE
+from .howstuffworks import HowStuffWorksIE
+from .hrfensehen import HRFernsehenIE
+from .hrti import (
+ HRTiIE,
+ HRTiPlaylistIE,
+)
+from .hse import (
+ HSEShowIE,
+ HSEProductIE,
+)
+from .genericembeds import (
+ HTML5MediaEmbedIE,
+ QuotedHTMLIE,
+)
+from .huajiao import HuajiaoIE
+from .huya import HuyaLiveIE
+from .huffpost import HuffPostIE
+from .hungama import (
+ HungamaIE,
+ HungamaSongIE,
+ HungamaAlbumPlaylistIE,
+)
+from .hypem import HypemIE
+from .hytale import HytaleIE
+from .icareus import IcareusIE
+from .ichinanalive import (
+ IchinanaLiveIE,
+ IchinanaLiveClipIE,
+)
+from .ign import (
+ IGNIE,
+ IGNVideoIE,
+ IGNArticleIE,
+)
+from .iheart import (
+ IHeartRadioIE,
+ IHeartRadioPodcastIE,
+)
+from .iltalehti import IltalehtiIE
+from .imdb import (
+ ImdbIE,
+ ImdbListIE
+)
+from .imgur import (
+ ImgurIE,
+ ImgurAlbumIE,
+ ImgurGalleryIE,
+)
+from .ina import InaIE
+from .inc import IncIE
+from .indavideo import IndavideoEmbedIE
+from .infoq import InfoQIE
+from .instagram import (
+ InstagramIE,
+ InstagramIOSIE,
+ InstagramUserIE,
+ InstagramTagIE,
+ InstagramStoryIE,
+)
+from .internazionale import InternazionaleIE
+from .internetvideoarchive import InternetVideoArchiveIE
+from .iprima import (
+ IPrimaIE,
+ IPrimaCNNIE
+)
+from .iqiyi import (
+ IqiyiIE,
+ IqIE,
+ IqAlbumIE
+)
+from .islamchannel import (
+ IslamChannelIE,
+ IslamChannelSeriesIE,
+)
+from .israelnationalnews import IsraelNationalNewsIE
+from .itprotv import (
+ ITProTVIE,
+ ITProTVCourseIE
+)
+from .itv import (
+ ITVIE,
+ ITVBTCCIE,
+)
+from .ivi import (
+ IviIE,
+ IviCompilationIE
+)
+from .ivideon import IvideonIE
+from .iwara import (
+ IwaraIE,
+ IwaraPlaylistIE,
+ IwaraUserIE,
+)
+from .ixigua import IxiguaIE
+from .izlesene import IzleseneIE
+from .jable import (
+ JableIE,
+ JablePlaylistIE,
+)
+from .jamendo import (
+ JamendoIE,
+ JamendoAlbumIE,
+)
+from .japandiet import (
+ ShugiinItvLiveIE,
+ ShugiinItvLiveRoomIE,
+ ShugiinItvVodIE,
+ SangiinInstructionIE,
+ SangiinIE,
+)
+from .jeuxvideo import JeuxVideoIE
+from .jove import JoveIE
+from .joj import JojIE
+from .jwplatform import JWPlatformIE
+from .kakao import KakaoIE
+from .kaltura import KalturaIE
+from .kanal2 import Kanal2IE
+from .karaoketv import KaraoketvIE
+from .karrierevideos import KarriereVideosIE
+from .keezmovies import KeezMoviesIE
+from .kelbyone import KelbyOneIE
+from .ketnet import KetnetIE
+from .khanacademy import (
+ KhanAcademyIE,
+ KhanAcademyUnitIE,
+)
+from .kicker import KickerIE
+from .kickstarter import KickStarterIE
+from .kinja import KinjaEmbedIE
+from .kinopoisk import KinoPoiskIE
+from .kompas import KompasVideoIE
+from .konserthusetplay import KonserthusetPlayIE
+from .koo import KooIE
+from .kth import KTHIE
+from .krasview import KrasViewIE
+from .ku6 import Ku6IE
+from .kusi import KUSIIE
+from .kuwo import (
+ KuwoIE,
+ KuwoAlbumIE,
+ KuwoChartIE,
+ KuwoSingerIE,
+ KuwoCategoryIE,
+ KuwoMvIE,
+)
+from .la7 import (
+ LA7IE,
+ LA7PodcastEpisodeIE,
+ LA7PodcastIE,
+)
+from .laola1tv import (
+ Laola1TvEmbedIE,
+ Laola1TvIE,
+ EHFTVIE,
+ ITTFIE,
+)
+from .lastfm import (
+ LastFMIE,
+ LastFMPlaylistIE,
+ LastFMUserIE,
+)
+from .lbry import (
+ LBRYIE,
+ LBRYChannelIE,
+)
+from .lci import LCIIE
+from .lcp import (
+ LcpPlayIE,
+ LcpIE,
+)
+from .lecture2go import Lecture2GoIE
+from .lecturio import (
+ LecturioIE,
+ LecturioCourseIE,
+ LecturioDeCourseIE,
+)
+from .leeco import (
+ LeIE,
+ LePlaylistIE,
+ LetvCloudIE,
+)
+from .lego import LEGOIE
+from .lemonde import LemondeIE
+from .lenta import LentaIE
+from .libraryofcongress import LibraryOfCongressIE
+from .libsyn import LibsynIE
+from .lifenews import (
+ LifeNewsIE,
+ LifeEmbedIE,
+)
+from .likee import (
+ LikeeIE,
+ LikeeUserIE
+)
+from .limelight import (
+ LimelightMediaIE,
+ LimelightChannelIE,
+ LimelightChannelListIE,
+)
+from .line import (
+ LineLiveIE,
+ LineLiveChannelIE,
+)
+from .linkedin import (
+ LinkedInIE,
+ LinkedInLearningIE,
+ LinkedInLearningCourseIE,
+)
+from .linuxacademy import LinuxAcademyIE
+from .liputan6 import Liputan6IE
+from .listennotes import ListenNotesIE
+from .litv import LiTVIE
+from .livejournal import LiveJournalIE
+from .livestream import (
+ LivestreamIE,
+ LivestreamOriginalIE,
+ LivestreamShortenerIE,
+)
+from .livestreamfails import LivestreamfailsIE
+from .lnkgo import (
+ LnkGoIE,
+ LnkIE,
+)
+from .localnews8 import LocalNews8IE
+from .lovehomeporn import LoveHomePornIE
+from .lrt import (
+ LRTVODIE,
+ LRTStreamIE
+)
+from .lynda import (
+ LyndaIE,
+ LyndaCourseIE
+)
+from .m6 import M6IE
+from .magentamusik360 import MagentaMusik360IE
+from .mailru import (
+ MailRuIE,
+ MailRuMusicIE,
+ MailRuMusicSearchIE,
+)
+from .mainstreaming import MainStreamingIE
+from .malltv import MallTVIE
+from .mangomolo import (
+ MangomoloVideoIE,
+ MangomoloLiveIE,
+)
+from .manoto import (
+ ManotoTVIE,
+ ManotoTVShowIE,
+ ManotoTVLiveIE,
+)
+from .manyvids import ManyVidsIE
+from .maoritv import MaoriTVIE
+from .markiza import (
+ MarkizaIE,
+ MarkizaPageIE,
+)
+from .massengeschmacktv import MassengeschmackTVIE
+from .masters import MastersIE
+from .matchtv import MatchTVIE
+from .mdr import MDRIE
+from .medaltv import MedalTVIE
+from .mediaite import MediaiteIE
+from .mediaklikk import MediaKlikkIE
+from .mediaset import (
+ MediasetIE,
+ MediasetShowIE,
+)
+from .mediasite import (
+ MediasiteIE,
+ MediasiteCatalogIE,
+ MediasiteNamedCatalogIE,
+)
+from .mediaworksnz import MediaWorksNZVODIE
+from .medici import MediciIE
+from .megaphone import MegaphoneIE
+from .meipai import MeipaiIE
+from .melonvod import MelonVODIE
+from .meta import METAIE
+from .metacafe import MetacafeIE
+from .metacritic import MetacriticIE
+from .mgoon import MgoonIE
+from .mgtv import MGTVIE
+from .miaopai import MiaoPaiIE
+from .microsoftstream import MicrosoftStreamIE
+from .microsoftvirtualacademy import (
+ MicrosoftVirtualAcademyIE,
+ MicrosoftVirtualAcademyCourseIE,
+)
+from .microsoftembed import MicrosoftEmbedIE
+from .mildom import (
+ MildomIE,
+ MildomVodIE,
+ MildomClipIE,
+ MildomUserVodIE,
+)
+from .minds import (
+ MindsIE,
+ MindsChannelIE,
+ MindsGroupIE,
+)
+from .ministrygrid import MinistryGridIE
+from .minoto import MinotoIE
+from .miomio import MioMioIE
+from .mirrativ import (
+ MirrativIE,
+ MirrativUserIE,
+)
+from .mirrorcouk import MirrorCoUKIE
+from .mit import TechTVMITIE, OCWMITIE
+from .mitele import MiTeleIE
+from .mixch import (
+ MixchIE,
+ MixchArchiveIE,
+)
+from .mixcloud import (
+ MixcloudIE,
+ MixcloudUserIE,
+ MixcloudPlaylistIE,
+)
+from .mlb import (
+ MLBIE,
+ MLBVideoIE,
+ MLBTVIE,
+ MLBArticleIE,
+)
+from .mlssoccer import MLSSoccerIE
+from .mnet import MnetIE
+from .mocha import MochaVideoIE
+from .moevideo import MoeVideoIE
+from .mofosex import (
+ MofosexIE,
+ MofosexEmbedIE,
+)
+from .mojvideo import MojvideoIE
+from .morningstar import MorningstarIE
+from .motherless import (
+ MotherlessIE,
+ MotherlessGroupIE
+)
+from .motorsport import MotorsportIE
+from .movieclips import MovieClipsIE
+from .moviepilot import MoviepilotIE
+from .moview import MoviewPlayIE
+from .moviezine import MoviezineIE
+from .movingimage import MovingImageIE
+from .msn import MSNIE
+from .mtv import (
+ MTVIE,
+ MTVVideoIE,
+ MTVServicesEmbeddedIE,
+ MTVDEIE,
+ MTVJapanIE,
+ MTVItaliaIE,
+ MTVItaliaProgrammaIE,
+)
+from .muenchentv import MuenchenTVIE
+from .murrtube import MurrtubeIE, MurrtubeUserIE
+from .musescore import MuseScoreIE
+from .musicdex import (
+ MusicdexSongIE,
+ MusicdexAlbumIE,
+ MusicdexArtistIE,
+ MusicdexPlaylistIE,
+)
+from .mwave import MwaveIE, MwaveMeetGreetIE
+from .mxplayer import (
+ MxplayerIE,
+ MxplayerShowIE,
+)
+from .mychannels import MyChannelsIE
+from .myspace import MySpaceIE, MySpaceAlbumIE
+from .myspass import MySpassIE
+from .myvi import (
+ MyviIE,
+ MyviEmbedIE,
+)
+from .myvideoge import MyVideoGeIE
+from .myvidster import MyVidsterIE
+from .n1 import (
+ N1InfoAssetIE,
+ N1InfoIIE,
+)
+from .nate import (
+ NateIE,
+ NateProgramIE,
+)
+from .nationalgeographic import (
+ NationalGeographicVideoIE,
+ NationalGeographicTVIE,
+)
+from .naver import (
+ NaverIE,
+ NaverLiveIE,
+ NaverNowIE,
+)
+from .nba import (
+ NBAWatchEmbedIE,
+ NBAWatchIE,
+ NBAWatchCollectionIE,
+ NBAEmbedIE,
+ NBAIE,
+ NBAChannelIE,
+)
+from .nbc import (
+ NBCIE,
+ NBCNewsIE,
+ NBCOlympicsIE,
+ NBCOlympicsStreamIE,
+ NBCSportsIE,
+ NBCSportsStreamIE,
+ NBCSportsVPlayerIE,
+ NBCStationsIE,
+)
+from .ndr import (
+ NDRIE,
+ NJoyIE,
+ NDREmbedBaseIE,
+ NDREmbedIE,
+ NJoyEmbedIE,
+)
+from .ndtv import NDTVIE
+from .nebula import (
+ NebulaIE,
+ NebulaSubscriptionsIE,
+ NebulaChannelIE,
+)
+from .nerdcubed import NerdCubedFeedIE
+from .netzkino import NetzkinoIE
+from .neteasemusic import (
+ NetEaseMusicIE,
+ NetEaseMusicAlbumIE,
+ NetEaseMusicSingerIE,
+ NetEaseMusicListIE,
+ NetEaseMusicMvIE,
+ NetEaseMusicProgramIE,
+ NetEaseMusicDjRadioIE,
+)
+from .netverse import (
+ NetverseIE,
+ NetversePlaylistIE,
+)
+from .newgrounds import (
+ NewgroundsIE,
+ NewgroundsPlaylistIE,
+ NewgroundsUserIE,
+)
+from .newspicks import NewsPicksIE
+from .newstube import NewstubeIE
+from .newsy import NewsyIE
+from .nextmedia import (
+ NextMediaIE,
+ NextMediaActionNewsIE,
+ AppleDailyIE,
+ NextTVIE,
+)
+from .nexx import (
+ NexxIE,
+ NexxEmbedIE,
+)
+from .nfb import NFBIE
+from .nfhsnetwork import NFHSNetworkIE
+from .nfl import (
+ NFLIE,
+ NFLArticleIE,
+)
+from .nhk import (
+ NhkVodIE,
+ NhkVodProgramIE,
+ NhkForSchoolBangumiIE,
+ NhkForSchoolSubjectIE,
+ NhkForSchoolProgramListIE,
+)
+from .nhl import NHLIE
+from .nick import (
+ NickIE,
+ NickBrIE,
+ NickDeIE,
+ NickNightIE,
+ NickRuIE,
+)
+from .niconico import (
+ NiconicoIE,
+ NiconicoPlaylistIE,
+ NiconicoUserIE,
+ NiconicoSeriesIE,
+ NiconicoHistoryIE,
+ NicovideoSearchDateIE,
+ NicovideoSearchIE,
+ NicovideoSearchURLIE,
+ NicovideoTagURLIE,
+)
+from .ninecninemedia import (
+ NineCNineMediaIE,
+ CPTwentyFourIE,
+)
+from .ninegag import NineGagIE
+from .ninenow import NineNowIE
+from .nintendo import NintendoIE
+from .nitter import NitterIE
+from .njpwworld import NJPWWorldIE
+from .nobelprize import NobelPrizeIE
+from .nonktube import NonkTubeIE
+from .noodlemagazine import NoodleMagazineIE
+from .noovo import NoovoIE
+from .normalboots import NormalbootsIE
+from .nosvideo import NosVideoIE
+from .nosnl import NOSNLArticleIE
+from .nova import (
+ NovaEmbedIE,
+ NovaIE,
+)
+from .novaplay import NovaPlayIE
+from .nowness import (
+ NownessIE,
+ NownessPlaylistIE,
+ NownessSeriesIE,
+)
+from .noz import NozIE
+from .npo import (
+ AndereTijdenIE,
+ NPOIE,
+ NPOLiveIE,
+ NPORadioIE,
+ NPORadioFragmentIE,
+ SchoolTVIE,
+ HetKlokhuisIE,
+ VPROIE,
+ WNLIE,
+)
+from .npr import NprIE
+from .nrk import (
+ NRKIE,
+ NRKPlaylistIE,
+ NRKSkoleIE,
+ NRKTVIE,
+ NRKTVDirekteIE,
+ NRKRadioPodkastIE,
+ NRKTVEpisodeIE,
+ NRKTVEpisodesIE,
+ NRKTVSeasonIE,
+ NRKTVSeriesIE,
+)
+from .nrl import NRLTVIE
+from .ntvcojp import NTVCoJpCUIE
+from .ntvde import NTVDeIE
+from .ntvru import NTVRuIE
+from .nytimes import (
+ NYTimesIE,
+ NYTimesArticleIE,
+ NYTimesCookingIE,
+)
+from .nuvid import NuvidIE
+from .nzherald import NZHeraldIE
+from .nzz import NZZIE
+from .odatv import OdaTVIE
+from .odnoklassniki import OdnoklassnikiIE
+from .oftv import (
+ OfTVIE,
+ OfTVPlaylistIE
+)
+from .oktoberfesttv import OktoberfestTVIE
+from .olympics import OlympicsReplayIE
+from .on24 import On24IE
+from .ondemandkorea import OnDemandKoreaIE
+from .onefootball import OneFootballIE
+from .onenewsnz import OneNewsNZIE
+from .onet import (
+ OnetIE,
+ OnetChannelIE,
+ OnetMVPIE,
+ OnetPlIE,
+)
+from .onionstudios import OnionStudiosIE
+from .ooyala import (
+ OoyalaIE,
+ OoyalaExternalIE,
+)
+from .opencast import (
+ OpencastIE,
+ OpencastPlaylistIE,
+)
+from .openrec import (
+ OpenRecIE,
+ OpenRecCaptureIE,
+ OpenRecMovieIE,
+)
+from .ora import OraTVIE
+from .orf import (
+ ORFTVthekIE,
+ ORFFM4StoryIE,
+ ORFRadioIE,
+ ORFIPTVIE,
+)
+from .outsidetv import OutsideTVIE
+from .packtpub import (
+ PacktPubIE,
+ PacktPubCourseIE,
+)
+from .palcomp3 import (
+ PalcoMP3IE,
+ PalcoMP3ArtistIE,
+ PalcoMP3VideoIE,
+)
+from .pandoratv import PandoraTVIE
+from .panopto import (
+ PanoptoIE,
+ PanoptoListIE,
+ PanoptoPlaylistIE
+)
+from .paramountplus import (
+ ParamountPlusIE,
+ ParamountPlusSeriesIE,
+)
+from .parler import ParlerIE
+from .parlview import ParlviewIE
+from .patreon import (
+ PatreonIE,
+ PatreonCampaignIE
+)
+from .pbs import PBSIE
+from .pearvideo import PearVideoIE
+from .peekvids import PeekVidsIE, PlayVidsIE
+from .peertube import (
+ PeerTubeIE,
+ PeerTubePlaylistIE,
+)
+from .peertv import PeerTVIE
+from .peloton import (
+ PelotonIE,
+ PelotonLiveIE
+)
+from .people import PeopleIE
+from .performgroup import PerformGroupIE
+from .periscope import (
+ PeriscopeIE,
+ PeriscopeUserIE,
+)
+from .philharmoniedeparis import PhilharmonieDeParisIE
+from .phoenix import PhoenixIE
+from .photobucket import PhotobucketIE
+from .piapro import PiaproIE
+from .picarto import (
+ PicartoIE,
+ PicartoVodIE,
+)
+from .piksel import PikselIE
+from .pinkbike import PinkbikeIE
+from .pinterest import (
+ PinterestIE,
+ PinterestCollectionIE,
+)
+from .pixivsketch import (
+ PixivSketchIE,
+ PixivSketchUserIE,
+)
+from .pladform import PladformIE
+from .planetmarathi import PlanetMarathiIE
+from .platzi import (
+ PlatziIE,
+ PlatziCourseIE,
+)
+from .playfm import PlayFMIE
+from .playplustv import PlayPlusTVIE
+from .plays import PlaysTVIE
+from .playstuff import PlayStuffIE
+from .playsuisse import PlaySuisseIE
+from .playtvak import PlaytvakIE
+from .playvid import PlayvidIE
+from .playwire import PlaywireIE
+from .plutotv import PlutoTVIE
+from .pluralsight import (
+ PluralsightIE,
+ PluralsightCourseIE,
+)
+from .podbayfm import PodbayFMIE, PodbayFMChannelIE
+from .podchaser import PodchaserIE
+from .podomatic import PodomaticIE
+from .pokemon import (
+ PokemonIE,
+ PokemonWatchIE,
+)
+from .pokergo import (
+ PokerGoIE,
+ PokerGoCollectionIE,
+)
+from .polsatgo import PolsatGoIE
+from .polskieradio import (
+ PolskieRadioIE,
+ PolskieRadioCategoryIE,
+ PolskieRadioPlayerIE,
+ PolskieRadioPodcastIE,
+ PolskieRadioPodcastListIE,
+ PolskieRadioRadioKierowcowIE,
+)
+from .popcorntimes import PopcorntimesIE
+from .popcorntv import PopcornTVIE
+from .porn91 import Porn91IE
+from .porncom import PornComIE
+from .pornflip import PornFlipIE
+from .pornhd import PornHdIE
+from .pornhub import (
+ PornHubIE,
+ PornHubUserIE,
+ PornHubPlaylistIE,
+ PornHubPagedVideoListIE,
+ PornHubUserVideosUploadIE,
+)
+from .pornotube import PornotubeIE
+from .pornovoisines import PornoVoisinesIE
+from .pornoxo import PornoXOIE
+from .pornez import PornezIE
+from .puhutv import (
+ PuhuTVIE,
+ PuhuTVSerieIE,
+)
+from .prankcast import PrankCastIE
+from .premiershiprugby import PremiershipRugbyIE
+from .presstv import PressTVIE
+from .projectveritas import ProjectVeritasIE
+from .prosiebensat1 import ProSiebenSat1IE
+from .prx import (
+ PRXStoryIE,
+ PRXSeriesIE,
+ PRXAccountIE,
+ PRXStoriesSearchIE,
+ PRXSeriesSearchIE
+)
+from .puls4 import Puls4IE
+from .pyvideo import PyvideoIE
+from .qingting import QingTingIE
+from .qqmusic import (
+ QQMusicIE,
+ QQMusicSingerIE,
+ QQMusicAlbumIE,
+ QQMusicToplistIE,
+ QQMusicPlaylistIE,
+)
+from .r7 import (
+ R7IE,
+ R7ArticleIE,
+)
+from .radiko import RadikoIE, RadikoRadioIE
+from .radiocanada import (
+ RadioCanadaIE,
+ RadioCanadaAudioVideoIE,
+)
+from .radiode import RadioDeIE
+from .radiojavan import RadioJavanIE
+from .radiobremen import RadioBremenIE
+from .radiofrance import FranceCultureIE, RadioFranceIE
+from .radiozet import RadioZetPodcastIE
+from .radiokapital import (
+ RadioKapitalIE,
+ RadioKapitalShowIE,
+)
+from .radlive import (
+ RadLiveIE,
+ RadLiveChannelIE,
+ RadLiveSeasonIE,
+)
+from .rai import (
+ RaiPlayIE,
+ RaiPlayLiveIE,
+ RaiPlayPlaylistIE,
+ RaiPlaySoundIE,
+ RaiPlaySoundLiveIE,
+ RaiPlaySoundPlaylistIE,
+ RaiNewsIE,
+ RaiSudtirolIE,
+ RaiIE,
+)
+from .raywenderlich import (
+ RayWenderlichIE,
+ RayWenderlichCourseIE,
+)
+from .rbmaradio import RBMARadioIE
+from .rcs import (
+ RCSIE,
+ RCSEmbedsIE,
+ RCSVariousIE,
+)
+from .rcti import (
+ RCTIPlusIE,
+ RCTIPlusSeriesIE,
+ RCTIPlusTVIE,
+)
+from .rds import RDSIE
+from .redbee import ParliamentLiveUKIE, RTBFIE
+from .redbulltv import (
+ RedBullTVIE,
+ RedBullEmbedIE,
+ RedBullTVRrnContentIE,
+ RedBullIE,
+)
+from .reddit import RedditIE
+from .redgifs import (
+ RedGifsIE,
+ RedGifsSearchIE,
+ RedGifsUserIE,
+)
+from .redtube import RedTubeIE
+from .regiotv import RegioTVIE
+from .rentv import (
+ RENTVIE,
+ RENTVArticleIE,
+)
+from .restudy import RestudyIE
+from .reuters import ReutersIE
+from .reverbnation import ReverbNationIE
+from .rice import RICEIE
+from .rmcdecouverte import RMCDecouverteIE
+from .rockstargames import RockstarGamesIE
+from .rokfin import (
+ RokfinIE,
+ RokfinStackIE,
+ RokfinChannelIE,
+ RokfinSearchIE,
+)
+from .roosterteeth import RoosterTeethIE, RoosterTeethSeriesIE
+from .rottentomatoes import RottenTomatoesIE
+from .rozhlas import RozhlasIE
+from .rte import RteIE, RteRadioIE
+from .rtlnl import (
+ RtlNlIE,
+ RTLLuTeleVODIE,
+ RTLLuArticleIE,
+ RTLLuLiveIE,
+ RTLLuRadioIE,
+)
+from .rtl2 import (
+ RTL2IE,
+ RTL2YouIE,
+ RTL2YouSeriesIE,
+)
+from .rtnews import (
+ RTNewsIE,
+ RTDocumentryIE,
+ RTDocumentryPlaylistIE,
+ RuptlyIE,
+)
+from .rtp import RTPIE
+from .rtrfm import RTRFMIE
+from .rts import RTSIE
+from .rtve import (
+ RTVEALaCartaIE,
+ RTVEAudioIE,
+ RTVELiveIE,
+ RTVEInfantilIE,
+ RTVETelevisionIE,
+)
+from .rtvnh import RTVNHIE
+from .rtvs import RTVSIE
+from .rtvslo import RTVSLOIE
+from .ruhd import RUHDIE
+from .rule34video import Rule34VideoIE
+from .rumble import (
+ RumbleEmbedIE,
+ RumbleChannelIE,
+)
+from .rutube import (
+ RutubeIE,
+ RutubeChannelIE,
+ RutubeEmbedIE,
+ RutubeMovieIE,
+ RutubePersonIE,
+ RutubePlaylistIE,
+ RutubeTagsIE,
+)
+from .glomex import (
+ GlomexIE,
+ GlomexEmbedIE,
+)
+from .megatvcom import (
+ MegaTVComIE,
+ MegaTVComEmbedIE,
+)
+from .ant1newsgr import (
+ Ant1NewsGrWatchIE,
+ Ant1NewsGrArticleIE,
+ Ant1NewsGrEmbedIE,
+)
+from .rutv import RUTVIE
+from .ruutu import RuutuIE
+from .ruv import (
+ RuvIE,
+ RuvSpilaIE
+)
+from .safari import (
+ SafariIE,
+ SafariApiIE,
+ SafariCourseIE,
+)
+from .saitosan import SaitosanIE
+from .samplefocus import SampleFocusIE
+from .sapo import SapoIE
+from .savefrom import SaveFromIE
+from .sbs import SBSIE
+from .screen9 import Screen9IE
+from .screencast import ScreencastIE
+from .screencastify import ScreencastifyIE
+from .screencastomatic import ScreencastOMaticIE
+from .scrippsnetworks import (
+ ScrippsNetworksWatchIE,
+ ScrippsNetworksIE,
+)
+from .scte import (
+ SCTEIE,
+ SCTECourseIE,
+)
+from .scrolller import ScrolllerIE
+from .seeker import SeekerIE
+from .senategov import SenateISVPIE, SenateGovIE
+from .sendtonews import SendtoNewsIE
+from .servus import ServusIE
+from .sevenplus import SevenPlusIE
+from .sexu import SexuIE
+from .seznamzpravy import (
+ SeznamZpravyIE,
+ SeznamZpravyArticleIE,
+)
+from .shahid import (
+ ShahidIE,
+ ShahidShowIE,
+)
+from .shared import (
+ SharedIE,
+ VivoIE,
+)
+from .sharevideos import ShareVideosEmbedIE
+from .shemaroome import ShemarooMeIE
+from .showroomlive import ShowRoomLiveIE
+from .simplecast import (
+ SimplecastIE,
+ SimplecastEpisodeIE,
+ SimplecastPodcastIE,
+)
+from .sina import SinaIE
+from .sixplay import SixPlayIE
+from .skeb import SkebIE
+from .skyit import (
+ SkyItPlayerIE,
+ SkyItVideoIE,
+ SkyItVideoLiveIE,
+ SkyItIE,
+ SkyItArteIE,
+ CieloTVItIE,
+ TV8ItIE,
+)
+from .skylinewebcams import SkylineWebcamsIE
+from .skynewsarabia import (
+ SkyNewsArabiaIE,
+ SkyNewsArabiaArticleIE,
+)
+from .skynewsau import SkyNewsAUIE
+from .sky import (
+ SkyNewsIE,
+ SkyNewsStoryIE,
+ SkySportsIE,
+ SkySportsNewsIE,
+)
+from .slideshare import SlideshareIE
+from .slideslive import SlidesLiveIE
+from .slutload import SlutloadIE
+from .smotrim import SmotrimIE
+from .snotr import SnotrIE
+from .sohu import SohuIE
+from .sonyliv import (
+ SonyLIVIE,
+ SonyLIVSeriesIE,
+)
+from .soundcloud import (
+ SoundcloudEmbedIE,
+ SoundcloudIE,
+ SoundcloudSetIE,
+ SoundcloudRelatedIE,
+ SoundcloudUserIE,
+ SoundcloudTrackStationIE,
+ SoundcloudPlaylistIE,
+ SoundcloudSearchIE,
+)
+from .soundgasm import (
+ SoundgasmIE,
+ SoundgasmProfileIE
+)
+from .southpark import (
+ SouthParkIE,
+ SouthParkDeIE,
+ SouthParkDkIE,
+ SouthParkEsIE,
+ SouthParkLatIE,
+ SouthParkNlIE
+)
+from .sovietscloset import (
+ SovietsClosetIE,
+ SovietsClosetPlaylistIE
+)
+from .spankbang import (
+ SpankBangIE,
+ SpankBangPlaylistIE,
+)
+from .spankwire import SpankwireIE
+from .spiegel import SpiegelIE
+from .spike import (
+ BellatorIE,
+ ParamountNetworkIE,
+)
+from .startrek import StarTrekIE
+from .stitcher import (
+ StitcherIE,
+ StitcherShowIE,
+)
+from .sport5 import Sport5IE
+from .sportbox import SportBoxIE
+from .sportdeutschland import SportDeutschlandIE
+from .spotify import (
+ SpotifyIE,
+ SpotifyShowIE,
+)
+from .spreaker import (
+ SpreakerIE,
+ SpreakerPageIE,
+ SpreakerShowIE,
+ SpreakerShowPageIE,
+)
+from .springboardplatform import SpringboardPlatformIE
+from .sprout import SproutIE
+from .srgssr import (
+ SRGSSRIE,
+ SRGSSRPlayIE,
+)
+from .srmediathek import SRMediathekIE
+from .stanfordoc import StanfordOpenClassroomIE
+from .startv import StarTVIE
+from .steam import (
+ SteamIE,
+ SteamCommunityBroadcastIE,
+)
+from .storyfire import (
+ StoryFireIE,
+ StoryFireUserIE,
+ StoryFireSeriesIE,
+)
+from .streamable import StreamableIE
+from .streamanity import StreamanityIE
+from .streamcloud import StreamcloudIE
+from .streamcz import StreamCZIE
+from .streamff import StreamFFIE
+from .streetvoice import StreetVoiceIE
+from .stretchinternet import StretchInternetIE
+from .stripchat import StripchatIE
+from .stv import STVPlayerIE
+from .substack import SubstackIE
+from .sunporno import SunPornoIE
+from .sverigesradio import (
+ SverigesRadioEpisodeIE,
+ SverigesRadioPublicationIE,
+)
+from .svt import (
+ SVTIE,
+ SVTPageIE,
+ SVTPlayIE,
+ SVTSeriesIE,
+)
+from .swearnet import SwearnetEpisodeIE
+from .swrmediathek import SWRMediathekIE
+from .syvdk import SYVDKIE
+from .syfy import SyfyIE
+from .sztvhu import SztvHuIE
+from .tagesschau import TagesschauIE
+from .tass import TassIE
+from .tbs import TBSIE
+from .tdslifeway import TDSLifewayIE
+from .teachable import (
+ TeachableIE,
+ TeachableCourseIE,
+)
+from .teachertube import (
+ TeacherTubeIE,
+ TeacherTubeUserIE,
+)
+from .teachingchannel import TeachingChannelIE
+from .teamcoco import TeamcocoIE
+from .teamtreehouse import TeamTreeHouseIE
+from .techtalks import TechTalksIE
+from .ted import (
+ TedEmbedIE,
+ TedPlaylistIE,
+ TedSeriesIE,
+ TedTalkIE,
+)
+from .tele5 import Tele5IE
+from .tele13 import Tele13IE
+from .telebruxelles import TeleBruxellesIE
+from .telecinco import TelecincoIE
+from .telegraaf import TelegraafIE
+from .telegram import TelegramEmbedIE
+from .telemb import TeleMBIE
+from .telemundo import TelemundoIE
+from .telequebec import (
+ TeleQuebecIE,
+ TeleQuebecSquatIE,
+ TeleQuebecEmissionIE,
+ TeleQuebecLiveIE,
+ TeleQuebecVideoIE,
+)
+from .teletask import TeleTaskIE
+from .telewebion import TelewebionIE
+from .tempo import TempoIE
+from .tencent import (
+ IflixEpisodeIE,
+ IflixSeriesIE,
+ VQQSeriesIE,
+ VQQVideoIE,
+ WeTvEpisodeIE,
+ WeTvSeriesIE,
+)
+from .tennistv import TennisTVIE
+from .tenplay import TenPlayIE
+from .testurl import TestURLIE
+from .tf1 import TF1IE
+from .tfo import TFOIE
+from .theholetv import TheHoleTvIE
+from .theintercept import TheInterceptIE
+from .theplatform import (
+ ThePlatformIE,
+ ThePlatformFeedIE,
+)
+from .thestar import TheStarIE
+from .thesun import TheSunIE
+from .theta import (
+ ThetaVideoIE,
+ ThetaStreamIE,
+)
+from .theweatherchannel import TheWeatherChannelIE
+from .thisamericanlife import ThisAmericanLifeIE
+from .thisav import ThisAVIE
+from .thisoldhouse import ThisOldHouseIE
+from .threespeak import (
+ ThreeSpeakIE,
+ ThreeSpeakUserIE,
+)
+from .threeqsdn import ThreeQSDNIE
+from .tiktok import (
+ TikTokIE,
+ TikTokUserIE,
+ TikTokSoundIE,
+ TikTokEffectIE,
+ TikTokTagIE,
+ TikTokVMIE,
+ DouyinIE,
+)
+from .tinypic import TinyPicIE
+from .tmz import TMZIE
+from .tnaflix import (
+ TNAFlixNetworkEmbedIE,
+ TNAFlixIE,
+ EMPFlixIE,
+ MovieFapIE,
+)
+from .toggle import (
+ ToggleIE,
+ MeWatchIE,
+)
+from .toggo import (
+ ToggoIE,
+)
+from .tokentube import (
+ TokentubeIE,
+ TokentubeChannelIE
+)
+from .tonline import TOnlineIE
+from .toongoggles import ToonGogglesIE
+from .toutv import TouTvIE
+from .toypics import ToypicsUserIE, ToypicsIE
+from .traileraddict import TrailerAddictIE
+from .triller import (
+ TrillerIE,
+ TrillerUserIE,
+)
+from .trilulilu import TriluliluIE
+from .trovo import (
+ TrovoIE,
+ TrovoVodIE,
+ TrovoChannelVodIE,
+ TrovoChannelClipIE,
+)
+from .trueid import TrueIDIE
+from .trunews import TruNewsIE
+from .truth import TruthIE
+from .trutv import TruTVIE
+from .tube8 import Tube8IE
+from .tubetugraz import TubeTuGrazIE, TubeTuGrazSeriesIE
+from .tubitv import (
+ TubiTvIE,
+ TubiTvShowIE,
+)
+from .tumblr import TumblrIE
+from .tunein import (
+ TuneInClipIE,
+ TuneInStationIE,
+ TuneInProgramIE,
+ TuneInTopicIE,
+ TuneInShortenerIE,
+)
+from .tunepk import TunePkIE
+from .turbo import TurboIE
+from .tv2 import (
+ TV2IE,
+ TV2ArticleIE,
+ KatsomoIE,
+ MTVUutisetArticleIE,
+)
+from .tv24ua import (
+ TV24UAVideoIE,
+)
+from .tv2dk import (
+ TV2DKIE,
+ TV2DKBornholmPlayIE,
+)
+from .tv2hu import (
+ TV2HuIE,
+ TV2HuSeriesIE,
+)
+from .tv4 import TV4IE
+from .tv5mondeplus import TV5MondePlusIE
+from .tv5unis import (
+ TV5UnisVideoIE,
+ TV5UnisIE,
+)
+from .tva import (
+ TVAIE,
+ QubIE,
+)
+from .tvanouvelles import (
+ TVANouvellesIE,
+ TVANouvellesArticleIE,
+)
+from .tvc import (
+ TVCIE,
+ TVCArticleIE,
+)
+from .tver import TVerIE
+from .tvigle import TvigleIE
+from .tviplayer import TVIPlayerIE
+from .tvland import TVLandIE
+from .tvn24 import TVN24IE
+from .tvnet import TVNetIE
+from .tvnoe import TVNoeIE
+from .tvnow import (
+ TVNowIE,
+ TVNowFilmIE,
+ TVNowNewIE,
+ TVNowSeasonIE,
+ TVNowAnnualIE,
+ TVNowShowIE,
+)
+from .tvopengr import (
+ TVOpenGrWatchIE,
+ TVOpenGrEmbedIE,
+)
+from .tvp import (
+ TVPEmbedIE,
+ TVPIE,
+ TVPStreamIE,
+ TVPVODSeriesIE,
+ TVPVODVideoIE,
+)
+from .tvplay import (
+ TVPlayIE,
+ ViafreeIE,
+ TVPlayHomeIE,
+)
+from .tvplayer import TVPlayerIE
+from .tweakers import TweakersIE
+from .twentyfourvideo import TwentyFourVideoIE
+from .twentymin import TwentyMinutenIE
+from .twentythreevideo import TwentyThreeVideoIE
+from .twitcasting import (
+ TwitCastingIE,
+ TwitCastingLiveIE,
+ TwitCastingUserIE,
+)
+from .twitch import (
+ TwitchVodIE,
+ TwitchCollectionIE,
+ TwitchVideosIE,
+ TwitchVideosClipsIE,
+ TwitchVideosCollectionsIE,
+ TwitchStreamIE,
+ TwitchClipsIE,
+)
+from .twitter import (
+ TwitterCardIE,
+ TwitterIE,
+ TwitterAmplifyIE,
+ TwitterBroadcastIE,
+ TwitterSpacesIE,
+ TwitterShortenerIE,
+)
+from .udemy import (
+ UdemyIE,
+ UdemyCourseIE
+)
+from .udn import UDNEmbedIE
+from .ufctv import (
+ UFCTVIE,
+ UFCArabiaIE,
+)
+from .ukcolumn import UkColumnIE
+from .uktvplay import UKTVPlayIE
+from .digiteka import DigitekaIE
+from .dlive import (
+ DLiveVODIE,
+ DLiveStreamIE,
+)
+from .drooble import DroobleIE
+from .umg import UMGDeIE
+from .unistra import UnistraIE
+from .unity import UnityIE
+from .unscripted import UnscriptedNewsVideoIE
+from .unsupported import KnownDRMIE, KnownPiracyIE
+from .uol import UOLIE
+from .uplynk import (
+ UplynkIE,
+ UplynkPreplayIE,
+)
+from .urort import UrortIE
+from .urplay import URPlayIE
+from .usanetwork import USANetworkIE
+from .usatoday import USATodayIE
+from .ustream import UstreamIE, UstreamChannelIE
+from .ustudio import (
+ UstudioIE,
+ UstudioEmbedIE,
+)
+from .utreon import UtreonIE
+from .varzesh3 import Varzesh3IE
+from .vbox7 import Vbox7IE
+from .veehd import VeeHDIE
+from .veo import VeoIE
+from .veoh import (
+ VeohIE,
+ VeohUserIE
+)
+from .vesti import VestiIE
+from .vevo import (
+ VevoIE,
+ VevoPlaylistIE,
+)
+from .vgtv import (
+ BTArticleIE,
+ BTVestlendingenIE,
+ VGTVIE,
+)
+from .vh1 import VH1IE
+from .vice import (
+ ViceIE,
+ ViceArticleIE,
+ ViceShowIE,
+)
+from .vidbit import VidbitIE
+from .viddler import ViddlerIE
+from .videa import VideaIE
+from .videocampus_sachsen import (
+ VideocampusSachsenIE,
+ ViMPPlaylistIE,
+)
+from .videodetective import VideoDetectiveIE
+from .videofyme import VideofyMeIE
+from .videomore import (
+ VideomoreIE,
+ VideomoreVideoIE,
+ VideomoreSeasonIE,
+)
+from .videopress import VideoPressIE
+from .vidio import (
+ VidioIE,
+ VidioPremierIE,
+ VidioLiveIE
+)
+from .vidlii import VidLiiIE
+from .viewlift import (
+ ViewLiftIE,
+ ViewLiftEmbedIE,
+)
+from .viidea import ViideaIE
+from .vimeo import (
+ VimeoIE,
+ VimeoAlbumIE,
+ VimeoChannelIE,
+ VimeoGroupsIE,
+ VimeoLikesIE,
+ VimeoOndemandIE,
+ VimeoProIE,
+ VimeoReviewIE,
+ VimeoUserIE,
+ VimeoWatchLaterIE,
+ VHXEmbedIE,
+)
+from .vimm import (
+ VimmIE,
+ VimmRecordingIE,
+)
+from .vimple import VimpleIE
+from .vine import (
+ VineIE,
+ VineUserIE,
+)
+from .viki import (
+ VikiIE,
+ VikiChannelIE,
+)
+from .viqeo import ViqeoIE
+from .viu import (
+ ViuIE,
+ ViuPlaylistIE,
+ ViuOTTIE,
+)
+from .vk import (
+ VKIE,
+ VKUserVideosIE,
+ VKWallPostIE,
+)
+from .vlive import (
+ VLiveIE,
+ VLivePostIE,
+ VLiveChannelIE,
+)
+from .vodlocker import VodlockerIE
+from .vodpl import VODPlIE
+from .vodplatform import VODPlatformIE
+from .voicerepublic import VoiceRepublicIE
+from .voicy import (
+ VoicyIE,
+ VoicyChannelIE,
+)
+from .voot import (
+ VootIE,
+ VootSeriesIE,
+)
+from .voxmedia import (
+ VoxMediaVolumeIE,
+ VoxMediaIE,
+)
+from .vrt import VRTIE
+from .vrak import VrakIE
+from .vrv import (
+ VRVIE,
+ VRVSeriesIE,
+)
+from .vshare import VShareIE
+from .vtm import VTMIE
+from .medialaan import MedialaanIE
+from .vuclip import VuClipIE
+from .vupload import VuploadIE
+from .vvvvid import (
+ VVVVIDIE,
+ VVVVIDShowIE,
+)
+from .vyborymos import VyboryMosIE
+from .vzaar import VzaarIE
+from .wakanim import WakanimIE
+from .walla import WallaIE
+from .washingtonpost import (
+ WashingtonPostIE,
+ WashingtonPostArticleIE,
+)
+from .wasdtv import (
+ WASDTVStreamIE,
+ WASDTVRecordIE,
+ WASDTVClipIE,
+)
+from .wat import WatIE
+from .watchbox import WatchBoxIE
+from .watchindianporn import WatchIndianPornIE
+from .wdr import (
+ WDRIE,
+ WDRPageIE,
+ WDRElefantIE,
+ WDRMobileIE,
+)
+from .webcaster import (
+ WebcasterIE,
+ WebcasterFeedIE,
+)
+from .webofstories import (
+ WebOfStoriesIE,
+ WebOfStoriesPlaylistIE,
+)
+from .weibo import (
+ WeiboIE,
+ WeiboMobileIE
+)
+from .weiqitv import WeiqiTVIE
+from .wikimedia import WikimediaIE
+from .willow import WillowIE
+from .wimtv import WimTVIE
+from .whowatch import WhoWatchIE
+from .wistia import (
+ WistiaIE,
+ WistiaPlaylistIE,
+ WistiaChannelIE,
+)
+from .wordpress import (
+ WordpressPlaylistEmbedIE,
+ WordpressMiniAudioPlayerEmbedIE,
+)
+from .worldstarhiphop import WorldStarHipHopIE
+from .wppilot import (
+ WPPilotIE,
+ WPPilotChannelsIE,
+)
+from .wsj import (
+ WSJIE,
+ WSJArticleIE,
+)
+from .wwe import WWEIE
+from .xbef import XBefIE
+from .xboxclips import XboxClipsIE
+from .xfileshare import XFileShareIE
+from .xhamster import (
+ XHamsterIE,
+ XHamsterEmbedIE,
+ XHamsterUserIE,
+)
+from .xiami import (
+ XiamiSongIE,
+ XiamiAlbumIE,
+ XiamiArtistIE,
+ XiamiCollectionIE
+)
+from .ximalaya import (
+ XimalayaIE,
+ XimalayaAlbumIE
+)
+from .xinpianchang import XinpianchangIE
+from .xminus import XMinusIE
+from .xnxx import XNXXIE
+from .xstream import XstreamIE
+from .xtube import XTubeUserIE, XTubeIE
+from .xuite import XuiteIE
+from .xvideos import XVideosIE
+from .xxxymovies import XXXYMoviesIE
+from .yahoo import (
+ YahooIE,
+ YahooSearchIE,
+ YahooGyaOPlayerIE,
+ YahooGyaOIE,
+ YahooJapanNewsIE,
+)
+from .yandexdisk import YandexDiskIE
+from .yandexmusic import (
+ YandexMusicTrackIE,
+ YandexMusicAlbumIE,
+ YandexMusicPlaylistIE,
+ YandexMusicArtistTracksIE,
+ YandexMusicArtistAlbumsIE,
+)
+from .yandexvideo import (
+ YandexVideoIE,
+ YandexVideoPreviewIE,
+ ZenYandexIE,
+ ZenYandexChannelIE,
+)
+from .yapfiles import YapFilesIE
+from .yesjapan import YesJapanIE
+from .yinyuetai import YinYueTaiIE
+from .yle_areena import YleAreenaIE
+from .ynet import YnetIE
+from .youjizz import YouJizzIE
+from .youku import (
+ YoukuIE,
+ YoukuShowIE,
+)
+from .younow import (
+ YouNowLiveIE,
+ YouNowChannelIE,
+ YouNowMomentIE,
+)
+from .youporn import YouPornIE
+from .yourporn import YourPornIE
+from .yourupload import YourUploadIE
+from .zapiks import ZapiksIE
+from .zattoo import (
+ BBVTVIE,
+ BBVTVLiveIE,
+ BBVTVRecordingsIE,
+ EinsUndEinsTVIE,
+ EinsUndEinsTVLiveIE,
+ EinsUndEinsTVRecordingsIE,
+ EWETVIE,
+ EWETVLiveIE,
+ EWETVRecordingsIE,
+ GlattvisionTVIE,
+ GlattvisionTVLiveIE,
+ GlattvisionTVRecordingsIE,
+ MNetTVIE,
+ MNetTVLiveIE,
+ MNetTVRecordingsIE,
+ NetPlusTVIE,
+ NetPlusTVLiveIE,
+ NetPlusTVRecordingsIE,
+ OsnatelTVIE,
+ OsnatelTVLiveIE,
+ OsnatelTVRecordingsIE,
+ QuantumTVIE,
+ QuantumTVLiveIE,
+ QuantumTVRecordingsIE,
+ SaltTVIE,
+ SaltTVLiveIE,
+ SaltTVRecordingsIE,
+ SAKTVIE,
+ SAKTVLiveIE,
+ SAKTVRecordingsIE,
+ VTXTVIE,
+ VTXTVLiveIE,
+ VTXTVRecordingsIE,
+ WalyTVIE,
+ WalyTVLiveIE,
+ WalyTVRecordingsIE,
+ ZattooIE,
+ ZattooLiveIE,
+ ZattooMoviesIE,
+ ZattooRecordingsIE,
+)
+from .zdf import ZDFIE, ZDFChannelIE
+from .zee5 import (
+ Zee5IE,
+ Zee5SeriesIE,
+)
+from .zeenews import ZeeNewsIE
+from .zhihu import ZhihuIE
+from .zingmp3 import (
+ ZingMp3IE,
+ ZingMp3AlbumIE,
+ ZingMp3ChartHomeIE,
+ ZingMp3WeekChartIE,
+ ZingMp3ChartMusicVideoIE,
+ ZingMp3UserIE,
+)
+from .zoom import ZoomIE
+from .zype import ZypeIE
diff --git a/hypervideo_dl/extractor/abc.py b/hypervideo_dl/extractor/abc.py
index 6fe195e..0ca76b8 100644
--- a/hypervideo_dl/extractor/abc.py
+++ b/hypervideo_dl/extractor/abc.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import hashlib
import hmac
import re
@@ -157,8 +155,6 @@ class ABCIE(InfoExtractor):
'format_id': format_id
})
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': self._og_search_title(webpage),
@@ -223,7 +219,6 @@ class ABCIViewIE(InfoExtractor):
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
if formats:
break
- self._sort_formats(formats)
subtitles = {}
src_vtt = stream.get('captions', {}).get('src-vtt')
diff --git a/hypervideo_dl/extractor/abcnews.py b/hypervideo_dl/extractor/abcnews.py
index 296b8ce..a57295b 100644
--- a/hypervideo_dl/extractor/abcnews.py
+++ b/hypervideo_dl/extractor/abcnews.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .amp import AMPIE
from .common import InfoExtractor
from ..utils import (
diff --git a/hypervideo_dl/extractor/abcotvs.py b/hypervideo_dl/extractor/abcotvs.py
index 5bff466..6dca19d 100644
--- a/hypervideo_dl/extractor/abcotvs.py
+++ b/hypervideo_dl/extractor/abcotvs.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
@@ -82,7 +78,6 @@ class ABCOTVSIE(InfoExtractor):
'url': mp4_url,
'width': 640,
})
- self._sort_formats(formats)
image = video.get('image') or {}
@@ -123,7 +118,6 @@ class ABCOTVSClipsIE(InfoExtractor):
title = video_data['title']
formats = self._extract_m3u8_formats(
video_data['videoURL'].split('?')[0], video_id, 'mp4')
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/abematv.py b/hypervideo_dl/extractor/abematv.py
index 27b7d86..80046af 100644
--- a/hypervideo_dl/extractor/abematv.py
+++ b/hypervideo_dl/extractor/abematv.py
@@ -1,42 +1,41 @@
-import io
-import json
-import time
+import base64
+import binascii
+import functools
import hashlib
import hmac
+import io
+import json
import re
import struct
-from base64 import urlsafe_b64encode
-from binascii import unhexlify
+import time
+import urllib.parse
+import urllib.request
+import urllib.response
+import uuid
from .common import InfoExtractor
from ..aes import aes_ecb_decrypt
-from ..compat import (
- compat_urllib_response,
- compat_urllib_parse_urlparse,
- compat_urllib_request,
-)
from ..utils import (
ExtractorError,
- decode_base,
+ bytes_to_intlist,
+ decode_base_n,
int_or_none,
- random_uuidv4,
+ intlist_to_bytes,
+ OnDemandPagedList,
request_to_url,
time_seconds,
- update_url_query,
traverse_obj,
- intlist_to_bytes,
- bytes_to_intlist,
- urljoin,
+ update_url_query,
)
-
# NOTE: network handler related code is temporary thing until network stack overhaul PRs are merged (#2861/#2862)
+
def add_opener(ydl, handler):
''' Add a handler for opening URLs, like _download_webpage '''
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
- assert isinstance(ydl._opener, compat_urllib_request.OpenerDirector)
+ assert isinstance(ydl._opener, urllib.request.OpenerDirector)
ydl._opener.add_handler(handler)
@@ -49,7 +48,7 @@ def remove_opener(ydl, handler):
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
opener = ydl._opener
- assert isinstance(ydl._opener, compat_urllib_request.OpenerDirector)
+ assert isinstance(ydl._opener, urllib.request.OpenerDirector)
if isinstance(handler, (type, tuple)):
find_cp = lambda x: isinstance(x, handler)
else:
@@ -99,20 +98,20 @@ def remove_opener(ydl, handler):
opener.handlers[:] = [x for x in opener.handlers if not find_cp(x)]
-class AbemaLicenseHandler(compat_urllib_request.BaseHandler):
+class AbemaLicenseHandler(urllib.request.BaseHandler):
handler_order = 499
STRTABLE = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
HKEY = b'3AF0298C219469522A313570E8583005A642E73EDD58E3EA2FB7339D3DF1597E'
def __init__(self, ie: 'AbemaTVIE'):
- # the protcol that this should really handle is 'abematv-license://'
+ # the protocol that this should really handle is 'abematv-license://'
# abematv_license_open is just a placeholder for development purposes
# ref. https://github.com/python/cpython/blob/f4c03484da59049eb62a9bf7777b963e2267d187/Lib/urllib/request.py#L510
setattr(self, 'abematv-license_open', getattr(self, 'abematv_license_open'))
self.ie = ie
def _get_videokey_from_ticket(self, ticket):
- to_show = self.ie._downloader.params.get('verbose', False)
+ to_show = self.ie.get_param('verbose', False)
media_token = self.ie._get_media_token(to_show=to_show)
license_response = self.ie._download_json(
@@ -126,11 +125,11 @@ class AbemaLicenseHandler(compat_urllib_request.BaseHandler):
'Content-Type': 'application/json',
})
- res = decode_base(license_response['k'], self.STRTABLE)
+ res = decode_base_n(license_response['k'], table=self.STRTABLE)
encvideokey = bytes_to_intlist(struct.pack('>QQ', res >> 64, res & 0xffffffffffffffff))
h = hmac.new(
- unhexlify(self.HKEY),
+ binascii.unhexlify(self.HKEY),
(license_response['cid'] + self.ie._DEVICE_ID).encode('utf-8'),
digestmod=hashlib.sha256)
enckey = bytes_to_intlist(h.digest())
@@ -139,84 +138,22 @@ class AbemaLicenseHandler(compat_urllib_request.BaseHandler):
def abematv_license_open(self, url):
url = request_to_url(url)
- ticket = compat_urllib_parse_urlparse(url).netloc
+ ticket = urllib.parse.urlparse(url).netloc
response_data = self._get_videokey_from_ticket(ticket)
- return compat_urllib_response.addinfourl(io.BytesIO(response_data), headers={
+ return urllib.response.addinfourl(io.BytesIO(response_data), headers={
'Content-Length': len(response_data),
}, url=url, code=200)
class AbemaTVBaseIE(InfoExtractor):
- def _extract_breadcrumb_list(self, webpage, video_id):
- for jld in re.finditer(
- r'(?is)</span></li></ul><script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
- webpage):
- jsonld = self._parse_json(jld.group('json_ld'), video_id, fatal=False)
- if jsonld:
- if jsonld.get('@type') != 'BreadcrumbList':
- continue
- trav = traverse_obj(jsonld, ('itemListElement', ..., 'name'))
- if trav:
- return trav
- return []
-
-
-class AbemaTVIE(AbemaTVBaseIE):
- _VALID_URL = r'https?://abema\.tv/(?P<type>now-on-air|video/episode|channels/.+?/slots)/(?P<id>[^?/]+)'
- _NETRC_MACHINE = 'abematv'
- _TESTS = [{
- 'url': 'https://abema.tv/video/episode/194-25_s2_p1',
- 'info_dict': {
- 'id': '194-25_s2_p1',
- 'title': '第1話 「チーズケーキ」 「モーニング再び」',
- 'series': '異世界食堂2',
- 'series_number': 2,
- 'episode': '第1話 「チーズケーキ」 「モーニング再び」',
- 'episode_number': 1,
- },
- 'skip': 'expired',
- }, {
- 'url': 'https://abema.tv/channels/anime-live2/slots/E8tvAnMJ7a9a5d',
- 'info_dict': {
- 'id': 'E8tvAnMJ7a9a5d',
- 'title': 'ゆるキャン△ SEASON2 全話一挙【無料ビデオ72時間】',
- 'series': 'ゆるキャン△ SEASON2',
- 'episode': 'ゆるキャン△ SEASON2 全話一挙【無料ビデオ72時間】',
- 'series_number': 2,
- 'episode_number': 1,
- 'description': 'md5:9c5a3172ae763278f9303922f0ea5b17',
- },
- 'skip': 'expired',
- }, {
- 'url': 'https://abema.tv/video/episode/87-877_s1282_p31047',
- 'info_dict': {
- 'id': 'E8tvAnMJ7a9a5d',
- 'title': '第5話『光射す』',
- 'description': 'md5:56d4fc1b4f7769ded5f923c55bb4695d',
- 'thumbnail': r're:https://hayabusa\.io/.+',
- 'series': '相棒',
- 'episode': '第5話『光射す』',
- },
- 'skip': 'expired',
- }, {
- 'url': 'https://abema.tv/now-on-air/abema-anime',
- 'info_dict': {
- 'id': 'abema-anime',
- # this varies
- # 'title': '女子高生の無駄づかい 全話一挙【無料ビデオ72時間】',
- 'description': 'md5:55f2e61f46a17e9230802d7bcc913d5f',
- 'is_live': True,
- },
- 'skip': 'Not supported until hypervideo implements native live downloader OR AbemaTV can start a local HTTP server',
- }]
_USERTOKEN = None
_DEVICE_ID = None
- _TIMETABLE = None
_MEDIATOKEN = None
_SECRETKEY = b'v+Gjs=25Aw5erR!J8ZuvRrCx*rGswhB&qdHd_SYerEWdU&a?3DzN9BRbp5KwY4hEmcj5#fykMjJ=AuWz5GSMY-d@H7DMEh3M@9n2G552Us$$k9cD=3TxwWe86!x#Zyhe'
- def _generate_aks(self, deviceid):
+ @classmethod
+ def _generate_aks(cls, deviceid):
deviceid = deviceid.encode('utf-8')
# add 1 hour and then drop minute and secs
ts_1hour = int((time_seconds(hours=9) // 3600 + 1) * 3600)
@@ -227,7 +164,7 @@ class AbemaTVIE(AbemaTVBaseIE):
def mix_once(nonce):
nonlocal tmp
- h = hmac.new(self._SECRETKEY, digestmod=hashlib.sha256)
+ h = hmac.new(cls._SECRETKEY, digestmod=hashlib.sha256)
h.update(nonce)
tmp = h.digest()
@@ -238,22 +175,22 @@ class AbemaTVIE(AbemaTVBaseIE):
def mix_twist(nonce):
nonlocal tmp
- mix_once(urlsafe_b64encode(tmp).rstrip(b'=') + nonce)
+ mix_once(base64.urlsafe_b64encode(tmp).rstrip(b'=') + nonce)
- mix_once(self._SECRETKEY)
+ mix_once(cls._SECRETKEY)
mix_tmp(time_struct.tm_mon)
mix_twist(deviceid)
mix_tmp(time_struct.tm_mday % 5)
mix_twist(ts_1hour_str)
mix_tmp(time_struct.tm_hour % 5)
- return urlsafe_b64encode(tmp).rstrip(b'=').decode('utf-8')
+ return base64.urlsafe_b64encode(tmp).rstrip(b'=').decode('utf-8')
def _get_device_token(self):
if self._USERTOKEN:
return self._USERTOKEN
- self._DEVICE_ID = random_uuidv4()
+ AbemaTVBaseIE._DEVICE_ID = str(uuid.uuid4())
aks = self._generate_aks(self._DEVICE_ID)
user_data = self._download_json(
'https://api.abema.io/v1/users', None, note='Authorizing',
@@ -264,7 +201,7 @@ class AbemaTVIE(AbemaTVBaseIE):
headers={
'Content-Type': 'application/json',
})
- self._USERTOKEN = user_data['token']
+ AbemaTVBaseIE._USERTOKEN = user_data['token']
# don't allow adding it 2 times or more, though it's guarded
remove_opener(self._downloader, AbemaLicenseHandler)
@@ -276,7 +213,7 @@ class AbemaTVIE(AbemaTVBaseIE):
if not invalidate and self._MEDIATOKEN:
return self._MEDIATOKEN
- self._MEDIATOKEN = self._download_json(
+ AbemaTVBaseIE._MEDIATOKEN = self._download_json(
'https://api.abema.io/v1/media/token', None, note='Fetching media token' if to_show else False,
query={
'osName': 'android',
@@ -286,11 +223,82 @@ class AbemaTVIE(AbemaTVBaseIE):
'appId': 'tv.abema',
'appVersion': '3.27.1'
}, headers={
- 'Authorization': 'bearer ' + self._get_device_token()
+ 'Authorization': f'bearer {self._get_device_token()}',
})['token']
return self._MEDIATOKEN
+ def _call_api(self, endpoint, video_id, query=None, note='Downloading JSON metadata'):
+ return self._download_json(
+ f'https://api.abema.io/{endpoint}', video_id, query=query or {},
+ note=note,
+ headers={
+ 'Authorization': f'bearer {self._get_device_token()}',
+ })
+
+ def _extract_breadcrumb_list(self, webpage, video_id):
+ for jld in re.finditer(
+ r'(?is)</span></li></ul><script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
+ webpage):
+ jsonld = self._parse_json(jld.group('json_ld'), video_id, fatal=False)
+ if traverse_obj(jsonld, '@type') != 'BreadcrumbList':
+ continue
+ items = traverse_obj(jsonld, ('itemListElement', ..., 'name'))
+ if items:
+ return items
+ return []
+
+
+class AbemaTVIE(AbemaTVBaseIE):
+ _VALID_URL = r'https?://abema\.tv/(?P<type>now-on-air|video/episode|channels/.+?/slots)/(?P<id>[^?/]+)'
+ _NETRC_MACHINE = 'abematv'
+ _TESTS = [{
+ 'url': 'https://abema.tv/video/episode/194-25_s2_p1',
+ 'info_dict': {
+ 'id': '194-25_s2_p1',
+ 'title': '第1話 「チーズケーキ」 「モーニング再び」',
+ 'series': '異世界食堂2',
+ 'series_number': 2,
+ 'episode': '第1話 「チーズケーキ」 「モーニング再び」',
+ 'episode_number': 1,
+ },
+ 'skip': 'expired',
+ }, {
+ 'url': 'https://abema.tv/channels/anime-live2/slots/E8tvAnMJ7a9a5d',
+ 'info_dict': {
+ 'id': 'E8tvAnMJ7a9a5d',
+ 'title': 'ゆるキャン△ SEASON2 全話一挙【無料ビデオ72時間】',
+ 'series': 'ゆるキャン△ SEASON2',
+ 'episode': 'ゆるキャン△ SEASON2 全話一挙【無料ビデオ72時間】',
+ 'series_number': 2,
+ 'episode_number': 1,
+ 'description': 'md5:9c5a3172ae763278f9303922f0ea5b17',
+ },
+ 'skip': 'expired',
+ }, {
+ 'url': 'https://abema.tv/video/episode/87-877_s1282_p31047',
+ 'info_dict': {
+ 'id': 'E8tvAnMJ7a9a5d',
+ 'title': '第5話『光射す』',
+ 'description': 'md5:56d4fc1b4f7769ded5f923c55bb4695d',
+ 'thumbnail': r're:https://hayabusa\.io/.+',
+ 'series': '相棒',
+ 'episode': '第5話『光射す』',
+ },
+ 'skip': 'expired',
+ }, {
+ 'url': 'https://abema.tv/now-on-air/abema-anime',
+ 'info_dict': {
+ 'id': 'abema-anime',
+ # this varies
+ # 'title': '女子高生の無駄づかい 全話一挙【無料ビデオ72時間】',
+ 'description': 'md5:55f2e61f46a17e9230802d7bcc913d5f',
+ 'is_live': True,
+ },
+ 'skip': 'Not supported until hypervideo implements native live downloader OR AbemaTV can start a local HTTP server',
+ }]
+ _TIMETABLE = None
+
def _perform_login(self, username, password):
if '@' in username: # don't strictly check if it's email address or not
ep, method = 'user/email', 'email'
@@ -303,18 +311,18 @@ class AbemaTVIE(AbemaTVBaseIE):
method: username,
'password': password
}).encode('utf-8'), headers={
- 'Authorization': 'bearer ' + self._get_device_token(),
+ 'Authorization': f'bearer {self._get_device_token()}',
'Origin': 'https://abema.tv',
'Referer': 'https://abema.tv/',
'Content-Type': 'application/json',
})
- self._USERTOKEN = login_response['token']
+ AbemaTVBaseIE._USERTOKEN = login_response['token']
self._get_media_token(True)
def _real_extract(self, url):
# starting download using infojson from this extractor is undefined behavior,
- # and never be fixed in the future; you must trigger downloads by directly specifing URL.
+ # and never be fixed in the future; you must trigger downloads by directly specifying URL.
# (unless there's a way to hook before downloading by extractor)
video_id, video_type = self._match_valid_url(url).group('id', 'type')
headers = {
@@ -357,7 +365,7 @@ class AbemaTVIE(AbemaTVBaseIE):
# read breadcrumb on top of page
breadcrumb = self._extract_breadcrumb_list(webpage, video_id)
if breadcrumb:
- # breadcrumb list translates to: (example is 1st test for this IE)
+ # breadcrumb list translates to: (e.g. 1st test for this IE)
# Home > Anime (genre) > Isekai Shokudo 2 (series name) > Episode 1 "Cheese cakes" "Morning again" (episode title)
# hence this works
info['series'] = breadcrumb[-2]
@@ -444,6 +452,7 @@ class AbemaTVIE(AbemaTVBaseIE):
class AbemaTVTitleIE(AbemaTVBaseIE):
_VALID_URL = r'https?://abema\.tv/video/title/(?P<id>[^?/]+)'
+ _PAGE_SIZE = 25
_TESTS = [{
'url': 'https://abema.tv/video/title/90-1597',
@@ -459,18 +468,39 @@ class AbemaTVTitleIE(AbemaTVBaseIE):
'title': '真心が届く~僕とスターのオフィス・ラブ!?~',
},
'playlist_mincount': 16,
+ }, {
+ 'url': 'https://abema.tv/video/title/25-102',
+ 'info_dict': {
+ 'id': '25-102',
+ 'title': 'ソードアート・オンライン アリシゼーション',
+ },
+ 'playlist_mincount': 24,
}]
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
+ def _fetch_page(self, playlist_id, series_version, page):
+ programs = self._call_api(
+ f'v1/video/series/{playlist_id}/programs', playlist_id,
+ note=f'Downloading page {page + 1}',
+ query={
+ 'seriesVersion': series_version,
+ 'offset': str(page * self._PAGE_SIZE),
+ 'order': 'seq',
+ 'limit': str(self._PAGE_SIZE),
+ })
+ yield from (
+ self.url_result(f'https://abema.tv/video/episode/{x}')
+ for x in traverse_obj(programs, ('programs', ..., 'id'), default=[]))
- playlist_title, breadcrumb = None, self._extract_breadcrumb_list(webpage, video_id)
- if breadcrumb:
- playlist_title = breadcrumb[-1]
+ def _entries(self, playlist_id, series_version):
+ return OnDemandPagedList(
+ functools.partial(self._fetch_page, playlist_id, series_version),
+ self._PAGE_SIZE)
- playlist = [
- self.url_result(urljoin('https://abema.tv/', mobj.group(1)))
- for mobj in re.finditer(r'<li\s*class=".+?EpisodeList.+?"><a\s*href="(/[^"]+?)"', webpage)]
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ series_info = self._call_api(f'v1/video/series/{playlist_id}', playlist_id)
- return self.playlist_result(playlist, playlist_title=playlist_title, playlist_id=video_id)
+ return self.playlist_result(
+ self._entries(playlist_id, series_info['version']), playlist_id=playlist_id,
+ playlist_title=series_info.get('title'),
+ playlist_description=series_info.get('content'))
diff --git a/hypervideo_dl/extractor/academicearth.py b/hypervideo_dl/extractor/academicearth.py
index 3409550..d9691cb 100644
--- a/hypervideo_dl/extractor/academicearth.py
+++ b/hypervideo_dl/extractor/academicearth.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/acast.py b/hypervideo_dl/extractor/acast.py
index 63587c5..f2f828f 100644
--- a/hypervideo_dl/extractor/acast.py
+++ b/hypervideo_dl/extractor/acast.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..utils import (
clean_html,
diff --git a/hypervideo_dl/extractor/acfun.py b/hypervideo_dl/extractor/acfun.py
new file mode 100644
index 0000000..dc57929
--- /dev/null
+++ b/hypervideo_dl/extractor/acfun.py
@@ -0,0 +1,199 @@
+from .common import InfoExtractor
+from ..utils import (
+ float_or_none,
+ format_field,
+ int_or_none,
+ traverse_obj,
+ parse_codecs,
+ parse_qs,
+)
+
+
+class AcFunVideoBaseIE(InfoExtractor):
+ def _extract_metadata(self, video_id, video_info):
+ playjson = self._parse_json(video_info['ksPlayJson'], video_id)
+
+ formats, subtitles = [], {}
+ for video in traverse_obj(playjson, ('adaptationSet', 0, 'representation')):
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(video['url'], video_id, 'mp4', fatal=False)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+ for f in fmts:
+ f.update({
+ 'fps': float_or_none(video.get('frameRate')),
+ 'width': int_or_none(video.get('width')),
+ 'height': int_or_none(video.get('height')),
+ 'tbr': float_or_none(video.get('avgBitrate')),
+ **parse_codecs(video.get('codecs', ''))
+ })
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'duration': float_or_none(video_info.get('durationMillis'), 1000),
+ 'timestamp': int_or_none(video_info.get('uploadTime'), 1000),
+ 'http_headers': {'Referer': 'https://www.acfun.cn/'},
+ }
+
+
+class AcFunVideoIE(AcFunVideoBaseIE):
+ _VALID_URL = r'https?://www\.acfun\.cn/v/ac(?P<id>[_\d]+)'
+
+ _TESTS = [{
+ 'url': 'https://www.acfun.cn/v/ac35457073',
+ 'info_dict': {
+ 'id': '35457073',
+ 'ext': 'mp4',
+ 'duration': 174.208,
+ 'timestamp': 1656403967,
+ 'title': '1 8 岁 现 状',
+ 'description': '“赶紧回去!班主任查班了!”',
+ 'uploader': '锤子game',
+ 'uploader_id': '51246077',
+ 'thumbnail': r're:^https?://.*\.(jpg|jpeg)',
+ 'upload_date': '20220628',
+ 'like_count': int,
+ 'view_count': int,
+ 'comment_count': int,
+ 'tags': list,
+ },
+ }, {
+ # example for len(video_list) > 1
+ 'url': 'https://www.acfun.cn/v/ac35468952_2',
+ 'info_dict': {
+ 'id': '35468952_2',
+ 'ext': 'mp4',
+ 'title': '【动画剧集】Rocket & Groot Season 1(2022)/火箭浣熊与格鲁特第1季 P02 S01E02 十拿九穩',
+ 'duration': 90.459,
+ 'uploader': '比令',
+ 'uploader_id': '37259967',
+ 'upload_date': '20220629',
+ 'timestamp': 1656479962,
+ 'tags': list,
+ 'like_count': int,
+ 'view_count': int,
+ 'comment_count': int,
+ 'thumbnail': r're:^https?://.*\.(jpg|jpeg)',
+ 'description': 'md5:67583aaf3a0f933bd606bc8a2d3ebb17',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+ json_all = self._search_json(r'window.videoInfo\s*=', webpage, 'videoInfo', video_id)
+
+ title = json_all.get('title')
+ video_list = json_all.get('videoList') or []
+ video_internal_id = traverse_obj(json_all, ('currentVideoInfo', 'id'))
+ if video_internal_id and len(video_list) > 1:
+ part_idx, part_video_info = next(
+ (idx + 1, v) for (idx, v) in enumerate(video_list)
+ if v['id'] == video_internal_id)
+ title = f'{title} P{part_idx:02d} {part_video_info["title"]}'
+
+ return {
+ **self._extract_metadata(video_id, json_all['currentVideoInfo']),
+ 'title': title,
+ 'thumbnail': json_all.get('coverUrl'),
+ 'description': json_all.get('description'),
+ 'uploader': traverse_obj(json_all, ('user', 'name')),
+ 'uploader_id': traverse_obj(json_all, ('user', 'href')),
+ 'tags': traverse_obj(json_all, ('tagList', ..., 'name')),
+ 'view_count': int_or_none(json_all.get('viewCount')),
+ 'like_count': int_or_none(json_all.get('likeCountShow')),
+ 'comment_count': int_or_none(json_all.get('commentCountShow')),
+ }
+
+
+class AcFunBangumiIE(AcFunVideoBaseIE):
+ _VALID_URL = r'https?://www\.acfun\.cn/bangumi/(?P<id>aa[_\d]+)'
+
+ _TESTS = [{
+ 'url': 'https://www.acfun.cn/bangumi/aa6002917_36188_1745457?ac=2',
+ 'info_dict': {
+ 'id': 'aa6002917_36188_1745457__2',
+ 'ext': 'mp4',
+ 'title': '【7月】租借女友 水原千鹤角色曲『DATE』特别PV',
+ 'upload_date': '20200916',
+ 'timestamp': 1600243813,
+ 'duration': 92.091,
+ },
+ }, {
+ 'url': 'https://www.acfun.cn/bangumi/aa5023171_36188_1750645',
+ 'info_dict': {
+ 'id': 'aa5023171_36188_1750645',
+ 'ext': 'mp4',
+ 'title': '红孩儿之趴趴蛙寻石记 第5话 ',
+ 'duration': 760.0,
+ 'season': '红孩儿之趴趴蛙寻石记',
+ 'season_id': 5023171,
+ 'season_number': 1, # series has only 1 season
+ 'episode': 'Episode 5',
+ 'episode_number': 5,
+ 'upload_date': '20181223',
+ 'timestamp': 1545552185,
+ 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)',
+ 'comment_count': int,
+ },
+ }, {
+ 'url': 'https://www.acfun.cn/bangumi/aa6065485_36188_1885061',
+ 'info_dict': {
+ 'id': 'aa6065485_36188_1885061',
+ 'ext': 'mp4',
+ 'title': '叽歪老表(第二季) 第5话 坚不可摧',
+ 'season': '叽歪老表(第二季)',
+ 'season_number': 2,
+ 'season_id': 6065485,
+ 'episode': '坚不可摧',
+ 'episode_number': 5,
+ 'upload_date': '20220324',
+ 'timestamp': 1648082786,
+ 'duration': 105.002,
+ 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)',
+ 'comment_count': int,
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ ac_idx = parse_qs(url).get('ac', [None])[-1]
+ video_id = f'{video_id}{format_field(ac_idx, None, "__%s")}'
+
+ webpage = self._download_webpage(url, video_id)
+ json_bangumi_data = self._search_json(r'window.bangumiData\s*=', webpage, 'bangumiData', video_id)
+
+ if ac_idx:
+ video_info = json_bangumi_data['hlVideoInfo']
+ return {
+ **self._extract_metadata(video_id, video_info),
+ 'title': video_info.get('title'),
+ }
+
+ video_info = json_bangumi_data['currentVideoInfo']
+
+ season_id = json_bangumi_data.get('bangumiId')
+ season_number = season_id and next((
+ idx for idx, v in enumerate(json_bangumi_data.get('relatedBangumis') or [], 1)
+ if v.get('id') == season_id), 1)
+
+ json_bangumi_list = self._search_json(
+ r'window\.bangumiList\s*=', webpage, 'bangumiList', video_id, fatal=False)
+ video_internal_id = int_or_none(traverse_obj(json_bangumi_data, ('currentVideoInfo', 'id')))
+ episode_number = video_internal_id and next((
+ idx for idx, v in enumerate(json_bangumi_list.get('items') or [], 1)
+ if v.get('videoId') == video_internal_id), None)
+
+ return {
+ **self._extract_metadata(video_id, video_info),
+ 'title': json_bangumi_data.get('showTitle'),
+ 'thumbnail': json_bangumi_data.get('image'),
+ 'season': json_bangumi_data.get('bangumiTitle'),
+ 'season_id': season_id,
+ 'season_number': season_number,
+ 'episode': json_bangumi_data.get('title'),
+ 'episode_number': episode_number,
+ 'comment_count': int_or_none(json_bangumi_data.get('commentCount')),
+ }
diff --git a/hypervideo_dl/extractor/adn.py b/hypervideo_dl/extractor/adn.py
index fca6e60..e0c18c8 100644
--- a/hypervideo_dl/extractor/adn.py
+++ b/hypervideo_dl/extractor/adn.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import base64
import binascii
import json
@@ -31,30 +28,34 @@ from ..utils import (
class ADNIE(InfoExtractor):
- IE_DESC = 'Anime Digital Network'
- _VALID_URL = r'https?://(?:www\.)?animedigitalnetwork\.fr/video/[^/]+/(?P<id>\d+)'
- _TEST = {
- 'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
- 'md5': '0319c99885ff5547565cacb4f3f9348d',
+ IE_DESC = 'Animation Digital Network'
+ _VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.fr/video/[^/]+/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://animationdigitalnetwork.fr/video/fruits-basket/9841-episode-1-a-ce-soir',
+ 'md5': '1c9ef066ceb302c86f80c2b371615261',
'info_dict': {
- 'id': '7778',
+ 'id': '9841',
'ext': 'mp4',
- 'title': 'Blue Exorcist - Kyôto Saga - Episode 1',
- 'description': 'md5:2f7b5aa76edbc1a7a92cedcda8a528d5',
- 'series': 'Blue Exorcist - Kyôto Saga',
- 'duration': 1467,
- 'release_date': '20170106',
+ 'title': 'Fruits Basket - Episode 1',
+ 'description': 'md5:14be2f72c3c96809b0ca424b0097d336',
+ 'series': 'Fruits Basket',
+ 'duration': 1437,
+ 'release_date': '20190405',
'comment_count': int,
'average_rating': float,
- 'season_number': 2,
- 'episode': 'Début des hostilités',
+ 'season_number': 1,
+ 'episode': 'À ce soir !',
'episode_number': 1,
- }
- }
+ },
+ 'skip': 'Only available in region (FR, ...)',
+ }, {
+ 'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
+ 'only_matching': True,
+ }]
- _NETRC_MACHINE = 'animedigitalnetwork'
- _BASE_URL = 'http://animedigitalnetwork.fr'
- _API_BASE_URL = 'https://gw.api.animedigitalnetwork.fr/'
+ _NETRC_MACHINE = 'animationdigitalnetwork'
+ _BASE = 'animationdigitalnetwork.fr'
+ _API_BASE_URL = 'https://gw.api.' + _BASE + '/'
_PLAYER_BASE_URL = _API_BASE_URL + 'player/'
_HEADERS = {}
_LOGIN_ERR_MESSAGE = 'Unable to log in'
@@ -78,14 +79,14 @@ class ADNIE(InfoExtractor):
if subtitle_location:
enc_subtitles = self._download_webpage(
subtitle_location, video_id, 'Downloading subtitles data',
- fatal=False, headers={'Origin': 'https://animedigitalnetwork.fr'})
+ fatal=False, headers={'Origin': 'https://' + self._BASE})
if not enc_subtitles:
return None
- # http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
+ # http://animationdigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
dec_subtitles = unpad_pkcs7(aes_cbc_decrypt_bytes(
compat_b64decode(enc_subtitles[24:]),
- binascii.unhexlify(self._K + 'ab9f52f5baae7c72'),
+ binascii.unhexlify(self._K + '7fac1178830cfe0c'),
compat_b64decode(enc_subtitles[:24])))
subtitles_json = self._parse_json(dec_subtitles.decode(), None, fatal=False)
if not subtitles_json:
@@ -234,7 +235,6 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
for f in m3u8_formats:
f['language'] = 'fr'
formats.extend(m3u8_formats)
- self._sort_formats(formats)
video = (self._download_json(
self._API_BASE_URL + 'video/%s' % video_id, video_id,
diff --git a/hypervideo_dl/extractor/adobeconnect.py b/hypervideo_dl/extractor/adobeconnect.py
index e2e6f93..8963b12 100644
--- a/hypervideo_dl/extractor/adobeconnect.py
+++ b/hypervideo_dl/extractor/adobeconnect.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
diff --git a/hypervideo_dl/extractor/adobepass.py b/hypervideo_dl/extractor/adobepass.py
index 5d98301..e5944f7 100644
--- a/hypervideo_dl/extractor/adobepass.py
+++ b/hypervideo_dl/extractor/adobepass.py
@@ -1,26 +1,20 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
+import getpass
import json
import re
import time
+import urllib.error
import xml.etree.ElementTree as etree
from .common import InfoExtractor
-from ..compat import (
- compat_kwargs,
- compat_urlparse,
- compat_getpass
-)
+from ..compat import compat_urlparse
from ..utils import (
+ NO_DEFAULT,
+ ExtractorError,
unescapeHTML,
- urlencode_postdata,
unified_timestamp,
- ExtractorError,
- NO_DEFAULT,
+ urlencode_postdata,
)
-
MSO_INFO = {
'DTV': {
'name': 'DIRECTV',
@@ -1350,10 +1344,15 @@ MSO_INFO = {
'username_field': 'username',
'password_field': 'password',
},
+ 'AlticeOne': {
+ 'name': 'Optimum TV',
+ 'username_field': 'j_username',
+ 'password_field': 'j_password',
+ },
}
-class AdobePassIE(InfoExtractor):
+class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor
_SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s'
_USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0'
_MVPD_CACHE = 'ap-mvpd'
@@ -1365,7 +1364,7 @@ class AdobePassIE(InfoExtractor):
headers.update(kwargs.get('headers', {}))
kwargs['headers'] = headers
return super(AdobePassIE, self)._download_webpage_handle(
- *args, **compat_kwargs(kwargs))
+ *args, **kwargs)
@staticmethod
def _get_mvpd_resource(provider_id, title, guid, rating):
@@ -1434,32 +1433,34 @@ class AdobePassIE(InfoExtractor):
guid = xml_text(resource, 'guid') if '<' in resource else resource
count = 0
while count < 2:
- requestor_info = self._downloader.cache.load(self._MVPD_CACHE, requestor_id) or {}
+ requestor_info = self.cache.load(self._MVPD_CACHE, requestor_id) or {}
authn_token = requestor_info.get('authn_token')
if authn_token and is_expired(authn_token, 'simpleTokenExpires'):
authn_token = None
if not authn_token:
- # TODO add support for other TV Providers
mso_id = self.get_param('ap_mso')
- if not mso_id:
- raise_mvpd_required()
- username, password = self._get_login_info('ap_username', 'ap_password', mso_id)
- if not username or not password:
- raise_mvpd_required()
- mso_info = MSO_INFO[mso_id]
+ if mso_id:
+ username, password = self._get_login_info('ap_username', 'ap_password', mso_id)
+ if not username or not password:
+ raise_mvpd_required()
+ mso_info = MSO_INFO[mso_id]
- provider_redirect_page_res = self._download_webpage_handle(
- self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id,
- 'Downloading Provider Redirect Page', query={
- 'noflash': 'true',
- 'mso_id': mso_id,
- 'requestor_id': requestor_id,
- 'no_iframe': 'false',
- 'domain_name': 'adobe.com',
- 'redirect_url': url,
- })
+ provider_redirect_page_res = self._download_webpage_handle(
+ self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id,
+ 'Downloading Provider Redirect Page', query={
+ 'noflash': 'true',
+ 'mso_id': mso_id,
+ 'requestor_id': requestor_id,
+ 'no_iframe': 'false',
+ 'domain_name': 'adobe.com',
+ 'redirect_url': url,
+ })
+ elif not self._cookies_passed:
+ raise_mvpd_required()
- if mso_id == 'Comcast_SSO':
+ if not mso_id:
+ pass
+ elif mso_id == 'Comcast_SSO':
# Comcast page flow varies by video site and whether you
# are on Comcast's network.
provider_redirect_page, urlh = provider_redirect_page_res
@@ -1507,7 +1508,7 @@ class AdobePassIE(InfoExtractor):
'send_confirm_link': False,
'send_token': True
}))
- philo_code = compat_getpass('Type auth code you have received [Return]: ')
+ philo_code = getpass.getpass('Type auth code you have received [Return]: ')
self._download_webpage(
'https://idp.philo.com/auth/update/login_code', video_id, 'Submitting token', data=urlencode_postdata({
'token': philo_code
@@ -1709,25 +1710,30 @@ class AdobePassIE(InfoExtractor):
mso_info.get('username_field', 'username'): username,
mso_info.get('password_field', 'password'): password
}
- if mso_id == 'Cablevision':
+ if mso_id in ('Cablevision', 'AlticeOne'):
form_data['_eventId_proceed'] = ''
mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', form_data)
if mso_id != 'Rogers':
post_form(mvpd_confirm_page_res, 'Confirming Login')
- session = self._download_webpage(
- self._SERVICE_PROVIDER_TEMPLATE % 'session', video_id,
- 'Retrieving Session', data=urlencode_postdata({
- '_method': 'GET',
- 'requestor_id': requestor_id,
- }), headers=mvpd_headers)
+ try:
+ session = self._download_webpage(
+ self._SERVICE_PROVIDER_TEMPLATE % 'session', video_id,
+ 'Retrieving Session', data=urlencode_postdata({
+ '_method': 'GET',
+ 'requestor_id': requestor_id,
+ }), headers=mvpd_headers)
+ except ExtractorError as e:
+ if not mso_id and isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 401:
+ raise_mvpd_required()
+ raise
if '<pendingLogout' in session:
- self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {})
+ self.cache.store(self._MVPD_CACHE, requestor_id, {})
count += 1
continue
authn_token = unescapeHTML(xml_text(session, 'authnToken'))
requestor_info['authn_token'] = authn_token
- self._downloader.cache.store(self._MVPD_CACHE, requestor_id, requestor_info)
+ self.cache.store(self._MVPD_CACHE, requestor_id, requestor_info)
authz_token = requestor_info.get(guid)
if authz_token and is_expired(authz_token, 'simpleTokenTTL'):
@@ -1743,14 +1749,14 @@ class AdobePassIE(InfoExtractor):
'userMeta': '1',
}), headers=mvpd_headers)
if '<pendingLogout' in authorize:
- self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {})
+ self.cache.store(self._MVPD_CACHE, requestor_id, {})
count += 1
continue
if '<error' in authorize:
raise ExtractorError(xml_text(authorize, 'details'), expected=True)
authz_token = unescapeHTML(xml_text(authorize, 'authzToken'))
requestor_info[guid] = authz_token
- self._downloader.cache.store(self._MVPD_CACHE, requestor_id, requestor_info)
+ self.cache.store(self._MVPD_CACHE, requestor_id, requestor_info)
mvpd_headers.update({
'ap_19': xml_text(authn_token, 'simpleSamlNameID'),
@@ -1766,7 +1772,7 @@ class AdobePassIE(InfoExtractor):
'hashed_guid': 'false',
}), headers=mvpd_headers)
if '<pendingLogout' in short_authorize:
- self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {})
+ self.cache.store(self._MVPD_CACHE, requestor_id, {})
count += 1
continue
return short_authorize
diff --git a/hypervideo_dl/extractor/adobetv.py b/hypervideo_dl/extractor/adobetv.py
index 3cfa1ff..d1525a1 100644
--- a/hypervideo_dl/extractor/adobetv.py
+++ b/hypervideo_dl/extractor/adobetv.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import functools
import re
@@ -72,7 +70,6 @@ class AdobeTVBaseIE(InfoExtractor):
})
s3_extracted = True
formats.append(f)
- self._sort_formats(formats)
return {
'id': video_id,
@@ -234,6 +231,7 @@ class AdobeTVChannelIE(AdobeTVPlaylistBaseIE):
class AdobeTVVideoIE(AdobeTVBaseIE):
IE_NAME = 'adobetv:video'
_VALID_URL = r'https?://video\.tv\.adobe\.com/v/(?P<id>\d+)'
+ _EMBED_REGEX = [r'<iframe[^>]+src=[\'"](?P<url>(?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]']
_TEST = {
# From https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners
@@ -270,7 +268,6 @@ class AdobeTVVideoIE(AdobeTVBaseIE):
'width': int_or_none(source.get('width') or None),
'url': source_src,
})
- self._sort_formats(formats)
# For both metadata and downloaded files the duration varies among
# formats. I just pick the max one
diff --git a/hypervideo_dl/extractor/adultswim.py b/hypervideo_dl/extractor/adultswim.py
index c97cfc1..bd29eb4 100644
--- a/hypervideo_dl/extractor/adultswim.py
+++ b/hypervideo_dl/extractor/adultswim.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import json
from .turner import TurnerBaseIE
@@ -183,7 +180,6 @@ class AdultSwimIE(TurnerBaseIE):
info['subtitles'].setdefault('en', []).append({
'url': asset_url,
})
- self._sort_formats(info['formats'])
return info
else:
diff --git a/hypervideo_dl/extractor/aenetworks.py b/hypervideo_dl/extractor/aenetworks.py
index 8025de5..d7c4010 100644
--- a/hypervideo_dl/extractor/aenetworks.py
+++ b/hypervideo_dl/extractor/aenetworks.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .theplatform import ThePlatformIE
from ..utils import (
ExtractorError,
@@ -12,7 +8,7 @@ from ..utils import (
)
-class AENetworksBaseIE(ThePlatformIE):
+class AENetworksBaseIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
_BASE_URL_REGEX = r'''(?x)https?://
(?:(?:www|play|watch)\.)?
(?P<domain>
@@ -32,14 +28,17 @@ class AENetworksBaseIE(ThePlatformIE):
}
def _extract_aen_smil(self, smil_url, video_id, auth=None):
- query = {'mbr': 'true'}
+ query = {
+ 'mbr': 'true',
+ 'formats': 'M3U+none,MPEG-DASH+none,MPEG4,MP3',
+ }
if auth:
query['auth'] = auth
TP_SMIL_QUERY = [{
'assetTypes': 'high_video_ak',
- 'switch': 'hls_high_ak'
+ 'switch': 'hls_high_ak',
}, {
- 'assetTypes': 'high_video_s3'
+ 'assetTypes': 'high_video_s3',
}, {
'assetTypes': 'high_video_s3',
'switch': 'hls_high_fastly',
@@ -63,7 +62,6 @@ class AENetworksBaseIE(ThePlatformIE):
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
if last_e and not formats:
raise last_e
- self._sort_formats(formats)
return {
'id': video_id,
'formats': formats,
@@ -305,7 +303,6 @@ class HistoryTopicIE(AENetworksBaseIE):
class HistoryPlayerIE(AENetworksBaseIE):
IE_NAME = 'history:player'
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:history|biography)\.com)/player/(?P<id>\d+)'
- _TESTS = []
def _real_extract(self, url):
domain, video_id = self._match_valid_url(url).groups()
diff --git a/hypervideo_dl/extractor/aeonco.py b/hypervideo_dl/extractor/aeonco.py
new file mode 100644
index 0000000..4655862
--- /dev/null
+++ b/hypervideo_dl/extractor/aeonco.py
@@ -0,0 +1,40 @@
+from .common import InfoExtractor
+from .vimeo import VimeoIE
+
+
+class AeonCoIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?aeon\.co/videos/(?P<id>[^/?]+)'
+ _TESTS = [{
+ 'url': 'https://aeon.co/videos/raw-solar-storm-footage-is-the-punk-rock-antidote-to-sleek-james-webb-imagery',
+ 'md5': 'e5884d80552c9b6ea8d268a258753362',
+ 'info_dict': {
+ 'id': '1284717',
+ 'ext': 'mp4',
+ 'title': 'Brilliant Noise',
+ 'thumbnail': 'https://i.vimeocdn.com/video/21006315-1a1e49da8b07fd908384a982b4ba9ff0268c509a474576ebdf7b1392f4acae3b-d_960',
+ 'uploader': 'Semiconductor',
+ 'uploader_id': 'semiconductor',
+ 'uploader_url': 'https://vimeo.com/semiconductor',
+ 'duration': 348
+ }
+ }, {
+ 'url': 'https://aeon.co/videos/dazzling-timelapse-shows-how-microbes-spoil-our-food-and-sometimes-enrich-it',
+ 'md5': '4e5f3dad9dbda0dbfa2da41a851e631e',
+ 'info_dict': {
+ 'id': '728595228',
+ 'ext': 'mp4',
+ 'title': 'Wrought',
+ 'thumbnail': 'https://i.vimeocdn.com/video/1484618528-c91452611f9a4e4497735a533da60d45b2fe472deb0c880f0afaab0cd2efb22a-d_1280',
+ 'uploader': 'Biofilm Productions',
+ 'uploader_id': 'user140352216',
+ 'uploader_url': 'https://vimeo.com/user140352216',
+ 'duration': 1344
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ vimeo_id = self._search_regex(r'hosterId":\s*"(?P<id>[0-9]+)', webpage, 'vimeo id')
+ vimeo_url = VimeoIE._smuggle_referrer(f'https://player.vimeo.com/video/{vimeo_id}', 'https://aeon.co')
+ return self.url_result(vimeo_url, VimeoIE)
diff --git a/hypervideo_dl/extractor/afreecatv.py b/hypervideo_dl/extractor/afreecatv.py
index 77f0e3c..9276fe7 100644
--- a/hypervideo_dl/extractor/afreecatv.py
+++ b/hypervideo_dl/extractor/afreecatv.py
@@ -1,14 +1,12 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
+import functools
import re
from .common import InfoExtractor
-from ..compat import compat_xpath
from ..utils import (
+ ExtractorError,
+ OnDemandPagedList,
date_from_str,
determine_ext,
- ExtractorError,
int_or_none,
qualities,
traverse_obj,
@@ -280,7 +278,7 @@ class AfreecaTVIE(InfoExtractor):
else:
raise ExtractorError('Unable to download video info')
- video_element = video_xml.findall(compat_xpath('./track/video'))[-1]
+ video_element = video_xml.findall('./track/video')[-1]
if video_element is None or video_element.text is None:
raise ExtractorError(
'Video %s does not exist' % video_id, expected=True)
@@ -310,7 +308,7 @@ class AfreecaTVIE(InfoExtractor):
if not video_url:
entries = []
- file_elements = video_element.findall(compat_xpath('./file'))
+ file_elements = video_element.findall('./file')
one = len(file_elements) == 1
for file_num, file_element in enumerate(file_elements, start=1):
file_url = url_or_none(file_element.text)
@@ -340,7 +338,6 @@ class AfreecaTVIE(InfoExtractor):
}]
if not formats and not self.get_param('ignore_no_formats'):
continue
- self._sort_formats(formats)
file_info = common_entry.copy()
file_info.update({
'id': format_id,
@@ -382,7 +379,7 @@ class AfreecaTVIE(InfoExtractor):
return info
-class AfreecaTVLiveIE(AfreecaTVIE):
+class AfreecaTVLiveIE(AfreecaTVIE): # XXX: Do not subclass from concrete IE
IE_NAME = 'afreecatv:live'
_VALID_URL = r'https?://play\.afreeca(?:tv)?\.com/(?P<id>[^/]+)(?:/(?P<bno>\d+))?'
@@ -466,8 +463,6 @@ class AfreecaTVLiveIE(AfreecaTVIE):
'quality': quality_key(quality_str),
})
- self._sort_formats(formats)
-
station_info = self._download_json(
'https://st.afreecatv.com/api/get_station_status.php', broadcast_no,
query={'szBjId': broadcaster_id}, fatal=False,
@@ -482,3 +477,57 @@ class AfreecaTVLiveIE(AfreecaTVIE):
'formats': formats,
'is_live': True,
}
+
+
+class AfreecaTVUserIE(InfoExtractor):
+ IE_NAME = 'afreecatv:user'
+ _VALID_URL = r'https?://bj\.afreeca(?:tv)?\.com/(?P<id>[^/]+)/vods/?(?P<slug_type>[^/]+)?'
+ _TESTS = [{
+ 'url': 'https://bj.afreecatv.com/ryuryu24/vods/review',
+ 'info_dict': {
+ '_type': 'playlist',
+ 'id': 'ryuryu24',
+ 'title': 'ryuryu24 - review',
+ },
+ 'playlist_count': 218,
+ }, {
+ 'url': 'https://bj.afreecatv.com/parang1995/vods/highlight',
+ 'info_dict': {
+ '_type': 'playlist',
+ 'id': 'parang1995',
+ 'title': 'parang1995 - highlight',
+ },
+ 'playlist_count': 997,
+ }, {
+ 'url': 'https://bj.afreecatv.com/ryuryu24/vods',
+ 'info_dict': {
+ '_type': 'playlist',
+ 'id': 'ryuryu24',
+ 'title': 'ryuryu24 - all',
+ },
+ 'playlist_count': 221,
+ }, {
+ 'url': 'https://bj.afreecatv.com/ryuryu24/vods/balloonclip',
+ 'info_dict': {
+ '_type': 'playlist',
+ 'id': 'ryuryu24',
+ 'title': 'ryuryu24 - balloonclip',
+ },
+ 'playlist_count': 0,
+ }]
+ _PER_PAGE = 60
+
+ def _fetch_page(self, user_id, user_type, page):
+ page += 1
+ info = self._download_json(f'https://bjapi.afreecatv.com/api/{user_id}/vods/{user_type}', user_id,
+ query={'page': page, 'per_page': self._PER_PAGE, 'orderby': 'reg_date'},
+ note=f'Downloading {user_type} video page {page}')
+ for item in info['data']:
+ yield self.url_result(
+ f'https://vod.afreecatv.com/player/{item["title_no"]}/', AfreecaTVIE, item['title_no'])
+
+ def _real_extract(self, url):
+ user_id, user_type = self._match_valid_url(url).group('id', 'slug_type')
+ user_type = user_type or 'all'
+ entries = OnDemandPagedList(functools.partial(self._fetch_page, user_id, user_type), self._PER_PAGE)
+ return self.playlist_result(entries, user_id, f'{user_id} - {user_type}')
diff --git a/hypervideo_dl/extractor/agora.py b/hypervideo_dl/extractor/agora.py
new file mode 100644
index 0000000..abb2d3f
--- /dev/null
+++ b/hypervideo_dl/extractor/agora.py
@@ -0,0 +1,251 @@
+import functools
+import uuid
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ OnDemandPagedList,
+ int_or_none,
+ month_by_name,
+ parse_duration,
+ try_call,
+)
+
+
+class WyborczaVideoIE(InfoExtractor):
+ # this id is not an article id, it has to be extracted from the article
+ _VALID_URL = r'(?:wyborcza:video:|https?://wyborcza\.pl/(?:api-)?video/)(?P<id>\d+)'
+ IE_NAME = 'wyborcza:video'
+ _TESTS = [{
+ 'url': 'wyborcza:video:26207634',
+ 'info_dict': {
+ 'id': '26207634',
+ 'ext': 'mp4',
+ 'title': '- Polska w 2020 r. jest innym państwem niż w 2015 r. Nie zmieniła się konstytucja, ale jest to już inny ustrój - mówi Adam Bodnar',
+ 'description': ' ',
+ 'uploader': 'Dorota Roman',
+ 'duration': 2474,
+ 'thumbnail': r're:https://.+\.jpg',
+ },
+ }, {
+ 'url': 'https://wyborcza.pl/video/26207634',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://wyborcza.pl/api-video/26207634',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ meta = self._download_json(f'https://wyborcza.pl/api-video/{video_id}', video_id)
+
+ formats = []
+ base_url = meta['redirector'].replace('http://', 'https://') + meta['basePath']
+ for quality in ('standard', 'high'):
+ if not meta['files'].get(quality):
+ continue
+ formats.append({
+ 'url': base_url + meta['files'][quality],
+ 'height': int_or_none(
+ self._search_regex(
+ r'p(\d+)[a-z]+\.mp4$', meta['files'][quality],
+ 'mp4 video height', default=None)),
+ 'format_id': quality,
+ })
+ if meta['files'].get('dash'):
+ formats.extend(self._extract_mpd_formats(base_url + meta['files']['dash'], video_id))
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': meta.get('title'),
+ 'description': meta.get('lead'),
+ 'uploader': meta.get('signature'),
+ 'thumbnail': meta.get('imageUrl'),
+ 'duration': meta.get('duration'),
+ }
+
+
+class WyborczaPodcastIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://(?:www\.)?(?:
+ wyborcza\.pl/podcast(?:/0,172673\.html)?|
+ wysokieobcasy\.pl/wysokie-obcasy/0,176631\.html
+ )(?:\?(?:[^&#]+?&)*podcast=(?P<id>\d+))?
+ '''
+ _TESTS = [{
+ 'url': 'https://wyborcza.pl/podcast/0,172673.html?podcast=100720#S.main_topic-K.C-B.6-L.1.podcast',
+ 'info_dict': {
+ 'id': '100720',
+ 'ext': 'mp3',
+ 'title': 'Cyfrodziewczyny. Kim były pionierki polskiej informatyki ',
+ 'uploader': 'Michał Nogaś ',
+ 'upload_date': '20210117',
+ 'description': 'md5:49f0a06ffc4c1931210d3ab1416a651d',
+ 'duration': 3684.0,
+ 'thumbnail': r're:https://.+\.jpg',
+ },
+ }, {
+ 'url': 'https://www.wysokieobcasy.pl/wysokie-obcasy/0,176631.html?podcast=100673',
+ 'info_dict': {
+ 'id': '100673',
+ 'ext': 'mp3',
+ 'title': 'Czym jest ubóstwo menstruacyjne i dlaczego dotyczy każdej i każdego z nas?',
+ 'uploader': 'Agnieszka Urazińska ',
+ 'upload_date': '20210115',
+ 'description': 'md5:c161dc035f8dbb60077011fc41274899',
+ 'duration': 1803.0,
+ 'thumbnail': r're:https://.+\.jpg',
+ },
+ }, {
+ 'url': 'https://wyborcza.pl/podcast',
+ 'info_dict': {
+ 'id': '334',
+ 'title': 'Gościnnie: Wyborcza, 8:10',
+ 'series': 'Gościnnie: Wyborcza, 8:10',
+ },
+ 'playlist_mincount': 370,
+ }, {
+ 'url': 'https://www.wysokieobcasy.pl/wysokie-obcasy/0,176631.html',
+ 'info_dict': {
+ 'id': '395',
+ 'title': 'Gościnnie: Wysokie Obcasy',
+ 'series': 'Gościnnie: Wysokie Obcasy',
+ },
+ 'playlist_mincount': 12,
+ }]
+
+ def _real_extract(self, url):
+ podcast_id = self._match_id(url)
+
+ if not podcast_id: # playlist
+ podcast_id = '395' if 'wysokieobcasy.pl/' in url else '334'
+ return self.url_result(TokFMAuditionIE._create_url(podcast_id), TokFMAuditionIE, podcast_id)
+
+ meta = self._download_json('https://wyborcza.pl/api/podcast', podcast_id,
+ query={'guid': podcast_id, 'type': 'wo' if 'wysokieobcasy.pl/' in url else None})
+
+ day, month, year = self._search_regex(r'^(\d\d?) (\w+) (\d{4})$', meta.get('publishedDate'),
+ 'upload date', group=(1, 2, 3), default=(None, None, None))
+ return {
+ 'id': podcast_id,
+ 'url': meta['url'],
+ 'title': meta.get('title'),
+ 'description': meta.get('description'),
+ 'thumbnail': meta.get('imageUrl'),
+ 'duration': parse_duration(meta.get('duration')),
+ 'uploader': meta.get('author'),
+ 'upload_date': try_call(lambda: f'{year}{month_by_name(month, lang="pl"):0>2}{day:0>2}'),
+ }
+
+
+class TokFMPodcastIE(InfoExtractor):
+ _VALID_URL = r'(?:https?://audycje\.tokfm\.pl/podcast/|tokfm:podcast:)(?P<id>\d+),?'
+ IE_NAME = 'tokfm:podcast'
+ _TESTS = [{
+ 'url': 'https://audycje.tokfm.pl/podcast/91275,-Systemowy-rasizm-Czy-zamieszki-w-USA-po-morderstwie-w-Minneapolis-doprowadza-do-zmian-w-sluzbach-panstwowych',
+ 'info_dict': {
+ 'id': '91275',
+ 'ext': 'aac',
+ 'title': 'md5:a9b15488009065556900169fb8061cce',
+ 'episode': 'md5:a9b15488009065556900169fb8061cce',
+ 'series': 'Analizy',
+ },
+ }]
+
+ def _real_extract(self, url):
+ media_id = self._match_id(url)
+
+ # in case it breaks see this but it returns a lot of useless data
+ # https://api.podcast.radioagora.pl/api4/getPodcasts?podcast_id=100091&with_guests=true&with_leaders_for_mobile=true
+ metadata = self._download_json(
+ f'https://audycje.tokfm.pl/getp/3{media_id}', media_id, 'Downloading podcast metadata')
+ if not metadata:
+ raise ExtractorError('No such podcast', expected=True)
+ metadata = metadata[0]
+
+ formats = []
+ for ext in ('aac', 'mp3'):
+ url_data = self._download_json(
+ f'https://api.podcast.radioagora.pl/api4/getSongUrl?podcast_id={media_id}&device_id={uuid.uuid4()}&ppre=false&audio={ext}',
+ media_id, 'Downloading podcast %s URL' % ext)
+ # prevents inserting the mp3 (default) multiple times
+ if 'link_ssl' in url_data and f'.{ext}' in url_data['link_ssl']:
+ formats.append({
+ 'url': url_data['link_ssl'],
+ 'ext': ext,
+ 'vcodec': 'none',
+ 'acodec': ext,
+ })
+
+ return {
+ 'id': media_id,
+ 'formats': formats,
+ 'title': metadata.get('podcast_name'),
+ 'series': metadata.get('series_name'),
+ 'episode': metadata.get('podcast_name'),
+ }
+
+
+class TokFMAuditionIE(InfoExtractor):
+ _VALID_URL = r'(?:https?://audycje\.tokfm\.pl/audycja/|tokfm:audition:)(?P<id>\d+),?'
+ IE_NAME = 'tokfm:audition'
+ _TESTS = [{
+ 'url': 'https://audycje.tokfm.pl/audycja/218,Analizy',
+ 'info_dict': {
+ 'id': '218',
+ 'title': 'Analizy',
+ 'series': 'Analizy',
+ },
+ 'playlist_count': 1635,
+ }]
+
+ _PAGE_SIZE = 30
+ _HEADERS = {
+ 'User-Agent': 'Mozilla/5.0 (Linux; Android 9; Redmi 3S Build/PQ3A.190801.002; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/87.0.4280.101 Mobile Safari/537.36',
+ }
+
+ @staticmethod
+ def _create_url(id):
+ return f'https://audycje.tokfm.pl/audycja/{id}'
+
+ def _real_extract(self, url):
+ audition_id = self._match_id(url)
+
+ data = self._download_json(
+ f'https://api.podcast.radioagora.pl/api4/getSeries?series_id={audition_id}',
+ audition_id, 'Downloading audition metadata', headers=self._HEADERS)
+ if not data:
+ raise ExtractorError('No such audition', expected=True)
+ data = data[0]
+
+ entries = OnDemandPagedList(functools.partial(
+ self._fetch_page, audition_id, data), self._PAGE_SIZE)
+
+ return {
+ '_type': 'playlist',
+ 'id': audition_id,
+ 'title': data.get('series_name'),
+ 'series': data.get('series_name'),
+ 'entries': entries,
+ }
+
+ def _fetch_page(self, audition_id, data, page):
+ for retry in self.RetryManager():
+ podcast_page = self._download_json(
+ f'https://api.podcast.radioagora.pl/api4/getPodcasts?series_id={audition_id}&limit=30&offset={page}&with_guests=true&with_leaders_for_mobile=true',
+ audition_id, f'Downloading podcast list page {page + 1}', headers=self._HEADERS)
+ if not podcast_page:
+ retry.error = ExtractorError('Agora returned empty page', expected=True)
+
+ for podcast in podcast_page:
+ yield {
+ '_type': 'url_transparent',
+ 'url': podcast['podcast_sharing_url'],
+ 'ie_key': TokFMPodcastIE.ie_key(),
+ 'title': podcast.get('podcast_name'),
+ 'episode': podcast.get('podcast_name'),
+ 'description': podcast.get('podcast_description'),
+ 'timestamp': int_or_none(podcast.get('podcast_timestamp')),
+ 'series': data.get('series_name'),
+ }
diff --git a/hypervideo_dl/extractor/airmozilla.py b/hypervideo_dl/extractor/airmozilla.py
index 9e38136..669556b 100644
--- a/hypervideo_dl/extractor/airmozilla.py
+++ b/hypervideo_dl/extractor/airmozilla.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/aliexpress.py b/hypervideo_dl/extractor/aliexpress.py
index 9722fe9..2e83f2e 100644
--- a/hypervideo_dl/extractor/aliexpress.py
+++ b/hypervideo_dl/extractor/aliexpress.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
diff --git a/hypervideo_dl/extractor/aljazeera.py b/hypervideo_dl/extractor/aljazeera.py
index 7bcdb7a..124bab0 100644
--- a/hypervideo_dl/extractor/aljazeera.py
+++ b/hypervideo_dl/extractor/aljazeera.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import json
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/allocine.py b/hypervideo_dl/extractor/allocine.py
index 403a277..2d342cf 100644
--- a/hypervideo_dl/extractor/allocine.py
+++ b/hypervideo_dl/extractor/allocine.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
@@ -115,8 +112,6 @@ class AllocineIE(InfoExtractor):
})
duration, view_count, timestamp = [None] * 3
- self._sort_formats(formats)
-
return {
'id': video_id,
'display_id': display_id,
diff --git a/hypervideo_dl/extractor/alphaporno.py b/hypervideo_dl/extractor/alphaporno.py
index 3a6d99f..8d5b472 100644
--- a/hypervideo_dl/extractor/alphaporno.py
+++ b/hypervideo_dl/extractor/alphaporno.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
parse_iso8601,
diff --git a/hypervideo_dl/extractor/alsace20tv.py b/hypervideo_dl/extractor/alsace20tv.py
index 4aae6fe..ea3332e 100644
--- a/hypervideo_dl/extractor/alsace20tv.py
+++ b/hypervideo_dl/extractor/alsace20tv.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
clean_html,
@@ -25,7 +22,6 @@ class Alsace20TVBaseIE(InfoExtractor):
self._extract_smil_formats(fmt_url, video_id, fatal=False)
if '/smil:_' in fmt_url
else self._extract_mpd_formats(fmt_url, video_id, mpd_id=res, fatal=False))
- self._sort_formats(formats)
webpage = (url and self._download_webpage(url, video_id, fatal=False)) or ''
thumbnail = url_or_none(dict_get(info, ('image', 'preview', )) or self._og_search_thumbnail(webpage))
diff --git a/hypervideo_dl/extractor/alura.py b/hypervideo_dl/extractor/alura.py
index d2e2df2..bfe066b 100644
--- a/hypervideo_dl/extractor/alura.py
+++ b/hypervideo_dl/extractor/alura.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -66,8 +63,6 @@ class AluraIE(InfoExtractor):
f['height'] = int('720' if m.group('res') == 'hd' else '480')
formats.extend(video_format)
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': video_title,
@@ -116,7 +111,7 @@ class AluraIE(InfoExtractor):
raise ExtractorError('Unable to log in')
-class AluraCourseIE(AluraIE):
+class AluraCourseIE(AluraIE): # XXX: Do not subclass from concrete IE
_VALID_URL = r'https?://(?:cursos\.)?alura\.com\.br/course/(?P<id>[^/]+)'
_LOGIN_URL = 'https://cursos.alura.com.br/loginForm?urlAfterLogin=/loginForm'
diff --git a/hypervideo_dl/extractor/amara.py b/hypervideo_dl/extractor/amara.py
index 61d4695..5018710 100644
--- a/hypervideo_dl/extractor/amara.py
+++ b/hypervideo_dl/extractor/amara.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from .youtube import YoutubeIE
from .vimeo import VimeoIE
diff --git a/hypervideo_dl/extractor/amazon.py b/hypervideo_dl/extractor/amazon.py
index 07b1b18..4d31706 100644
--- a/hypervideo_dl/extractor/amazon.py
+++ b/hypervideo_dl/extractor/amazon.py
@@ -1,6 +1,5 @@
-# coding: utf-8
from .common import InfoExtractor
-from ..utils import int_or_none
+from ..utils import ExtractorError, int_or_none
class AmazonStoreIE(InfoExtractor):
@@ -10,7 +9,7 @@ class AmazonStoreIE(InfoExtractor):
'url': 'https://www.amazon.co.uk/dp/B098XNCHLD/',
'info_dict': {
'id': 'B098XNCHLD',
- 'title': 'md5:5f3194dbf75a8dcfc83079bd63a2abed',
+ 'title': 'md5:dae240564cbb2642170c02f7f0d7e472',
},
'playlist_mincount': 1,
'playlist': [{
@@ -19,28 +18,44 @@ class AmazonStoreIE(InfoExtractor):
'ext': 'mp4',
'title': 'mcdodo usb c cable 100W 5a',
'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 34,
},
}]
}, {
'url': 'https://www.amazon.in/Sony-WH-1000XM4-Cancelling-Headphones-Bluetooth/dp/B0863TXGM3',
'info_dict': {
'id': 'B0863TXGM3',
- 'title': 'md5:b0bde4881d3cfd40d63af19f7898b8ff',
+ 'title': 'md5:d1d3352428f8f015706c84b31e132169',
},
'playlist_mincount': 4,
}, {
'url': 'https://www.amazon.com/dp/B0845NXCXF/',
'info_dict': {
'id': 'B0845NXCXF',
- 'title': 'md5:2145cd4e3c7782f1ee73649a3cff1171',
+ 'title': 'md5:f3fa12779bf62ddb6a6ec86a360a858e',
},
'playlist-mincount': 1,
+ }, {
+ 'url': 'https://www.amazon.es/Samsung-Smartphone-s-AMOLED-Quad-c%C3%A1mara-espa%C3%B1ola/dp/B08WX337PQ',
+ 'info_dict': {
+ 'id': 'B08WX337PQ',
+ 'title': 'md5:f3fa12779bf62ddb6a6ec86a360a858e',
+ },
+ 'playlist_mincount': 1,
}]
def _real_extract(self, url):
id = self._match_id(url)
- webpage = self._download_webpage(url, id)
- data_json = self._parse_json(self._html_search_regex(r'var\s?obj\s?=\s?jQuery\.parseJSON\(\'(.*)\'\)', webpage, 'data'), id)
+
+ for retry in self.RetryManager():
+ webpage = self._download_webpage(url, id)
+ try:
+ data_json = self._search_json(
+ r'var\s?obj\s?=\s?jQuery\.parseJSON\(\'', webpage, 'data', id,
+ transform_source=lambda x: x.replace(R'\\u', R'\u'))
+ except ExtractorError as e:
+ retry.error = e
+
entries = [{
'id': video['marketPlaceID'],
'url': video['url'],
@@ -50,4 +65,4 @@ class AmazonStoreIE(InfoExtractor):
'height': int_or_none(video.get('videoHeight')),
'width': int_or_none(video.get('videoWidth')),
} for video in (data_json.get('videos') or []) if video.get('isVideo') and video.get('url')]
- return self.playlist_result(entries, playlist_id=id, playlist_title=data_json['title'])
+ return self.playlist_result(entries, playlist_id=id, playlist_title=data_json.get('title'))
diff --git a/hypervideo_dl/extractor/amazonminitv.py b/hypervideo_dl/extractor/amazonminitv.py
new file mode 100644
index 0000000..7309968
--- /dev/null
+++ b/hypervideo_dl/extractor/amazonminitv.py
@@ -0,0 +1,290 @@
+import json
+
+from .common import InfoExtractor
+from ..utils import ExtractorError, int_or_none, traverse_obj, try_get
+
+
+class AmazonMiniTVBaseIE(InfoExtractor):
+ def _real_initialize(self):
+ self._download_webpage(
+ 'https://www.amazon.in/minitv', None,
+ note='Fetching guest session cookies')
+ AmazonMiniTVBaseIE.session_id = self._get_cookies('https://www.amazon.in')['session-id'].value
+
+ def _call_api(self, asin, data=None, note=None):
+ device = {'clientId': 'ATVIN', 'deviceLocale': 'en_GB'}
+ if data:
+ data['variables'].update({
+ 'contentType': 'VOD',
+ 'sessionIdToken': self.session_id,
+ **device,
+ })
+
+ resp = self._download_json(
+ f'https://www.amazon.in/minitv/api/web/{"graphql" if data else "prs"}',
+ asin, note=note, headers={'Content-Type': 'application/json'},
+ data=json.dumps(data).encode() if data else None,
+ query=None if data else {
+ 'deviceType': 'A1WMMUXPCUJL4N',
+ 'contentId': asin,
+ **device,
+ })
+
+ if resp.get('errors'):
+ raise ExtractorError(f'MiniTV said: {resp["errors"][0]["message"]}')
+ elif not data:
+ return resp
+ return resp['data'][data['operationName']]
+
+
+class AmazonMiniTVIE(AmazonMiniTVBaseIE):
+ _VALID_URL = r'(?:https?://(?:www\.)?amazon\.in/minitv/tp/|amazonminitv:(?:amzn1\.dv\.gti\.)?)(?P<id>[a-f0-9-]+)'
+ _TESTS = [{
+ 'url': 'https://www.amazon.in/minitv/tp/75fe3a75-b8fe-4499-8100-5c9424344840?referrer=https%3A%2F%2Fwww.amazon.in%2Fminitv',
+ 'info_dict': {
+ 'id': 'amzn1.dv.gti.75fe3a75-b8fe-4499-8100-5c9424344840',
+ 'ext': 'mp4',
+ 'title': 'May I Kiss You?',
+ 'language': 'Hindi',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'description': 'md5:a549bfc747973e04feb707833474e59d',
+ 'release_timestamp': 1644710400,
+ 'release_date': '20220213',
+ 'duration': 846,
+ 'chapters': 'count:2',
+ 'series': 'Couple Goals',
+ 'series_id': 'amzn1.dv.gti.56521d46-b040-4fd5-872e-3e70476a04b0',
+ 'season': 'Season 3',
+ 'season_number': 3,
+ 'season_id': 'amzn1.dv.gti.20331016-d9b9-4968-b991-c89fa4927a36',
+ 'episode': 'May I Kiss You?',
+ 'episode_number': 2,
+ 'episode_id': 'amzn1.dv.gti.75fe3a75-b8fe-4499-8100-5c9424344840',
+ },
+ }, {
+ 'url': 'https://www.amazon.in/minitv/tp/280d2564-584f-452f-9c98-7baf906e01ab?referrer=https%3A%2F%2Fwww.amazon.in%2Fminitv',
+ 'info_dict': {
+ 'id': 'amzn1.dv.gti.280d2564-584f-452f-9c98-7baf906e01ab',
+ 'ext': 'mp4',
+ 'title': 'Jahaan',
+ 'language': 'Hindi',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'description': 'md5:05eb765a77bf703f322f120ec6867339',
+ 'release_timestamp': 1647475200,
+ 'release_date': '20220317',
+ 'duration': 783,
+ 'chapters': [],
+ },
+ }, {
+ 'url': 'https://www.amazon.in/minitv/tp/280d2564-584f-452f-9c98-7baf906e01ab',
+ 'only_matching': True,
+ }, {
+ 'url': 'amazonminitv:amzn1.dv.gti.280d2564-584f-452f-9c98-7baf906e01ab',
+ 'only_matching': True,
+ }, {
+ 'url': 'amazonminitv:280d2564-584f-452f-9c98-7baf906e01ab',
+ 'only_matching': True,
+ }]
+
+ _GRAPHQL_QUERY_CONTENT = '''
+query content($sessionIdToken: String!, $deviceLocale: String, $contentId: ID!, $contentType: ContentType!, $clientId: String) {
+ content(
+ applicationContextInput: {deviceLocale: $deviceLocale, sessionIdToken: $sessionIdToken, clientId: $clientId}
+ contentId: $contentId
+ contentType: $contentType
+ ) {
+ contentId
+ name
+ ... on Episode {
+ contentId
+ vodType
+ name
+ images
+ description {
+ synopsis
+ contentLengthInSeconds
+ }
+ publicReleaseDateUTC
+ audioTracks
+ seasonId
+ seriesId
+ seriesName
+ seasonNumber
+ episodeNumber
+ timecode {
+ endCreditsTime
+ }
+ }
+ ... on MovieContent {
+ contentId
+ vodType
+ name
+ description {
+ synopsis
+ contentLengthInSeconds
+ }
+ images
+ publicReleaseDateUTC
+ audioTracks
+ }
+ }
+}'''
+
+ def _real_extract(self, url):
+ asin = f'amzn1.dv.gti.{self._match_id(url)}'
+ prs = self._call_api(asin, note='Downloading playback info')
+
+ formats, subtitles = [], {}
+ for type_, asset in prs['playbackAssets'].items():
+ if not traverse_obj(asset, 'manifestUrl'):
+ continue
+ if type_ == 'hls':
+ m3u8_fmts, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
+ asset['manifestUrl'], asin, ext='mp4', entry_protocol='m3u8_native',
+ m3u8_id=type_, fatal=False)
+ formats.extend(m3u8_fmts)
+ subtitles = self._merge_subtitles(subtitles, m3u8_subs)
+ elif type_ == 'dash':
+ mpd_fmts, mpd_subs = self._extract_mpd_formats_and_subtitles(
+ asset['manifestUrl'], asin, mpd_id=type_, fatal=False)
+ formats.extend(mpd_fmts)
+ subtitles = self._merge_subtitles(subtitles, mpd_subs)
+ else:
+ self.report_warning(f'Unknown asset type: {type_}')
+
+ title_info = self._call_api(
+ asin, note='Downloading title info', data={
+ 'operationName': 'content',
+ 'variables': {'contentId': asin},
+ 'query': self._GRAPHQL_QUERY_CONTENT,
+ })
+ credits_time = try_get(title_info, lambda x: x['timecode']['endCreditsTime'] / 1000)
+ is_episode = title_info.get('vodType') == 'EPISODE'
+
+ return {
+ 'id': asin,
+ 'title': title_info.get('name'),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'language': traverse_obj(title_info, ('audioTracks', 0)),
+ 'thumbnails': [{
+ 'id': type_,
+ 'url': url,
+ } for type_, url in (title_info.get('images') or {}).items()],
+ 'description': traverse_obj(title_info, ('description', 'synopsis')),
+ 'release_timestamp': int_or_none(try_get(title_info, lambda x: x['publicReleaseDateUTC'] / 1000)),
+ 'duration': traverse_obj(title_info, ('description', 'contentLengthInSeconds')),
+ 'chapters': [{
+ 'start_time': credits_time,
+ 'title': 'End Credits',
+ }] if credits_time else [],
+ 'series': title_info.get('seriesName'),
+ 'series_id': title_info.get('seriesId'),
+ 'season_number': title_info.get('seasonNumber'),
+ 'season_id': title_info.get('seasonId'),
+ 'episode': title_info.get('name') if is_episode else None,
+ 'episode_number': title_info.get('episodeNumber'),
+ 'episode_id': asin if is_episode else None,
+ }
+
+
+class AmazonMiniTVSeasonIE(AmazonMiniTVBaseIE):
+ IE_NAME = 'amazonminitv:season'
+ _VALID_URL = r'amazonminitv:season:(?:amzn1\.dv\.gti\.)?(?P<id>[a-f0-9-]+)'
+ IE_DESC = 'Amazon MiniTV Series, "minitv:season:" prefix'
+ _TESTS = [{
+ 'url': 'amazonminitv:season:amzn1.dv.gti.0aa996eb-6a1b-4886-a342-387fbd2f1db0',
+ 'playlist_mincount': 6,
+ 'info_dict': {
+ 'id': 'amzn1.dv.gti.0aa996eb-6a1b-4886-a342-387fbd2f1db0',
+ },
+ }, {
+ 'url': 'amazonminitv:season:0aa996eb-6a1b-4886-a342-387fbd2f1db0',
+ 'only_matching': True,
+ }]
+
+ _GRAPHQL_QUERY = '''
+query getEpisodes($sessionIdToken: String!, $clientId: String, $episodeOrSeasonId: ID!, $deviceLocale: String) {
+ getEpisodes(
+ applicationContextInput: {sessionIdToken: $sessionIdToken, deviceLocale: $deviceLocale, clientId: $clientId}
+ episodeOrSeasonId: $episodeOrSeasonId
+ ) {
+ episodes {
+ ... on Episode {
+ contentId
+ name
+ images
+ seriesName
+ seasonId
+ seriesId
+ seasonNumber
+ episodeNumber
+ description {
+ synopsis
+ contentLengthInSeconds
+ }
+ publicReleaseDateUTC
+ }
+ }
+ }
+}
+'''
+
+ def _entries(self, asin):
+ season_info = self._call_api(
+ asin, note='Downloading season info', data={
+ 'operationName': 'getEpisodes',
+ 'variables': {'episodeOrSeasonId': asin},
+ 'query': self._GRAPHQL_QUERY,
+ })
+
+ for episode in season_info['episodes']:
+ yield self.url_result(
+ f'amazonminitv:{episode["contentId"]}', AmazonMiniTVIE, episode['contentId'])
+
+ def _real_extract(self, url):
+ asin = f'amzn1.dv.gti.{self._match_id(url)}'
+ return self.playlist_result(self._entries(asin), asin)
+
+
+class AmazonMiniTVSeriesIE(AmazonMiniTVBaseIE):
+ IE_NAME = 'amazonminitv:series'
+ _VALID_URL = r'amazonminitv:series:(?:amzn1\.dv\.gti\.)?(?P<id>[a-f0-9-]+)'
+ _TESTS = [{
+ 'url': 'amazonminitv:series:amzn1.dv.gti.56521d46-b040-4fd5-872e-3e70476a04b0',
+ 'playlist_mincount': 3,
+ 'info_dict': {
+ 'id': 'amzn1.dv.gti.56521d46-b040-4fd5-872e-3e70476a04b0',
+ },
+ }, {
+ 'url': 'amazonminitv:series:56521d46-b040-4fd5-872e-3e70476a04b0',
+ 'only_matching': True,
+ }]
+
+ _GRAPHQL_QUERY = '''
+query getSeasons($sessionIdToken: String!, $deviceLocale: String, $episodeOrSeasonOrSeriesId: ID!, $clientId: String) {
+ getSeasons(
+ applicationContextInput: {deviceLocale: $deviceLocale, sessionIdToken: $sessionIdToken, clientId: $clientId}
+ episodeOrSeasonOrSeriesId: $episodeOrSeasonOrSeriesId
+ ) {
+ seasons {
+ seasonId
+ }
+ }
+}
+'''
+
+ def _entries(self, asin):
+ season_info = self._call_api(
+ asin, note='Downloading series info', data={
+ 'operationName': 'getSeasons',
+ 'variables': {'episodeOrSeasonOrSeriesId': asin},
+ 'query': self._GRAPHQL_QUERY,
+ })
+
+ for season in season_info['seasons']:
+ yield self.url_result(f'amazonminitv:season:{season["seasonId"]}', AmazonMiniTVSeasonIE, season['seasonId'])
+
+ def _real_extract(self, url):
+ asin = f'amzn1.dv.gti.{self._match_id(url)}'
+ return self.playlist_result(self._entries(asin), asin)
diff --git a/hypervideo_dl/extractor/amcnetworks.py b/hypervideo_dl/extractor/amcnetworks.py
index e38e215..c58bc7b 100644
--- a/hypervideo_dl/extractor/amcnetworks.py
+++ b/hypervideo_dl/extractor/amcnetworks.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .theplatform import ThePlatformIE
@@ -12,7 +9,7 @@ from ..utils import (
)
-class AMCNetworksIE(ThePlatformIE):
+class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
_VALID_URL = r'https?://(?:www\.)?(?P<site>amc|bbcamerica|ifc|(?:we|sundance)tv)\.com/(?P<id>(?:movies|shows(?:/[^/]+)+)/[^/?#&]+)'
_TESTS = [{
'url': 'https://www.bbcamerica.com/shows/the-graham-norton-show/videos/tina-feys-adorable-airline-themed-family-dinner--51631',
@@ -109,7 +106,6 @@ class AMCNetworksIE(ThePlatformIE):
media_url = update_url_query(media_url, query)
formats, subtitles = self._extract_theplatform_smil(
media_url, video_id)
- self._sort_formats(formats)
thumbnails = []
thumbnail_urls = [properties.get('imageDesktop')]
diff --git a/hypervideo_dl/extractor/americastestkitchen.py b/hypervideo_dl/extractor/americastestkitchen.py
index 6e6099a..abda55d 100644
--- a/hypervideo_dl/extractor/americastestkitchen.py
+++ b/hypervideo_dl/extractor/americastestkitchen.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import json
from .common import InfoExtractor
@@ -14,7 +11,7 @@ from ..utils import (
class AmericasTestKitchenIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?(?:americastestkitchen|cooks(?:country|illustrated))\.com/(?P<resource_type>episode|videos)/(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:www\.)?americastestkitchen\.com/(?:cooks(?:country|illustrated)/)?(?P<resource_type>episode|videos)/(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.americastestkitchen.com/episode/582-weeknight-japanese-suppers',
'md5': 'b861c3e365ac38ad319cfd509c30577f',
@@ -22,15 +19,20 @@ class AmericasTestKitchenIE(InfoExtractor):
'id': '5b400b9ee338f922cb06450c',
'title': 'Japanese Suppers',
'ext': 'mp4',
+ 'display_id': 'weeknight-japanese-suppers',
'description': 'md5:64e606bfee910627efc4b5f050de92b3',
- 'thumbnail': r're:^https?://',
- 'timestamp': 1523318400,
- 'upload_date': '20180410',
- 'release_date': '20180410',
- 'series': "America's Test Kitchen",
- 'season_number': 18,
+ 'timestamp': 1523304000,
+ 'upload_date': '20180409',
+ 'release_date': '20180409',
+ 'series': 'America\'s Test Kitchen',
+ 'season': 'Season 18',
'episode': 'Japanese Suppers',
+ 'season_number': 18,
'episode_number': 15,
+ 'duration': 1376,
+ 'thumbnail': r're:^https?://',
+ 'average_rating': 0,
+ 'view_count': int,
},
'params': {
'skip_download': True,
@@ -43,15 +45,20 @@ class AmericasTestKitchenIE(InfoExtractor):
'id': '5fbe8c61bda2010001c6763b',
'title': 'Simple Chicken Dinner',
'ext': 'mp4',
+ 'display_id': 'atktv_2103_simple-chicken-dinner_full-episode_web-mp4',
'description': 'md5:eb68737cc2fd4c26ca7db30139d109e7',
- 'thumbnail': r're:^https?://',
- 'timestamp': 1610755200,
- 'upload_date': '20210116',
- 'release_date': '20210116',
- 'series': "America's Test Kitchen",
- 'season_number': 21,
+ 'timestamp': 1610737200,
+ 'upload_date': '20210115',
+ 'release_date': '20210115',
+ 'series': 'America\'s Test Kitchen',
+ 'season': 'Season 21',
'episode': 'Simple Chicken Dinner',
+ 'season_number': 21,
'episode_number': 3,
+ 'duration': 1397,
+ 'thumbnail': r're:^https?://',
+ 'view_count': int,
+ 'average_rating': 0,
},
'params': {
'skip_download': True,
@@ -60,10 +67,10 @@ class AmericasTestKitchenIE(InfoExtractor):
'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon',
'only_matching': True,
}, {
- 'url': 'https://www.cookscountry.com/episode/564-when-only-chocolate-will-do',
+ 'url': 'https://www.americastestkitchen.com/cookscountry/episode/564-when-only-chocolate-will-do',
'only_matching': True,
}, {
- 'url': 'https://www.cooksillustrated.com/videos/4478-beef-wellington',
+ 'url': 'https://www.americastestkitchen.com/cooksillustrated/videos/4478-beef-wellington',
'only_matching': True,
}]
@@ -93,7 +100,7 @@ class AmericasTestKitchenIE(InfoExtractor):
class AmericasTestKitchenSeasonIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?(?P<show>americastestkitchen|cookscountry)\.com/episodes/browse/season_(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:www\.)?americastestkitchen\.com(?P<show>/cookscountry)?/episodes/browse/season_(?P<id>\d+)'
_TESTS = [{
# ATK Season
'url': 'https://www.americastestkitchen.com/episodes/browse/season_1',
@@ -104,7 +111,7 @@ class AmericasTestKitchenSeasonIE(InfoExtractor):
'playlist_count': 13,
}, {
# Cooks Country Season
- 'url': 'https://www.cookscountry.com/episodes/browse/season_12',
+ 'url': 'https://www.americastestkitchen.com/cookscountry/episodes/browse/season_12',
'info_dict': {
'id': 'season_12',
'title': 'Season 12',
@@ -113,17 +120,17 @@ class AmericasTestKitchenSeasonIE(InfoExtractor):
}]
def _real_extract(self, url):
- show_name, season_number = self._match_valid_url(url).groups()
+ show_path, season_number = self._match_valid_url(url).group('show', 'id')
season_number = int(season_number)
- slug = 'atk' if show_name == 'americastestkitchen' else 'cco'
+ slug = 'cco' if show_path == '/cookscountry' else 'atk'
season = 'Season %d' % season_number
season_search = self._download_json(
'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_%s_season_desc_production' % slug,
season, headers={
- 'Origin': 'https://www.%s.com' % show_name,
+ 'Origin': 'https://www.americastestkitchen.com',
'X-Algolia-API-Key': '8d504d0099ed27c1b73708d22871d805',
'X-Algolia-Application-Id': 'Y1FNZXUI30',
}, query={
@@ -139,12 +146,12 @@ class AmericasTestKitchenSeasonIE(InfoExtractor):
def entries():
for episode in (season_search.get('hits') or []):
- search_url = episode.get('search_url')
+ search_url = episode.get('search_url') # always formatted like '/episode/123-title-of-episode'
if not search_url:
continue
yield {
'_type': 'url',
- 'url': 'https://www.%s.com%s' % (show_name, search_url),
+ 'url': f'https://www.americastestkitchen.com{show_path or ""}{search_url}',
'id': try_get(episode, lambda e: e['objectID'].split('_')[-1]),
'title': episode.get('title'),
'description': episode.get('description'),
diff --git a/hypervideo_dl/extractor/amp.py b/hypervideo_dl/extractor/amp.py
index 24c684c..b0cbd77 100644
--- a/hypervideo_dl/extractor/amp.py
+++ b/hypervideo_dl/extractor/amp.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
determine_ext,
@@ -13,7 +10,7 @@ from ..utils import (
)
-class AMPIE(InfoExtractor):
+class AMPIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor
# parse Akamai Adaptive Media Player feed
def _extract_feed_info(self, url):
feed = self._download_json(
@@ -87,8 +84,6 @@ class AMPIE(InfoExtractor):
'ext': ext,
})
- self._sort_formats(formats)
-
timestamp = unified_timestamp(item.get('pubDate'), ' ') or parse_iso8601(item.get('dc-date'))
return {
diff --git a/hypervideo_dl/extractor/angel.py b/hypervideo_dl/extractor/angel.py
new file mode 100644
index 0000000..306b365
--- /dev/null
+++ b/hypervideo_dl/extractor/angel.py
@@ -0,0 +1,56 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import url_or_none, merge_dicts
+
+
+class AngelIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?angel\.com/watch/(?P<series>[^/?#]+)/episode/(?P<id>[\w-]+)/season-(?P<season_number>\d+)/episode-(?P<episode_number>\d+)/(?P<title>[^/?#]+)'
+ _TESTS = [{
+ 'url': 'https://www.angel.com/watch/tuttle-twins/episode/2f3d0382-ea82-4cdc-958e-84fbadadc710/season-1/episode-1/when-laws-give-you-lemons',
+ 'md5': '4734e5cfdd64a568e837246aa3eaa524',
+ 'info_dict': {
+ 'id': '2f3d0382-ea82-4cdc-958e-84fbadadc710',
+ 'ext': 'mp4',
+ 'title': 'Tuttle Twins Season 1, Episode 1: When Laws Give You Lemons',
+ 'description': 'md5:73b704897c20ab59c433a9c0a8202d5e',
+ 'thumbnail': r're:^https?://images.angelstudios.com/image/upload/angel-app/.*$',
+ 'duration': 1359.0
+ }
+ }, {
+ 'url': 'https://www.angel.com/watch/the-chosen/episode/8dfb714d-bca5-4812-8125-24fb9514cd10/season-1/episode-1/i-have-called-you-by-name',
+ 'md5': 'e4774bad0a5f0ad2e90d175cafdb797d',
+ 'info_dict': {
+ 'id': '8dfb714d-bca5-4812-8125-24fb9514cd10',
+ 'ext': 'mp4',
+ 'title': 'The Chosen Season 1, Episode 1: I Have Called You By Name',
+ 'description': 'md5:aadfb4827a94415de5ff6426e6dee3be',
+ 'thumbnail': r're:^https?://images.angelstudios.com/image/upload/angel-app/.*$',
+ 'duration': 3276.0
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ json_ld = self._search_json_ld(webpage, video_id)
+
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ json_ld.pop('url'), video_id, note='Downloading HD m3u8 information')
+
+ info_dict = {
+ 'id': video_id,
+ 'title': self._og_search_title(webpage),
+ 'description': self._og_search_description(webpage),
+ 'formats': formats,
+ 'subtitles': subtitles
+ }
+
+ # Angel uses cloudinary in the background and supports image transformations.
+ # We remove these transformations and return the source file
+ base_thumbnail_url = url_or_none(self._og_search_thumbnail(webpage)) or json_ld.pop('thumbnails')
+ if base_thumbnail_url:
+ info_dict['thumbnail'] = re.sub(r'(/upload)/.+(/angel-app/.+)$', r'\1\2', base_thumbnail_url)
+
+ return merge_dicts(info_dict, json_ld)
diff --git a/hypervideo_dl/extractor/animelab.py b/hypervideo_dl/extractor/animelab.py
deleted file mode 100644
index 1c2cc47..0000000
--- a/hypervideo_dl/extractor/animelab.py
+++ /dev/null
@@ -1,278 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-
-from ..utils import (
- ExtractorError,
- urlencode_postdata,
- int_or_none,
- str_or_none,
- determine_ext,
-)
-
-from ..compat import compat_HTTPError
-
-
-class AnimeLabBaseIE(InfoExtractor):
- _LOGIN_URL = 'https://www.animelab.com/login'
- _NETRC_MACHINE = 'animelab'
- _LOGGED_IN = False
-
- def _is_logged_in(self, login_page=None):
- if not self._LOGGED_IN:
- if not login_page:
- login_page = self._download_webpage(self._LOGIN_URL, None, 'Downloading login page')
- AnimeLabBaseIE._LOGGED_IN = 'Sign In' not in login_page
- return self._LOGGED_IN
-
- def _perform_login(self, username, password):
- if self._is_logged_in():
- return
-
- login_form = {
- 'email': username,
- 'password': password,
- }
-
- try:
- response = self._download_webpage(
- self._LOGIN_URL, None, 'Logging in', 'Wrong login info',
- data=urlencode_postdata(login_form),
- headers={'Content-Type': 'application/x-www-form-urlencoded'})
- except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
- raise ExtractorError('Unable to log in (wrong credentials?)', expected=True)
- raise
-
- if not self._is_logged_in(response):
- raise ExtractorError('Unable to login (cannot verify if logged in)')
-
- def _real_initialize(self):
- if not self._is_logged_in():
- self.raise_login_required('Login is required to access any AnimeLab content')
-
-
-class AnimeLabIE(AnimeLabBaseIE):
- _VALID_URL = r'https?://(?:www\.)?animelab\.com/player/(?P<id>[^/]+)'
-
- # the following tests require authentication, but a free account will suffice
- # just set 'usenetrc' to true in test/local_parameters.json if you use a .netrc file
- # or you can set 'username' and 'password' there
- # the tests also select a specific format so that the same video is downloaded
- # regardless of whether the user is premium or not (needs testing on a premium account)
- _TEST = {
- 'url': 'https://www.animelab.com/player/fullmetal-alchemist-brotherhood-episode-42',
- 'md5': '05bde4b91a5d1ff46ef5b94df05b0f7f',
- 'info_dict': {
- 'id': '383',
- 'ext': 'mp4',
- 'display_id': 'fullmetal-alchemist-brotherhood-episode-42',
- 'title': 'Fullmetal Alchemist: Brotherhood - Episode 42 - Signs of a Counteroffensive',
- 'description': 'md5:103eb61dd0a56d3dfc5dbf748e5e83f4',
- 'series': 'Fullmetal Alchemist: Brotherhood',
- 'episode': 'Signs of a Counteroffensive',
- 'episode_number': 42,
- 'duration': 1469,
- 'season': 'Season 1',
- 'season_number': 1,
- 'season_id': '38',
- },
- 'params': {
- 'format': '[format_id=21711_yeshardsubbed_ja-JP][height=480]',
- },
- 'skip': 'All AnimeLab content requires authentication',
- }
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
-
- # unfortunately we can get different URLs for the same formats
- # e.g. if we are using a "free" account so no dubs available
- # (so _remove_duplicate_formats is not effective)
- # so we use a dictionary as a workaround
- formats = {}
- for language_option_url in ('https://www.animelab.com/player/%s/subtitles',
- 'https://www.animelab.com/player/%s/dubbed'):
- actual_url = language_option_url % display_id
- webpage = self._download_webpage(actual_url, display_id, 'Downloading URL ' + actual_url)
-
- video_collection = self._parse_json(self._search_regex(r'new\s+?AnimeLabApp\.VideoCollection\s*?\((.*?)\);', webpage, 'AnimeLab VideoCollection'), display_id)
- position = int_or_none(self._search_regex(r'playlistPosition\s*?=\s*?(\d+)', webpage, 'Playlist Position'))
-
- raw_data = video_collection[position]['videoEntry']
-
- video_id = str_or_none(raw_data['id'])
-
- # create a title from many sources (while grabbing other info)
- # TODO use more fallback sources to get some of these
- series = raw_data.get('showTitle')
- video_type = raw_data.get('videoEntryType', {}).get('name')
- episode_number = raw_data.get('episodeNumber')
- episode_name = raw_data.get('name')
-
- title_parts = (series, video_type, episode_number, episode_name)
- if None not in title_parts:
- title = '%s - %s %s - %s' % title_parts
- else:
- title = episode_name
-
- description = raw_data.get('synopsis') or self._og_search_description(webpage, default=None)
-
- duration = int_or_none(raw_data.get('duration'))
-
- thumbnail_data = raw_data.get('images', [])
- thumbnails = []
- for thumbnail in thumbnail_data:
- for instance in thumbnail['imageInstances']:
- image_data = instance.get('imageInfo', {})
- thumbnails.append({
- 'id': str_or_none(image_data.get('id')),
- 'url': image_data.get('fullPath'),
- 'width': image_data.get('width'),
- 'height': image_data.get('height'),
- })
-
- season_data = raw_data.get('season', {}) or {}
- season = str_or_none(season_data.get('name'))
- season_number = int_or_none(season_data.get('seasonNumber'))
- season_id = str_or_none(season_data.get('id'))
-
- for video_data in raw_data['videoList']:
- current_video_list = {}
- current_video_list['language'] = video_data.get('language', {}).get('languageCode')
-
- is_hardsubbed = video_data.get('hardSubbed')
-
- for video_instance in video_data['videoInstances']:
- httpurl = video_instance.get('httpUrl')
- url = httpurl if httpurl else video_instance.get('rtmpUrl')
- if url is None:
- # this video format is unavailable to the user (not premium etc.)
- continue
-
- current_format = current_video_list.copy()
-
- format_id_parts = []
-
- format_id_parts.append(str_or_none(video_instance.get('id')))
-
- if is_hardsubbed is not None:
- if is_hardsubbed:
- format_id_parts.append('yeshardsubbed')
- else:
- format_id_parts.append('nothardsubbed')
-
- format_id_parts.append(current_format['language'])
-
- format_id = '_'.join([x for x in format_id_parts if x is not None])
-
- ext = determine_ext(url)
- if ext == 'm3u8':
- for format_ in self._extract_m3u8_formats(
- url, video_id, m3u8_id=format_id, fatal=False):
- formats[format_['format_id']] = format_
- continue
- elif ext == 'mpd':
- for format_ in self._extract_mpd_formats(
- url, video_id, mpd_id=format_id, fatal=False):
- formats[format_['format_id']] = format_
- continue
-
- current_format['url'] = url
- quality_data = video_instance.get('videoQuality')
- if quality_data:
- quality = quality_data.get('name') or quality_data.get('description')
- else:
- quality = None
-
- height = None
- if quality:
- height = int_or_none(self._search_regex(r'(\d+)p?$', quality, 'Video format height', default=None))
-
- if height is None:
- self.report_warning('Could not get height of video')
- else:
- current_format['height'] = height
- current_format['format_id'] = format_id
-
- formats[current_format['format_id']] = current_format
-
- formats = list(formats.values())
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'display_id': display_id,
- 'title': title,
- 'description': description,
- 'series': series,
- 'episode': episode_name,
- 'episode_number': int_or_none(episode_number),
- 'thumbnails': thumbnails,
- 'duration': duration,
- 'formats': formats,
- 'season': season,
- 'season_number': season_number,
- 'season_id': season_id,
- }
-
-
-class AnimeLabShowsIE(AnimeLabBaseIE):
- _VALID_URL = r'https?://(?:www\.)?animelab\.com/shows/(?P<id>[^/]+)'
-
- _TEST = {
- 'url': 'https://www.animelab.com/shows/attack-on-titan',
- 'info_dict': {
- 'id': '45',
- 'title': 'Attack on Titan',
- 'description': 'md5:989d95a2677e9309368d5cf39ba91469',
- },
- 'playlist_count': 59,
- 'skip': 'All AnimeLab content requires authentication',
- }
-
- def _real_extract(self, url):
- _BASE_URL = 'http://www.animelab.com'
- _SHOWS_API_URL = '/api/videoentries/show/videos/'
- display_id = self._match_id(url)
-
- webpage = self._download_webpage(url, display_id, 'Downloading requested URL')
-
- show_data_str = self._search_regex(r'({"id":.*}),\svideoEntry', webpage, 'AnimeLab show data')
- show_data = self._parse_json(show_data_str, display_id)
-
- show_id = str_or_none(show_data.get('id'))
- title = show_data.get('name')
- description = show_data.get('shortSynopsis') or show_data.get('longSynopsis')
-
- entries = []
- for season in show_data['seasons']:
- season_id = season['id']
- get_data = urlencode_postdata({
- 'seasonId': season_id,
- 'limit': 1000,
- })
- # despite using urlencode_postdata, we are sending a GET request
- target_url = _BASE_URL + _SHOWS_API_URL + show_id + "?" + get_data.decode('utf-8')
- response = self._download_webpage(
- target_url,
- None, 'Season id %s' % season_id)
-
- season_data = self._parse_json(response, display_id)
-
- for video_data in season_data['list']:
- entries.append(self.url_result(
- _BASE_URL + '/player/' + video_data['slug'], 'AnimeLab',
- str_or_none(video_data.get('id')), video_data.get('name')
- ))
-
- return {
- '_type': 'playlist',
- 'id': show_id,
- 'title': title,
- 'description': description,
- 'entries': entries,
- }
-
-# TODO implement myqueue
diff --git a/hypervideo_dl/extractor/animeondemand.py b/hypervideo_dl/extractor/animeondemand.py
deleted file mode 100644
index 2e674d5..0000000
--- a/hypervideo_dl/extractor/animeondemand.py
+++ /dev/null
@@ -1,284 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import (
- determine_ext,
- extract_attributes,
- ExtractorError,
- join_nonempty,
- url_or_none,
- urlencode_postdata,
- urljoin,
-)
-
-
-class AnimeOnDemandIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?anime-on-demand\.de/anime/(?P<id>\d+)'
- _LOGIN_URL = 'https://www.anime-on-demand.de/users/sign_in'
- _APPLY_HTML5_URL = 'https://www.anime-on-demand.de/html5apply'
- _NETRC_MACHINE = 'animeondemand'
- # German-speaking countries of Europe
- _GEO_COUNTRIES = ['AT', 'CH', 'DE', 'LI', 'LU']
- _TESTS = [{
- # jap, OmU
- 'url': 'https://www.anime-on-demand.de/anime/161',
- 'info_dict': {
- 'id': '161',
- 'title': 'Grimgar, Ashes and Illusions (OmU)',
- 'description': 'md5:6681ce3c07c7189d255ac6ab23812d31',
- },
- 'playlist_mincount': 4,
- }, {
- # Film wording is used instead of Episode, ger/jap, Dub/OmU
- 'url': 'https://www.anime-on-demand.de/anime/39',
- 'only_matching': True,
- }, {
- # Episodes without titles, jap, OmU
- 'url': 'https://www.anime-on-demand.de/anime/162',
- 'only_matching': True,
- }, {
- # ger/jap, Dub/OmU, account required
- 'url': 'https://www.anime-on-demand.de/anime/169',
- 'only_matching': True,
- }, {
- # Full length film, non-series, ger/jap, Dub/OmU, account required
- 'url': 'https://www.anime-on-demand.de/anime/185',
- 'only_matching': True,
- }, {
- # Flash videos
- 'url': 'https://www.anime-on-demand.de/anime/12',
- 'only_matching': True,
- }]
-
- def _perform_login(self, username, password):
- login_page = self._download_webpage(
- self._LOGIN_URL, None, 'Downloading login page')
-
- if '>Our licensing terms allow the distribution of animes only to German-speaking countries of Europe' in login_page:
- self.raise_geo_restricted(
- '%s is only available in German-speaking countries of Europe' % self.IE_NAME)
-
- login_form = self._form_hidden_inputs('new_user', login_page)
-
- login_form.update({
- 'user[login]': username,
- 'user[password]': password,
- })
-
- post_url = self._search_regex(
- r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
- 'post url', default=self._LOGIN_URL, group='url')
-
- if not post_url.startswith('http'):
- post_url = urljoin(self._LOGIN_URL, post_url)
-
- response = self._download_webpage(
- post_url, None, 'Logging in',
- data=urlencode_postdata(login_form), headers={
- 'Referer': self._LOGIN_URL,
- })
-
- if all(p not in response for p in ('>Logout<', 'href="/users/sign_out"')):
- error = self._search_regex(
- r'<p[^>]+\bclass=(["\'])(?:(?!\1).)*\balert\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</p>',
- response, 'error', default=None, group='error')
- if error:
- raise ExtractorError('Unable to login: %s' % error, expected=True)
- raise ExtractorError('Unable to log in')
-
- def _real_extract(self, url):
- anime_id = self._match_id(url)
-
- webpage = self._download_webpage(url, anime_id)
-
- if 'data-playlist=' not in webpage:
- self._download_webpage(
- self._APPLY_HTML5_URL, anime_id,
- 'Activating HTML5 beta', 'Unable to apply HTML5 beta')
- webpage = self._download_webpage(url, anime_id)
-
- csrf_token = self._html_search_meta(
- 'csrf-token', webpage, 'csrf token', fatal=True)
-
- anime_title = self._html_search_regex(
- r'(?s)<h1[^>]+itemprop="name"[^>]*>(.+?)</h1>',
- webpage, 'anime name')
- anime_description = self._html_search_regex(
- r'(?s)<div[^>]+itemprop="description"[^>]*>(.+?)</div>',
- webpage, 'anime description', default=None)
-
- def extract_info(html, video_id, num=None):
- title, description = [None] * 2
- formats = []
-
- for input_ in re.findall(
- r'<input[^>]+class=["\'].*?streamstarter[^>]+>', html):
- attributes = extract_attributes(input_)
- title = attributes.get('data-dialog-header')
- playlist_urls = []
- for playlist_key in ('data-playlist', 'data-otherplaylist', 'data-stream'):
- playlist_url = attributes.get(playlist_key)
- if isinstance(playlist_url, compat_str) and re.match(
- r'/?[\da-zA-Z]+', playlist_url):
- playlist_urls.append(attributes[playlist_key])
- if not playlist_urls:
- continue
-
- lang = attributes.get('data-lang')
- lang_note = attributes.get('value')
-
- for playlist_url in playlist_urls:
- kind = self._search_regex(
- r'videomaterialurl/\d+/([^/]+)/',
- playlist_url, 'media kind', default=None)
- format_id = join_nonempty(lang, kind) if lang or kind else str(num)
- format_note = join_nonempty(kind, lang_note, delim=', ')
- item_id_list = []
- if format_id:
- item_id_list.append(format_id)
- item_id_list.append('videomaterial')
- playlist = self._download_json(
- urljoin(url, playlist_url), video_id,
- 'Downloading %s JSON' % ' '.join(item_id_list),
- headers={
- 'X-Requested-With': 'XMLHttpRequest',
- 'X-CSRF-Token': csrf_token,
- 'Referer': url,
- 'Accept': 'application/json, text/javascript, */*; q=0.01',
- }, fatal=False)
- if not playlist:
- continue
- stream_url = url_or_none(playlist.get('streamurl'))
- if stream_url:
- rtmp = re.search(
- r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+/))(?P<playpath>mp[34]:.+)',
- stream_url)
- if rtmp:
- formats.append({
- 'url': rtmp.group('url'),
- 'app': rtmp.group('app'),
- 'play_path': rtmp.group('playpath'),
- 'page_url': url,
- 'player_url': 'https://www.anime-on-demand.de/assets/jwplayer.flash-55abfb34080700304d49125ce9ffb4a6.swf',
- 'rtmp_real_time': True,
- 'format_id': 'rtmp',
- 'ext': 'flv',
- })
- continue
- start_video = playlist.get('startvideo', 0)
- playlist = playlist.get('playlist')
- if not playlist or not isinstance(playlist, list):
- continue
- playlist = playlist[start_video]
- title = playlist.get('title')
- if not title:
- continue
- description = playlist.get('description')
- for source in playlist.get('sources', []):
- file_ = source.get('file')
- if not file_:
- continue
- ext = determine_ext(file_)
- format_id = join_nonempty(
- lang, kind,
- 'hls' if ext == 'm3u8' else None,
- 'dash' if source.get('type') == 'video/dash' or ext == 'mpd' else None)
- if ext == 'm3u8':
- file_formats = self._extract_m3u8_formats(
- file_, video_id, 'mp4',
- entry_protocol='m3u8_native', m3u8_id=format_id, fatal=False)
- elif source.get('type') == 'video/dash' or ext == 'mpd':
- continue
- file_formats = self._extract_mpd_formats(
- file_, video_id, mpd_id=format_id, fatal=False)
- else:
- continue
- for f in file_formats:
- f.update({
- 'language': lang,
- 'format_note': format_note,
- })
- formats.extend(file_formats)
-
- return {
- 'title': title,
- 'description': description,
- 'formats': formats,
- }
-
- def extract_entries(html, video_id, common_info, num=None):
- info = extract_info(html, video_id, num)
-
- if info['formats']:
- self._sort_formats(info['formats'])
- f = common_info.copy()
- f.update(info)
- yield f
-
- # Extract teaser/trailer only when full episode is not available
- if not info['formats']:
- m = re.search(
- r'data-dialog-header=(["\'])(?P<title>.+?)\1[^>]+href=(["\'])(?P<href>.+?)\3[^>]*>(?P<kind>Teaser|Trailer)<',
- html)
- if m:
- f = common_info.copy()
- f.update({
- 'id': '%s-%s' % (f['id'], m.group('kind').lower()),
- 'title': m.group('title'),
- 'url': urljoin(url, m.group('href')),
- })
- yield f
-
- def extract_episodes(html):
- for num, episode_html in enumerate(re.findall(
- r'(?s)<h3[^>]+class="episodebox-title".+?>Episodeninhalt<', html), 1):
- episodebox_title = self._search_regex(
- (r'class="episodebox-title"[^>]+title=(["\'])(?P<title>.+?)\1',
- r'class="episodebox-title"[^>]+>(?P<title>.+?)<'),
- episode_html, 'episodebox title', default=None, group='title')
- if not episodebox_title:
- continue
-
- episode_number = int(self._search_regex(
- r'(?:Episode|Film)\s*(\d+)',
- episodebox_title, 'episode number', default=num))
- episode_title = self._search_regex(
- r'(?:Episode|Film)\s*\d+\s*-\s*(.+)',
- episodebox_title, 'episode title', default=None)
-
- video_id = 'episode-%d' % episode_number
-
- common_info = {
- 'id': video_id,
- 'series': anime_title,
- 'episode': episode_title,
- 'episode_number': episode_number,
- }
-
- for e in extract_entries(episode_html, video_id, common_info):
- yield e
-
- def extract_film(html, video_id):
- common_info = {
- 'id': anime_id,
- 'title': anime_title,
- 'description': anime_description,
- }
- for e in extract_entries(html, video_id, common_info):
- yield e
-
- def entries():
- has_episodes = False
- for e in extract_episodes(webpage):
- has_episodes = True
- yield e
-
- if not has_episodes:
- for e in extract_film(webpage, anime_id):
- yield e
-
- return self.playlist_result(
- entries(), anime_id, anime_title, anime_description)
diff --git a/hypervideo_dl/extractor/ant1newsgr.py b/hypervideo_dl/extractor/ant1newsgr.py
index 1075b46..7b384b2 100644
--- a/hypervideo_dl/extractor/ant1newsgr.py
+++ b/hypervideo_dl/extractor/ant1newsgr.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
import urllib.parse
from .common import InfoExtractor
@@ -10,7 +6,6 @@ from ..utils import (
ExtractorError,
determine_ext,
scale_thumbnails_to_max_format_width,
- unescapeHTML,
)
@@ -24,7 +19,6 @@ class Ant1NewsGrBaseIE(InfoExtractor):
raise ExtractorError('no source found for %s' % video_id)
formats, subs = (self._extract_m3u8_formats_and_subtitles(source, video_id, 'mp4')
if determine_ext(source) == 'm3u8' else ([{'url': source}], {}))
- self._sort_formats(formats)
thumbnails = scale_thumbnails_to_max_format_width(
formats, [{'url': info['thumb']}], r'(?<=/imgHandler/)\d+')
return {
@@ -94,7 +88,7 @@ class Ant1NewsGrArticleIE(Ant1NewsGrBaseIE):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
info = self._search_json_ld(webpage, video_id, expected_type='NewsArticle')
- embed_urls = list(Ant1NewsGrEmbedIE._extract_urls(webpage))
+ embed_urls = list(Ant1NewsGrEmbedIE._extract_embed_urls(url, webpage))
if not embed_urls:
raise ExtractorError('no videos found for %s' % video_id, expected=True)
return self.playlist_from_matches(
@@ -107,6 +101,7 @@ class Ant1NewsGrEmbedIE(Ant1NewsGrBaseIE):
IE_DESC = 'ant1news.gr embedded videos'
_BASE_PLAYER_URL_RE = r'(?:https?:)?//(?:[a-zA-Z0-9\-]+\.)?(?:antenna|ant1news)\.gr/templates/pages/player'
_VALID_URL = rf'{_BASE_PLAYER_URL_RE}\?([^#]+&)?cid=(?P<id>[^#&]+)'
+ _EMBED_REGEX = [rf'<iframe[^>]+?src=(?P<_q1>["\'])(?P<url>{_BASE_PLAYER_URL_RE}\?(?:(?!(?P=_q1)).)+)(?P=_q1)']
_API_PATH = '/news/templates/data/jsonPlayer'
_TESTS = [{
@@ -120,16 +115,6 @@ class Ant1NewsGrEmbedIE(Ant1NewsGrBaseIE):
},
}]
- @classmethod
- def _extract_urls(cls, webpage):
- _EMBED_URL_RE = rf'{cls._BASE_PLAYER_URL_RE}\?(?:(?!(?P=_q1)).)+'
- _EMBED_RE = rf'<iframe[^>]+?src=(?P<_q1>["\'])(?P<url>{_EMBED_URL_RE})(?P=_q1)'
- for mobj in re.finditer(_EMBED_RE, webpage):
- url = unescapeHTML(mobj.group('url'))
- if not cls.suitable(url):
- continue
- yield url
-
def _real_extract(self, url):
video_id = self._match_id(url)
diff --git a/hypervideo_dl/extractor/anvato.py b/hypervideo_dl/extractor/anvato.py
index 686d453..79bfe41 100644
--- a/hypervideo_dl/extractor/anvato.py
+++ b/hypervideo_dl/extractor/anvato.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import base64
import hashlib
import json
@@ -10,38 +7,68 @@ import time
from .common import InfoExtractor
from ..aes import aes_encrypt
-from ..compat import compat_str
from ..utils import (
bytes_to_intlist,
determine_ext,
- intlist_to_bytes,
int_or_none,
+ intlist_to_bytes,
join_nonempty,
+ smuggle_url,
strip_jsonp,
+ traverse_obj,
unescapeHTML,
unsmuggle_url,
)
-# This import causes a ModuleNotFoundError on some systems for unknown reason.
-# See issues:
-# https://github.com/hypervideo/hypervideo/issues/35
-# https://github.com/ytdl-org/youtube-dl/issues/27449
-# https://github.com/animelover1984/youtube-dl/issues/17
-try:
- from .anvato_token_generator import NFLTokenGenerator
-except ImportError:
- NFLTokenGenerator = None
-
def md5_text(s):
- if not isinstance(s, compat_str):
- s = compat_str(s)
- return hashlib.md5(s.encode('utf-8')).hexdigest()
+ return hashlib.md5(str(s).encode()).hexdigest()
class AnvatoIE(InfoExtractor):
_VALID_URL = r'anvato:(?P<access_key_or_mcp>[^:]+):(?P<id>\d+)'
+ _API_BASE_URL = 'https://tkx.mp.lura.live/rest/v2'
+ _ANVP_RE = r'<script[^>]+\bdata-anvp\s*=\s*(["\'])(?P<anvp>(?:(?!\1).)+)\1'
+ _AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce' # from anvplayer.min.js
+
+ _TESTS = [{
+ # from https://www.nfl.com/videos/baker-mayfield-s-game-changing-plays-from-3-td-game-week-14
+ 'url': 'anvato:GXvEgwyJeWem8KCYXfeoHWknwP48Mboj:899441',
+ 'md5': '921919dab3cd0b849ff3d624831ae3e2',
+ 'info_dict': {
+ 'id': '899441',
+ 'ext': 'mp4',
+ 'title': 'Baker Mayfield\'s game-changing plays from 3-TD game Week 14',
+ 'description': 'md5:85e05a3cc163f8c344340f220521136d',
+ 'upload_date': '20201215',
+ 'timestamp': 1608009755,
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'uploader': 'NFL',
+ 'tags': ['Baltimore Ravens at Cleveland Browns (2020-REG-14)', 'Baker Mayfield', 'Game Highlights',
+ 'Player Highlights', 'Cleveland Browns', 'league'],
+ 'duration': 157,
+ 'categories': ['Entertainment', 'Game', 'Highlights'],
+ },
+ }, {
+ # from https://ktla.com/news/99-year-old-woman-learns-to-fly-in-torrance-checks-off-bucket-list-dream/
+ 'url': 'anvato:X8POa4zpGZMmeiq0wqiO8IP5rMqQM9VN:8032455',
+ 'md5': '837718bcfb3a7778d022f857f7a9b19e',
+ 'info_dict': {
+ 'id': '8032455',
+ 'ext': 'mp4',
+ 'title': '99-year-old woman learns to fly plane in Torrance, checks off bucket list dream',
+ 'description': 'md5:0a12bab8159445e78f52a297a35c6609',
+ 'upload_date': '20220928',
+ 'timestamp': 1664408881,
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'uploader': 'LIN',
+ 'tags': ['video', 'news', '5live'],
+ 'duration': 155,
+ 'categories': ['News'],
+ },
+ }]
+
# Copied from anvplayer.min.js
_ANVACK_TABLE = {
'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ',
@@ -214,86 +241,74 @@ class AnvatoIE(InfoExtractor):
'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582'
}
- _TOKEN_GENERATORS = {
- 'GXvEgwyJeWem8KCYXfeoHWknwP48Mboj': NFLTokenGenerator,
+ def _generate_nfl_token(self, anvack, mcp_id):
+ reroute = self._download_json(
+ 'https://api.nfl.com/v1/reroute', mcp_id, data=b'grant_type=client_credentials',
+ headers={'X-Domain-Id': 100}, note='Fetching token info')
+ token_type = reroute.get('token_type') or 'Bearer'
+ auth_token = f'{token_type} {reroute["access_token"]}'
+ response = self._download_json(
+ 'https://api.nfl.com/v3/shield/', mcp_id, data=json.dumps({
+ 'query': '''{
+ viewer {
+ mediaToken(anvack: "%s", id: %s) {
+ token
}
+ }
+}''' % (anvack, mcp_id),
+ }).encode(), headers={
+ 'Authorization': auth_token,
+ 'Content-Type': 'application/json',
+ }, note='Fetching NFL API token')
+ return traverse_obj(response, ('data', 'viewer', 'mediaToken', 'token'))
- _API_KEY = '3hwbSuqqT690uxjNYBktSQpa5ZrpYYR0Iofx7NcJHyA'
-
- _ANVP_RE = r'<script[^>]+\bdata-anvp\s*=\s*(["\'])(?P<anvp>(?:(?!\1).)+)\1'
- _AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce'
-
- _TESTS = [{
- # from https://www.boston25news.com/news/watch-humpback-whale-breaches-right-next-to-fishing-boat-near-nh/817484874
- 'url': 'anvato:8v9BEynrwx8EFLYpgfOWcG1qJqyXKlRM:4465496',
- 'info_dict': {
- 'id': '4465496',
- 'ext': 'mp4',
- 'title': 'VIDEO: Humpback whale breaches right next to NH boat',
- 'description': 'VIDEO: Humpback whale breaches right next to NH boat. Footage courtesy: Zach Fahey.',
- 'duration': 22,
- 'timestamp': 1534855680,
- 'upload_date': '20180821',
- 'uploader': 'ANV',
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- # from https://sanfrancisco.cbslocal.com/2016/06/17/source-oakland-cop-on-leave-for-having-girlfriend-help-with-police-reports/
- 'url': 'anvato:DVzl9QRzox3ZZsP9bNu5Li3X7obQOnqP:3417601',
- 'only_matching': True,
- }]
-
- def __init__(self, *args, **kwargs):
- super(AnvatoIE, self).__init__(*args, **kwargs)
- self.__server_time = None
+ _TOKEN_GENERATORS = {
+ 'GXvEgwyJeWem8KCYXfeoHWknwP48Mboj': _generate_nfl_token,
+ }
def _server_time(self, access_key, video_id):
- if self.__server_time is not None:
- return self.__server_time
-
- self.__server_time = int(self._download_json(
- self._api_prefix(access_key) + 'server_time?anvack=' + access_key, video_id,
- note='Fetching server time')['server_time'])
+ return int_or_none(traverse_obj(self._download_json(
+ f'{self._API_BASE_URL}/server_time', video_id, query={'anvack': access_key},
+ note='Fetching server time', fatal=False), 'server_time')) or int(time.time())
- return self.__server_time
-
- def _api_prefix(self, access_key):
- return 'https://tkx2-%s.anvato.net/rest/v2/' % ('prod' if 'prod' in access_key else 'stage')
-
- def _get_video_json(self, access_key, video_id):
+ def _get_video_json(self, access_key, video_id, extracted_token):
# See et() in anvplayer.min.js, which is an alias of getVideoJSON()
- video_data_url = self._api_prefix(access_key) + 'mcp/video/%s?anvack=%s' % (video_id, access_key)
+ video_data_url = f'{self._API_BASE_URL}/mcp/video/{video_id}?anvack={access_key}'
server_time = self._server_time(access_key, video_id)
- input_data = '%d~%s~%s' % (server_time, md5_text(video_data_url), md5_text(server_time))
+ input_data = f'{server_time}~{md5_text(video_data_url)}~{md5_text(server_time)}'
auth_secret = intlist_to_bytes(aes_encrypt(
bytes_to_intlist(input_data[:64]), bytes_to_intlist(self._AUTH_KEY)))
-
- video_data_url += '&X-Anvato-Adst-Auth=' + base64.b64encode(auth_secret).decode('ascii')
+ query = {
+ 'X-Anvato-Adst-Auth': base64.b64encode(auth_secret).decode('ascii'),
+ 'rtyp': 'fp',
+ }
anvrid = md5_text(time.time() * 1000 * random.random())[:30]
api = {
'anvrid': anvrid,
'anvts': server_time,
}
- if self._TOKEN_GENERATORS.get(access_key) is not None:
- api['anvstk2'] = self._TOKEN_GENERATORS[access_key].generate(self, access_key, video_id)
+ if extracted_token is not None:
+ api['anvstk2'] = extracted_token
+ elif self._TOKEN_GENERATORS.get(access_key) is not None:
+ api['anvstk2'] = self._TOKEN_GENERATORS[access_key](self, access_key, video_id)
+ elif self._ANVACK_TABLE.get(access_key) is not None:
+ api['anvstk'] = md5_text(f'{access_key}|{anvrid}|{server_time}|{self._ANVACK_TABLE[access_key]}')
else:
- api['anvstk'] = md5_text('%s|%s|%d|%s' % (
- access_key, anvrid, server_time,
- self._ANVACK_TABLE.get(access_key, self._API_KEY)))
+ api['anvstk2'] = 'default'
return self._download_json(
- video_data_url, video_id, transform_source=strip_jsonp,
- data=json.dumps({'api': api}).encode('utf-8'))
+ video_data_url, video_id, transform_source=strip_jsonp, query=query,
+ data=json.dumps({'api': api}, separators=(',', ':')).encode('utf-8'))
- def _get_anvato_videos(self, access_key, video_id):
- video_data = self._get_video_json(access_key, video_id)
+ def _get_anvato_videos(self, access_key, video_id, token):
+ video_data = self._get_video_json(access_key, video_id, token)
formats = []
for published_url in video_data['published_urls']:
- video_url = published_url['embed_url']
+ video_url = published_url.get('embed_url')
+ if not video_url:
+ continue
media_format = published_url.get('format')
ext = determine_ext(video_url)
@@ -308,15 +323,27 @@ class AnvatoIE(InfoExtractor):
'tbr': tbr or None,
}
- if media_format == 'm3u8' and tbr is not None:
+ vtt_subs, hls_subs = {}, {}
+ if media_format == 'vtt':
+ _, vtt_subs = self._extract_m3u8_formats_and_subtitles(
+ video_url, video_id, m3u8_id='vtt', fatal=False)
+ continue
+ elif media_format == 'm3u8' and tbr is not None:
a_format.update({
'format_id': join_nonempty('hls', tbr),
'ext': 'mp4',
})
elif media_format == 'm3u8-variant' or ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- video_url, video_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id='hls', fatal=False))
+ # For some videos the initial m3u8 URL returns JSON instead
+ manifest_json = self._download_json(
+ video_url, video_id, note='Downloading manifest JSON', errnote=False)
+ if manifest_json:
+ video_url = manifest_json.get('master_m3u8')
+ if not video_url:
+ continue
+ hls_fmts, hls_subs = self._extract_m3u8_formats_and_subtitles(
+ video_url, video_id, ext='mp4', m3u8_id='hls', fatal=False)
+ formats.extend(hls_fmts)
continue
elif ext == 'mp3' or media_format == 'mp3':
a_format['vcodec'] = 'none'
@@ -327,8 +354,6 @@ class AnvatoIE(InfoExtractor):
})
formats.append(a_format)
- self._sort_formats(formats)
-
subtitles = {}
for caption in video_data.get('captions', []):
a_caption = {
@@ -336,6 +361,7 @@ class AnvatoIE(InfoExtractor):
'ext': 'tt' if caption.get('format') == 'SMPTE-TT' else None
}
subtitles.setdefault(caption['language'], []).append(a_caption)
+ subtitles = self._merge_subtitles(subtitles, hls_subs, vtt_subs)
return {
'id': video_id,
@@ -352,30 +378,19 @@ class AnvatoIE(InfoExtractor):
'subtitles': subtitles,
}
- @staticmethod
- def _extract_urls(ie, webpage, video_id):
- entries = []
- for mobj in re.finditer(AnvatoIE._ANVP_RE, webpage):
- anvplayer_data = ie._parse_json(
- mobj.group('anvp'), video_id, transform_source=unescapeHTML,
- fatal=False)
- if not anvplayer_data:
- continue
- video = anvplayer_data.get('video')
- if not isinstance(video, compat_str) or not video.isdigit():
- continue
- access_key = anvplayer_data.get('accessKey')
- if not access_key:
- mcp = anvplayer_data.get('mcp')
- if mcp:
- access_key = AnvatoIE._MCP_TO_ACCESS_KEY_TABLE.get(
- mcp.lower())
+ @classmethod
+ def _extract_from_webpage(cls, url, webpage):
+ for mobj in re.finditer(cls._ANVP_RE, webpage):
+ anvplayer_data = unescapeHTML(json.loads(mobj.group('anvp'))) or {}
+ video_id, access_key = anvplayer_data.get('video'), anvplayer_data.get('accessKey')
if not access_key:
+ access_key = cls._MCP_TO_ACCESS_KEY_TABLE.get((anvplayer_data.get('mcp') or '').lower())
+ if not (video_id or '').isdigit() or not access_key:
continue
- entries.append(ie.url_result(
- 'anvato:%s:%s' % (access_key, video), ie=AnvatoIE.ie_key(),
- video_id=video))
- return entries
+ url = f'anvato:{access_key}:{video_id}'
+ if anvplayer_data.get('token'):
+ url = smuggle_url(url, {'token': anvplayer_data['token']})
+ yield cls.url_result(url, AnvatoIE, video_id)
def _extract_anvato_videos(self, webpage, video_id):
anvplayer_data = self._parse_json(
@@ -383,7 +398,7 @@ class AnvatoIE(InfoExtractor):
self._ANVP_RE, webpage, 'Anvato player data', group='anvp'),
video_id)
return self._get_anvato_videos(
- anvplayer_data['accessKey'], anvplayer_data['video'])
+ anvplayer_data['accessKey'], anvplayer_data['video'], 'default') # cbslocal token = 'default'
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
@@ -391,9 +406,7 @@ class AnvatoIE(InfoExtractor):
'countries': smuggled_data.get('geo_countries'),
})
- mobj = self._match_valid_url(url)
- access_key, video_id = mobj.group('access_key_or_mcp', 'id')
+ access_key, video_id = self._match_valid_url(url).group('access_key_or_mcp', 'id')
if access_key not in self._ANVACK_TABLE:
- access_key = self._MCP_TO_ACCESS_KEY_TABLE.get(
- access_key) or access_key
- return self._get_anvato_videos(access_key, video_id)
+ access_key = self._MCP_TO_ACCESS_KEY_TABLE.get(access_key) or access_key
+ return self._get_anvato_videos(access_key, video_id, smuggled_data.get('token'))
diff --git a/hypervideo_dl/extractor/anvato_token_generator/__init__.py b/hypervideo_dl/extractor/anvato_token_generator/__init__.py
deleted file mode 100644
index 6e223db..0000000
--- a/hypervideo_dl/extractor/anvato_token_generator/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-from __future__ import unicode_literals
-
-from .nfl import NFLTokenGenerator
-
-__all__ = [
- 'NFLTokenGenerator',
-]
diff --git a/hypervideo_dl/extractor/anvato_token_generator/common.py b/hypervideo_dl/extractor/anvato_token_generator/common.py
deleted file mode 100644
index b959a90..0000000
--- a/hypervideo_dl/extractor/anvato_token_generator/common.py
+++ /dev/null
@@ -1,6 +0,0 @@
-from __future__ import unicode_literals
-
-
-class TokenGenerator:
- def generate(self, anvack, mcp_id):
- raise NotImplementedError('This method must be implemented by subclasses')
diff --git a/hypervideo_dl/extractor/anvato_token_generator/nfl.py b/hypervideo_dl/extractor/anvato_token_generator/nfl.py
deleted file mode 100644
index 97a2b24..0000000
--- a/hypervideo_dl/extractor/anvato_token_generator/nfl.py
+++ /dev/null
@@ -1,30 +0,0 @@
-from __future__ import unicode_literals
-
-import json
-
-from .common import TokenGenerator
-
-
-class NFLTokenGenerator(TokenGenerator):
- _AUTHORIZATION = None
-
- def generate(ie, anvack, mcp_id):
- if not NFLTokenGenerator._AUTHORIZATION:
- reroute = ie._download_json(
- 'https://api.nfl.com/v1/reroute', mcp_id,
- data=b'grant_type=client_credentials',
- headers={'X-Domain-Id': 100})
- NFLTokenGenerator._AUTHORIZATION = '%s %s' % (reroute.get('token_type') or 'Bearer', reroute['access_token'])
- return ie._download_json(
- 'https://api.nfl.com/v3/shield/', mcp_id, data=json.dumps({
- 'query': '''{
- viewer {
- mediaToken(anvack: "%s", id: %s) {
- token
- }
- }
-}''' % (anvack, mcp_id),
- }).encode(), headers={
- 'Authorization': NFLTokenGenerator._AUTHORIZATION,
- 'Content-Type': 'application/json',
- })['data']['viewer']['mediaToken']['token']
diff --git a/hypervideo_dl/extractor/aol.py b/hypervideo_dl/extractor/aol.py
index 4766a2c..6949ca9 100644
--- a/hypervideo_dl/extractor/aol.py
+++ b/hypervideo_dl/extractor/aol.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .yahoo import YahooIE
@@ -12,7 +9,7 @@ from ..utils import (
)
-class AolIE(YahooIE):
+class AolIE(YahooIE): # XXX: Do not subclass from concrete IE
IE_NAME = 'aol.com'
_VALID_URL = r'(?:aol-video:|https?://(?:www\.)?aol\.(?:com|ca|co\.uk|de|jp)/video/(?:[^/]+/)*)(?P<id>\d{9}|[0-9a-f]{24}|[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12})'
@@ -122,7 +119,6 @@ class AolIE(YahooIE):
'height': int_or_none(qs.get('h', [None])[0]),
})
formats.append(f)
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/apa.py b/hypervideo_dl/extractor/apa.py
index 1736cdf..1ea0b1d 100644
--- a/hypervideo_dl/extractor/apa.py
+++ b/hypervideo_dl/extractor/apa.py
@@ -1,8 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
from .common import InfoExtractor
from ..utils import (
determine_ext,
@@ -13,6 +8,7 @@ from ..utils import (
class APAIE(InfoExtractor):
_VALID_URL = r'(?P<base_url>https?://[^/]+\.apa\.at)/embed/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+ _EMBED_REGEX = [r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//[^/]+\.apa\.at/embed/[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}.*?)\1']
_TESTS = [{
'url': 'http://uvp.apa.at/embed/293f6d17-692a-44e3-9fd5-7b178f3a1029',
'md5': '2b12292faeb0a7d930c778c7a5b4759b',
@@ -33,14 +29,6 @@ class APAIE(InfoExtractor):
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- return [
- mobj.group('url')
- for mobj in re.finditer(
- r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//[^/]+\.apa\.at/embed/[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}.*?)\1',
- webpage)]
-
def _real_extract(self, url):
mobj = self._match_valid_url(url)
video_id, base_url = mobj.group('id', 'base_url')
@@ -84,7 +72,6 @@ class APAIE(InfoExtractor):
'format_id': format_id,
'height': height,
})
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/aparat.py b/hypervideo_dl/extractor/aparat.py
index 1057233..4a989d8 100644
--- a/hypervideo_dl/extractor/aparat.py
+++ b/hypervideo_dl/extractor/aparat.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
get_element_by_id,
@@ -13,6 +10,7 @@ from ..utils import (
class AparatIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P<id>[a-zA-Z0-9]+)'
+ _EMBED_REGEX = [r'<iframe .*?src="(?P<url>http://www\.aparat\.com/video/[^"]+)"']
_TESTS = [{
'url': 'http://www.aparat.com/v/wP8On',
@@ -75,7 +73,6 @@ class AparatIE(InfoExtractor):
r'(\d+)[pP]', label or '', 'height',
default=None)),
})
- self._sort_formats(formats)
info = self._search_json_ld(webpage, video_id, default={})
diff --git a/hypervideo_dl/extractor/appleconnect.py b/hypervideo_dl/extractor/appleconnect.py
index 494f833..d00b0f9 100644
--- a/hypervideo_dl/extractor/appleconnect.py
+++ b/hypervideo_dl/extractor/appleconnect.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
str_to_int,
diff --git a/hypervideo_dl/extractor/applepodcasts.py b/hypervideo_dl/extractor/applepodcasts.py
index 9139ff7..49bbeab 100644
--- a/hypervideo_dl/extractor/applepodcasts.py
+++ b/hypervideo_dl/extractor/applepodcasts.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
clean_html,
diff --git a/hypervideo_dl/extractor/appletrailers.py b/hypervideo_dl/extractor/appletrailers.py
index 0abfb43..a5abb55 100644
--- a/hypervideo_dl/extractor/appletrailers.py
+++ b/hypervideo_dl/extractor/appletrailers.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import re
import json
@@ -122,7 +120,6 @@ class AppleTrailersIE(InfoExtractor):
'height': int_or_none(size_data.get('height')),
'language': version[:2],
})
- self._sort_formats(formats)
entries.append({
'id': movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', clip_title).lower(),
@@ -187,8 +184,6 @@ class AppleTrailersIE(InfoExtractor):
'height': int_or_none(format['height']),
})
- self._sort_formats(formats)
-
playlist.append({
'_type': 'video',
'id': video_id,
diff --git a/hypervideo_dl/extractor/archiveorg.py b/hypervideo_dl/extractor/archiveorg.py
index 2ab3c1b..90dda9f 100644
--- a/hypervideo_dl/extractor/archiveorg.py
+++ b/hypervideo_dl/extractor/archiveorg.py
@@ -1,39 +1,35 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
import json
+import re
+import urllib.parse
+
from .common import InfoExtractor
-from .youtube import YoutubeIE, YoutubeBaseInfoExtractor
-from ..compat import (
- compat_urllib_parse_unquote,
- compat_urllib_parse_unquote_plus,
- compat_HTTPError
-)
+from .youtube import YoutubeBaseInfoExtractor, YoutubeIE
+from ..compat import compat_HTTPError, compat_urllib_parse_unquote
from ..utils import (
+ KNOWN_EXTENSIONS,
+ ExtractorError,
+ HEADRequest,
bug_reports_message,
clean_html,
dict_get,
extract_attributes,
- ExtractorError,
get_element_by_id,
- HEADRequest,
int_or_none,
join_nonempty,
- KNOWN_EXTENSIONS,
+ js_to_json,
merge_dicts,
mimetype2ext,
orderedSet,
parse_duration,
parse_qs,
- str_to_int,
str_or_none,
+ str_to_int,
traverse_obj,
try_get,
unified_strdate,
unified_timestamp,
+ url_or_none,
urlhandle_detect_ext,
- url_or_none
)
@@ -54,6 +50,11 @@ class ArchiveOrgIE(InfoExtractor):
'upload_date': '20100315',
'creator': 'SRI International',
'uploader': 'laura@archive.org',
+ 'thumbnail': r're:https://archive\.org/download/.*\.jpg',
+ 'release_year': 1968,
+ 'display_id': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect.cdr',
+ 'track': 'XD300-23 68HighlightsAResearchCntAugHumanIntellect',
+
},
}, {
'url': 'https://archive.org/details/Cops1922',
@@ -62,33 +63,43 @@ class ArchiveOrgIE(InfoExtractor):
'id': 'Cops1922',
'ext': 'mp4',
'title': 'Buster Keaton\'s "Cops" (1922)',
- 'description': 'md5:43a603fd6c5b4b90d12a96b921212b9c',
+ 'description': 'md5:cd6f9910c35aedd5fc237dbc3957e2ca',
'uploader': 'yorkmba99@hotmail.com',
'timestamp': 1387699629,
'upload_date': '20131222',
+ 'display_id': 'Cops-v2.mp4',
+ 'thumbnail': r're:https://archive\.org/download/.*\.jpg',
+ 'duration': 1091.96,
},
}, {
'url': 'http://archive.org/embed/XD300-23_68HighlightsAResearchCntAugHumanIntellect',
'only_matching': True,
}, {
'url': 'https://archive.org/details/Election_Ads',
- 'md5': '284180e857160cf866358700bab668a3',
+ 'md5': 'eec5cddebd4793c6a653b69c3b11f2e6',
'info_dict': {
'id': 'Election_Ads/Commercial-JFK1960ElectionAdCampaignJingle.mpg',
'title': 'Commercial-JFK1960ElectionAdCampaignJingle.mpg',
- 'ext': 'mp4',
+ 'ext': 'mpg',
+ 'thumbnail': r're:https://archive\.org/download/.*\.jpg',
+ 'duration': 59.77,
+ 'display_id': 'Commercial-JFK1960ElectionAdCampaignJingle.mpg',
},
}, {
'url': 'https://archive.org/details/Election_Ads/Commercial-Nixon1960ElectionAdToughonDefense.mpg',
- 'md5': '7915213ef02559b5501fe630e1a53f59',
+ 'md5': 'ea1eed8234e7d4165f38c8c769edef38',
'info_dict': {
'id': 'Election_Ads/Commercial-Nixon1960ElectionAdToughonDefense.mpg',
'title': 'Commercial-Nixon1960ElectionAdToughonDefense.mpg',
- 'ext': 'mp4',
+ 'ext': 'mpg',
'timestamp': 1205588045,
'uploader': 'mikedavisstripmaster@yahoo.com',
'description': '1960 Presidential Campaign Election Commercials John F Kennedy, Richard M Nixon',
'upload_date': '20080315',
+ 'display_id': 'Commercial-Nixon1960ElectionAdToughonDefense.mpg',
+ 'duration': 59.51,
+ 'license': 'http://creativecommons.org/licenses/publicdomain/',
+ 'thumbnail': r're:https://archive\.org/download/.*\.jpg',
},
}, {
'url': 'https://archive.org/details/gd1977-05-08.shure57.stevenson.29303.flac16',
@@ -97,6 +108,12 @@ class ArchiveOrgIE(InfoExtractor):
'id': 'gd1977-05-08.shure57.stevenson.29303.flac16/gd1977-05-08d01t01.flac',
'title': 'Turning',
'ext': 'flac',
+ 'track': 'Turning',
+ 'creator': 'Grateful Dead',
+ 'display_id': 'gd1977-05-08d01t01.flac',
+ 'track_number': 1,
+ 'album': '1977-05-08 - Barton Hall - Cornell University',
+ 'duration': 39.8,
},
}, {
'url': 'https://archive.org/details/gd1977-05-08.shure57.stevenson.29303.flac16/gd1977-05-08d01t07.flac',
@@ -107,11 +124,20 @@ class ArchiveOrgIE(InfoExtractor):
'ext': 'flac',
'timestamp': 1205895624,
'uploader': 'mvernon54@yahoo.com',
- 'description': 'md5:6a31f1996db0aa0fc9da6d6e708a1bb0',
+ 'description': 'md5:6c921464414814720c6593810a5c7e3d',
'upload_date': '20080319',
'location': 'Barton Hall - Cornell University',
+ 'duration': 438.68,
+ 'track': 'Deal',
+ 'creator': 'Grateful Dead',
+ 'album': '1977-05-08 - Barton Hall - Cornell University',
+ 'release_date': '19770508',
+ 'display_id': 'gd1977-05-08d01t07.flac',
+ 'release_year': 1977,
+ 'track_number': 7,
},
}, {
+ # FIXME: give a better error message than just IndexError when all available formats are restricted
'url': 'https://archive.org/details/lp_the-music-of-russia_various-artists-a-askaryan-alexander-melik',
'md5': '7cb019baa9b332e82ea7c10403acd180',
'info_dict': {
@@ -119,6 +145,7 @@ class ArchiveOrgIE(InfoExtractor):
'title': 'Bells Of Rostov',
'ext': 'mp3',
},
+ 'skip': 'restricted'
}, {
'url': 'https://archive.org/details/lp_the-music-of-russia_various-artists-a-askaryan-alexander-melik/disc1/02.02.+Song+And+Chorus+In+The+Polovetsian+Camp+From+%22Prince+Igor%22+(Act+2%2C+Scene+1).mp3',
'md5': '1d0aabe03edca83ca58d9ed3b493a3c3',
@@ -131,6 +158,52 @@ class ArchiveOrgIE(InfoExtractor):
'description': 'md5:012b2d668ae753be36896f343d12a236',
'upload_date': '20190928',
},
+ 'skip': 'restricted'
+ }, {
+ # Original formats are private
+ 'url': 'https://archive.org/details/irelandthemakingofarepublic',
+ 'info_dict': {
+ 'id': 'irelandthemakingofarepublic',
+ 'title': 'Ireland: The Making of a Republic',
+ 'upload_date': '20160610',
+ 'description': 'md5:f70956a156645a658a0dc9513d9e78b7',
+ 'uploader': 'dimitrios@archive.org',
+ 'creator': ['British Broadcasting Corporation', 'Time-Life Films'],
+ 'timestamp': 1465594947,
+ },
+ 'playlist': [
+ {
+ 'md5': '0b211261b26590d49df968f71b90690d',
+ 'info_dict': {
+ 'id': 'irelandthemakingofarepublic/irelandthemakingofarepublicreel1_01.mov',
+ 'ext': 'mp4',
+ 'title': 'irelandthemakingofarepublicreel1_01.mov',
+ 'duration': 130.46,
+ 'thumbnail': 'https://archive.org/download/irelandthemakingofarepublic/irelandthemakingofarepublic.thumbs/irelandthemakingofarepublicreel1_01_000117.jpg',
+ 'display_id': 'irelandthemakingofarepublicreel1_01.mov',
+ },
+ }, {
+ 'md5': '67335ee3b23a0da930841981c1e79b02',
+ 'info_dict': {
+ 'id': 'irelandthemakingofarepublic/irelandthemakingofarepublicreel1_02.mov',
+ 'ext': 'mp4',
+ 'duration': 1395.13,
+ 'title': 'irelandthemakingofarepublicreel1_02.mov',
+ 'display_id': 'irelandthemakingofarepublicreel1_02.mov',
+ 'thumbnail': 'https://archive.org/download/irelandthemakingofarepublic/irelandthemakingofarepublic.thumbs/irelandthemakingofarepublicreel1_02_001374.jpg',
+ },
+ }, {
+ 'md5': 'e470e86787893603f4a341a16c281eb5',
+ 'info_dict': {
+ 'id': 'irelandthemakingofarepublic/irelandthemakingofarepublicreel2.mov',
+ 'ext': 'mp4',
+ 'duration': 1602.67,
+ 'title': 'irelandthemakingofarepublicreel2.mov',
+ 'thumbnail': 'https://archive.org/download/irelandthemakingofarepublic/irelandthemakingofarepublic.thumbs/irelandthemakingofarepublicreel2_001554.jpg',
+ 'display_id': 'irelandthemakingofarepublicreel2.mov',
+ },
+ }
+ ]
}]
@staticmethod
@@ -146,7 +219,7 @@ class ArchiveOrgIE(InfoExtractor):
return json.loads(extract_attributes(element)['value'])
def _real_extract(self, url):
- video_id = compat_urllib_parse_unquote_plus(self._match_id(url))
+ video_id = urllib.parse.unquote_plus(self._match_id(url))
identifier, entry_id = (video_id.split('/', 1) + [None])[:2]
# Archive.org metadata API doesn't clearly demarcate playlist entries
@@ -221,17 +294,25 @@ class ArchiveOrgIE(InfoExtractor):
'filesize': int_or_none(f.get('size'))})
extension = (f['name'].rsplit('.', 1) + [None])[1]
- if extension in KNOWN_EXTENSIONS:
+
+ # We don't want to skip private formats if the user has access to them,
+ # however without access to an account with such privileges we can't implement/test this.
+ # For now to be safe, we will only skip them if there is no user logged in.
+ is_logged_in = bool(self._get_cookies('https://archive.org').get('logged-in-sig'))
+ if extension in KNOWN_EXTENSIONS and (not f.get('private') or is_logged_in):
entry['formats'].append({
'url': 'https://archive.org/download/' + identifier + '/' + f['name'],
'format': f.get('format'),
'width': int_or_none(f.get('width')),
'height': int_or_none(f.get('height')),
'filesize': int_or_none(f.get('size')),
- 'protocol': 'https'})
+ 'protocol': 'https',
+ 'source_preference': 0 if f.get('source') == 'original' else -1,
+ 'format_note': f.get('source')
+ })
for entry in entries.values():
- self._sort_formats(entry['formats'])
+ entry['_format_sort_fields'] = ('source', )
if len(entries) == 1:
# If there's only one item, use it as the main info dict
@@ -287,7 +368,9 @@ class YoutubeWebArchiveIE(InfoExtractor):
'channel_id': 'UCukCyHaD-bK3in_pKpfH9Eg',
'duration': 32,
'uploader_id': 'Zeurel',
- 'uploader_url': 'http://www.youtube.com/user/Zeurel'
+ 'uploader_url': 'https://www.youtube.com/user/Zeurel',
+ 'thumbnail': r're:https?://.*\.(jpg|webp)',
+ 'channel_url': 'https://www.youtube.com/channel/UCukCyHaD-bK3in_pKpfH9Eg',
}
}, {
# Internal link
@@ -302,7 +385,9 @@ class YoutubeWebArchiveIE(InfoExtractor):
'channel_id': 'UCHnyfMqiRRG1u-2MsSQLbXA',
'duration': 771,
'uploader_id': '1veritasium',
- 'uploader_url': 'http://www.youtube.com/user/1veritasium'
+ 'uploader_url': 'https://www.youtube.com/user/1veritasium',
+ 'thumbnail': r're:https?://.*\.(jpg|webp)',
+ 'channel_url': 'https://www.youtube.com/channel/UCHnyfMqiRRG1u-2MsSQLbXA',
}
}, {
# Video from 2012, webm format itag 45. Newest capture is deleted video, with an invalid description.
@@ -316,7 +401,9 @@ class YoutubeWebArchiveIE(InfoExtractor):
'duration': 398,
'description': 'md5:ff4de6a7980cb65d951c2f6966a4f2f3',
'uploader_id': 'machinima',
- 'uploader_url': 'http://www.youtube.com/user/machinima'
+ 'uploader_url': 'https://www.youtube.com/user/machinima',
+ 'thumbnail': r're:https?://.*\.(jpg|webp)',
+ 'uploader': 'machinima'
}
}, {
# FLV video. Video file URL does not provide itag information
@@ -330,7 +417,10 @@ class YoutubeWebArchiveIE(InfoExtractor):
'duration': 19,
'description': 'md5:10436b12e07ac43ff8df65287a56efb4',
'uploader_id': 'jawed',
- 'uploader_url': 'http://www.youtube.com/user/jawed'
+ 'uploader_url': 'https://www.youtube.com/user/jawed',
+ 'channel_url': 'https://www.youtube.com/channel/UC4QobU6STFB0P71PMvOGN5A',
+ 'thumbnail': r're:https?://.*\.(jpg|webp)',
+ 'uploader': 'jawed',
}
}, {
'url': 'https://web.archive.org/web/20110712231407/http://www.youtube.com/watch?v=lTx3G6h2xyA',
@@ -344,7 +434,9 @@ class YoutubeWebArchiveIE(InfoExtractor):
'duration': 204,
'description': 'md5:f7535343b6eda34a314eff8b85444680',
'uploader_id': 'itsmadeon',
- 'uploader_url': 'http://www.youtube.com/user/itsmadeon'
+ 'uploader_url': 'https://www.youtube.com/user/itsmadeon',
+ 'channel_url': 'https://www.youtube.com/channel/UCqMDNf3Pn5L7pcNkuSEeO3w',
+ 'thumbnail': r're:https?://.*\.(jpg|webp)',
}
}, {
# First capture is of dead video, second is the oldest from CDX response.
@@ -355,10 +447,13 @@ class YoutubeWebArchiveIE(InfoExtractor):
'title': 'Fake Teen Doctor Strikes AGAIN! - Weekly Weird News',
'upload_date': '20160218',
'channel_id': 'UCdIaNUarhzLSXGoItz7BHVA',
- 'duration': 1236,
+ 'duration': 1235,
'description': 'md5:21032bae736421e89c2edf36d1936947',
'uploader_id': 'MachinimaETC',
- 'uploader_url': 'http://www.youtube.com/user/MachinimaETC'
+ 'uploader_url': 'https://www.youtube.com/user/MachinimaETC',
+ 'channel_url': 'https://www.youtube.com/channel/UCdIaNUarhzLSXGoItz7BHVA',
+ 'thumbnail': r're:https?://.*\.(jpg|webp)',
+ 'uploader': 'ETC News',
}
}, {
# First capture of dead video, capture date in link links to dead capture.
@@ -369,10 +464,13 @@ class YoutubeWebArchiveIE(InfoExtractor):
'title': 'WTF: Video Games Still Launch BROKEN?! - T.U.G.S.',
'upload_date': '20160219',
'channel_id': 'UCdIaNUarhzLSXGoItz7BHVA',
- 'duration': 798,
+ 'duration': 797,
'description': 'md5:a1dbf12d9a3bd7cb4c5e33b27d77ffe7',
'uploader_id': 'MachinimaETC',
- 'uploader_url': 'http://www.youtube.com/user/MachinimaETC'
+ 'uploader_url': 'https://www.youtube.com/user/MachinimaETC',
+ 'channel_url': 'https://www.youtube.com/channel/UCdIaNUarhzLSXGoItz7BHVA',
+ 'thumbnail': r're:https?://.*\.(jpg|webp)',
+ 'uploader': 'ETC News',
},
'expected_warnings': [
r'unable to download capture webpage \(it may not be archived\)'
@@ -392,12 +490,11 @@ class YoutubeWebArchiveIE(InfoExtractor):
'title': 'It\'s Bootleg AirPods Time.',
'upload_date': '20211021',
'channel_id': 'UC7Jwj9fkrf1adN4fMmTkpug',
- 'channel_url': 'http://www.youtube.com/channel/UC7Jwj9fkrf1adN4fMmTkpug',
+ 'channel_url': 'https://www.youtube.com/channel/UC7Jwj9fkrf1adN4fMmTkpug',
'duration': 810,
'description': 'md5:7b567f898d8237b256f36c1a07d6d7bc',
+ 'thumbnail': r're:https?://.*\.(jpg|webp)',
'uploader': 'DankPods',
- 'uploader_id': 'UC7Jwj9fkrf1adN4fMmTkpug',
- 'uploader_url': 'http://www.youtube.com/channel/UC7Jwj9fkrf1adN4fMmTkpug'
}
}, {
# player response contains '};' See: https://github.com/ytdl-org/youtube-dl/issues/27093
@@ -408,12 +505,135 @@ class YoutubeWebArchiveIE(InfoExtractor):
'title': 'bitch lasagna',
'upload_date': '20181005',
'channel_id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',
- 'channel_url': 'http://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',
+ 'channel_url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',
'duration': 135,
'description': 'md5:2dbe4051feeff2dab5f41f82bb6d11d0',
'uploader': 'PewDiePie',
'uploader_id': 'PewDiePie',
- 'uploader_url': 'http://www.youtube.com/user/PewDiePie'
+ 'uploader_url': 'https://www.youtube.com/user/PewDiePie',
+ 'thumbnail': r're:https?://.*\.(jpg|webp)',
+ }
+ }, {
+ # ~June 2010 Capture. swfconfig
+ 'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=8XeW5ilk-9Y',
+ 'info_dict': {
+ 'id': '8XeW5ilk-9Y',
+ 'ext': 'flv',
+ 'title': 'Story of Stuff, The Critique Part 4 of 4',
+ 'duration': 541,
+ 'description': 'md5:28157da06f2c5e94c97f7f3072509972',
+ 'uploader': 'HowTheWorldWorks',
+ 'uploader_id': 'HowTheWorldWorks',
+ 'thumbnail': r're:https?://.*\.(jpg|webp)',
+ 'uploader_url': 'https://www.youtube.com/user/HowTheWorldWorks',
+ 'upload_date': '20090520',
+ }
+ }, {
+ # Jan 2011: watch-video-date/eow-date surrounded by whitespace
+ 'url': 'https://web.archive.org/web/20110126141719/http://www.youtube.com/watch?v=Q_yjX80U7Yc',
+ 'info_dict': {
+ 'id': 'Q_yjX80U7Yc',
+ 'ext': 'flv',
+ 'title': 'Spray Paint Art by Clay Butler: Purple Fantasy Forest',
+ 'uploader_id': 'claybutlermusic',
+ 'description': 'md5:4595264559e3d0a0ceb3f011f6334543',
+ 'upload_date': '20090803',
+ 'uploader': 'claybutlermusic',
+ 'thumbnail': r're:https?://.*\.(jpg|webp)',
+ 'duration': 132,
+ 'uploader_url': 'https://www.youtube.com/user/claybutlermusic',
+ }
+ }, {
+ # ~May 2009 swfArgs. ytcfg is spread out over various vars
+ 'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=c5uJgG05xUY',
+ 'info_dict': {
+ 'id': 'c5uJgG05xUY',
+ 'ext': 'webm',
+ 'title': 'Story of Stuff, The Critique Part 1 of 4',
+ 'uploader_id': 'HowTheWorldWorks',
+ 'uploader': 'HowTheWorldWorks',
+ 'uploader_url': 'https://www.youtube.com/user/HowTheWorldWorks',
+ 'upload_date': '20090513',
+ 'description': 'md5:4ca77d79538064e41e4cc464e93f44f0',
+ 'thumbnail': r're:https?://.*\.(jpg|webp)',
+ 'duration': 754,
+ }
+ }, {
+ # ~June 2012. Upload date is in another lang so cannot extract.
+ 'url': 'https://web.archive.org/web/20120607174520/http://www.youtube.com/watch?v=xWTLLl-dQaA',
+ 'info_dict': {
+ 'id': 'xWTLLl-dQaA',
+ 'ext': 'mp4',
+ 'title': 'Black Nerd eHarmony Video Bio Parody (SPOOF)',
+ 'uploader_url': 'https://www.youtube.com/user/BlackNerdComedy',
+ 'description': 'md5:e25f0133aaf9e6793fb81c18021d193e',
+ 'uploader_id': 'BlackNerdComedy',
+ 'uploader': 'BlackNerdComedy',
+ 'duration': 182,
+ 'thumbnail': r're:https?://.*\.(jpg|webp)',
+ }
+ }, {
+ # ~July 2013
+ 'url': 'https://web.archive.org/web/*/https://www.youtube.com/watch?v=9eO1aasHyTM',
+ 'info_dict': {
+ 'id': '9eO1aasHyTM',
+ 'ext': 'mp4',
+ 'title': 'Polar-oid',
+ 'description': 'Cameras and bears are dangerous!',
+ 'uploader_url': 'https://www.youtube.com/user/punkybird',
+ 'uploader_id': 'punkybird',
+ 'duration': 202,
+ 'channel_id': 'UC62R2cBezNBOqxSerfb1nMQ',
+ 'channel_url': 'https://www.youtube.com/channel/UC62R2cBezNBOqxSerfb1nMQ',
+ 'upload_date': '20060428',
+ 'uploader': 'punkybird',
+ }
+ }, {
+ # April 2020: Player response in player config
+ 'url': 'https://web.archive.org/web/20200416034815/https://www.youtube.com/watch?v=Cf7vS8jc7dY&gl=US&hl=en',
+ 'info_dict': {
+ 'id': 'Cf7vS8jc7dY',
+ 'ext': 'mp4',
+ 'title': 'A Dramatic Pool Story (by Jamie Spicer-Lewis) - Game Grumps Animated',
+ 'duration': 64,
+ 'upload_date': '20200408',
+ 'uploader_id': 'GameGrumps',
+ 'uploader': 'GameGrumps',
+ 'channel_url': 'https://www.youtube.com/channel/UC9CuvdOVfMPvKCiwdGKL3cQ',
+ 'channel_id': 'UC9CuvdOVfMPvKCiwdGKL3cQ',
+ 'thumbnail': r're:https?://.*\.(jpg|webp)',
+ 'description': 'md5:c625bb3c02c4f5fb4205971e468fa341',
+ 'uploader_url': 'https://www.youtube.com/user/GameGrumps',
+ }
+ }, {
+ # watch7-user-header with yt-user-info
+ 'url': 'ytarchive:kbh4T_b4Ixw:20160307085057',
+ 'info_dict': {
+ 'id': 'kbh4T_b4Ixw',
+ 'ext': 'mp4',
+ 'title': 'Shovel Knight OST - Strike the Earth! Plains of Passage 16 bit SNES style remake / remix',
+ 'channel_url': 'https://www.youtube.com/channel/UCnTaGvsHmMy792DWeT6HbGA',
+ 'uploader': 'Nelward music',
+ 'duration': 213,
+ 'description': 'md5:804b4a9ce37b050a5fefdbb23aeba54d',
+ 'thumbnail': r're:https?://.*\.(jpg|webp)',
+ 'upload_date': '20150503',
+ 'channel_id': 'UCnTaGvsHmMy792DWeT6HbGA',
+ }
+ }, {
+ # April 2012
+ 'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=SOm7mPoPskU',
+ 'info_dict': {
+ 'id': 'SOm7mPoPskU',
+ 'ext': 'mp4',
+ 'title': 'Boyfriend - Justin Bieber Parody',
+ 'uploader_url': 'https://www.youtube.com/user/thecomputernerd01',
+ 'uploader': 'thecomputernerd01',
+ 'thumbnail': r're:https?://.*\.(jpg|webp)',
+ 'description': 'md5:dd7fa635519c2a5b4d566beaecad7491',
+ 'duration': 200,
+ 'upload_date': '20120407',
+ 'uploader_id': 'thecomputernerd01',
}
}, {
'url': 'https://web.archive.org/web/http://www.youtube.com/watch?v=kH-G_aIBlFw',
@@ -445,9 +665,11 @@ class YoutubeWebArchiveIE(InfoExtractor):
'only_matching': True
},
]
- _YT_INITIAL_DATA_RE = r'(?:(?:(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;)|%s)' % YoutubeBaseInfoExtractor._YT_INITIAL_DATA_RE
- _YT_INITIAL_PLAYER_RESPONSE_RE = r'(?:(?:(?:window\s*\[\s*["\']ytInitialPlayerResponse["\']\s*\]|ytInitialPlayerResponse)\s*=[(\s]*({.+?})[)\s]*;)|%s)' % YoutubeBaseInfoExtractor._YT_INITIAL_PLAYER_RESPONSE_RE
- _YT_INITIAL_BOUNDARY_RE = r'(?:(?:var\s+meta|</script|\n)|%s)' % YoutubeBaseInfoExtractor._YT_INITIAL_BOUNDARY_RE
+ _YT_INITIAL_DATA_RE = YoutubeBaseInfoExtractor._YT_INITIAL_DATA_RE
+ _YT_INITIAL_PLAYER_RESPONSE_RE = fr'''(?x:
+ (?:window\s*\[\s*["\']ytInitialPlayerResponse["\']\s*\]|ytInitialPlayerResponse)\s*=[(\s]*|
+ {YoutubeBaseInfoExtractor._YT_INITIAL_PLAYER_RESPONSE_RE}
+ )'''
_YT_DEFAULT_THUMB_SERVERS = ['i.ytimg.com'] # thumbnails most likely archived on these servers
_YT_ALL_THUMB_SERVERS = orderedSet(
@@ -477,11 +699,6 @@ class YoutubeWebArchiveIE(InfoExtractor):
elif not isinstance(res, list) or len(res) != 0:
self.report_warning('Error while parsing CDX API response' + bug_reports_message())
- def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
- return self._parse_json(self._search_regex(
- (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
- regex), webpage, name, default='{}'), video_id, fatal=False)
-
def _extract_webpage_title(self, webpage):
page_title = self._html_extract_title(webpage, default='')
# YouTube video pages appear to always have either 'YouTube -' as prefix or '- YouTube' as suffix.
@@ -491,10 +708,32 @@ class YoutubeWebArchiveIE(InfoExtractor):
def _extract_metadata(self, video_id, webpage):
search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None)) if webpage else (lambda x: None))
- player_response = self._extract_yt_initial_variable(
- webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE, video_id, 'initial player response') or {}
- initial_data = self._extract_yt_initial_variable(
- webpage, self._YT_INITIAL_DATA_RE, video_id, 'initial player response') or {}
+ player_response = self._search_json(
+ self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response',
+ video_id, default={})
+ initial_data = self._search_json(
+ self._YT_INITIAL_DATA_RE, webpage, 'initial data', video_id, default={})
+
+ ytcfg = {}
+ for j in re.findall(r'yt\.setConfig\(\s*(?P<json>{\s*(?s:.+?)\s*})\s*\);', webpage): # ~June 2010
+ ytcfg.update(self._parse_json(j, video_id, fatal=False, ignore_extra=True, transform_source=js_to_json, errnote='') or {})
+
+ # XXX: this also may contain a 'ptchn' key
+ player_config = (
+ self._search_json(
+ r'(?:yt\.playerConfig|ytplayer\.config|swfConfig)\s*=',
+ webpage, 'player config', video_id, default=None)
+ or ytcfg.get('PLAYER_CONFIG') or {})
+
+ # XXX: this may also contain a 'creator' key.
+ swf_args = self._search_json(r'swfArgs\s*=', webpage, 'swf config', video_id, default={})
+ if swf_args and not traverse_obj(player_config, ('args',)):
+ player_config['args'] = swf_args
+
+ if not player_response:
+ # April 2020
+ player_response = self._parse_json(
+ traverse_obj(player_config, ('args', 'player_response')) or '{}', video_id, fatal=False)
initial_data_video = traverse_obj(
initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'videoPrimaryInfoRenderer'),
@@ -510,21 +749,64 @@ class YoutubeWebArchiveIE(InfoExtractor):
video_details.get('title')
or YoutubeBaseInfoExtractor._get_text(microformats, 'title')
or YoutubeBaseInfoExtractor._get_text(initial_data_video, 'title')
+ or traverse_obj(player_config, ('args', 'title'))
or self._extract_webpage_title(webpage)
or search_meta(['og:title', 'twitter:title', 'title']))
+ def id_from_url(url, type_):
+ return self._search_regex(
+ rf'(?:{type_})/([^/#&?]+)', url or '', f'{type_} id', default=None)
+
+ # XXX: would the get_elements_by_... functions be better suited here?
+ _CHANNEL_URL_HREF_RE = r'href="[^"]*(?P<url>https?://www\.youtube\.com/(?:user|channel)/[^"]+)"'
+ uploader_or_channel_url = self._search_regex(
+ [fr'<(?:link\s*itemprop=\"url\"|a\s*id=\"watch-username\").*?\b{_CHANNEL_URL_HREF_RE}>', # @fd05024
+ fr'<div\s*id=\"(?:watch-channel-stats|watch-headline-user-info)\"[^>]*>\s*<a[^>]*\b{_CHANNEL_URL_HREF_RE}'], # ~ May 2009, ~June 2012
+ webpage, 'uploader or channel url', default=None)
+
+ owner_profile_url = url_or_none(microformats.get('ownerProfileUrl')) # @a6211d2
+
+ # Uploader refers to the /user/ id ONLY
+ uploader_id = (
+ id_from_url(owner_profile_url, 'user')
+ or id_from_url(uploader_or_channel_url, 'user')
+ or ytcfg.get('VIDEO_USERNAME'))
+ uploader_url = f'https://www.youtube.com/user/{uploader_id}' if uploader_id else None
+
+ # XXX: do we want to differentiate uploader and channel?
+ uploader = (
+ self._search_regex(
+ [r'<a\s*id="watch-username"[^>]*>\s*<strong>([^<]+)</strong>', # June 2010
+ r'var\s*watchUsername\s*=\s*\'(.+?)\';', # ~May 2009
+ r'<div\s*\bid=\"watch-channel-stats"[^>]*>\s*<a[^>]*>\s*(.+?)\s*</a', # ~May 2009
+ r'<a\s*id="watch-userbanner"[^>]*title="\s*(.+?)\s*"'], # ~June 2012
+ webpage, 'uploader', default=None)
+ or self._html_search_regex(
+ [r'(?s)<div\s*class="yt-user-info".*?<a[^>]*[^>]*>\s*(.*?)\s*</a', # March 2016
+ r'(?s)<a[^>]*yt-user-name[^>]*>\s*(.*?)\s*</a'], # july 2013
+ get_element_by_id('watch7-user-header', webpage), 'uploader', default=None)
+ or self._html_search_regex(
+ r'<button\s*href="/user/[^>]*>\s*<span[^>]*>\s*(.+?)\s*<', # April 2012
+ get_element_by_id('watch-headline-user-info', webpage), 'uploader', default=None)
+ or traverse_obj(player_config, ('args', 'creator'))
+ or video_details.get('author'))
+
channel_id = str_or_none(
video_details.get('channelId')
or microformats.get('externalChannelId')
or search_meta('channelId')
or self._search_regex(
r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1', # @b45a9e6
- webpage, 'channel id', default=None, group='id'))
- channel_url = f'http://www.youtube.com/channel/{channel_id}' if channel_id else None
+ webpage, 'channel id', default=None, group='id')
+ or id_from_url(owner_profile_url, 'channel')
+ or id_from_url(uploader_or_channel_url, 'channel')
+ or traverse_obj(player_config, ('args', 'ucid')))
+ channel_url = f'https://www.youtube.com/channel/{channel_id}' if channel_id else None
duration = int_or_none(
video_details.get('lengthSeconds')
or microformats.get('lengthSeconds')
+ or traverse_obj(player_config, ('args', ('length_seconds', 'l')), get_all=False)
or parse_duration(search_meta('duration')))
description = (
video_details.get('shortDescription')
@@ -532,26 +814,13 @@ class YoutubeWebArchiveIE(InfoExtractor):
or clean_html(get_element_by_id('eow-description', webpage)) # @9e6dd23
or search_meta(['description', 'og:description', 'twitter:description']))
- uploader = video_details.get('author')
-
- # Uploader ID and URL
- uploader_mobj = re.search(
- r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">', # @fd05024
- webpage)
- if uploader_mobj is not None:
- uploader_id, uploader_url = uploader_mobj.group('uploader_id'), uploader_mobj.group('uploader_url')
- else:
- # @a6211d2
- uploader_url = url_or_none(microformats.get('ownerProfileUrl'))
- uploader_id = self._search_regex(
- r'(?:user|channel)/([^/]+)', uploader_url or '', 'uploader id', default=None)
-
upload_date = unified_strdate(
dict_get(microformats, ('uploadDate', 'publishDate'))
or search_meta(['uploadDate', 'datePublished'])
or self._search_regex(
- [r'(?s)id="eow-date.*?>(.*?)</span>',
- r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'], # @7998520
+ [r'(?s)id="eow-date.*?>\s*(.*?)\s*</span>',
+ r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']', # @7998520
+ r'class\s*=\s*"(?:watch-video-date|watch-video-added post-date)"[^>]*>\s*([^<]+?)\s*<'], # ~June 2010, ~Jan 2009 (respectively)
webpage, 'upload date', default=None))
return {
@@ -597,7 +866,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
response = self._call_cdx_api(
video_id, f'https://www.youtube.com/watch?v={video_id}',
filters=['mimetype:text/html'], collapse=['timestamp:6', 'digest'], query={'matchType': 'prefix'}) or []
- all_captures = sorted([int_or_none(r['timestamp']) for r in response if int_or_none(r['timestamp']) is not None])
+ all_captures = sorted(int_or_none(r['timestamp']) for r in response if int_or_none(r['timestamp']) is not None)
# Prefer the new polymer UI captures as we support extracting more metadata from them
# WBM captures seem to all switch to this layout ~July 2020
@@ -620,18 +889,22 @@ class YoutubeWebArchiveIE(InfoExtractor):
url_date = url_date or url_date_2
urlh = None
- try:
- urlh = self._request_webpage(
- HEADRequest('https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/%s' % video_id),
- video_id, note='Fetching archived video file url', expected_status=True)
- except ExtractorError as e:
- # HTTP Error 404 is expected if the video is not saved.
- if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
- self.raise_no_formats(
- 'The requested video is not archived, indexed, or there is an issue with web.archive.org',
- expected=True)
- else:
- raise
+ retry_manager = self.RetryManager(fatal=False)
+ for retry in retry_manager:
+ try:
+ urlh = self._request_webpage(
+ HEADRequest('https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/%s' % video_id),
+ video_id, note='Fetching archived video file url', expected_status=True)
+ except ExtractorError as e:
+ # HTTP Error 404 is expected if the video is not saved.
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
+ self.raise_no_formats(
+ 'The requested video is not archived, indexed, or there is an issue with web.archive.org (try again later)', expected=True)
+ else:
+ retry.error = e
+
+ if retry_manager.error:
+ self.raise_no_formats(retry_manager.error, expected=True, video_id=video_id)
capture_dates = self._get_capture_dates(video_id, int_or_none(url_date))
self.write_debug('Captures to try: ' + join_nonempty(*capture_dates, delim=', '))
diff --git a/hypervideo_dl/extractor/arcpublishing.py b/hypervideo_dl/extractor/arcpublishing.py
index 8880e5c..febd3d2 100644
--- a/hypervideo_dl/extractor/arcpublishing.py
+++ b/hypervideo_dl/extractor/arcpublishing.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -73,8 +70,8 @@ class ArcPublishingIE(InfoExtractor):
], 'video-api-cdn.%s.arcpublishing.com/api'),
]
- @staticmethod
- def _extract_urls(webpage):
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
entries = []
# https://arcpublishing.atlassian.net/wiki/spaces/POWA/overview
for powa_el in re.findall(r'(<div[^>]+class="[^"]*\bpowa\b[^"]*"[^>]+data-uuid="%s"[^>]*>)' % ArcPublishingIE._UUID_REGEX, webpage):
@@ -147,7 +144,6 @@ class ArcPublishingIE(InfoExtractor):
'url': s_url,
'quality': -10,
})
- self._sort_formats(formats)
subtitles = {}
for subtitle in (try_get(video, lambda x: x['subtitles']['urls'], list) or []):
diff --git a/hypervideo_dl/extractor/ard.py b/hypervideo_dl/extractor/ard.py
index 7ea339b..0a8a874 100644
--- a/hypervideo_dl/extractor/ard.py
+++ b/hypervideo_dl/extractor/ard.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import json
import re
@@ -43,8 +40,6 @@ class ARDMediathekBaseIE(InfoExtractor):
'This video is not available due to geoblocking',
countries=self._GEO_COUNTRIES, metadata_available=True)
- self._sort_formats(formats)
-
subtitles = {}
subtitle_url = media_info.get('_subtitleUrl')
if subtitle_url:
@@ -265,7 +260,6 @@ class ARDMediathekIE(ARDMediathekBaseIE):
'format_id': fid,
'url': furl,
})
- self._sort_formats(formats)
info = {
'formats': formats,
}
@@ -374,7 +368,6 @@ class ARDIE(InfoExtractor):
continue
f['url'] = format_url
formats.append(f)
- self._sort_formats(formats)
_SUB_FORMATS = (
('./dataTimedText', 'ttml'),
diff --git a/hypervideo_dl/extractor/arkena.py b/hypervideo_dl/extractor/arkena.py
index 4f4f457..de36ec8 100644
--- a/hypervideo_dl/extractor/arkena.py
+++ b/hypervideo_dl/extractor/arkena.py
@@ -1,8 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
from .common import InfoExtractor
from ..utils import (
ExtractorError,
@@ -22,6 +17,8 @@ class ArkenaIE(InfoExtractor):
play\.arkena\.com/(?:config|embed)/avp/v\d/player/media/(?P<id>[^/]+)/[^/]+/(?P<account_id>\d+)
)
'''
+ # See https://support.arkena.com/display/PLAY/Ways+to+embed+your+video
+ _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//play\.arkena\.com/embed/avp/.+?)\1']
_TESTS = [{
'url': 'https://video.qbrick.com/play2/embed/player?accountId=1034090&mediaId=d8ab4607-00090107-aab86310',
'md5': '97f117754e5f3c020f5f26da4a44ebaf',
@@ -53,15 +50,6 @@ class ArkenaIE(InfoExtractor):
'only_matching': True,
}]
- @staticmethod
- def _extract_url(webpage):
- # See https://support.arkena.com/display/PLAY/Ways+to+embed+your+video
- mobj = re.search(
- r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//play\.arkena\.com/embed/avp/.+?)\1',
- webpage)
- if mobj:
- return mobj.group('url')
-
def _real_extract(self, url):
mobj = self._match_valid_url(url)
video_id = mobj.group('id')
@@ -148,7 +136,6 @@ class ArkenaIE(InfoExtractor):
elif mime_type == 'application/vnd.ms-sstr+xml':
formats.extend(self._extract_ism_formats(
href, video_id, ism_id='mss', fatal=False))
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/arnes.py b/hypervideo_dl/extractor/arnes.py
index 050c252..a493714 100644
--- a/hypervideo_dl/extractor/arnes.py
+++ b/hypervideo_dl/extractor/arnes.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
@@ -76,7 +73,6 @@ class ArnesIE(InfoExtractor):
'width': int_or_none(media.get('width')),
'height': int_or_none(media.get('height')),
})
- self._sort_formats(formats)
channel = video.get('channel') or {}
channel_id = channel.get('url')
@@ -93,7 +89,7 @@ class ArnesIE(InfoExtractor):
'timestamp': parse_iso8601(video.get('creationTime')),
'channel': channel.get('name'),
'channel_id': channel_id,
- 'channel_url': format_field(channel_id, template=f'{self._BASE_URL}/?channel=%s'),
+ 'channel_url': format_field(channel_id, None, f'{self._BASE_URL}/?channel=%s'),
'duration': float_or_none(video.get('duration'), 1000),
'view_count': int_or_none(video.get('views')),
'tags': video.get('hashtags'),
diff --git a/hypervideo_dl/extractor/arte.py b/hypervideo_dl/extractor/arte.py
index c2f2c1b..54e4d2d 100644
--- a/hypervideo_dl/extractor/arte.py
+++ b/hypervideo_dl/extractor/arte.py
@@ -1,193 +1,216 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
-from ..compat import (
- compat_str,
-)
from ..utils import (
ExtractorError,
+ GeoRestrictedError,
int_or_none,
+ parse_iso8601,
parse_qs,
- qualities,
strip_or_none,
- try_get,
- unified_strdate,
+ traverse_obj,
url_or_none,
)
class ArteTVBaseIE(InfoExtractor):
_ARTE_LANGUAGES = 'fr|de|en|es|it|pl'
- _API_BASE = 'https://api.arte.tv/api/player/v1'
+ _API_BASE = 'https://api.arte.tv/api/player/v2'
class ArteTVIE(ArteTVBaseIE):
_VALID_URL = r'''(?x)
- https?://
+ (?:https?://
(?:
(?:www\.)?arte\.tv/(?P<lang>%(langs)s)/videos|
api\.arte\.tv/api/player/v\d+/config/(?P<lang_2>%(langs)s)
)
- /(?P<id>\d{6}-\d{3}-[AF])
+ |arte://program)
+ /(?P<id>\d{6}-\d{3}-[AF]|LIVE)
''' % {'langs': ArteTVBaseIE._ARTE_LANGUAGES}
_TESTS = [{
'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.arte.tv/pl/videos/100103-000-A/usa-dyskryminacja-na-porodowce/',
'info_dict': {
- 'id': '088501-000-A',
+ 'id': '100103-000-A',
+ 'title': 'USA: Dyskryminacja na porodówce',
+ 'description': 'md5:242017b7cce59ffae340a54baefcafb1',
+ 'alt_title': 'ARTE Reportage',
+ 'upload_date': '20201103',
+ 'duration': 554,
+ 'thumbnail': r're:https://api-cdn\.arte\.tv/.+940x530',
+ 'timestamp': 1604417980,
'ext': 'mp4',
- 'title': 'Mexico: Stealing Petrol to Survive',
- 'upload_date': '20190628',
},
+ 'params': {'skip_download': 'm3u8'}
}, {
- 'url': 'https://www.arte.tv/pl/videos/100103-000-A/usa-dyskryminacja-na-porodowce/',
- 'only_matching': True,
+ 'note': 'No alt_title',
+ 'url': 'https://www.arte.tv/fr/videos/110371-000-A/la-chaleur-supplice-des-arbres-de-rue/',
+ 'info_dict': {
+ 'id': '110371-000-A',
+ 'ext': 'mp4',
+ 'upload_date': '20220718',
+ 'duration': 154,
+ 'timestamp': 1658162460,
+ 'description': 'md5:5890f36fe7dccfadb8b7c0891de54786',
+ 'title': 'La chaleur, supplice des arbres de rue',
+ 'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/CPE2sQDtD8GLQgt8DuYHLf/940x530',
+ },
+ 'params': {'skip_download': 'm3u8'}
}, {
'url': 'https://api.arte.tv/api/player/v2/config/de/100605-013-A',
'only_matching': True,
+ }, {
+ 'url': 'https://api.arte.tv/api/player/v2/config/de/LIVE',
+ 'only_matching': True,
}]
+ _GEO_BYPASS = True
+
+ _LANG_MAP = { # ISO639 -> French abbreviations
+ 'fr': 'F',
+ 'de': 'A',
+ 'en': 'E[ANG]',
+ 'es': 'E[ESP]',
+ 'it': 'E[ITA]',
+ 'pl': 'E[POL]',
+ # XXX: probably means mixed; <https://www.arte.tv/en/videos/107710-029-A/dispatches-from-ukraine-local-journalists-report/>
+ # uses this code for audio that happens to be in Ukrainian, but the manifest uses the ISO code 'mul' (mixed)
+ 'mul': 'EU',
+ }
+
+ _VERSION_CODE_RE = re.compile(r'''(?x)
+ V
+ (?P<original_voice>O?)
+ (?P<vlang>[FA]|E\[[A-Z]+\]|EU)?
+ (?P<audio_desc>AUD|)
+ (?:
+ (?P<has_sub>-ST)
+ (?P<sdh_sub>M?)
+ (?P<sub_lang>[FA]|E\[[A-Z]+\]|EU)
+ )?
+ ''')
+
+ # all obtained by exhaustive testing
+ _COUNTRIES_MAP = {
+ 'DE_FR': (
+ 'BL', 'DE', 'FR', 'GF', 'GP', 'MF', 'MQ', 'NC',
+ 'PF', 'PM', 'RE', 'WF', 'YT',
+ ),
+ # with both of the below 'BE' sometimes works, sometimes doesn't
+ 'EUR_DE_FR': (
+ 'AT', 'BL', 'CH', 'DE', 'FR', 'GF', 'GP', 'LI',
+ 'MC', 'MF', 'MQ', 'NC', 'PF', 'PM', 'RE', 'WF',
+ 'YT',
+ ),
+ 'SAT': (
+ 'AD', 'AT', 'AX', 'BG', 'BL', 'CH', 'CY', 'CZ',
+ 'DE', 'DK', 'EE', 'ES', 'FI', 'FR', 'GB', 'GF',
+ 'GR', 'HR', 'HU', 'IE', 'IS', 'IT', 'KN', 'LI',
+ 'LT', 'LU', 'LV', 'MC', 'MF', 'MQ', 'MT', 'NC',
+ 'NL', 'NO', 'PF', 'PL', 'PM', 'PT', 'RE', 'RO',
+ 'SE', 'SI', 'SK', 'SM', 'VA', 'WF', 'YT',
+ ),
+ }
+
def _real_extract(self, url):
mobj = self._match_valid_url(url)
video_id = mobj.group('id')
lang = mobj.group('lang') or mobj.group('lang_2')
-
- info = self._download_json(
- '%s/config/%s/%s' % (self._API_BASE, lang, video_id), video_id)
- player_info = info['videoJsonPlayer']
-
- vsr = try_get(player_info, lambda x: x['VSR'], dict)
- if not vsr:
- error = None
- if try_get(player_info, lambda x: x['custom_msg']['type']) == 'error':
- error = try_get(
- player_info, lambda x: x['custom_msg']['msg'], compat_str)
- if not error:
- error = 'Video %s is not available' % player_info.get('VID') or video_id
- raise ExtractorError(error, expected=True)
-
- upload_date_str = player_info.get('shootingDate')
- if not upload_date_str:
- upload_date_str = (player_info.get('VRA') or player_info.get('VDA') or '').split(' ')[0]
-
- title = (player_info.get('VTI') or player_info['VID']).strip()
- subtitle = player_info.get('VSU', '').strip()
- if subtitle:
- title += ' - %s' % subtitle
-
- qfunc = qualities(['MQ', 'HQ', 'EQ', 'SQ'])
-
- LANGS = {
- 'fr': 'F',
- 'de': 'A',
- 'en': 'E[ANG]',
- 'es': 'E[ESP]',
- 'it': 'E[ITA]',
- 'pl': 'E[POL]',
- }
-
- langcode = LANGS.get(lang, lang)
-
- formats = []
- for format_id, format_dict in vsr.items():
- f = dict(format_dict)
- format_url = url_or_none(f.get('url'))
- streamer = f.get('streamer')
- if not format_url and not streamer:
- continue
- versionCode = f.get('versionCode')
- l = re.escape(langcode)
-
- # Language preference from most to least priority
- # Reference: section 6.8 of
- # https://www.arte.tv/sites/en/corporate/files/complete-technical-guidelines-arte-geie-v1-07-1.pdf
- PREFERENCES = (
- # original version in requested language, without subtitles
- r'VO{0}$'.format(l),
- # original version in requested language, with partial subtitles in requested language
- r'VO{0}-ST{0}$'.format(l),
- # original version in requested language, with subtitles for the deaf and hard-of-hearing in requested language
- r'VO{0}-STM{0}$'.format(l),
- # non-original (dubbed) version in requested language, without subtitles
- r'V{0}$'.format(l),
- # non-original (dubbed) version in requested language, with subtitles partial subtitles in requested language
- r'V{0}-ST{0}$'.format(l),
- # non-original (dubbed) version in requested language, with subtitles for the deaf and hard-of-hearing in requested language
- r'V{0}-STM{0}$'.format(l),
- # original version in requested language, with partial subtitles in different language
- r'VO{0}-ST(?!{0}).+?$'.format(l),
- # original version in requested language, with subtitles for the deaf and hard-of-hearing in different language
- r'VO{0}-STM(?!{0}).+?$'.format(l),
- # original version in different language, with partial subtitles in requested language
- r'VO(?:(?!{0}).+?)?-ST{0}$'.format(l),
- # original version in different language, with subtitles for the deaf and hard-of-hearing in requested language
- r'VO(?:(?!{0}).+?)?-STM{0}$'.format(l),
- # original version in different language, without subtitles
- r'VO(?:(?!{0}))?$'.format(l),
- # original version in different language, with partial subtitles in different language
- r'VO(?:(?!{0}).+?)?-ST(?!{0}).+?$'.format(l),
- # original version in different language, with subtitles for the deaf and hard-of-hearing in different language
- r'VO(?:(?!{0}).+?)?-STM(?!{0}).+?$'.format(l),
- )
-
- for pref, p in enumerate(PREFERENCES):
- if re.match(p, versionCode):
- lang_pref = len(PREFERENCES) - pref
- break
- else:
- lang_pref = -1
- format_note = '%s, %s' % (f.get('versionCode'), f.get('versionLibelle'))
-
- media_type = f.get('mediaType')
- if media_type == 'hls':
- m3u8_formats = self._extract_m3u8_formats(
- format_url, video_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id=format_id, fatal=False)
- for m3u8_format in m3u8_formats:
- m3u8_format.update({
+ langauge_code = self._LANG_MAP.get(lang)
+
+ config = self._download_json(f'{self._API_BASE}/config/{lang}/{video_id}', video_id)
+
+ geoblocking = traverse_obj(config, ('data', 'attributes', 'restriction', 'geoblocking')) or {}
+ if geoblocking.get('restrictedArea'):
+ raise GeoRestrictedError(f'Video restricted to {geoblocking["code"]!r}',
+ countries=self._COUNTRIES_MAP.get(geoblocking['code'], ('DE', 'FR')))
+
+ if not traverse_obj(config, ('data', 'attributes', 'rights')):
+ # Eg: https://www.arte.tv/de/videos/097407-215-A/28-minuten
+ # Eg: https://www.arte.tv/es/videos/104351-002-A/serviteur-du-peuple-1-23
+ raise ExtractorError(
+ 'Video is not available in this language edition of Arte or broadcast rights expired', expected=True)
+
+ formats, subtitles = [], {}
+ secondary_formats = []
+ for stream in config['data']['attributes']['streams']:
+ # official player contains code like `e.get("versions")[0].eStat.ml5`
+ stream_version = stream['versions'][0]
+ stream_version_code = stream_version['eStat']['ml5']
+
+ lang_pref = -1
+ m = self._VERSION_CODE_RE.match(stream_version_code)
+ if m:
+ lang_pref = int(''.join('01'[x] for x in (
+ m.group('vlang') == langauge_code, # we prefer voice in the requested language
+ not m.group('audio_desc'), # and not the audio description version
+ bool(m.group('original_voice')), # but if voice is not in the requested language, at least choose the original voice
+ m.group('sub_lang') == langauge_code, # if subtitles are present, we prefer them in the requested language
+ not m.group('has_sub'), # but we prefer no subtitles otherwise
+ not m.group('sdh_sub'), # and we prefer not the hard-of-hearing subtitles if there are subtitles
+ )))
+
+ short_label = traverse_obj(stream_version, 'shortLabel', expected_type=str, default='?')
+ if stream['protocol'].startswith('HLS'):
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ stream['url'], video_id=video_id, ext='mp4', m3u8_id=stream_version_code, fatal=False)
+ for fmt in fmts:
+ fmt.update({
+ 'format_note': f'{stream_version.get("label", "unknown")} [{short_label}]',
'language_preference': lang_pref,
- 'format_note': format_note,
})
- formats.extend(m3u8_formats)
- continue
+ if any(map(short_label.startswith, ('cc', 'OGsub'))):
+ secondary_formats.extend(fmts)
+ else:
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+
+ elif stream['protocol'] in ('HTTPS', 'RTMP'):
+ formats.append({
+ 'format_id': f'{stream["protocol"]}-{stream_version_code}',
+ 'url': stream['url'],
+ 'format_note': f'{stream_version.get("label", "unknown")} [{short_label}]',
+ 'language_preference': lang_pref,
+ # 'ext': 'mp4', # XXX: may or may not be necessary, at least for HTTPS
+ })
- format = {
- 'format_id': format_id,
- 'language_preference': lang_pref,
- 'format_note': format_note,
- 'width': int_or_none(f.get('width')),
- 'height': int_or_none(f.get('height')),
- 'tbr': int_or_none(f.get('bitrate')),
- 'quality': qfunc(f.get('quality')),
- }
-
- if media_type == 'rtmp':
- format['url'] = f['streamer']
- format['play_path'] = 'mp4:' + f['url']
- format['ext'] = 'flv'
else:
- format['url'] = f['url']
+ self.report_warning(f'Skipping stream with unknown protocol {stream["protocol"]}')
- formats.append(format)
+ # TODO: chapters from stream['segments']?
+ # The JS also looks for chapters in config['data']['attributes']['chapters'],
+ # but I am yet to find a video having those
- # For this extractor, quality only represents the relative quality
- # with respect to other formats with the same resolution
- self._sort_formats(formats, ('res', 'quality'))
+ formats.extend(secondary_formats)
+ self._remove_duplicate_formats(formats)
+
+ metadata = config['data']['attributes']['metadata']
return {
- 'id': player_info.get('VID') or video_id,
- 'title': title,
- 'description': player_info.get('VDE') or player_info.get('V7T'),
- 'upload_date': unified_strdate(upload_date_str),
- 'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
+ 'id': metadata['providerId'],
+ 'webpage_url': traverse_obj(metadata, ('link', 'url')),
+ 'title': traverse_obj(metadata, 'subtitle', 'title'),
+ 'alt_title': metadata.get('subtitle') and metadata.get('title'),
+ 'description': metadata.get('description'),
+ 'duration': traverse_obj(metadata, ('duration', 'seconds')),
+ 'language': metadata.get('language'),
+ 'timestamp': traverse_obj(config, ('data', 'attributes', 'rights', 'begin'), expected_type=parse_iso8601),
+ 'is_live': config['data']['attributes'].get('live', False),
'formats': formats,
+ 'subtitles': subtitles,
+ 'thumbnails': [
+ {'url': image['url'], 'id': image.get('caption')}
+ for image in metadata.get('images') or [] if url_or_none(image.get('url'))
+ ],
}
class ArteTVEmbedIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?arte\.tv/player/v\d+/index\.php\?.*?\bjson_url=.+'
+ _EMBED_REGEX = [r'<(?:iframe|script)[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?arte\.tv/player/v\d+/index\.php\?.*?\bjson_url=.+?)\1']
_TESTS = [{
'url': 'https://www.arte.tv/player/v5/index.php?json_url=https%3A%2F%2Fapi.arte.tv%2Fapi%2Fplayer%2Fv2%2Fconfig%2Fde%2F100605-013-A&lang=de&autoplay=true&mute=0100605-013-A',
'info_dict': {
@@ -197,17 +220,12 @@ class ArteTVEmbedIE(InfoExtractor):
'description': 'md5:be40b667f45189632b78c1425c7c2ce1',
'upload_date': '20201116',
},
+ 'skip': 'No video available'
}, {
'url': 'https://www.arte.tv/player/v3/index.php?json_url=https://api.arte.tv/api/player/v2/config/de/100605-013-A',
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- return [url for _, url in re.findall(
- r'<(?:iframe|script)[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?arte\.tv/player/v\d+/index\.php\?.*?\bjson_url=.+?)\1',
- webpage)]
-
def _real_extract(self, url):
qs = parse_qs(url)
json_url = qs['json_url'][0]
@@ -220,44 +238,36 @@ class ArteTVPlaylistIE(ArteTVBaseIE):
_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>%s)/videos/(?P<id>RC-\d{6})' % ArteTVBaseIE._ARTE_LANGUAGES
_TESTS = [{
'url': 'https://www.arte.tv/en/videos/RC-016954/earn-a-living/',
- 'info_dict': {
- 'id': 'RC-016954',
- 'title': 'Earn a Living',
- 'description': 'md5:d322c55011514b3a7241f7fb80d494c2',
- },
- 'playlist_mincount': 6,
+ 'only_matching': True,
}, {
'url': 'https://www.arte.tv/pl/videos/RC-014123/arte-reportage/',
- 'only_matching': True,
+ 'playlist_mincount': 100,
+ 'info_dict': {
+ 'description': 'md5:84e7bf1feda248bc325ebfac818c476e',
+ 'id': 'RC-014123',
+ 'title': 'ARTE Reportage - najlepsze reportaże',
+ },
}]
def _real_extract(self, url):
- lang, playlist_id = self._match_valid_url(url).groups()
- collection = self._download_json(
- '%s/collectionData/%s/%s?source=videos'
- % (self._API_BASE, lang, playlist_id), playlist_id)
- entries = []
- for video in collection['videos']:
- if not isinstance(video, dict):
- continue
- video_url = url_or_none(video.get('url')) or url_or_none(video.get('jsonUrl'))
- if not video_url:
- continue
- video_id = video.get('programId')
- entries.append({
- '_type': 'url_transparent',
- 'url': video_url,
- 'id': video_id,
- 'title': video.get('title'),
- 'alt_title': video.get('subtitle'),
- 'thumbnail': url_or_none(try_get(video, lambda x: x['mainImage']['url'], compat_str)),
- 'duration': int_or_none(video.get('durationSeconds')),
- 'view_count': int_or_none(video.get('views')),
- 'ie_key': ArteTVIE.ie_key(),
- })
- title = collection.get('title')
- description = collection.get('shortDescription') or collection.get('teaserText')
- return self.playlist_result(entries, playlist_id, title, description)
+ lang, playlist_id = self._match_valid_url(url).group('lang', 'id')
+ playlist = self._download_json(
+ f'{self._API_BASE}/playlist/{lang}/{playlist_id}', playlist_id)['data']['attributes']
+
+ entries = [{
+ '_type': 'url_transparent',
+ 'url': video['config']['url'],
+ 'ie_key': ArteTVIE.ie_key(),
+ 'id': video.get('providerId'),
+ 'title': video.get('title'),
+ 'alt_title': video.get('subtitle'),
+ 'thumbnail': url_or_none(traverse_obj(video, ('mainImage', 'url'))),
+ 'duration': int_or_none(traverse_obj(video, ('duration', 'seconds'))),
+ } for video in traverse_obj(playlist, ('items', lambda _, v: v['config']['url']))]
+
+ return self.playlist_result(entries, playlist_id,
+ traverse_obj(playlist, ('metadata', 'title')),
+ traverse_obj(playlist, ('metadata', 'description')))
class ArteTVCategoryIE(ArteTVBaseIE):
@@ -270,14 +280,13 @@ class ArteTVCategoryIE(ArteTVBaseIE):
'description': 'Investigative documentary series, geopolitical analysis, and international commentary',
},
'playlist_mincount': 13,
- },
- ]
+ }]
@classmethod
def suitable(cls, url):
return (
not any(ie.suitable(url) for ie in (ArteTVIE, ArteTVPlaylistIE, ))
- and super(ArteTVCategoryIE, cls).suitable(url))
+ and super().suitable(url))
def _real_extract(self, url):
lang, playlist_id = self._match_valid_url(url).groups()
@@ -293,9 +302,7 @@ class ArteTVCategoryIE(ArteTVBaseIE):
if any(ie.suitable(video) for ie in (ArteTVIE, ArteTVPlaylistIE, )):
items.append(video)
- title = (self._og_search_title(webpage, default=None)
- or self._html_search_regex(r'<title\b[^>]*>([^<]+)</title>', default=None))
- title = strip_or_none(title.rsplit('|', 1)[0]) or self._generic_title(url)
+ title = strip_or_none(self._generic_title('', webpage, default='').rsplit('|', 1)[0]) or None
return self.playlist_from_matches(items, playlist_id=playlist_id, playlist_title=title,
description=self._og_search_description(webpage, default=None))
diff --git a/hypervideo_dl/extractor/asiancrush.py b/hypervideo_dl/extractor/asiancrush.py
index 7f1940f..23f310e 100644
--- a/hypervideo_dl/extractor/asiancrush.py
+++ b/hypervideo_dl/extractor/asiancrush.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import functools
import re
diff --git a/hypervideo_dl/extractor/atresplayer.py b/hypervideo_dl/extractor/atresplayer.py
index 465af4e..a20e7f9 100644
--- a/hypervideo_dl/extractor/atresplayer.py
+++ b/hypervideo_dl/extractor/atresplayer.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..compat import compat_HTTPError
from ..utils import (
@@ -88,7 +84,6 @@ class AtresPlayerIE(InfoExtractor):
elif src_type == 'application/dash+xml':
formats, subtitles = self._extract_mpd_formats(
src, video_id, mpd_id='dash', fatal=False)
- self._sort_formats(formats)
heartbeat = episode.get('heartbeat') or {}
omniture = episode.get('omniture') or {}
diff --git a/hypervideo_dl/extractor/atscaleconf.py b/hypervideo_dl/extractor/atscaleconf.py
new file mode 100644
index 0000000..3f7b1e9
--- /dev/null
+++ b/hypervideo_dl/extractor/atscaleconf.py
@@ -0,0 +1,34 @@
+import re
+
+from .common import InfoExtractor
+
+
+class AtScaleConfEventIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?atscaleconference\.com/events/(?P<id>[^/&$?]+)'
+
+ _TESTS = [{
+ 'url': 'https://atscaleconference.com/events/data-scale-spring-2022/',
+ 'playlist_mincount': 13,
+ 'info_dict': {
+ 'id': 'data-scale-spring-2022',
+ 'title': 'Data @Scale Spring 2022',
+ 'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55'
+ },
+ }, {
+ 'url': 'https://atscaleconference.com/events/video-scale-2021/',
+ 'playlist_mincount': 14,
+ 'info_dict': {
+ 'id': 'video-scale-2021',
+ 'title': 'Video @Scale 2021',
+ 'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55'
+ },
+ }]
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+ webpage = self._download_webpage(url, id)
+
+ return self.playlist_from_matches(
+ re.findall(r'data-url\s*=\s*"(https?://(?:www\.)?atscaleconference\.com/videos/[^"]+)"', webpage),
+ ie='Generic', playlist_id=id,
+ title=self._og_search_title(webpage), description=self._og_search_description(webpage))
diff --git a/hypervideo_dl/extractor/atttechchannel.py b/hypervideo_dl/extractor/atttechchannel.py
index 8f93fb3..6ff4ec0 100644
--- a/hypervideo_dl/extractor/atttechchannel.py
+++ b/hypervideo_dl/extractor/atttechchannel.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import unified_strdate
diff --git a/hypervideo_dl/extractor/atvat.py b/hypervideo_dl/extractor/atvat.py
index 481a097..d6ed9e4 100644
--- a/hypervideo_dl/extractor/atvat.py
+++ b/hypervideo_dl/extractor/atvat.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import datetime
from .common import InfoExtractor
@@ -52,7 +49,6 @@ class ATVAtIE(InfoExtractor):
'url': source_url,
'format_id': protocol,
})
- self._sort_formats(formats)
return {
'id': clip_id,
diff --git a/hypervideo_dl/extractor/audimedia.py b/hypervideo_dl/extractor/audimedia.py
index 6bd48ef..35114e5 100644
--- a/hypervideo_dl/extractor/audimedia.py
+++ b/hypervideo_dl/extractor/audimedia.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
int_or_none,
@@ -79,7 +76,6 @@ class AudiMediaIE(InfoExtractor):
'format_id': 'http-%s' % bitrate,
})
formats.append(f)
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/audioboom.py b/hypervideo_dl/extractor/audioboom.py
index c51837b..a23fcd2 100644
--- a/hypervideo_dl/extractor/audioboom.py
+++ b/hypervideo_dl/extractor/audioboom.py
@@ -1,27 +1,33 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
-from ..utils import (
- clean_html,
- float_or_none,
-)
+from ..utils import clean_html, float_or_none, traverse_obj, unescapeHTML
class AudioBoomIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?audioboom\.com/(?:boos|posts)/(?P<id>[0-9]+)'
_TESTS = [{
'url': 'https://audioboom.com/posts/7398103-asim-chaudhry',
- 'md5': '7b00192e593ff227e6a315486979a42d',
+ 'md5': '4d68be11c9f9daf3dab0778ad1e010c3',
'info_dict': {
'id': '7398103',
'ext': 'mp3',
'title': 'Asim Chaudhry',
- 'description': 'md5:2f3fef17dacc2595b5362e1d7d3602fc',
+ 'description': 'md5:0ed714ae0e81e5d9119cac2f618ad679',
'duration': 4000.99,
'uploader': 'Sue Perkins: An hour or so with...',
'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channel/perkins',
}
+ }, { # Direct mp3-file link
+ 'url': 'https://audioboom.com/posts/8128496.mp3',
+ 'md5': 'e329edf304d450def95c7f86a9165ee1',
+ 'info_dict': {
+ 'id': '8128496',
+ 'ext': 'mp3',
+ 'title': 'TCRNo8 / DAILY 03 - In Control',
+ 'description': 'md5:44665f142db74858dfa21c5b34787948',
+ 'duration': 1689.7,
+ 'uploader': 'Lost Dot Podcast: The Trans Pyrenees and Transcontinental Race',
+ 'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channels/5003904',
+ }
}, {
'url': 'https://audioboom.com/posts/4279833-3-09-2016-czaban-hour-3?t=0',
'only_matching': True,
@@ -29,45 +35,23 @@ class AudioBoomIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
+ webpage = self._download_webpage(f'https://audioboom.com/posts/{video_id}', video_id)
- webpage = self._download_webpage(url, video_id)
-
- clip = None
-
- clip_store = self._parse_json(
- self._html_search_regex(
- r'data-new-clip-store=(["\'])(?P<json>{.+?})\1',
- webpage, 'clip store', default='{}', group='json'),
- video_id, fatal=False)
- if clip_store:
- clips = clip_store.get('clips')
- if clips and isinstance(clips, list) and isinstance(clips[0], dict):
- clip = clips[0]
-
- def from_clip(field):
- if clip:
- return clip.get(field)
-
- audio_url = from_clip('clipURLPriorToLoading') or self._og_search_property(
- 'audio', webpage, 'audio url')
- title = from_clip('title') or self._html_search_meta(
- ['og:title', 'og:audio:title', 'audio_title'], webpage)
- description = from_clip('description') or clean_html(from_clip('formattedDescription')) or self._og_search_description(webpage)
-
- duration = float_or_none(from_clip('duration') or self._html_search_meta(
- 'weibo:audio:duration', webpage))
-
- uploader = from_clip('author') or self._html_search_meta(
- ['og:audio:artist', 'twitter:audio:artist_name', 'audio_artist'], webpage, 'uploader')
- uploader_url = from_clip('author_url') or self._html_search_meta(
- 'audioboo:channel', webpage, 'uploader url')
+ clip_store = self._search_json(
+ r'data-react-class="V5DetailPagePlayer"\s*data-react-props=["\']',
+ webpage, 'clip store', video_id, fatal=False, transform_source=unescapeHTML)
+ clip = traverse_obj(clip_store, ('clips', 0), expected_type=dict) or {}
return {
'id': video_id,
- 'url': audio_url,
- 'title': title,
- 'description': description,
- 'duration': duration,
- 'uploader': uploader,
- 'uploader_url': uploader_url,
+ 'url': clip.get('clipURLPriorToLoading') or self._og_search_property('audio', webpage, 'audio url'),
+ 'title': clip.get('title') or self._html_search_meta(['og:title', 'og:audio:title', 'audio_title'], webpage),
+ 'description': (clip.get('description') or clean_html(clip.get('formattedDescription'))
+ or self._og_search_description(webpage)),
+ 'duration': float_or_none(clip.get('duration') or self._html_search_meta('weibo:audio:duration', webpage)),
+ 'uploader': clip.get('author') or self._html_search_meta(
+ ['og:audio:artist', 'twitter:audio:artist_name', 'audio_artist'], webpage, 'uploader'),
+ 'uploader_url': clip.get('author_url') or self._html_search_regex(
+ r'<div class="avatar flex-shrink-0">\s*<a href="(?P<uploader_url>http[^"]+)"',
+ webpage, 'uploader url', fatal=False),
}
diff --git a/hypervideo_dl/extractor/audiodraft.py b/hypervideo_dl/extractor/audiodraft.py
new file mode 100644
index 0000000..71e5afd
--- /dev/null
+++ b/hypervideo_dl/extractor/audiodraft.py
@@ -0,0 +1,93 @@
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class AudiodraftBaseIE(InfoExtractor):
+ def _audiodraft_extract_from_id(self, player_entry_id):
+ data_json = self._download_json(
+ 'https://www.audiodraft.com/scripts/general/player/getPlayerInfoNew.php', player_entry_id,
+ headers={
+ 'Content-type': 'application/x-www-form-urlencoded; charset=UTF-8',
+ 'X-Requested-With': 'XMLHttpRequest',
+ }, data=f'id={player_entry_id}'.encode('utf-8'))
+
+ return {
+ 'id': str(data_json['entry_id']),
+ 'title': data_json.get('entry_title'),
+ 'url': data_json['path'],
+ 'vcodec': 'none',
+ 'ext': 'mp3',
+ 'uploader': data_json.get('designer_name'),
+ 'uploader_id': data_json.get('designer_id'),
+ 'webpage_url': data_json.get('entry_url'),
+ 'like_count': int_or_none(data_json.get('entry_likes')),
+ 'average_rating': int_or_none(data_json.get('entry_rating')),
+ }
+
+
+class AudiodraftCustomIE(AudiodraftBaseIE):
+ IE_NAME = 'Audiodraft:custom'
+ _VALID_URL = r'https?://(?:[-\w]+)\.audiodraft\.com/entry/(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'http://nokiatune.audiodraft.com/entry/5874',
+ 'info_dict': {
+ 'id': '9485',
+ 'ext': 'mp3',
+ 'title': 'Hula Hula Calls',
+ 'uploader': 'unclemaki',
+ 'uploader_id': '13512',
+ 'average_rating': 5,
+ 'like_count': int,
+ },
+ }, {
+ 'url': 'http://vikinggrace.audiodraft.com/entry/501',
+ 'info_dict': {
+ 'id': '22241',
+ 'ext': 'mp3',
+ 'title': 'MVG Happy',
+ 'uploader': 'frog',
+ 'uploader_id': '19142',
+ 'average_rating': 5,
+ 'like_count': int,
+ },
+ }, {
+ 'url': 'http://timferriss.audiodraft.com/entry/765',
+ 'info_dict': {
+ 'id': '19710',
+ 'ext': 'mp3',
+ 'title': 'ferris03',
+ 'uploader': 'malex',
+ 'uploader_id': '17335',
+ 'average_rating': 5,
+ 'like_count': int,
+ },
+ }]
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+ webpage = self._download_webpage(url, id)
+ player_entry_id = self._search_regex(r'playAudio\(\'(player_entry_\d+)\'\);', webpage, id, 'play entry id')
+ return self._audiodraft_extract_from_id(player_entry_id)
+
+
+class AudiodraftGenericIE(AudiodraftBaseIE):
+ IE_NAME = 'Audiodraft:generic'
+ _VALID_URL = r'https?://www\.audiodraft\.com/contests/[^/#]+#entries&eid=(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'https://www.audiodraft.com/contests/570-Score-A-Video-Surprise-Us#entries&eid=30138',
+ 'info_dict': {
+ 'id': '30138',
+ 'ext': 'mp3',
+ 'title': 'DROP in sound_V2',
+ 'uploader': 'TiagoSilva',
+ 'uploader_id': '19452',
+ 'average_rating': 4,
+ 'like_count': int,
+ },
+ }]
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+ return self._audiodraft_extract_from_id(f'player_entry_{id}')
diff --git a/hypervideo_dl/extractor/audiomack.py b/hypervideo_dl/extractor/audiomack.py
index 19775cf..5c4160f 100644
--- a/hypervideo_dl/extractor/audiomack.py
+++ b/hypervideo_dl/extractor/audiomack.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import itertools
import time
diff --git a/hypervideo_dl/extractor/audius.py b/hypervideo_dl/extractor/audius.py
index fa64995..6448b44 100644
--- a/hypervideo_dl/extractor/audius.py
+++ b/hypervideo_dl/extractor/audius.py
@@ -1,11 +1,8 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import random
from .common import InfoExtractor
-from ..utils import ExtractorError, try_get, compat_str, str_or_none
-from ..compat import compat_urllib_parse_unquote
+from ..compat import compat_str, compat_urllib_parse_unquote
+from ..utils import ExtractorError, str_or_none, try_get
class AudiusBaseIE(InfoExtractor):
@@ -171,7 +168,7 @@ class AudiusIE(AudiusBaseIE):
}
-class AudiusTrackIE(AudiusIE):
+class AudiusTrackIE(AudiusIE): # XXX: Do not subclass from concrete IE
_VALID_URL = r'''(?x)(?:audius:)(?:https?://(?:www\.)?.+/v1/tracks/)?(?P<track_id>\w+)'''
IE_NAME = 'audius:track'
IE_DESC = 'Audius track ID or API link. Prepend with "audius:"'
@@ -246,7 +243,7 @@ class AudiusPlaylistIE(AudiusBaseIE):
playlist_data.get('description'))
-class AudiusProfileIE(AudiusPlaylistIE):
+class AudiusProfileIE(AudiusPlaylistIE): # XXX: Do not subclass from concrete IE
IE_NAME = 'audius:artist'
IE_DESC = 'Audius.co profile/artist pages'
_VALID_URL = r'https?://(?:www)?audius\.co/(?P<id>[^\/]+)/?(?:[?#]|$)'
diff --git a/hypervideo_dl/extractor/awaan.py b/hypervideo_dl/extractor/awaan.py
index f5e559c..6fc938d 100644
--- a/hypervideo_dl/extractor/awaan.py
+++ b/hypervideo_dl/extractor/awaan.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import base64
from .common import InfoExtractor
@@ -44,7 +41,7 @@ class AWAANBaseIE(InfoExtractor):
'id': video_id,
'title': title,
'description': video_data.get('description_en') or video_data.get('description_ar'),
- 'thumbnail': format_field(img, template='http://admin.mangomolo.com/analytics/%s'),
+ 'thumbnail': format_field(img, None, 'http://admin.mangomolo.com/analytics/%s'),
'duration': int_or_none(video_data.get('duration')),
'timestamp': parse_iso8601(video_data.get('create_time'), ' '),
'is_live': is_live,
diff --git a/hypervideo_dl/extractor/aws.py b/hypervideo_dl/extractor/aws.py
index dccfeaf..eb831a1 100644
--- a/hypervideo_dl/extractor/aws.py
+++ b/hypervideo_dl/extractor/aws.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import datetime
import hashlib
import hmac
@@ -9,7 +6,7 @@ from .common import InfoExtractor
from ..compat import compat_urllib_parse_urlencode
-class AWSIE(InfoExtractor):
+class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor
_AWS_ALGORITHM = 'AWS4-HMAC-SHA256'
_AWS_REGION = 'us-east-1'
diff --git a/hypervideo_dl/extractor/azmedien.py b/hypervideo_dl/extractor/azmedien.py
index 0168340..d1686ee 100644
--- a/hypervideo_dl/extractor/azmedien.py
+++ b/hypervideo_dl/extractor/azmedien.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import json
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/baidu.py b/hypervideo_dl/extractor/baidu.py
index 364fd94..8786d67 100644
--- a/hypervideo_dl/extractor/baidu.py
+++ b/hypervideo_dl/extractor/baidu.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..utils import unescapeHTML
diff --git a/hypervideo_dl/extractor/banbye.py b/hypervideo_dl/extractor/banbye.py
index 3d4d36e..c873425 100644
--- a/hypervideo_dl/extractor/banbye.py
+++ b/hypervideo_dl/extractor/banbye.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import math
from .common import InfoExtractor
@@ -83,8 +80,6 @@ class BanByeIE(BanByeBaseIE):
'url': f'{self._CDN_BASE}/video/{video_id}/{quality}.mp4',
} for quality in data['quality']]
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': data.get('title'),
diff --git a/hypervideo_dl/extractor/bandaichannel.py b/hypervideo_dl/extractor/bandaichannel.py
index f1bcdef..d7fcf44 100644
--- a/hypervideo_dl/extractor/bandaichannel.py
+++ b/hypervideo_dl/extractor/bandaichannel.py
@@ -1,11 +1,8 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .brightcove import BrightcoveNewIE
+from .brightcove import BrightcoveNewBaseIE
from ..utils import extract_attributes
-class BandaiChannelIE(BrightcoveNewIE):
+class BandaiChannelIE(BrightcoveNewBaseIE):
IE_NAME = 'bandaichannel'
_VALID_URL = r'https?://(?:www\.)?b-ch\.com/titles/(?P<id>\d+/\d+)'
_TESTS = [{
diff --git a/hypervideo_dl/extractor/bandcamp.py b/hypervideo_dl/extractor/bandcamp.py
index 745055e..de81e0d 100644
--- a/hypervideo_dl/extractor/bandcamp.py
+++ b/hypervideo_dl/extractor/bandcamp.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import random
import re
import time
@@ -8,23 +5,24 @@ import time
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
+ KNOWN_EXTENSIONS,
ExtractorError,
float_or_none,
int_or_none,
- KNOWN_EXTENSIONS,
parse_filesize,
str_or_none,
try_get,
- update_url_query,
unified_strdate,
unified_timestamp,
+ update_url_query,
url_or_none,
urljoin,
)
class BandcampIE(InfoExtractor):
- _VALID_URL = r'https?://[^/]+\.bandcamp\.com/track/(?P<id>[^/?#&]+)'
+ _VALID_URL = r'https?://(?P<uploader>[^/]+)\.bandcamp\.com/track/(?P<id>[^/?#&]+)'
+ _EMBED_REGEX = [r'<meta property="og:url"[^>]*?content="(?P<url>.*?bandcamp\.com.*?)"']
_TESTS = [{
'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
'md5': 'c557841d5e50261777a6585648adf439',
@@ -87,7 +85,7 @@ class BandcampIE(InfoExtractor):
attr + ' data', group=2), video_id, fatal=fatal)
def _real_extract(self, url):
- title = self._match_id(url)
+ title, uploader = self._match_valid_url(url).group('id', 'uploader')
webpage = self._download_webpage(url, title)
tralbum = self._extract_data_attr(webpage, title)
thumbnail = self._og_search_thumbnail(webpage)
@@ -186,8 +184,6 @@ class BandcampIE(InfoExtractor):
'acodec': format_id.split('-')[0],
})
- self._sort_formats(formats)
-
title = '%s - %s' % (artist, track) if artist else track
if not duration:
@@ -199,6 +195,8 @@ class BandcampIE(InfoExtractor):
'title': title,
'thumbnail': thumbnail,
'uploader': artist,
+ 'uploader_id': uploader,
+ 'uploader_url': f'https://{uploader}.bandcamp.com',
'timestamp': timestamp,
'release_timestamp': unified_timestamp(tralbum.get('album_release_date')),
'duration': duration,
@@ -211,7 +209,7 @@ class BandcampIE(InfoExtractor):
}
-class BandcampAlbumIE(BandcampIE):
+class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE
IE_NAME = 'Bandcamp:album'
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com/album/(?P<id>[^/?#&]+)'
@@ -314,7 +312,7 @@ class BandcampAlbumIE(BandcampIE):
}
-class BandcampWeeklyIE(BandcampIE):
+class BandcampWeeklyIE(BandcampIE): # XXX: Do not subclass from concrete IE
IE_NAME = 'Bandcamp:weekly'
_VALID_URL = r'https?://(?:www\.)?bandcamp\.com/?\?(?:.*?&)?show=(?P<id>\d+)'
_TESTS = [{
@@ -363,7 +361,6 @@ class BandcampWeeklyIE(BandcampIE):
'ext': ext,
'vcodec': 'none',
})
- self._sort_formats(formats)
title = show.get('audio_title') or 'Bandcamp Weekly'
subtitle = show.get('subtitle')
@@ -439,7 +436,7 @@ class BandcampUserIE(InfoExtractor):
uploader = self._match_id(url)
webpage = self._download_webpage(url, uploader)
- discography_data = (re.findall(r'<li data-item-id=["\'][^>]+>\s*<a href=["\']([^"\']+)', webpage)
+ discography_data = (re.findall(r'<li data-item-id=["\'][^>]+>\s*<a href=["\'](?![^"\'/]*?/merch)([^"\']+)', webpage)
or re.findall(r'<div[^>]+trackTitle["\'][^"\']+["\']([^"\']+)', webpage))
return self.playlist_from_matches(
diff --git a/hypervideo_dl/extractor/bannedvideo.py b/hypervideo_dl/extractor/bannedvideo.py
index 3db1151..51e7220 100644
--- a/hypervideo_dl/extractor/bannedvideo.py
+++ b/hypervideo_dl/extractor/bannedvideo.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import json
from .common import InfoExtractor
@@ -137,7 +135,6 @@ query GetCommentReplies($id: String!) {
formats.extend(self._extract_m3u8_formats(
video_info.get('streamUrl'), video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls', live=True))
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/bbc.py b/hypervideo_dl/extractor/bbc.py
index 29ad7de..9d28e70 100644
--- a/hypervideo_dl/extractor/bbc.py
+++ b/hypervideo_dl/extractor/bbc.py
@@ -1,19 +1,12 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import functools
import itertools
import json
import re
+import urllib.error
+import xml.etree.ElementTree
from .common import InfoExtractor
-from ..compat import (
- compat_etree_Element,
- compat_HTTPError,
- compat_str,
- compat_urllib_error,
- compat_urlparse,
-)
+from ..compat import compat_HTTPError, compat_str, compat_urlparse
from ..utils import (
ExtractorError,
OnDemandPagedList,
@@ -53,6 +46,7 @@ class BBCCoUkIE(InfoExtractor):
)
(?P<id>%s)(?!/(?:episodes|broadcasts|clips))
''' % _ID_REGEX
+ _EMBED_REGEX = [r'setPlaylist\("(?P<url>https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)']
_LOGIN_URL = 'https://account.bbc.com/signin'
_NETRC_MACHINE = 'bbc'
@@ -318,7 +312,7 @@ class BBCCoUkIE(InfoExtractor):
continue
captions = self._download_xml(
cc_url, programme_id, 'Downloading captions', fatal=False)
- if not isinstance(captions, compat_etree_Element):
+ if not isinstance(captions, xml.etree.ElementTree.Element):
continue
subtitles['en'] = [
{
@@ -394,7 +388,7 @@ class BBCCoUkIE(InfoExtractor):
href, programme_id, ext='mp4', entry_protocol='m3u8_native',
m3u8_id=format_id, fatal=False)
except ExtractorError as e:
- if not (isinstance(e.exc_info[1], compat_urllib_error.HTTPError)
+ if not (isinstance(e.exc_info[1], urllib.error.HTTPError)
and e.exc_info[1].code in (403, 404)):
raise
fmts = []
@@ -581,8 +575,6 @@ class BBCCoUkIE(InfoExtractor):
else:
programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
- self._sort_formats(formats)
-
return {
'id': programme_id,
'title': title,
@@ -594,10 +586,15 @@ class BBCCoUkIE(InfoExtractor):
}
-class BBCIE(BBCCoUkIE):
+class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
IE_NAME = 'bbc'
IE_DESC = 'BBC'
- _VALID_URL = r'https?://(?:www\.)?bbc\.(?:com|co\.uk)/(?:[^/]+/)+(?P<id>[^/#?]+)'
+ _VALID_URL = r'''(?x)
+ https?://(?:www\.)?(?:
+ bbc\.(?:com|co\.uk)|
+ bbcnewsd73hkzno2ini43t4gblxvycyac5aw4gnv7t2rccijh7745uqd\.onion|
+ bbcweb3hytmzhn5d532owbu6oqadra5z3ar726vq5kgwwn6aucdccrad\.onion
+ )/(?:[^/]+/)+(?P<id>[^/#?]+)'''
_MEDIA_SETS = [
'pc',
@@ -847,6 +844,12 @@ class BBCIE(BBCCoUkIE):
'upload_date': '20190604',
'categories': ['Psychology'],
},
+ }, { # onion routes
+ 'url': 'https://www.bbcnewsd73hkzno2ini43t4gblxvycyac5aw4gnv7t2rccijh7745uqd.onion/news/av/world-europe-63208576',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.bbcweb3hytmzhn5d532owbu6oqadra5z3ar726vq5kgwwn6aucdccrad.onion/sport/av/football/63195681',
+ 'only_matching': True,
}]
@classmethod
@@ -885,7 +888,6 @@ class BBCIE(BBCCoUkIE):
def _extract_from_playlist_sxml(self, url, playlist_id, timestamp):
programme_id, title, description, duration, formats, subtitles = \
self._process_legacy_playlist_url(url, playlist_id)
- self._sort_formats(formats)
return {
'id': programme_id,
'title': title,
@@ -904,12 +906,8 @@ class BBCIE(BBCCoUkIE):
json_ld_info = self._search_json_ld(webpage, playlist_id, default={})
timestamp = json_ld_info.get('timestamp')
- playlist_title = json_ld_info.get('title')
- if not playlist_title:
- playlist_title = (self._og_search_title(webpage, default=None)
- or self._html_extract_title(webpage, 'playlist title', default=None))
- if playlist_title:
- playlist_title = re.sub(r'(.+)\s*-\s*BBC.*?$', r'\1', playlist_title).strip()
+ playlist_title = json_ld_info.get('title') or re.sub(
+ r'(.+)\s*-\s*BBC.*?$', r'\1', self._generic_title('', webpage, default='')).strip() or None
playlist_description = json_ld_info.get(
'description') or self._og_search_description(webpage, default=None)
@@ -953,7 +951,6 @@ class BBCIE(BBCCoUkIE):
duration = int_or_none(items[0].get('duration'))
programme_id = items[0].get('vpid')
formats, subtitles = self._download_media_selector(programme_id)
- self._sort_formats(formats)
entries.append({
'id': programme_id,
'title': title,
@@ -990,7 +987,6 @@ class BBCIE(BBCCoUkIE):
continue
raise
if entry:
- self._sort_formats(entry['formats'])
entries.append(entry)
if entries:
@@ -1014,7 +1010,6 @@ class BBCIE(BBCCoUkIE):
if programme_id:
formats, subtitles = self._download_media_selector(programme_id)
- self._sort_formats(formats)
# digitalData may be missing (e.g. http://www.bbc.com/autos/story/20130513-hyundais-rock-star)
digital_data = self._parse_json(
self._search_regex(
@@ -1046,7 +1041,6 @@ class BBCIE(BBCCoUkIE):
if version_id:
title = smp_data['title']
formats, subtitles = self._download_media_selector(version_id)
- self._sort_formats(formats)
image_url = smp_data.get('holdingImageURL')
display_date = init_data.get('displayDate')
topic_title = init_data.get('topicTitle')
@@ -1088,7 +1082,6 @@ class BBCIE(BBCCoUkIE):
continue
title = lead_media.get('title') or self._og_search_title(webpage)
formats, subtitles = self._download_media_selector(programme_id)
- self._sort_formats(formats)
description = lead_media.get('summary')
uploader = lead_media.get('masterBrand')
uploader_id = lead_media.get('mid')
@@ -1117,7 +1110,6 @@ class BBCIE(BBCCoUkIE):
if current_programme and programme_id and current_programme.get('type') == 'playable_item':
title = current_programme.get('titles', {}).get('tertiary') or playlist_title
formats, subtitles = self._download_media_selector(programme_id)
- self._sort_formats(formats)
synopses = current_programme.get('synopses') or {}
network = current_programme.get('network') or {}
duration = int_or_none(
@@ -1150,7 +1142,6 @@ class BBCIE(BBCCoUkIE):
clip_title = clip.get('title')
if clip_vpid and clip_title:
formats, subtitles = self._download_media_selector(clip_vpid)
- self._sort_formats(formats)
return {
'id': clip_vpid,
'title': clip_title,
@@ -1172,7 +1163,6 @@ class BBCIE(BBCCoUkIE):
if not programme_id:
continue
formats, subtitles = self._download_media_selector(programme_id)
- self._sort_formats(formats)
entries.append({
'id': programme_id,
'title': playlist_title,
@@ -1204,7 +1194,6 @@ class BBCIE(BBCCoUkIE):
if not (item_id and item_title):
continue
formats, subtitles = self._download_media_selector(item_id)
- self._sort_formats(formats)
item_desc = None
blocks = try_get(media, lambda x: x['summary']['blocks'], list)
if blocks:
@@ -1238,7 +1227,7 @@ class BBCIE(BBCCoUkIE):
(lambda x: x['data']['blocks'],
lambda x: x['data']['content']['model']['blocks'],),
list) or []):
- if block.get('type') != 'media':
+ if block.get('type') not in ['media', 'video']:
continue
parse_media(block.get('model'))
return self.playlist_result(
@@ -1305,7 +1294,6 @@ class BBCIE(BBCCoUkIE):
formats, subtitles = self._extract_from_media_meta(media_meta, playlist_id)
if not formats and not self.get_param('ignore_no_formats'):
continue
- self._sort_formats(formats)
video_id = media_meta.get('externalId')
if not video_id:
diff --git a/hypervideo_dl/extractor/beatport.py b/hypervideo_dl/extractor/beatport.py
index e1cf8b4..0aecbd0 100644
--- a/hypervideo_dl/extractor/beatport.py
+++ b/hypervideo_dl/extractor/beatport.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -77,7 +74,6 @@ class BeatportIE(InfoExtractor):
fmt['abr'] = 96
fmt['asr'] = 44100
formats.append(fmt)
- self._sort_formats(formats)
images = []
for name, info in track['images'].items():
diff --git a/hypervideo_dl/extractor/beeg.py b/hypervideo_dl/extractor/beeg.py
index 717fff3..52ee68e 100644
--- a/hypervideo_dl/extractor/beeg.py
+++ b/hypervideo_dl/extractor/beeg.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
@@ -78,8 +76,6 @@ class BeegIE(InfoExtractor):
f['height'] = height
formats.extend(current_formats)
- self._sort_formats(formats)
-
return {
'id': video_id,
'display_id': first_fact.get('id'),
diff --git a/hypervideo_dl/extractor/behindkink.py b/hypervideo_dl/extractor/behindkink.py
index 2c97f98..ca44981 100644
--- a/hypervideo_dl/extractor/behindkink.py
+++ b/hypervideo_dl/extractor/behindkink.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..utils import url_basename
diff --git a/hypervideo_dl/extractor/bellmedia.py b/hypervideo_dl/extractor/bellmedia.py
index 904c17e..5ae4b91 100644
--- a/hypervideo_dl/extractor/bellmedia.py
+++ b/hypervideo_dl/extractor/bellmedia.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
@@ -28,7 +24,7 @@ class BellMediaIE(InfoExtractor):
)/.*?(?:\b(?:vid(?:eoid)?|clipId)=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6,})'''
_TESTS = [{
'url': 'https://www.bnnbloomberg.ca/video/david-cockfield-s-top-picks~1403070',
- 'md5': '36d3ef559cfe8af8efe15922cd3ce950',
+ 'md5': '3e5b8e38370741d5089da79161646635',
'info_dict': {
'id': '1403070',
'ext': 'flv',
@@ -36,6 +32,14 @@ class BellMediaIE(InfoExtractor):
'description': 'md5:810f7f8c6a83ad5b48677c3f8e5bb2c3',
'upload_date': '20180525',
'timestamp': 1527288600,
+ 'season_id': 73997,
+ 'season': '2018',
+ 'thumbnail': 'http://images2.9c9media.com/image_asset/2018_5_25_baf30cbd-b28d-4a18-9903-4bb8713b00f5_PNG_956x536.jpg',
+ 'tags': [],
+ 'categories': ['ETFs'],
+ 'season_number': 8,
+ 'duration': 272.038,
+ 'series': 'Market Call Tonight',
},
}, {
'url': 'http://www.thecomedynetwork.ca/video/player?vid=923582',
diff --git a/hypervideo_dl/extractor/berufetv.py b/hypervideo_dl/extractor/berufetv.py
new file mode 100644
index 0000000..8160cbd
--- /dev/null
+++ b/hypervideo_dl/extractor/berufetv.py
@@ -0,0 +1,70 @@
+from .common import InfoExtractor
+from ..utils import float_or_none, mimetype2ext, traverse_obj
+
+
+class BerufeTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?web\.arbeitsagentur\.de/berufetv/[^?#]+/film;filmId=(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://web.arbeitsagentur.de/berufetv/studienberufe/wirtschaftswissenschaften/wirtschaftswissenschaften-volkswirtschaft/film;filmId=DvKC3DUpMKvUZ_6fEnfg3u',
+ 'md5': '041b6432ec8e6838f84a5c30f31cc795',
+ 'info_dict': {
+ 'id': 'DvKC3DUpMKvUZ_6fEnfg3u',
+ 'ext': 'mp4',
+ 'title': 'Volkswirtschaftslehre',
+ 'description': 'md5:6bd87d0c63163480a6489a37526ee1c1',
+ 'categories': ['Studien&shy;beruf'],
+ 'tags': ['Studienfilm'],
+ 'duration': 602.440,
+ 'thumbnail': r're:^https://asset-out-cdn\.video-cdn\.net/private/videos/DvKC3DUpMKvUZ_6fEnfg3u/thumbnails/793063\?quality=thumbnail&__token__=[^\s]+$',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ movie_metadata = self._download_json(
+ 'https://rest.arbeitsagentur.de/infosysbub/berufetv/pc/v1/film-metadata',
+ video_id, 'Downloading JSON metadata',
+ headers={'X-API-Key': '79089773-4892-4386-86e6-e8503669f426'}, fatal=False)
+
+ meta = traverse_obj(
+ movie_metadata, ('metadaten', lambda _, i: video_id == i['miId']),
+ get_all=False, default={})
+
+ video = self._download_json(
+ f'https://d.video-cdn.net/play/player/8YRzUk6pTzmBdrsLe9Y88W/video/{video_id}',
+ video_id, 'Downloading video JSON')
+
+ formats, subtitles = [], {}
+ for key, source in video['videoSources']['html'].items():
+ if key == 'auto':
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(source[0]['source'], video_id)
+ formats += fmts
+ subtitles = subs
+ else:
+ formats.append({
+ 'url': source[0]['source'],
+ 'ext': mimetype2ext(source[0]['mimeType']),
+ 'format_id': key,
+ })
+
+ for track in video.get('videoTracks') or []:
+ if track.get('type') != 'SUBTITLES':
+ continue
+ subtitles.setdefault(track['language'], []).append({
+ 'url': track['source'],
+ 'name': track.get('label'),
+ 'ext': 'vtt'
+ })
+
+ return {
+ 'id': video_id,
+ 'title': meta.get('titel') or traverse_obj(video, ('videoMetaData', 'title')),
+ 'description': meta.get('beschreibung'),
+ 'thumbnail': meta.get('thumbnail') or f'https://asset-out-cdn.video-cdn.net/private/videos/{video_id}/thumbnails/active',
+ 'duration': float_or_none(video.get('duration'), scale=1000),
+ 'categories': [meta['kategorie']] if meta.get('kategorie') else None,
+ 'tags': meta.get('themengebiete'),
+ 'subtitles': subtitles,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/bet.py b/hypervideo_dl/extractor/bet.py
index 2c71442..6b867d1 100644
--- a/hypervideo_dl/extractor/bet.py
+++ b/hypervideo_dl/extractor/bet.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .mtv import MTVServicesInfoExtractor
from ..utils import unified_strdate
diff --git a/hypervideo_dl/extractor/bfi.py b/hypervideo_dl/extractor/bfi.py
index 60c8944..76f0516 100644
--- a/hypervideo_dl/extractor/bfi.py
+++ b/hypervideo_dl/extractor/bfi.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/bfmtv.py b/hypervideo_dl/extractor/bfmtv.py
index 501f69d..d86d283 100644
--- a/hypervideo_dl/extractor/bfmtv.py
+++ b/hypervideo_dl/extractor/bfmtv.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -45,7 +42,7 @@ class BFMTVIE(BFMTVBaseIE):
return self._brightcove_url_result(video_block['videoid'], video_block)
-class BFMTVLiveIE(BFMTVIE):
+class BFMTVLiveIE(BFMTVIE): # XXX: Do not subclass from concrete IE
IE_NAME = 'bfmtv:live'
_VALID_URL = BFMTVBaseIE._VALID_URL_BASE + '(?P<id>(?:[^/]+/)?en-direct)'
_TESTS = [{
diff --git a/hypervideo_dl/extractor/bibeltv.py b/hypervideo_dl/extractor/bibeltv.py
index 56c2bfe..fd20aad 100644
--- a/hypervideo_dl/extractor/bibeltv.py
+++ b/hypervideo_dl/extractor/bibeltv.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/bigflix.py b/hypervideo_dl/extractor/bigflix.py
index 28e3e59..02d1ba0 100644
--- a/hypervideo_dl/extractor/bigflix.py
+++ b/hypervideo_dl/extractor/bigflix.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -66,8 +63,6 @@ class BigflixIE(InfoExtractor):
'url': decode_url(file_url),
})
- self._sort_formats(formats)
-
description = self._html_search_meta('description', webpage)
return {
diff --git a/hypervideo_dl/extractor/bigo.py b/hypervideo_dl/extractor/bigo.py
index ddf76ac..1cb6e58 100644
--- a/hypervideo_dl/extractor/bigo.py
+++ b/hypervideo_dl/extractor/bigo.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import ExtractorError, urlencode_postdata
@@ -31,7 +28,7 @@ class BigoIE(InfoExtractor):
user_id = self._match_id(url)
info_raw = self._download_json(
- 'https://bigo.tv/studio/getInternalStudioInfo',
+ 'https://ta.bigo.tv/official_website/studio/getInternalStudioInfo',
user_id, data=urlencode_postdata({'siteId': user_id}))
if not isinstance(info_raw, dict):
@@ -44,14 +41,14 @@ class BigoIE(InfoExtractor):
if not info.get('alive'):
raise ExtractorError('This user is offline.', expected=True)
+ formats, subs = self._extract_m3u8_formats_and_subtitles(
+ info.get('hls_src'), user_id, 'mp4', 'm3u8')
+
return {
'id': info.get('roomId') or user_id,
'title': info.get('roomTopic') or info.get('nick_name') or user_id,
- 'formats': [{
- 'url': info.get('hls_src'),
- 'ext': 'mp4',
- 'protocol': 'm3u8',
- }],
+ 'formats': formats,
+ 'subtitles': subs,
'thumbnail': info.get('snapshot'),
'uploader': info.get('nick_name'),
'uploader_id': user_id,
diff --git a/hypervideo_dl/extractor/bild.py b/hypervideo_dl/extractor/bild.py
index b8dfbd4..f3dea33 100644
--- a/hypervideo_dl/extractor/bild.py
+++ b/hypervideo_dl/extractor/bild.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
int_or_none,
diff --git a/hypervideo_dl/extractor/bilibili.py b/hypervideo_dl/extractor/bilibili.py
index 909f7f8..bc04241 100644
--- a/hypervideo_dl/extractor/bilibili.py
+++ b/hypervideo_dl/extractor/bilibili.py
@@ -1,509 +1,561 @@
-# coding: utf-8
-
import base64
-import hashlib
-import itertools
import functools
-import re
+import itertools
import math
+import urllib.error
+import urllib.parse
from .common import InfoExtractor, SearchInfoExtractor
-from ..compat import (
- compat_parse_qs,
- compat_urlparse,
- compat_urllib_parse_urlparse
-)
from ..utils import (
ExtractorError,
+ GeoRestrictedError,
+ InAdvancePagedList,
+ OnDemandPagedList,
filter_dict,
- int_or_none,
float_or_none,
+ format_field,
+ int_or_none,
+ make_archive_id,
mimetype2ext,
- parse_iso8601,
- traverse_obj,
parse_count,
- smuggle_url,
+ parse_qs,
+ qualities,
srt_subtitles_timecode,
str_or_none,
- strip_jsonp,
- unified_timestamp,
- unsmuggle_url,
- urlencode_postdata,
+ traverse_obj,
url_or_none,
- OnDemandPagedList
+ urlencode_postdata,
)
-class BiliBiliIE(InfoExtractor):
- _VALID_URL = r'''(?x)
- https?://
- (?:(?:www|bangumi)\.)?
- bilibili\.(?:tv|com)/
- (?:
- (?:
- video/[aA][vV]|
- anime/(?P<anime_id>\d+)/play\#
- )(?P<id>\d+)|
- (s/)?video/[bB][vV](?P<id_bv>[^/?#&]+)
- )
- (?:/?\?p=(?P<page>\d+))?
- '''
+class BilibiliBaseIE(InfoExtractor):
+ def extract_formats(self, play_info):
+ format_names = {
+ r['quality']: traverse_obj(r, 'new_description', 'display_desc')
+ for r in traverse_obj(play_info, ('support_formats', lambda _, v: v['quality']))
+ }
+
+ audios = traverse_obj(play_info, ('dash', 'audio', ...))
+ flac_audio = traverse_obj(play_info, ('dash', 'flac', 'audio'))
+ if flac_audio:
+ audios.append(flac_audio)
+ formats = [{
+ 'url': traverse_obj(audio, 'baseUrl', 'base_url', 'url'),
+ 'ext': mimetype2ext(traverse_obj(audio, 'mimeType', 'mime_type')),
+ 'acodec': audio.get('codecs'),
+ 'vcodec': 'none',
+ 'tbr': float_or_none(audio.get('bandwidth'), scale=1000),
+ 'filesize': int_or_none(audio.get('size'))
+ } for audio in audios]
+
+ formats.extend({
+ 'url': traverse_obj(video, 'baseUrl', 'base_url', 'url'),
+ 'ext': mimetype2ext(traverse_obj(video, 'mimeType', 'mime_type')),
+ 'fps': float_or_none(traverse_obj(video, 'frameRate', 'frame_rate')),
+ 'width': int_or_none(video.get('width')),
+ 'height': int_or_none(video.get('height')),
+ 'vcodec': video.get('codecs'),
+ 'acodec': 'none' if audios else None,
+ 'tbr': float_or_none(video.get('bandwidth'), scale=1000),
+ 'filesize': int_or_none(video.get('size')),
+ 'quality': int_or_none(video.get('id')),
+ 'format': format_names.get(video.get('id')),
+ } for video in traverse_obj(play_info, ('dash', 'video', ...)))
+
+ missing_formats = format_names.keys() - set(traverse_obj(formats, (..., 'quality')))
+ if missing_formats:
+ self.to_screen(f'Format(s) {", ".join(format_names[i] for i in missing_formats)} are missing; '
+ f'you have to login or become premium member to download them. {self._login_hint()}')
+
+ return formats
+
+ def json2srt(self, json_data):
+ srt_data = ''
+ for idx, line in enumerate(json_data.get('body') or []):
+ srt_data += (f'{idx + 1}\n'
+ f'{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n'
+ f'{line["content"]}\n\n')
+ return srt_data
+
+ def _get_subtitles(self, video_id, initial_state, cid):
+ subtitles = {
+ 'danmaku': [{
+ 'ext': 'xml',
+ 'url': f'https://comment.bilibili.com/{cid}.xml',
+ }]
+ }
+
+ for s in traverse_obj(initial_state, ('videoData', 'subtitle', 'list')) or []:
+ subtitles.setdefault(s['lan'], []).append({
+ 'ext': 'srt',
+ 'data': self.json2srt(self._download_json(s['subtitle_url'], video_id))
+ })
+ return subtitles
+
+ def _get_chapters(self, aid, cid):
+ chapters = aid and cid and self._download_json(
+ 'https://api.bilibili.com/x/player/v2', aid, query={'aid': aid, 'cid': cid},
+ note='Extracting chapters', fatal=False)
+ return traverse_obj(chapters, ('data', 'view_points', ..., {
+ 'title': 'content',
+ 'start_time': 'from',
+ 'end_time': 'to',
+ })) or None
+
+ def _get_comments(self, aid):
+ for idx in itertools.count(1):
+ replies = traverse_obj(
+ self._download_json(
+ f'https://api.bilibili.com/x/v2/reply?pn={idx}&oid={aid}&type=1&jsonp=jsonp&sort=2&_=1567227301685',
+ aid, note=f'Extracting comments from page {idx}', fatal=False),
+ ('data', 'replies'))
+ if not replies:
+ return
+ for children in map(self._get_all_children, replies):
+ yield from children
+
+ def _get_all_children(self, reply):
+ yield {
+ 'author': traverse_obj(reply, ('member', 'uname')),
+ 'author_id': traverse_obj(reply, ('member', 'mid')),
+ 'id': reply.get('rpid'),
+ 'text': traverse_obj(reply, ('content', 'message')),
+ 'timestamp': reply.get('ctime'),
+ 'parent': reply.get('parent') or 'root',
+ }
+ for children in map(self._get_all_children, traverse_obj(reply, ('replies', ...))):
+ yield from children
+
+
+class BiliBiliIE(BilibiliBaseIE):
+ _VALID_URL = r'https?://www\.bilibili\.com/video/[aAbB][vV](?P<id>[^/?#&]+)'
_TESTS = [{
+ 'url': 'https://www.bilibili.com/video/BV13x41117TL',
+ 'info_dict': {
+ 'id': 'BV13x41117TL',
+ 'title': '阿滴英文|英文歌分享#6 "Closer',
+ 'ext': 'mp4',
+ 'description': '滴妹今天唱Closer給你聽! 有史以来,被推最多次也是最久的歌曲,其实歌词跟我原本想像差蛮多的,不过还是好听! 微博@阿滴英文',
+ 'uploader_id': '65880958',
+ 'uploader': '阿滴英文',
+ 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
+ 'duration': 554.117,
+ 'tags': list,
+ 'comment_count': int,
+ 'upload_date': '20170301',
+ 'timestamp': 1488353834,
+ 'like_count': int,
+ 'view_count': int,
+ },
+ }, {
+ # old av URL version
'url': 'http://www.bilibili.com/video/av1074402/',
- 'md5': '5f7d29e1a2872f3df0cf76b1f87d3788',
'info_dict': {
- 'id': '1074402_part1',
+ 'thumbnail': r're:^https?://.*\.(jpg|jpeg)$',
'ext': 'mp4',
- 'title': '【金坷垃】金泡沫',
- 'uploader_id': '156160',
'uploader': '菊子桑',
+ 'uploader_id': '156160',
+ 'id': 'BV11x411K7CN',
+ 'title': '【金坷垃】金泡沫',
+ 'duration': 308.36,
'upload_date': '20140420',
+ 'timestamp': 1397983878,
'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
- 'timestamp': 1398012678,
+ 'like_count': int,
+ 'comment_count': int,
+ 'view_count': int,
+ 'tags': list,
},
+ 'params': {'skip_download': True},
}, {
- # Tested in BiliBiliBangumiIE
- 'url': 'http://bangumi.bilibili.com/anime/1869/play#40062',
- 'only_matching': True,
+ 'note': 'Anthology',
+ 'url': 'https://www.bilibili.com/video/BV1bK411W797',
+ 'info_dict': {
+ 'id': 'BV1bK411W797',
+ 'title': '物语中的人物是如何吐槽自己的OP的'
+ },
+ 'playlist_count': 18,
+ 'playlist': [{
+ 'info_dict': {
+ 'id': 'BV1bK411W797_p1',
+ 'ext': 'mp4',
+ 'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
+ 'tags': 'count:11',
+ 'timestamp': 1589601697,
+ 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
+ 'uploader': '打牌还是打桩',
+ 'uploader_id': '150259984',
+ 'like_count': int,
+ 'comment_count': int,
+ 'upload_date': '20200516',
+ 'view_count': int,
+ 'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
+ 'duration': 90.314,
+ }
+ }]
}, {
- # bilibili.tv
- 'url': 'http://www.bilibili.tv/video/av1074402/',
- 'only_matching': True,
+ 'note': 'Specific page of Anthology',
+ 'url': 'https://www.bilibili.com/video/BV1bK411W797?p=1',
+ 'info_dict': {
+ 'id': 'BV1bK411W797_p1',
+ 'ext': 'mp4',
+ 'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
+ 'tags': 'count:11',
+ 'timestamp': 1589601697,
+ 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
+ 'uploader': '打牌还是打桩',
+ 'uploader_id': '150259984',
+ 'like_count': int,
+ 'comment_count': int,
+ 'upload_date': '20200516',
+ 'view_count': int,
+ 'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
+ 'duration': 90.314,
+ }
}, {
- 'url': 'http://bangumi.bilibili.com/anime/5802/play#100643',
- 'md5': '3f721ad1e75030cc06faf73587cfec57',
+ 'note': 'video has subtitles',
+ 'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
'info_dict': {
- 'id': '100643_part1',
+ 'id': 'BV12N4y1M7rh',
'ext': 'mp4',
- 'title': 'CHAOS;CHILD',
- 'description': '如果你是神明,并且能够让妄想成为现实。那你会进行怎么样的妄想?是淫靡的世界?独裁社会?毁灭性的制裁?还是……2015年,涩谷。从6年前发生的大灾害“涩谷地震”之后复兴了的这个街区里新设立的私立高中...',
+ 'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
+ 'tags': list,
+ 'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
+ 'duration': 313.557,
+ 'upload_date': '20220709',
+ 'uploader': '小夫Tech',
+ 'timestamp': 1657347907,
+ 'uploader_id': '1326814124',
+ 'comment_count': int,
+ 'view_count': int,
+ 'like_count': int,
+ 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
+ 'subtitles': 'count:2'
},
- 'skip': 'Geo-restricted to China',
+ 'params': {'listsubtitles': True},
}, {
- 'url': 'http://www.bilibili.com/video/av8903802/',
+ 'url': 'https://www.bilibili.com/video/av8903802/',
'info_dict': {
- 'id': '8903802_part1',
+ 'id': 'BV13x41117TL',
'ext': 'mp4',
'title': '阿滴英文|英文歌分享#6 "Closer',
'upload_date': '20170301',
- 'description': '滴妹今天唱Closer給你聽! 有史以来,被推最多次也是最久的歌曲,其实歌词跟我原本想像差蛮多的,不过还是好听! 微博@阿滴英文',
- 'timestamp': 1488382634,
+ 'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
+ 'timestamp': 1488353834,
'uploader_id': '65880958',
'uploader': '阿滴英文',
+ 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
+ 'duration': 554.117,
+ 'tags': list,
+ 'comment_count': int,
+ 'view_count': int,
+ 'like_count': int,
},
'params': {
'skip_download': True,
},
}, {
- # new BV video id format
- 'url': 'https://www.bilibili.com/video/BV1JE411F741',
- 'only_matching': True,
- }, {
- # Anthology
- 'url': 'https://www.bilibili.com/video/BV1bK411W797',
+ 'note': 'video has chapter',
+ 'url': 'https://www.bilibili.com/video/BV1vL411G7N7/',
'info_dict': {
- 'id': 'BV1bK411W797',
- 'title': '物语中的人物是如何吐槽自己的OP的'
+ 'id': 'BV1vL411G7N7',
+ 'ext': 'mp4',
+ 'title': '如何为你的B站视频添加进度条分段',
+ 'timestamp': 1634554558,
+ 'upload_date': '20211018',
+ 'description': 'md5:a9a3d6702b3a94518d419b2e9c320a6d',
+ 'tags': list,
+ 'uploader': '爱喝咖啡的当麻',
+ 'duration': 669.482,
+ 'uploader_id': '1680903',
+ 'chapters': 'count:6',
+ 'comment_count': int,
+ 'view_count': int,
+ 'like_count': int,
+ 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
},
- 'playlist_count': 17,
+ 'params': {'skip_download': True},
}]
- _APP_KEY = 'iVGUTjsxvpLeuDCf'
- _BILIBILI_KEY = 'aHRmhWMLkdeMuILqORnYZocwMBpMEOdt'
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
+ play_info = self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id)['data']
- def _report_error(self, result):
- if 'message' in result:
- raise ExtractorError('%s said: %s' % (self.IE_NAME, result['message']), expected=True)
- elif 'code' in result:
- raise ExtractorError('%s returns error %d' % (self.IE_NAME, result['code']), expected=True)
- else:
- raise ExtractorError('Can\'t extract Bangumi episode ID')
+ video_data = initial_state['videoData']
+ video_id, title = video_data['bvid'], video_data.get('title')
- def _real_extract(self, url):
- url, smuggled_data = unsmuggle_url(url, {})
+ # Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself.
+ page_list_json = traverse_obj(
+ self._download_json(
+ 'https://api.bilibili.com/x/player/pagelist', video_id,
+ fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'},
+ note='Extracting videos in anthology'),
+ 'data', expected_type=list) or []
+ is_anthology = len(page_list_json) > 1
+
+ part_id = int_or_none(parse_qs(url).get('p', [None])[-1])
+ if is_anthology and not part_id and self._yes_playlist(video_id, video_id):
+ return self.playlist_from_matches(
+ page_list_json, video_id, title, ie=BiliBiliIE,
+ getter=lambda entry: f'https://www.bilibili.com/video/{video_id}?p={entry["page"]}')
- mobj = self._match_valid_url(url)
- video_id = mobj.group('id_bv') or mobj.group('id')
+ if is_anthology:
+ title += f' p{part_id:02d} {traverse_obj(page_list_json, ((part_id or 1) - 1, "part")) or ""}'
- av_id, bv_id = self._get_video_id_set(video_id, mobj.group('id_bv') is not None)
- video_id = av_id
+ aid = video_data.get('aid')
+ old_video_id = format_field(aid, None, f'%s_part{part_id or 1}')
- info = {}
- anime_id = mobj.group('anime_id')
- page_id = mobj.group('page')
- webpage = self._download_webpage(url, video_id)
+ cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid')
- # Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself.
- # If the video has no page argument, check to see if it's an anthology
- if page_id is None:
- if not self.get_param('noplaylist'):
- r = self._extract_anthology_entries(bv_id, video_id, webpage)
- if r is not None:
- self.to_screen('Downloading anthology %s - add --no-playlist to just download video' % video_id)
- return r
- else:
- self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
-
- if 'anime/' not in url:
- cid = self._search_regex(
- r'\bcid(?:["\']:|=)(\d+),["\']page(?:["\']:|=)' + str(page_id), webpage, 'cid',
- default=None
- ) or self._search_regex(
- r'\bcid(?:["\']:|=)(\d+)', webpage, 'cid',
- default=None
- ) or compat_parse_qs(self._search_regex(
- [r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
- r'EmbedPlayer\([^)]+,\s*\\"([^"]+)\\"\)',
- r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'],
- webpage, 'player parameters'))['cid'][0]
- else:
- if 'no_bangumi_tip' not in smuggled_data:
- self.to_screen('Downloading episode %s. To download all videos in anime %s, re-run hypervideo with %s' % (
- video_id, anime_id, compat_urlparse.urljoin(url, '//bangumi.bilibili.com/anime/%s' % anime_id)))
- headers = {
- 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
- 'Referer': url
- }
- headers.update(self.geo_verification_headers())
-
- js = self._download_json(
- 'http://bangumi.bilibili.com/web_api/get_source', video_id,
- data=urlencode_postdata({'episode_id': video_id}),
- headers=headers)
- if 'result' not in js:
- self._report_error(js)
- cid = js['result']['cid']
-
- headers = {
- 'Accept': 'application/json',
- 'Referer': url
+ return {
+ 'id': f'{video_id}{format_field(part_id, None, "_p%d")}',
+ 'formats': self.extract_formats(play_info),
+ '_old_archive_ids': [make_archive_id(self, old_video_id)] if old_video_id else None,
+ 'title': title,
+ 'description': traverse_obj(initial_state, ('videoData', 'desc')),
+ 'view_count': traverse_obj(initial_state, ('videoData', 'stat', 'view')),
+ 'uploader': traverse_obj(initial_state, ('upData', 'name')),
+ 'uploader_id': traverse_obj(initial_state, ('upData', 'mid')),
+ 'like_count': traverse_obj(initial_state, ('videoData', 'stat', 'like')),
+ 'comment_count': traverse_obj(initial_state, ('videoData', 'stat', 'reply')),
+ 'tags': traverse_obj(initial_state, ('tags', ..., 'tag_name')),
+ 'thumbnail': traverse_obj(initial_state, ('videoData', 'pic')),
+ 'timestamp': traverse_obj(initial_state, ('videoData', 'pubdate')),
+ 'duration': float_or_none(play_info.get('timelength'), scale=1000),
+ 'chapters': self._get_chapters(aid, cid),
+ 'subtitles': self.extract_subtitles(video_id, initial_state, cid),
+ '__post_extractor': self.extract_comments(aid),
+ 'http_headers': {'Referer': url},
}
- headers.update(self.geo_verification_headers())
- video_info = self._parse_json(
- self._search_regex(r'window.__playinfo__\s*=\s*({.+?})</script>', webpage, 'video info', default=None) or '{}',
- video_id, fatal=False)
- video_info = video_info.get('data') or {}
- durl = traverse_obj(video_info, ('dash', 'video'))
- audios = traverse_obj(video_info, ('dash', 'audio')) or []
- entries = []
+class BiliBiliBangumiIE(BilibiliBaseIE):
+ _VALID_URL = r'(?x)https?://www\.bilibili\.com/bangumi/play/(?P<id>(?:ss|ep)\d+)'
- RENDITIONS = ('qn=80&quality=80&type=', 'quality=2&type=mp4')
- for num, rendition in enumerate(RENDITIONS, start=1):
- payload = 'appkey=%s&cid=%s&otype=json&%s' % (self._APP_KEY, cid, rendition)
- sign = hashlib.md5((payload + self._BILIBILI_KEY).encode('utf-8')).hexdigest()
- if not video_info:
- video_info = self._download_json(
- 'http://interface.bilibili.com/v2/playurl?%s&sign=%s' % (payload, sign),
- video_id, note='Downloading video info page',
- headers=headers, fatal=num == len(RENDITIONS))
- if not video_info:
- continue
-
- if not durl and 'durl' not in video_info:
- if num < len(RENDITIONS):
- continue
- self._report_error(video_info)
-
- formats = []
- for idx, durl in enumerate(durl or video_info['durl']):
- formats.append({
- 'url': durl.get('baseUrl') or durl.get('base_url') or durl.get('url'),
- 'ext': mimetype2ext(durl.get('mimeType') or durl.get('mime_type')),
- 'fps': int_or_none(durl.get('frameRate') or durl.get('frame_rate')),
- 'width': int_or_none(durl.get('width')),
- 'height': int_or_none(durl.get('height')),
- 'vcodec': durl.get('codecs'),
- 'acodec': 'none' if audios else None,
- 'tbr': float_or_none(durl.get('bandwidth'), scale=1000),
- 'filesize': int_or_none(durl.get('size')),
- })
- for backup_url in traverse_obj(durl, 'backup_url', expected_type=list) or []:
- formats.append({
- 'url': backup_url,
- 'quality': -2 if 'hd.mp4' in backup_url else -3,
- })
-
- for audio in audios:
- formats.append({
- 'url': audio.get('baseUrl') or audio.get('base_url') or audio.get('url'),
- 'ext': mimetype2ext(audio.get('mimeType') or audio.get('mime_type')),
- 'fps': int_or_none(audio.get('frameRate') or audio.get('frame_rate')),
- 'width': int_or_none(audio.get('width')),
- 'height': int_or_none(audio.get('height')),
- 'acodec': audio.get('codecs'),
- 'vcodec': 'none',
- 'tbr': float_or_none(audio.get('bandwidth'), scale=1000),
- 'filesize': int_or_none(audio.get('size'))
- })
- for backup_url in traverse_obj(audio, 'backup_url', expected_type=list) or []:
- formats.append({
- 'url': backup_url,
- # backup URLs have lower priorities
- 'quality': -3,
- })
-
- info.update({
- 'id': video_id,
- 'duration': float_or_none(durl.get('length'), 1000),
- 'formats': formats,
- 'http_headers': {
- 'Referer': url,
- },
- })
- break
-
- self._sort_formats(formats)
-
- title = self._html_search_regex((
- r'<h1[^>]+title=(["\'])(?P<content>[^"\']+)',
- r'(?s)<h1[^>]*>(?P<content>.+?)</h1>',
- self._meta_regex('title')
- ), webpage, 'title', group='content', fatal=False)
-
- # Get part title for anthologies
- if page_id is not None:
- # TODO: The json is already downloaded by _extract_anthology_entries. Don't redownload for each video.
- part_info = traverse_obj(self._download_json(
- f'https://api.bilibili.com/x/player/pagelist?bvid={bv_id}&jsonp=jsonp',
- video_id, note='Extracting videos in anthology'), 'data', expected_type=list)
- title = title if len(part_info) == 1 else traverse_obj(part_info, (int(page_id) - 1, 'part')) or title
-
- description = self._html_search_meta('description', webpage)
- timestamp = unified_timestamp(self._html_search_regex(
- r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time',
- default=None) or self._html_search_meta(
- 'uploadDate', webpage, 'timestamp', default=None))
- thumbnail = self._html_search_meta(['og:image', 'thumbnailUrl'], webpage)
-
- # TODO 'view_count' requires deobfuscating Javascript
- info.update({
- 'id': f'{video_id}_part{page_id or 1}',
- 'cid': cid,
- 'title': title,
- 'description': description,
- 'timestamp': timestamp,
- 'thumbnail': thumbnail,
- 'duration': float_or_none(video_info.get('timelength'), scale=1000),
- })
-
- uploader_mobj = re.search(
- r'<a[^>]+href="(?:https?:)?//space\.bilibili\.com/(?P<id>\d+)"[^>]*>\s*(?P<name>[^<]+?)\s*<',
- webpage)
- if uploader_mobj:
- info.update({
- 'uploader': uploader_mobj.group('name').strip(),
- 'uploader_id': uploader_mobj.group('id'),
- })
+ _TESTS = [{
+ 'url': 'https://www.bilibili.com/bangumi/play/ss897',
+ 'info_dict': {
+ 'id': 'ss897',
+ 'ext': 'mp4',
+ 'series': '神的记事本',
+ 'season': '神的记事本',
+ 'season_id': 897,
+ 'season_number': 1,
+ 'episode': '你与旅行包',
+ 'episode_number': 2,
+ 'title': '神的记事本:第2话 你与旅行包',
+ 'duration': 1428.487,
+ 'timestamp': 1310809380,
+ 'upload_date': '20110716',
+ 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
+ },
+ }, {
+ 'url': 'https://www.bilibili.com/bangumi/play/ep508406',
+ 'only_matching': True,
+ }]
- if not info.get('uploader'):
- info['uploader'] = self._html_search_meta(
- 'author', webpage, 'uploader', default=None)
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
- top_level_info = {
- 'tags': traverse_obj(self._download_json(
- f'https://api.bilibili.com/x/tag/archive/tags?aid={video_id}',
- video_id, fatal=False, note='Downloading tags'), ('data', ..., 'tag_name')),
- }
+ if '您所在的地区无法观看本片' in webpage:
+ raise GeoRestrictedError('This video is restricted')
+ elif ('开通大会员观看' in webpage and '__playinfo__' not in webpage
+ or '正在观看预览,大会员免费看全片' in webpage):
+ self.raise_login_required('This video is for premium members only')
- info['subtitles'] = {
- 'danmaku': [{
- 'ext': 'xml',
- 'url': f'https://comment.bilibili.com/{cid}.xml',
- }]
- }
+ play_info = self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id)['data']
+ formats = self.extract_formats(play_info)
+ if (not formats and '成为大会员抢先看' in webpage
+ and play_info.get('durl') and not play_info.get('dash')):
+ self.raise_login_required('This video is for premium members only')
- r'''
- # Requires https://github.com/m13253/danmaku2ass which is licenced under GPL3
- # See https://github.com/animelover1984/youtube-dl
+ initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
- raw_danmaku = self._download_webpage(
- f'https://comment.bilibili.com/{cid}.xml', video_id, fatal=False, note='Downloading danmaku comments')
- danmaku = NiconicoIE.CreateDanmaku(raw_danmaku, commentType='Bilibili', x=1024, y=576)
- entries[0]['subtitles'] = {
- 'danmaku': [{
- 'ext': 'ass',
- 'data': danmaku
- }]
+ season_id = traverse_obj(initial_state, ('mediaInfo', 'season_id'))
+ season_number = season_id and next((
+ idx + 1 for idx, e in enumerate(
+ traverse_obj(initial_state, ('mediaInfo', 'seasons', ...)))
+ if e.get('season_id') == season_id
+ ), None)
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': traverse_obj(initial_state, 'h1Title'),
+ 'episode': traverse_obj(initial_state, ('epInfo', 'long_title')),
+ 'episode_number': int_or_none(traverse_obj(initial_state, ('epInfo', 'title'))),
+ 'series': traverse_obj(initial_state, ('mediaInfo', 'series')),
+ 'season': traverse_obj(initial_state, ('mediaInfo', 'season_title')),
+ 'season_id': season_id,
+ 'season_number': season_number,
+ 'thumbnail': traverse_obj(initial_state, ('epInfo', 'cover')),
+ 'timestamp': traverse_obj(initial_state, ('epInfo', 'pub_time')),
+ 'duration': float_or_none(play_info.get('timelength'), scale=1000),
+ 'subtitles': self.extract_subtitles(
+ video_id, initial_state, traverse_obj(initial_state, ('epInfo', 'cid'))),
+ '__post_extractor': self.extract_comments(traverse_obj(initial_state, ('epInfo', 'aid'))),
+ 'http_headers': {'Referer': url, **self.geo_verification_headers()},
}
- '''
- top_level_info['__post_extractor'] = self.extract_comments(video_id)
- for entry in entries:
- entry.update(info)
+class BiliBiliBangumiMediaIE(InfoExtractor):
+ _VALID_URL = r'https?://www\.bilibili\.com/bangumi/media/md(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.bilibili.com/bangumi/media/md24097891',
+ 'info_dict': {
+ 'id': '24097891',
+ },
+ 'playlist_mincount': 25,
+ }]
- if len(entries) == 1:
- entries[0].update(top_level_info)
- return entries[0]
+ def _real_extract(self, url):
+ media_id = self._match_id(url)
+ webpage = self._download_webpage(url, media_id)
- for idx, entry in enumerate(entries):
- entry['id'] = '%s_part%d' % (video_id, (idx + 1))
+ initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id)
+ episode_list = self._download_json(
+ 'https://api.bilibili.com/pgc/web/season/section', media_id,
+ query={'season_id': initial_state['mediaInfo']['season_id']},
+ note='Downloading season info')['result']['main_section']['episodes']
- return {
- 'id': str(video_id),
- 'bv_id': bv_id,
- 'title': title,
- 'description': description,
- **info, **top_level_info
- }
+ return self.playlist_result((
+ self.url_result(entry['share_url'], BiliBiliBangumiIE, entry['aid'])
+ for entry in episode_list), media_id)
- def _extract_anthology_entries(self, bv_id, video_id, webpage):
- title = self._html_search_regex(
- (r'<h1[^>]+\btitle=(["\'])(?P<title>(?:(?!\1).)+)\1',
- r'(?s)<h1[^>]*>(?P<title>.+?)</h1>',
- r'<title>(?P<title>.+?)</title>'), webpage, 'title',
- group='title')
- json_data = self._download_json(
- f'https://api.bilibili.com/x/player/pagelist?bvid={bv_id}&jsonp=jsonp',
- video_id, note='Extracting videos in anthology')
-
- if json_data['data']:
- return self.playlist_from_matches(
- json_data['data'], bv_id, title, ie=BiliBiliIE.ie_key(),
- getter=lambda entry: 'https://www.bilibili.com/video/%s?p=%d' % (bv_id, entry['page']))
-
- def _get_video_id_set(self, id, is_bv):
- query = {'bvid': id} if is_bv else {'aid': id}
- response = self._download_json(
- "http://api.bilibili.cn/x/web-interface/view",
- id, query=query,
- note='Grabbing original ID via API')
-
- if response['code'] == -400:
- raise ExtractorError('Video ID does not exist', expected=True, video_id=id)
- elif response['code'] != 0:
- raise ExtractorError(f'Unknown error occurred during API check (code {response["code"]})',
- expected=True, video_id=id)
- return response['data']['aid'], response['data']['bvid']
-
- def _get_comments(self, video_id, commentPageNumber=0):
- for idx in itertools.count(1):
- replies = traverse_obj(
- self._download_json(
- f'https://api.bilibili.com/x/v2/reply?pn={idx}&oid={video_id}&type=1&jsonp=jsonp&sort=2&_=1567227301685',
- video_id, note=f'Extracting comments from page {idx}', fatal=False),
- ('data', 'replies'))
- if not replies:
- return
- for children in map(self._get_all_children, replies):
- yield from children
- def _get_all_children(self, reply):
- yield {
- 'author': traverse_obj(reply, ('member', 'uname')),
- 'author_id': traverse_obj(reply, ('member', 'mid')),
- 'id': reply.get('rpid'),
- 'text': traverse_obj(reply, ('content', 'message')),
- 'timestamp': reply.get('ctime'),
- 'parent': reply.get('parent') or 'root',
- }
- for children in map(self._get_all_children, reply.get('replies') or []):
- yield from children
+class BilibiliSpaceBaseIE(InfoExtractor):
+ def _extract_playlist(self, fetch_page, get_metadata, get_entries):
+ first_page = fetch_page(0)
+ metadata = get_metadata(first_page)
+ paged_list = InAdvancePagedList(
+ lambda idx: get_entries(fetch_page(idx) if idx else first_page),
+ metadata['page_count'], metadata['page_size'])
-class BiliBiliBangumiIE(InfoExtractor):
- _VALID_URL = r'https?://bangumi\.bilibili\.com/anime/(?P<id>\d+)'
+ return metadata, paged_list
- IE_NAME = 'bangumi.bilibili.com'
- IE_DESC = 'BiliBili番剧'
+class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
+ _VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)(?P<video>/video)?/?(?:[?#]|$)'
_TESTS = [{
- 'url': 'http://bangumi.bilibili.com/anime/1869',
- 'info_dict': {
- 'id': '1869',
- 'title': '混沌武士',
- 'description': 'md5:6a9622b911565794c11f25f81d6a97d2',
- },
- 'playlist_count': 26,
- }, {
- 'url': 'http://bangumi.bilibili.com/anime/1869',
+ 'url': 'https://space.bilibili.com/3985676/video',
'info_dict': {
- 'id': '1869',
- 'title': '混沌武士',
- 'description': 'md5:6a9622b911565794c11f25f81d6a97d2',
- },
- 'playlist': [{
- 'md5': '91da8621454dd58316851c27c68b0c13',
- 'info_dict': {
- 'id': '40062',
- 'ext': 'mp4',
- 'title': '混沌武士',
- 'description': '故事发生在日本的江户时代。风是一个小酒馆的打工女。一日,酒馆里来了一群恶霸,虽然他们的举动令风十分不满,但是毕竟风只是一届女流,无法对他们采取什么行动,只能在心里嘟哝。这时,酒家里又进来了个“不良份子...',
- 'timestamp': 1414538739,
- 'upload_date': '20141028',
- 'episode': '疾风怒涛 Tempestuous Temperaments',
- 'episode_number': 1,
- },
- }],
- 'params': {
- 'playlist_items': '1',
+ 'id': '3985676',
},
+ 'playlist_mincount': 178,
}]
- @classmethod
- def suitable(cls, url):
- return False if BiliBiliIE.suitable(url) else super(BiliBiliBangumiIE, cls).suitable(url)
-
def _real_extract(self, url):
- bangumi_id = self._match_id(url)
-
- # Sometimes this API returns a JSONP response
- season_info = self._download_json(
- 'http://bangumi.bilibili.com/jsonp/seasoninfo/%s.ver' % bangumi_id,
- bangumi_id, transform_source=strip_jsonp)['result']
+ playlist_id, is_video_url = self._match_valid_url(url).group('id', 'video')
+ if not is_video_url:
+ self.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. '
+ 'To download audios, add a "/audio" to the URL')
- entries = [{
- '_type': 'url_transparent',
- 'url': smuggle_url(episode['webplay_url'], {'no_bangumi_tip': 1}),
- 'ie_key': BiliBiliIE.ie_key(),
- 'timestamp': parse_iso8601(episode.get('update_time'), delimiter=' '),
- 'episode': episode.get('index_title'),
- 'episode_number': int_or_none(episode.get('index')),
- } for episode in season_info['episodes']]
+ def fetch_page(page_idx):
+ try:
+ response = self._download_json('https://api.bilibili.com/x/space/arc/search',
+ playlist_id, note=f'Downloading page {page_idx}',
+ query={'mid': playlist_id, 'pn': page_idx + 1, 'jsonp': 'jsonp'})
+ except ExtractorError as e:
+ if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 412:
+ raise ExtractorError(
+ 'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
+ raise
+ if response['code'] == -401:
+ raise ExtractorError(
+ 'Request is blocked by server (401), please add cookies, wait and try later.', expected=True)
+ return response['data']
+
+ def get_metadata(page_data):
+ page_size = page_data['page']['ps']
+ entry_count = page_data['page']['count']
+ return {
+ 'page_count': math.ceil(entry_count / page_size),
+ 'page_size': page_size,
+ }
- entries = sorted(entries, key=lambda entry: entry.get('episode_number'))
+ def get_entries(page_data):
+ for entry in traverse_obj(page_data, ('list', 'vlist')) or []:
+ yield self.url_result(f'https://www.bilibili.com/video/{entry["bvid"]}', BiliBiliIE, entry['bvid'])
- return self.playlist_result(
- entries, bangumi_id,
- season_info.get('bangumi_title'), season_info.get('evaluate'))
+ metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
+ return self.playlist_result(paged_list, playlist_id)
-class BilibiliChannelIE(InfoExtractor):
- _VALID_URL = r'https?://space.bilibili\.com/(?P<id>\d+)'
- _API_URL = "https://api.bilibili.com/x/space/arc/search?mid=%s&pn=%d&jsonp=jsonp"
+class BilibiliSpaceAudioIE(BilibiliSpaceBaseIE):
+ _VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)/audio'
_TESTS = [{
- 'url': 'https://space.bilibili.com/3985676/video',
- 'info_dict': {},
- 'playlist_mincount': 112,
+ 'url': 'https://space.bilibili.com/3985676/audio',
+ 'info_dict': {
+ 'id': '3985676',
+ },
+ 'playlist_mincount': 1,
}]
- def _entries(self, list_id):
- count, max_count = 0, None
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ def fetch_page(page_idx):
+ return self._download_json(
+ 'https://api.bilibili.com/audio/music-service/web/song/upper', playlist_id,
+ note=f'Downloading page {page_idx}',
+ query={'uid': playlist_id, 'pn': page_idx + 1, 'ps': 30, 'order': 1, 'jsonp': 'jsonp'})['data']
+
+ def get_metadata(page_data):
+ return {
+ 'page_count': page_data['pageCount'],
+ 'page_size': page_data['pageSize'],
+ }
- for page_num in itertools.count(1):
- data = self._download_json(
- self._API_URL % (list_id, page_num), list_id, note=f'Downloading page {page_num}')['data']
+ def get_entries(page_data):
+ for entry in page_data.get('data', []):
+ yield self.url_result(f'https://www.bilibili.com/audio/au{entry["id"]}', BilibiliAudioIE, entry['id'])
- max_count = max_count or traverse_obj(data, ('page', 'count'))
+ metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
+ return self.playlist_result(paged_list, playlist_id)
- entries = traverse_obj(data, ('list', 'vlist'))
- if not entries:
- return
- for entry in entries:
- yield self.url_result(
- 'https://www.bilibili.com/video/%s' % entry['bvid'],
- BiliBiliIE.ie_key(), entry['bvid'])
- count += len(entries)
- if max_count and count >= max_count:
- return
+class BilibiliSpacePlaylistIE(BilibiliSpaceBaseIE):
+ _VALID_URL = r'https?://space.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail\?sid=(?P<sid>\d+)'
+ _TESTS = [{
+ 'url': 'https://space.bilibili.com/2142762/channel/collectiondetail?sid=57445',
+ 'info_dict': {
+ 'id': '2142762_57445',
+ 'title': '《底特律 变人》'
+ },
+ 'playlist_mincount': 31,
+ }]
def _real_extract(self, url):
- list_id = self._match_id(url)
- return self.playlist_result(self._entries(list_id), list_id)
+ mid, sid = self._match_valid_url(url).group('mid', 'sid')
+ playlist_id = f'{mid}_{sid}'
+
+ def fetch_page(page_idx):
+ return self._download_json(
+ 'https://api.bilibili.com/x/polymer/space/seasons_archives_list',
+ playlist_id, note=f'Downloading page {page_idx}',
+ query={'mid': mid, 'season_id': sid, 'page_num': page_idx + 1, 'page_size': 30})['data']
+
+ def get_metadata(page_data):
+ page_size = page_data['page']['page_size']
+ entry_count = page_data['page']['total']
+ return {
+ 'page_count': math.ceil(entry_count / page_size),
+ 'page_size': page_size,
+ 'title': traverse_obj(page_data, ('meta', 'name'))
+ }
+
+ def get_entries(page_data):
+ for entry in page_data.get('archives', []):
+ yield self.url_result(f'https://www.bilibili.com/video/{entry["bvid"]}',
+ BiliBiliIE, entry['bvid'])
+
+ metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
+ return self.playlist_result(paged_list, playlist_id, metadata['title'])
class BilibiliCategoryIE(InfoExtractor):
@@ -568,8 +620,7 @@ class BilibiliCategoryIE(InfoExtractor):
self._fetch_page, api_url, num_pages, query), size)
def _real_extract(self, url):
- u = compat_urllib_parse_urlparse(url)
- category, subcategory = u.path.split('/')[2:4]
+ category, subcategory = urllib.parse.urlparse(url).path.split('/')[2:4]
query = '%s: %s' % (category, subcategory)
return self.playlist_result(self._entries(category, subcategory, query), query, query)
@@ -589,14 +640,15 @@ class BiliBiliSearchIE(SearchInfoExtractor):
'keyword': query,
'page': page_num,
'context': '',
- 'order': 'pubdate',
'duration': 0,
'tids_2': '',
'__refresh__': 'true',
'search_type': 'video',
'tids': 0,
'highlight': 1,
- })['data'].get('result') or []
+ })['data'].get('result')
+ if not videos:
+ break
for video in videos:
yield self.url_result(video['arcurl'], 'BiliBili', str(video['aid']))
@@ -646,6 +698,11 @@ class BilibiliAudioIE(BilibiliAudioBaseIE):
'vcodec': 'none'
}]
+ for a_format in formats:
+ a_format.setdefault('http_headers', {}).update({
+ 'Referer': url,
+ })
+
song = self._call_api('song/info', au_id)
title = song['title']
statistic = song.get('statistic') or {}
@@ -753,17 +810,20 @@ class BiliIntlBaseIE(InfoExtractor):
def json2srt(self, json):
data = '\n\n'.join(
f'{i + 1}\n{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n{line["content"]}'
- for i, line in enumerate(json['body']) if line.get('content'))
+ for i, line in enumerate(traverse_obj(json, (
+ 'body', lambda _, l: l['content'] and l['from'] and l['to']))))
return data
def _get_subtitles(self, *, ep_id=None, aid=None):
sub_json = self._call_api(
- '/web/v2/subtitle', ep_id or aid, note='Downloading subtitles list',
- errnote='Unable to download subtitles list', query=filter_dict({
+ '/web/v2/subtitle', ep_id or aid, fatal=False,
+ note='Downloading subtitles list', errnote='Unable to download subtitles list',
+ query=filter_dict({
'platform': 'web',
+ 's_locale': 'en_US',
'episode_id': ep_id,
'aid': aid,
- }))
+ })) or {}
subtitles = {}
for sub in sub_json.get('subtitles') or []:
sub_url = sub.get('url')
@@ -818,7 +878,6 @@ class BiliIntlBaseIE(InfoExtractor):
'filesize': aud.get('size'),
})
- self._sort_formats(formats)
return formats
def _extract_video_info(self, video_data, *, ep_id=None, aid=None):
@@ -866,7 +925,7 @@ class BiliIntlBaseIE(InfoExtractor):
class BiliIntlIE(BiliIntlBaseIE):
- _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-z]{2}/)?(play/(?P<season_id>\d+)/(?P<ep_id>\d+)|video/(?P<aid>\d+))'
+ _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(play/(?P<season_id>\d+)/(?P<ep_id>\d+)|video/(?P<aid>\d+))'
_TESTS = [{
# Bstation page
'url': 'https://www.bilibili.tv/en/play/34613/341736',
@@ -909,6 +968,10 @@ class BiliIntlIE(BiliIntlBaseIE):
# No language in URL
'url': 'https://www.bilibili.tv/video/2019955076',
'only_matching': True,
+ }, {
+ # Uppercase language in URL
+ 'url': 'https://www.bilibili.tv/EN/video/2019955076',
+ 'only_matching': True,
}]
def _real_extract(self, url):
@@ -916,12 +979,11 @@ class BiliIntlIE(BiliIntlBaseIE):
video_id = ep_id or aid
webpage = self._download_webpage(url, video_id)
# Bstation layout
- initial_data = self._parse_json(self._search_regex(
- r'window\.__INITIAL_(?:DATA|STATE)__\s*=\s*({.+?});', webpage,
- 'preload state', default='{}'), video_id, fatal=False) or {}
- video_data = (
- traverse_obj(initial_data, ('OgvVideo', 'epDetail'), expected_type=dict)
- or traverse_obj(initial_data, ('UgcVideo', 'videoData'), expected_type=dict) or {})
+ initial_data = (
+ self._search_json(r'window\.__INITIAL_(?:DATA|STATE)__\s*=', webpage, 'preload state', video_id, default={})
+ or self._search_nuxt_data(webpage, video_id, '__initialState', fatal=False, traverse=None))
+ video_data = traverse_obj(
+ initial_data, ('OgvVideo', 'epDetail'), ('UgcVideo', 'videoData'), ('ugc', 'archive'), expected_type=dict)
if season_id and not video_data:
# Non-Bstation layout, read through episode list
@@ -929,11 +991,11 @@ class BiliIntlIE(BiliIntlBaseIE):
video_data = traverse_obj(season_json,
('sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == ep_id),
expected_type=dict, get_all=False)
- return self._extract_video_info(video_data, ep_id=ep_id, aid=aid)
+ return self._extract_video_info(video_data or {}, ep_id=ep_id, aid=aid)
class BiliIntlSeriesIE(BiliIntlBaseIE):
- _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-z]{2}/)?play/(?P<id>\d+)$'
+ _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?play/(?P<id>\d+)/?(?:[?#]|$)'
_TESTS = [{
'url': 'https://www.bilibili.tv/en/play/34613',
'playlist_mincount': 15,
@@ -951,6 +1013,9 @@ class BiliIntlSeriesIE(BiliIntlBaseIE):
}, {
'url': 'https://www.biliintl.com/en/play/34613',
'only_matching': True,
+ }, {
+ 'url': 'https://www.biliintl.com/EN/play/34613',
+ 'only_matching': True,
}]
def _entries(self, series_id):
@@ -966,3 +1031,87 @@ class BiliIntlSeriesIE(BiliIntlBaseIE):
self._entries(series_id), series_id, series_info.get('title'), series_info.get('description'),
categories=traverse_obj(series_info, ('styles', ..., 'title'), expected_type=str_or_none),
thumbnail=url_or_none(series_info.get('horizontal_cover')), view_count=parse_count(series_info.get('view')))
+
+
+class BiliLiveIE(InfoExtractor):
+ _VALID_URL = r'https?://live.bilibili.com/(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'https://live.bilibili.com/196',
+ 'info_dict': {
+ 'id': '33989',
+ 'description': "周六杂谈回,其他时候随机游戏。 | \n录播:@下播型泛式录播组。 | \n直播通知群(全员禁言):666906670,902092584,59971⑧481 (功能一样,别多加)",
+ 'ext': 'flv',
+ 'title': "太空狼人杀联动,不被爆杀就算赢",
+ 'thumbnail': "https://i0.hdslb.com/bfs/live/new_room_cover/e607bc1529057ef4b332e1026e62cf46984c314d.jpg",
+ 'timestamp': 1650802769,
+ },
+ 'skip': 'not live'
+ }, {
+ 'url': 'https://live.bilibili.com/196?broadcast_type=0&is_room_feed=1?spm_id_from=333.999.space_home.strengthen_live_card.click',
+ 'only_matching': True
+ }]
+
+ _FORMATS = {
+ 80: {'format_id': 'low', 'format_note': '流畅'},
+ 150: {'format_id': 'high_res', 'format_note': '高清'},
+ 250: {'format_id': 'ultra_high_res', 'format_note': '超清'},
+ 400: {'format_id': 'blue_ray', 'format_note': '蓝光'},
+ 10000: {'format_id': 'source', 'format_note': '原画'},
+ 20000: {'format_id': '4K', 'format_note': '4K'},
+ 30000: {'format_id': 'dolby', 'format_note': '杜比'},
+ }
+
+ _quality = staticmethod(qualities(list(_FORMATS)))
+
+ def _call_api(self, path, room_id, query):
+ api_result = self._download_json(f'https://api.live.bilibili.com/{path}', room_id, query=query)
+ if api_result.get('code') != 0:
+ raise ExtractorError(api_result.get('message') or 'Unable to download JSON metadata')
+ return api_result.get('data') or {}
+
+ def _parse_formats(self, qn, fmt):
+ for codec in fmt.get('codec') or []:
+ if codec.get('current_qn') != qn:
+ continue
+ for url_info in codec['url_info']:
+ yield {
+ 'url': f'{url_info["host"]}{codec["base_url"]}{url_info["extra"]}',
+ 'ext': fmt.get('format_name'),
+ 'vcodec': codec.get('codec_name'),
+ 'quality': self._quality(qn),
+ **self._FORMATS[qn],
+ }
+
+ def _real_extract(self, url):
+ room_id = self._match_id(url)
+ room_data = self._call_api('room/v1/Room/get_info', room_id, {'id': room_id})
+ if room_data.get('live_status') == 0:
+ raise ExtractorError('Streamer is not live', expected=True)
+
+ formats = []
+ for qn in self._FORMATS.keys():
+ stream_data = self._call_api('xlive/web-room/v2/index/getRoomPlayInfo', room_id, {
+ 'room_id': room_id,
+ 'qn': qn,
+ 'codec': '0,1',
+ 'format': '0,2',
+ 'mask': '0',
+ 'no_playurl': '0',
+ 'platform': 'web',
+ 'protocol': '0,1',
+ })
+ for fmt in traverse_obj(stream_data, ('playurl_info', 'playurl', 'stream', ..., 'format', ...)) or []:
+ formats.extend(self._parse_formats(qn, fmt))
+
+ return {
+ 'id': room_id,
+ 'title': room_data.get('title'),
+ 'description': room_data.get('description'),
+ 'thumbnail': room_data.get('user_cover'),
+ 'timestamp': stream_data.get('live_time'),
+ 'formats': formats,
+ 'http_headers': {
+ 'Referer': url,
+ },
+ }
diff --git a/hypervideo_dl/extractor/biobiochiletv.py b/hypervideo_dl/extractor/biobiochiletv.py
index dc86c57..180c965 100644
--- a/hypervideo_dl/extractor/biobiochiletv.py
+++ b/hypervideo_dl/extractor/biobiochiletv.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
ExtractorError,
diff --git a/hypervideo_dl/extractor/biqle.py b/hypervideo_dl/extractor/biqle.py
index 2b57bad..0277535 100644
--- a/hypervideo_dl/extractor/biqle.py
+++ b/hypervideo_dl/extractor/biqle.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from .vk import VKIE
from ..compat import compat_b64decode
@@ -89,7 +86,6 @@ class BIQLEIE(InfoExtractor):
'height': int_or_none(height),
'ext': ext,
})
- self._sort_formats(formats)
thumbnails = []
for k, v in item.items():
diff --git a/hypervideo_dl/extractor/bitchute.py b/hypervideo_dl/extractor/bitchute.py
index dcae6f4..10e7b0b 100644
--- a/hypervideo_dl/extractor/bitchute.py
+++ b/hypervideo_dl/extractor/bitchute.py
@@ -1,14 +1,20 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import itertools
+import functools
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
- GeoRestrictedError,
+ HEADRequest,
+ OnDemandPagedList,
+ clean_html,
+ get_element_by_class,
+ get_element_by_id,
+ get_elements_html_by_class,
+ int_or_none,
orderedSet,
+ parse_count,
+ parse_duration,
+ traverse_obj,
unified_strdate,
urlencode_postdata,
)
@@ -16,11 +22,12 @@ from ..utils import (
class BitChuteIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P<id>[^/?#&]+)'
+ _EMBED_REGEX = [rf'<(?:script|iframe)[^>]+\bsrc=(["\'])(?P<url>{_VALID_URL})']
_TESTS = [{
'url': 'https://www.bitchute.com/video/UGlrF9o9b-Q/',
'md5': '7e427d7ed7af5a75b5855705ec750e2b',
'info_dict': {
- 'id': 'szoMrox2JEI',
+ 'id': 'UGlrF9o9b-Q',
'ext': 'mp4',
'title': 'This is the first video on #BitChute !',
'description': 'md5:a0337e7b1fe39e32336974af8173a034',
@@ -29,130 +36,198 @@ class BitChuteIE(InfoExtractor):
'upload_date': '20170103',
},
}, {
+ # video not downloadable in browser, but we can recover it
+ 'url': 'https://www.bitchute.com/video/2s6B3nZjAk7R/',
+ 'md5': '05c12397d5354bf24494885b08d24ed1',
+ 'info_dict': {
+ 'id': '2s6B3nZjAk7R',
+ 'ext': 'mp4',
+ 'filesize': 71537926,
+ 'title': 'STYXHEXENHAMMER666 - Election Fraud, Clinton 2020, EU Armies, and Gun Control',
+ 'description': 'md5:228ee93bd840a24938f536aeac9cf749',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'BitChute',
+ 'upload_date': '20181113',
+ },
+ 'params': {'check_formats': None},
+ }, {
+ # restricted video
+ 'url': 'https://www.bitchute.com/video/WEnQU7XGcTdl/',
+ 'info_dict': {
+ 'id': 'WEnQU7XGcTdl',
+ 'ext': 'mp4',
+ 'title': 'Impartial Truth - Ein Letzter Appell an die Vernunft',
+ },
+ 'params': {'skip_download': True},
+ 'skip': 'Georestricted in DE',
+ }, {
'url': 'https://www.bitchute.com/embed/lbb5G1hjPhw/',
'only_matching': True,
}, {
'url': 'https://www.bitchute.com/torrent/Zee5BE49045h/szoMrox2JEI.webtorrent',
'only_matching': True,
}]
+ _GEO_BYPASS = False
- @staticmethod
- def _extract_urls(webpage):
- return [
- mobj.group('url')
- for mobj in re.finditer(
- r'<(?:script|iframe)[^>]+\bsrc=(["\'])(?P<url>%s)' % BitChuteIE._VALID_URL,
- webpage)]
+ _HEADERS = {
+ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.57 Safari/537.36',
+ 'Referer': 'https://www.bitchute.com/',
+ }
+
+ def _check_format(self, video_url, video_id):
+ urls = orderedSet(
+ re.sub(r'(^https?://)(seed\d+)(?=\.bitchute\.com)', fr'\g<1>{host}', video_url)
+ for host in (r'\g<2>', 'seed150', 'seed151', 'seed152', 'seed153'))
+ for url in urls:
+ try:
+ response = self._request_webpage(
+ HEADRequest(url), video_id=video_id, note=f'Checking {url}', headers=self._HEADERS)
+ except ExtractorError as e:
+ self.to_screen(f'{video_id}: URL is invalid, skipping: {e.cause}')
+ continue
+ return {
+ 'url': url,
+ 'filesize': int_or_none(response.headers.get('Content-Length'))
+ }
+
+ def _raise_if_restricted(self, webpage):
+ page_title = clean_html(get_element_by_class('page-title', webpage)) or ''
+ if re.fullmatch(r'(?:Channel|Video) Restricted', page_title):
+ reason = clean_html(get_element_by_id('page-detail', webpage)) or page_title
+ self.raise_geo_restricted(reason)
def _real_extract(self, url):
video_id = self._match_id(url)
-
webpage = self._download_webpage(
- 'https://www.bitchute.com/video/%s' % video_id, video_id, headers={
- 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.57 Safari/537.36',
- })
+ f'https://www.bitchute.com/video/{video_id}', video_id, headers=self._HEADERS)
- title = self._html_search_regex(
- (r'<[^>]+\bid=["\']video-title[^>]+>([^<]+)', r'<title>([^<]+)'),
- webpage, 'title', default=None) or self._html_search_meta(
- 'description', webpage, 'title',
- default=None) or self._og_search_description(webpage)
+ self._raise_if_restricted(webpage)
+ publish_date = clean_html(get_element_by_class('video-publish-date', webpage))
+ entries = self._parse_html5_media_entries(url, webpage, video_id)
- format_urls = []
- for mobj in re.finditer(
- r'addWebSeed\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage):
- format_urls.append(mobj.group('url'))
- format_urls.extend(re.findall(r'as=(https?://[^&"\']+)', webpage))
-
- formats = [
- {'url': format_url}
- for format_url in orderedSet(format_urls)]
+ formats = []
+ for format_ in traverse_obj(entries, (0, 'formats', ...)):
+ if self.get_param('check_formats') is not False:
+ format_.update(self._check_format(format_.pop('url'), video_id) or {})
+ if 'url' not in format_:
+ continue
+ formats.append(format_)
if not formats:
- entries = self._parse_html5_media_entries(
- url, webpage, video_id)
- if not entries:
- error = self._html_search_regex(r'<h1 class="page-title">([^<]+)</h1>', webpage, 'error', default='Cannot find video')
- if error == 'Video Unavailable':
- raise GeoRestrictedError(error)
- raise ExtractorError(error)
- formats = entries[0]['formats']
-
- self._check_formats(formats, video_id)
- self._sort_formats(formats)
-
- description = self._html_search_regex(
- r'(?s)<div\b[^>]+\bclass=["\']full hidden[^>]+>(.+?)</div>',
- webpage, 'description', fatal=False)
- thumbnail = self._og_search_thumbnail(
- webpage, default=None) or self._html_search_meta(
- 'twitter:image:src', webpage, 'thumbnail')
- uploader = self._html_search_regex(
- (r'(?s)<div class=["\']channel-banner.*?<p\b[^>]+\bclass=["\']name[^>]+>(.+?)</p>',
- r'(?s)<p\b[^>]+\bclass=["\']video-author[^>]+>(.+?)</p>'),
- webpage, 'uploader', fatal=False)
-
- upload_date = unified_strdate(self._search_regex(
- r'class=["\']video-publish-date[^>]+>[^<]+ at \d+:\d+ UTC on (.+?)\.',
- webpage, 'upload date', fatal=False))
+ self.raise_no_formats(
+ 'Video is unavailable. Please make sure this video is playable in the browser '
+ 'before reporting this issue.', expected=True, video_id=video_id)
return {
'id': video_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'uploader': uploader,
- 'upload_date': upload_date,
+ 'title': self._html_extract_title(webpage) or self._og_search_title(webpage),
+ 'description': self._og_search_description(webpage, default=None),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'uploader': clean_html(get_element_by_class('owner', webpage)),
+ 'upload_date': unified_strdate(self._search_regex(
+ r'at \d+:\d+ UTC on (.+?)\.', publish_date, 'upload date', fatal=False)),
'formats': formats,
}
class BitChuteChannelIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?bitchute\.com/channel/(?P<id>[^/?#&]+)'
- _TEST = {
- 'url': 'https://www.bitchute.com/channel/victoriaxrave/',
- 'playlist_mincount': 185,
+ _VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?P<type>channel|playlist)/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.bitchute.com/channel/bitchute/',
'info_dict': {
- 'id': 'victoriaxrave',
+ 'id': 'bitchute',
+ 'title': 'BitChute',
+ 'description': 'md5:5329fb3866125afa9446835594a9b138',
},
- }
+ 'playlist': [
+ {
+ 'md5': '7e427d7ed7af5a75b5855705ec750e2b',
+ 'info_dict': {
+ 'id': 'UGlrF9o9b-Q',
+ 'ext': 'mp4',
+ 'filesize': None,
+ 'title': 'This is the first video on #BitChute !',
+ 'description': 'md5:a0337e7b1fe39e32336974af8173a034',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'BitChute',
+ 'upload_date': '20170103',
+ 'duration': 16,
+ 'view_count': int,
+ },
+ }
+ ],
+ 'params': {
+ 'skip_download': True,
+ 'playlist_items': '-1',
+ },
+ }, {
+ 'url': 'https://www.bitchute.com/playlist/wV9Imujxasw9/',
+ 'playlist_mincount': 20,
+ 'info_dict': {
+ 'id': 'wV9Imujxasw9',
+ 'title': 'Bruce MacDonald and "The Light of Darkness"',
+ 'description': 'md5:04913227d2714af1d36d804aa2ab6b1e',
+ }
+ }]
_TOKEN = 'zyG6tQcGPE5swyAEFLqKUwMuMMuF6IO2DZ6ZDQjGfsL0e4dcTLwqkTTul05Jdve7'
+ PAGE_SIZE = 25
+ HTML_CLASS_NAMES = {
+ 'channel': {
+ 'container': 'channel-videos-container',
+ 'title': 'channel-videos-title',
+ 'description': 'channel-videos-text',
+ },
+ 'playlist': {
+ 'container': 'playlist-video',
+ 'title': 'title',
+ 'description': 'description',
+ }
- def _entries(self, channel_id):
- channel_url = 'https://www.bitchute.com/channel/%s/' % channel_id
- offset = 0
- for page_num in itertools.count(1):
- data = self._download_json(
- '%sextend/' % channel_url, channel_id,
- 'Downloading channel page %d' % page_num,
- data=urlencode_postdata({
- 'csrfmiddlewaretoken': self._TOKEN,
- 'name': '',
- 'offset': offset,
- }), headers={
- 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
- 'Referer': channel_url,
- 'X-Requested-With': 'XMLHttpRequest',
- 'Cookie': 'csrftoken=%s' % self._TOKEN,
- })
- if data.get('success') is False:
- break
- html = data.get('html')
- if not html:
- break
- video_ids = re.findall(
- r'class=["\']channel-videos-image-container[^>]+>\s*<a\b[^>]+\bhref=["\']/video/([^"\'/]+)',
- html)
- if not video_ids:
- break
- offset += len(video_ids)
- for video_id in video_ids:
- yield self.url_result(
- 'https://www.bitchute.com/video/%s' % video_id,
- ie=BitChuteIE.ie_key(), video_id=video_id)
+ }
+
+ @staticmethod
+ def _make_url(playlist_id, playlist_type):
+ return f'https://www.bitchute.com/{playlist_type}/{playlist_id}/'
+
+ def _fetch_page(self, playlist_id, playlist_type, page_num):
+ playlist_url = self._make_url(playlist_id, playlist_type)
+ data = self._download_json(
+ f'{playlist_url}extend/', playlist_id, f'Downloading page {page_num}',
+ data=urlencode_postdata({
+ 'csrfmiddlewaretoken': self._TOKEN,
+ 'name': '',
+ 'offset': page_num * self.PAGE_SIZE,
+ }), headers={
+ 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
+ 'Referer': playlist_url,
+ 'X-Requested-With': 'XMLHttpRequest',
+ 'Cookie': f'csrftoken={self._TOKEN}',
+ })
+ if not data.get('success'):
+ return
+ classes = self.HTML_CLASS_NAMES[playlist_type]
+ for video_html in get_elements_html_by_class(classes['container'], data.get('html')):
+ video_id = self._search_regex(
+ r'<a\s[^>]*\bhref=["\']/video/([^"\'/]+)', video_html, 'video id', default=None)
+ if not video_id:
+ continue
+ yield self.url_result(
+ f'https://www.bitchute.com/video/{video_id}', BitChuteIE, video_id, url_transparent=True,
+ title=clean_html(get_element_by_class(classes['title'], video_html)),
+ description=clean_html(get_element_by_class(classes['description'], video_html)),
+ duration=parse_duration(get_element_by_class('video-duration', video_html)),
+ view_count=parse_count(clean_html(get_element_by_class('video-views', video_html))))
def _real_extract(self, url):
- channel_id = self._match_id(url)
+ playlist_type, playlist_id = self._match_valid_url(url).group('type', 'id')
+ webpage = self._download_webpage(self._make_url(playlist_id, playlist_type), playlist_id)
+
+ page_func = functools.partial(self._fetch_page, playlist_id, playlist_type)
return self.playlist_result(
- self._entries(channel_id), playlist_id=channel_id)
+ OnDemandPagedList(page_func, self.PAGE_SIZE), playlist_id,
+ title=self._html_extract_title(webpage, default=None),
+ description=self._html_search_meta(
+ ('description', 'og:description', 'twitter:description'), webpage, default=None),
+ playlist_count=int_or_none(self._html_search_regex(
+ r'<span>(\d+)\s+videos?</span>', webpage, 'playlist count', default=None)))
diff --git a/hypervideo_dl/extractor/bitwave.py b/hypervideo_dl/extractor/bitwave.py
index e6e093f..a82cd26 100644
--- a/hypervideo_dl/extractor/bitwave.py
+++ b/hypervideo_dl/extractor/bitwave.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
@@ -47,7 +45,6 @@ class BitwaveStreamIE(InfoExtractor):
formats = self._extract_m3u8_formats(
channel['data']['url'], username,
'mp4')
- self._sort_formats(formats)
return {
'id': username,
diff --git a/hypervideo_dl/extractor/blackboardcollaborate.py b/hypervideo_dl/extractor/blackboardcollaborate.py
index 8ae2941..8f41c89 100644
--- a/hypervideo_dl/extractor/blackboardcollaborate.py
+++ b/hypervideo_dl/extractor/blackboardcollaborate.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..utils import parse_iso8601
diff --git a/hypervideo_dl/extractor/bleacherreport.py b/hypervideo_dl/extractor/bleacherreport.py
index d1bf8e8..8d8fabe 100644
--- a/hypervideo_dl/extractor/bleacherreport.py
+++ b/hypervideo_dl/extractor/bleacherreport.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from .amp import AMPIE
from ..utils import (
diff --git a/hypervideo_dl/extractor/blinkx.py b/hypervideo_dl/extractor/blinkx.py
deleted file mode 100644
index d70a3b3..0000000
--- a/hypervideo_dl/extractor/blinkx.py
+++ /dev/null
@@ -1,86 +0,0 @@
-from __future__ import unicode_literals
-
-import json
-
-from .common import InfoExtractor
-from ..utils import (
- remove_start,
- int_or_none,
-)
-
-
-class BlinkxIE(InfoExtractor):
- _VALID_URL = r'(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P<id>[^?]+)'
- IE_NAME = 'blinkx'
-
- _TEST = {
- 'url': 'http://www.blinkx.com/ce/Da0Gw3xc5ucpNduzLuDDlv4WC9PuI4fDi1-t6Y3LyfdY2SZS5Urbvn-UPJvrvbo8LTKTc67Wu2rPKSQDJyZeeORCR8bYkhs8lI7eqddznH2ofh5WEEdjYXnoRtj7ByQwt7atMErmXIeYKPsSDuMAAqJDlQZ-3Ff4HJVeH_s3Gh8oQ',
- 'md5': '337cf7a344663ec79bf93a526a2e06c7',
- 'info_dict': {
- 'id': 'Da0Gw3xc',
- 'ext': 'mp4',
- 'title': 'No Daily Show for John Oliver; HBO Show Renewed - IGN News',
- 'uploader': 'IGN News',
- 'upload_date': '20150217',
- 'timestamp': 1424215740,
- 'description': 'HBO has renewed Last Week Tonight With John Oliver for two more seasons.',
- 'duration': 47.743333,
- },
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- display_id = video_id[:8]
-
- api_url = ('https://apib4.blinkx.com/api.php?action=play_video&'
- + 'video=%s' % video_id)
- data_json = self._download_webpage(api_url, display_id)
- data = json.loads(data_json)['api']['results'][0]
- duration = None
- thumbnails = []
- formats = []
- for m in data['media']:
- if m['type'] == 'jpg':
- thumbnails.append({
- 'url': m['link'],
- 'width': int(m['w']),
- 'height': int(m['h']),
- })
- elif m['type'] == 'original':
- duration = float(m['d'])
- elif m['type'] == 'youtube':
- yt_id = m['link']
- self.to_screen('Youtube video detected: %s' % yt_id)
- return self.url_result(yt_id, 'Youtube', video_id=yt_id)
- elif m['type'] in ('flv', 'mp4'):
- vcodec = remove_start(m['vcodec'], 'ff')
- acodec = remove_start(m['acodec'], 'ff')
- vbr = int_or_none(m.get('vbr') or m.get('vbitrate'), 1000)
- abr = int_or_none(m.get('abr') or m.get('abitrate'), 1000)
- tbr = vbr + abr if vbr and abr else None
- format_id = '%s-%sk-%s' % (vcodec, tbr, m['w'])
- formats.append({
- 'format_id': format_id,
- 'url': m['link'],
- 'vcodec': vcodec,
- 'acodec': acodec,
- 'abr': abr,
- 'vbr': vbr,
- 'tbr': tbr,
- 'width': int_or_none(m.get('w')),
- 'height': int_or_none(m.get('h')),
- })
-
- self._sort_formats(formats)
-
- return {
- 'id': display_id,
- 'fullid': video_id,
- 'title': data['title'],
- 'formats': formats,
- 'uploader': data.get('channel_name'),
- 'timestamp': data.get('pubdate_epoch'),
- 'description': data.get('description'),
- 'thumbnails': thumbnails,
- 'duration': duration,
- }
diff --git a/hypervideo_dl/extractor/blogger.py b/hypervideo_dl/extractor/blogger.py
index dba131c..3d6e033 100644
--- a/hypervideo_dl/extractor/blogger.py
+++ b/hypervideo_dl/extractor/blogger.py
@@ -1,8 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
from ..utils import (
mimetype2ext,
parse_duration,
@@ -16,7 +11,7 @@ from .common import InfoExtractor
class BloggerIE(InfoExtractor):
IE_NAME = 'blogger.com'
_VALID_URL = r'https?://(?:www\.)?blogger\.com/video\.g\?token=(?P<id>.+)'
- _VALID_EMBED = r'''<iframe[^>]+src=["']((?:https?:)?//(?:www\.)?blogger\.com/video\.g\?token=[^"']+)["']'''
+ _EMBED_REGEX = [r'''<iframe[^>]+src=["'](?P<url>(?:https?:)?//(?:www\.)?blogger\.com/video\.g\?token=[^"']+)["']''']
_TESTS = [{
'url': 'https://www.blogger.com/video.g?token=AD6v5dzEe9hfcARr5Hlq1WTkYy6t-fXH3BBahVhGvVHe5szdEUBEloSEDSTA8-b111089KbfWuBvTN7fnbxMtymsHhXAXwVvyzHH4Qch2cfLQdGxKQrrEuFpC1amSl_9GuLWODjPgw',
'md5': 'f1bc19b6ea1b0fd1d81e84ca9ec467ac',
@@ -29,10 +24,6 @@ class BloggerIE(InfoExtractor):
}
}]
- @staticmethod
- def _extract_urls(webpage):
- return re.findall(BloggerIE._VALID_EMBED, webpage)
-
def _real_extract(self, url):
token_id = self._match_id(url)
webpage = self._download_webpage(url, token_id)
diff --git a/hypervideo_dl/extractor/bloomberg.py b/hypervideo_dl/extractor/bloomberg.py
index 2fbfad1..792155e 100644
--- a/hypervideo_dl/extractor/bloomberg.py
+++ b/hypervideo_dl/extractor/bloomberg.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -10,13 +7,11 @@ class BloombergIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?bloomberg\.com/(?:[^/]+/)*(?P<id>[^/?#]+)'
_TESTS = [{
- 'url': 'http://www.bloomberg.com/news/videos/b/aaeae121-5949-481e-a1ce-4562db6f5df2',
- # The md5 checksum changes
+ 'url': 'https://www.bloomberg.com/news/videos/2021-09-14/apple-unveils-the-new-iphone-13-stock-doesn-t-move-much-video',
'info_dict': {
- 'id': 'qurhIVlJSB6hzkVi229d8g',
+ 'id': 'V8cFcYMxTHaMcEiiYVr39A',
'ext': 'flv',
- 'title': 'Shah\'s Presentation on Foreign-Exchange Strategies',
- 'description': 'md5:a8ba0302912d03d246979735c17d2761',
+ 'title': 'Apple Unveils the New IPhone 13, Stock Doesn\'t Move Much',
},
'params': {
'format': 'best[format_id^=hds]',
@@ -60,7 +55,7 @@ class BloombergIE(InfoExtractor):
title = re.sub(': Video$', '', self._og_search_title(webpage))
embed_info = self._download_json(
- 'http://www.bloomberg.com/api/embed?id=%s' % video_id, video_id)
+ 'http://www.bloomberg.com/multimedia/api/embed?id=%s' % video_id, video_id)
formats = []
for stream in embed_info['streams']:
stream_url = stream.get('url')
@@ -72,7 +67,6 @@ class BloombergIE(InfoExtractor):
else:
formats.extend(self._extract_f4m_formats(
stream_url, video_id, f4m_id='hds', fatal=False))
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/bokecc.py b/hypervideo_dl/extractor/bokecc.py
index 6a89d36..ca326f2 100644
--- a/hypervideo_dl/extractor/bokecc.py
+++ b/hypervideo_dl/extractor/bokecc.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..compat import compat_parse_qs
from ..utils import ExtractorError
@@ -25,8 +21,6 @@ class BokeCCBaseIE(InfoExtractor):
'quality': int(quality.attrib['value']),
} for quality in info_xml.findall('./video/quality')]
- self._sort_formats(formats)
-
return formats
diff --git a/hypervideo_dl/extractor/bongacams.py b/hypervideo_dl/extractor/bongacams.py
index 4e346e7..bf95566 100644
--- a/hypervideo_dl/extractor/bongacams.py
+++ b/hypervideo_dl/extractor/bongacams.py
@@ -1,6 +1,3 @@
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
@@ -11,13 +8,28 @@ from ..utils import (
class BongaCamsIE(InfoExtractor):
- _VALID_URL = r'https?://(?P<host>(?:[^/]+\.)?bongacams\d*\.com)/(?P<id>[^/?&#]+)'
+ _VALID_URL = r'https?://(?P<host>(?:[^/]+\.)?bongacams\d*\.(?:com|net))/(?P<id>[^/?&#]+)'
_TESTS = [{
'url': 'https://de.bongacams.com/azumi-8',
'only_matching': True,
}, {
'url': 'https://cn.bongacams.com/azumi-8',
'only_matching': True,
+ }, {
+ 'url': 'https://de.bongacams.net/claireashton',
+ 'info_dict': {
+ 'id': 'claireashton',
+ 'ext': 'mp4',
+ 'title': r're:ClaireAshton \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
+ 'age_limit': 18,
+ 'uploader_id': 'ClaireAshton',
+ 'uploader': 'ClaireAshton',
+ 'like_count': int,
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
}]
def _real_extract(self, url):
@@ -45,7 +57,6 @@ class BongaCamsIE(InfoExtractor):
formats = self._extract_m3u8_formats(
'%s/hls/stream_%s/playlist.m3u8' % (server_url, uploader_id),
channel_id, 'mp4', m3u8_id='hls', live=True)
- self._sort_formats(formats)
return {
'id': channel_id,
diff --git a/hypervideo_dl/extractor/booyah.py b/hypervideo_dl/extractor/booyah.py
new file mode 100644
index 0000000..5c55f2c
--- /dev/null
+++ b/hypervideo_dl/extractor/booyah.py
@@ -0,0 +1,86 @@
+from .common import InfoExtractor
+from ..utils import int_or_none, str_or_none, traverse_obj
+
+
+class BooyahBaseIE(InfoExtractor):
+ _BOOYAH_SESSION_KEY = None
+
+ def _real_initialize(self):
+ BooyahBaseIE._BOOYAH_SESSION_KEY = self._request_webpage(
+ 'https://booyah.live/api/v3/auths/sessions', None, data=b'').getheader('booyah-session-key')
+
+ def _get_comments(self, video_id):
+ comment_json = self._download_json(
+ f'https://booyah.live/api/v3/playbacks/{video_id}/comments/tops', video_id,
+ headers={'Booyah-Session-Key': self._BOOYAH_SESSION_KEY}, fatal=False) or {}
+
+ return [{
+ 'id': comment.get('comment_id'),
+ 'author': comment.get('from_nickname'),
+ 'author_id': comment.get('from_uid'),
+ 'author_thumbnail': comment.get('from_thumbnail'),
+ 'text': comment.get('content'),
+ 'timestamp': comment.get('create_time'),
+ 'like_count': comment.get('like_cnt'),
+ } for comment in comment_json.get('comment_list') or ()]
+
+
+class BooyahClipsIE(BooyahBaseIE):
+ _VALID_URL = r'https?://booyah.live/clips/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://booyah.live/clips/13887261322952306617',
+ 'info_dict': {
+ 'id': '13887261322952306617',
+ 'ext': 'mp4',
+ 'view_count': int,
+ 'duration': 30,
+ 'channel_id': 90565760,
+ 'like_count': int,
+ 'title': 'Cayendo con estilo 😎',
+ 'uploader': '♡LɪꜱGΛ​MER​',
+ 'comment_count': int,
+ 'uploader_id': '90565760',
+ 'thumbnail': 'https://resmambet-a.akamaihd.net/mambet-storage/Clip/90565760/90565760-27204374-fba0-409d-9d7b-63a48b5c0e75.jpg',
+ 'upload_date': '20220617',
+ 'timestamp': 1655490556,
+ 'modified_timestamp': 1655490556,
+ 'modified_date': '20220617',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ json_data = self._download_json(
+ f'https://booyah.live/api/v3/playbacks/{video_id}', video_id,
+ headers={'Booyah-Session-key': self._BOOYAH_SESSION_KEY})
+
+ formats = []
+ for video_data in json_data['playback']['endpoint_list']:
+ formats.extend(({
+ 'url': video_data.get('stream_url'),
+ 'ext': 'mp4',
+ 'height': video_data.get('resolution'),
+ }, {
+ 'url': video_data.get('download_url'),
+ 'ext': 'mp4',
+ 'format_note': 'Watermarked',
+ 'height': video_data.get('resolution'),
+ 'preference': -10,
+ }))
+
+ return {
+ 'id': video_id,
+ 'title': traverse_obj(json_data, ('playback', 'name')),
+ 'thumbnail': traverse_obj(json_data, ('playback', 'thumbnail_url')),
+ 'formats': formats,
+ 'view_count': traverse_obj(json_data, ('playback', 'views')),
+ 'like_count': traverse_obj(json_data, ('playback', 'likes')),
+ 'duration': traverse_obj(json_data, ('playback', 'duration')),
+ 'comment_count': traverse_obj(json_data, ('playback', 'comment_cnt')),
+ 'channel_id': traverse_obj(json_data, ('playback', 'channel_id')),
+ 'uploader': traverse_obj(json_data, ('user', 'nickname')),
+ 'uploader_id': str_or_none(traverse_obj(json_data, ('user', 'uid'))),
+ 'modified_timestamp': int_or_none(traverse_obj(json_data, ('playback', 'update_time_ms')), 1000),
+ 'timestamp': int_or_none(traverse_obj(json_data, ('playback', 'create_time_ms')), 1000),
+ '__post_extractor': self.extract_comments(video_id, self._get_comments(video_id)),
+ }
diff --git a/hypervideo_dl/extractor/bostonglobe.py b/hypervideo_dl/extractor/bostonglobe.py
index 57882fb..92f8ea2 100644
--- a/hypervideo_dl/extractor/bostonglobe.py
+++ b/hypervideo_dl/extractor/bostonglobe.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/box.py b/hypervideo_dl/extractor/box.py
index 8214086..8ab1496 100644
--- a/hypervideo_dl/extractor/box.py
+++ b/hypervideo_dl/extractor/box.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import json
from .common import InfoExtractor
@@ -82,8 +79,6 @@ class BoxIE(InfoExtractor):
'url': update_url_query(authenticated_download_url, query),
})
- self._sort_formats(formats)
-
creator = f.get('created_by') or {}
return {
diff --git a/hypervideo_dl/extractor/bpb.py b/hypervideo_dl/extractor/bpb.py
index 8f6ef3c..f28e581 100644
--- a/hypervideo_dl/extractor/bpb.py
+++ b/hypervideo_dl/extractor/bpb.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -16,7 +13,6 @@ class BpbIE(InfoExtractor):
_TEST = {
'url': 'http://www.bpb.de/mediathek/297/joachim-gauck-zu-1989-und-die-erinnerung-an-die-ddr',
- # md5 fails in Python 2.6 due to buggy server response and wrong handling of urllib2
'md5': 'c4f84c8a8044ca9ff68bb8441d300b3f',
'info_dict': {
'id': '297',
@@ -52,8 +48,6 @@ class BpbIE(InfoExtractor):
'format_id': '%s-%s' % (quality, determine_ext(video_url)),
})
- self._sort_formats(formats)
-
return {
'id': video_id,
'formats': formats,
diff --git a/hypervideo_dl/extractor/br.py b/hypervideo_dl/extractor/br.py
index 0155827..309452d 100644
--- a/hypervideo_dl/extractor/br.py
+++ b/hypervideo_dl/extractor/br.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import json
from .common import InfoExtractor
@@ -160,7 +157,6 @@ class BRIE(InfoExtractor):
'format_id': 'rtmp-%s' % asset_type,
})
formats.append(rtmp_format_info)
- self._sort_formats(formats)
return formats
def _extract_thumbnails(self, variants, base_url):
@@ -275,7 +271,6 @@ class BRMediathekIE(InfoExtractor):
'tbr': tbr,
'filesize': int_or_none(node.get('fileSize')),
})
- self._sort_formats(formats)
subtitles = {}
for edge in clip.get('captionFiles', {}).get('edges', []):
diff --git a/hypervideo_dl/extractor/bravotv.py b/hypervideo_dl/extractor/bravotv.py
index 139d51c..d489584 100644
--- a/hypervideo_dl/extractor/bravotv.py
+++ b/hypervideo_dl/extractor/bravotv.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .adobepass import AdobePassIE
diff --git a/hypervideo_dl/extractor/breakcom.py b/hypervideo_dl/extractor/breakcom.py
index f38789f..00cf308 100644
--- a/hypervideo_dl/extractor/breakcom.py
+++ b/hypervideo_dl/extractor/breakcom.py
@@ -1,6 +1,3 @@
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from .youtube import YoutubeIE
from ..utils import (
@@ -66,7 +63,6 @@ class BreakIE(InfoExtractor):
'format_id': 'http-%d' % bitrate if bitrate else 'http',
'tbr': bitrate,
})
- self._sort_formats(formats)
title = self._search_regex(
(r'title["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
diff --git a/hypervideo_dl/extractor/breitbart.py b/hypervideo_dl/extractor/breitbart.py
index e029aa6..ea0a59c 100644
--- a/hypervideo_dl/extractor/breitbart.py
+++ b/hypervideo_dl/extractor/breitbart.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
@@ -26,11 +24,9 @@ class BreitBartIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
formats = self._extract_m3u8_formats(f'https://cdn.jwplayer.com/manifests/{video_id}.m3u8', video_id, ext='mp4')
- self._sort_formats(formats)
return {
'id': video_id,
- 'title': (self._og_search_title(webpage, default=None)
- or self._html_extract_title(webpage, 'video title')),
+ 'title': self._generic_title('', webpage),
'description': self._og_search_description(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
'age_limit': self._rta_search(webpage),
diff --git a/hypervideo_dl/extractor/brightcove.py b/hypervideo_dl/extractor/brightcove.py
index dcd332b..2b7ddca 100644
--- a/hypervideo_dl/extractor/brightcove.py
+++ b/hypervideo_dl/extractor/brightcove.py
@@ -1,9 +1,7 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import base64
import re
import struct
+import xml.etree.ElementTree
from .adobepass import AdobePassIE
from .common import InfoExtractor
@@ -12,7 +10,6 @@ from ..compat import (
compat_HTTPError,
compat_parse_qs,
compat_urlparse,
- compat_xml_parse_error,
)
from ..utils import (
clean_html,
@@ -148,6 +145,159 @@ class BrightcoveLegacyIE(InfoExtractor):
}
]
+ _WEBPAGE_TESTS = [{
+ # embedded brightcove video
+ # it also tests brightcove videos that need to set the 'Referer'
+ # in the http requests
+ 'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
+ 'info_dict': {
+ 'id': '2765128793001',
+ 'ext': 'mp4',
+ 'title': 'Le cours de bourse : l’analyse technique',
+ 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
+ 'uploader': 'BFM BUSINESS',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': '404 Not Found',
+ }, {
+ # embedded with itemprop embedURL and video id spelled as `idVideo`
+ 'url': 'http://bfmbusiness.bfmtv.com/mediaplayer/chroniques/olivier-delamarche/',
+ 'info_dict': {
+ 'id': '5255628253001',
+ 'ext': 'mp4',
+ 'title': 'md5:37c519b1128915607601e75a87995fc0',
+ 'description': 'md5:37f7f888b434bb8f8cc8dbd4f7a4cf26',
+ 'uploader': 'BFM BUSINESS',
+ 'uploader_id': '876450612001',
+ 'timestamp': 1482255315,
+ 'upload_date': '20161220',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'Redirects, page gone',
+ }, {
+ # https://github.com/ytdl-org/youtube-dl/issues/2253
+ 'url': 'http://bcove.me/i6nfkrc3',
+ 'md5': '0ba9446db037002366bab3b3eb30c88c',
+ 'info_dict': {
+ 'id': '3101154703001',
+ 'ext': 'mp4',
+ 'title': 'Still no power',
+ 'uploader': 'thestar.com',
+ 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
+ },
+ 'skip': 'video gone',
+ }, {
+ # https://github.com/ytdl-org/youtube-dl/issues/3541
+ 'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
+ 'info_dict': {
+ 'id': '3866516442001',
+ 'ext': 'mp4',
+ 'title': 'Leer mij vrouwen kennen: Aflevering 1',
+ 'description': 'Leer mij vrouwen kennen: Aflevering 1',
+ 'uploader': 'SBS Broadcasting',
+ },
+ 'skip': 'Restricted to Netherlands, 404 Not Found',
+ 'params': {
+ 'skip_download': True, # m3u8 download
+ },
+ }, {
+ # Brightcove video in <iframe>
+ 'url': 'http://www.un.org/chinese/News/story.asp?NewsID=27724',
+ 'md5': '36d74ef5e37c8b4a2ce92880d208b968',
+ 'info_dict': {
+ 'id': '5360463607001',
+ 'ext': 'mp4',
+ 'title': '叙利亚失明儿童在废墟上演唱《心跳》 呼吁获得正常童年生活',
+ 'description': '联合国儿童基金会中东和北非区域大使、作曲家扎德·迪拉尼(Zade Dirani)在3月15日叙利亚冲突爆发7周年纪念日之际发布了为叙利亚谱写的歌曲《心跳》(HEARTBEAT),为受到六年冲突影响的叙利亚儿童发出强烈呐喊,呼吁世界做出共同努力,使叙利亚儿童重新获得享有正常童年生活的权利。',
+ 'uploader': 'United Nations',
+ 'uploader_id': '1362235914001',
+ 'timestamp': 1489593889,
+ 'upload_date': '20170315',
+ },
+ 'skip': '404 Not Found',
+ }, {
+ # Brightcove with UUID in videoPlayer
+ 'url': 'http://www8.hp.com/cn/zh/home.html',
+ 'info_dict': {
+ 'id': '5255815316001',
+ 'ext': 'mp4',
+ 'title': 'Sprocket Video - China',
+ 'description': 'Sprocket Video - China',
+ 'uploader': 'HP-Video Gallery',
+ 'timestamp': 1482263210,
+ 'upload_date': '20161220',
+ 'uploader_id': '1107601872001',
+ },
+ 'params': {
+ 'skip_download': True, # m3u8 download
+ },
+ 'skip': 'video rotates...weekly?',
+ }, {
+ # Multiple brightcove videos
+ # https://github.com/ytdl-org/youtube-dl/issues/2283
+ 'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
+ 'info_dict': {
+ 'id': 'always-never',
+ 'title': 'Always / Never - The New Yorker',
+ },
+ 'playlist_count': 3,
+ 'params': {
+ 'extract_flat': False,
+ 'skip_download': True,
+ },
+ 'skip': 'Redirects, page gone',
+ }, {
+ # BrightcoveInPageEmbed embed
+ 'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
+ 'info_dict': {
+ 'id': '4238694884001',
+ 'ext': 'flv',
+ 'title': 'Tabletop: Dread, Last Thoughts',
+ 'description': 'Tabletop: Dread, Last Thoughts',
+ 'duration': 51690,
+ },
+ 'skip': 'Redirects, page gone',
+ }, {
+ # Brightcove embed, with no valid 'renditions' but valid 'IOSRenditions'
+ # This video can't be played in browsers if Flash disabled and UA set to iPhone, which is actually a false alarm
+ 'url': 'https://dl.dropboxusercontent.com/u/29092637/interview.html',
+ 'info_dict': {
+ 'id': '4785848093001',
+ 'ext': 'mp4',
+ 'title': 'The Cardinal Pell Interview',
+ 'description': 'Sky News Contributor Andrew Bolt interviews George Pell in Rome, following the Cardinal\'s evidence before the Royal Commission into Child Abuse. ',
+ 'uploader': 'GlobeCast Australia - GlobeStream',
+ 'uploader_id': '2733773828001',
+ 'upload_date': '20160304',
+ 'timestamp': 1457083087,
+ },
+ 'params': {
+ # m3u8 downloads
+ 'skip_download': True,
+ },
+ 'skip': '404 Not Found',
+ }, {
+ # Brightcove embed with whitespace around attribute names
+ 'url': 'http://www.stack.com/video/3167554373001/learn-to-hit-open-three-pointers-with-damian-lillard-s-baseline-drift-drill',
+ 'info_dict': {
+ 'id': '3167554373001',
+ 'ext': 'mp4',
+ 'title': "Learn to Hit Open Three-Pointers With Damian Lillard's Baseline Drift Drill",
+ 'description': 'md5:57bacb0e0f29349de4972bfda3191713',
+ 'uploader_id': '1079349493',
+ 'upload_date': '20140207',
+ 'timestamp': 1391810548,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': '410 Gone',
+ }]
+
@classmethod
def _build_brightcove_url(cls, object_str):
"""
@@ -166,7 +316,7 @@ class BrightcoveLegacyIE(InfoExtractor):
try:
object_doc = compat_etree_fromstring(object_str.encode('utf-8'))
- except compat_xml_parse_error:
+ except xml.etree.ElementTree.ParseError:
return
fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars')
@@ -284,6 +434,11 @@ class BrightcoveLegacyIE(InfoExtractor):
return [src for _, src in re.findall(
r'<iframe[^>]+src=([\'"])((?:https?:)?//link\.brightcove\.com/services/player/(?!\1).+)\1', webpage)]
+ def _extract_from_webpage(self, url, webpage):
+ bc_urls = self._extract_brightcove_urls(webpage)
+ for bc_url in bc_urls:
+ yield self.url_result(smuggle_url(bc_url, {'Referer': url}), BrightcoveLegacyIE)
+
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
@@ -339,7 +494,131 @@ class BrightcoveLegacyIE(InfoExtractor):
raise UnsupportedError(url)
-class BrightcoveNewIE(AdobePassIE):
+class BrightcoveNewBaseIE(AdobePassIE):
+ def _parse_brightcove_metadata(self, json_data, video_id, headers={}):
+ title = json_data['name'].strip()
+
+ formats, subtitles = [], {}
+ sources = json_data.get('sources') or []
+ for source in sources:
+ container = source.get('container')
+ ext = mimetype2ext(source.get('type'))
+ src = source.get('src')
+ if ext == 'm3u8' or container == 'M2TS':
+ if not src:
+ continue
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ src, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
+ subtitles = self._merge_subtitles(subtitles, subs)
+ elif ext == 'mpd':
+ if not src:
+ continue
+ fmts, subs = self._extract_mpd_formats_and_subtitles(src, video_id, 'dash', fatal=False)
+ subtitles = self._merge_subtitles(subtitles, subs)
+ else:
+ streaming_src = source.get('streaming_src')
+ stream_name, app_name = source.get('stream_name'), source.get('app_name')
+ if not src and not streaming_src and (not stream_name or not app_name):
+ continue
+ tbr = float_or_none(source.get('avg_bitrate'), 1000)
+ height = int_or_none(source.get('height'))
+ width = int_or_none(source.get('width'))
+ f = {
+ 'tbr': tbr,
+ 'filesize': int_or_none(source.get('size')),
+ 'container': container,
+ 'ext': ext or container.lower(),
+ }
+ if width == 0 and height == 0:
+ f.update({
+ 'vcodec': 'none',
+ })
+ else:
+ f.update({
+ 'width': width,
+ 'height': height,
+ 'vcodec': source.get('codec'),
+ })
+
+ def build_format_id(kind):
+ format_id = kind
+ if tbr:
+ format_id += '-%dk' % int(tbr)
+ if height:
+ format_id += '-%dp' % height
+ return format_id
+
+ if src or streaming_src:
+ f.update({
+ 'url': src or streaming_src,
+ 'format_id': build_format_id('http' if src else 'http-streaming'),
+ 'source_preference': 0 if src else -1,
+ })
+ else:
+ f.update({
+ 'url': app_name,
+ 'play_path': stream_name,
+ 'format_id': build_format_id('rtmp'),
+ })
+ fmts = [f]
+
+ # https://support.brightcove.com/playback-api-video-fields-reference#key_systems_object
+ if container == 'WVM' or source.get('key_systems') or ext == 'ism':
+ for f in fmts:
+ f['has_drm'] = True
+ formats.extend(fmts)
+
+ if not formats:
+ errors = json_data.get('errors')
+ if errors:
+ error = errors[0]
+ self.raise_no_formats(
+ error.get('message') or error.get('error_subcode') or error['error_code'], expected=True)
+
+ for f in formats:
+ f.setdefault('http_headers', {}).update(headers)
+
+ for text_track in json_data.get('text_tracks', []):
+ if text_track.get('kind') != 'captions':
+ continue
+ text_track_url = url_or_none(text_track.get('src'))
+ if not text_track_url:
+ continue
+ lang = (str_or_none(text_track.get('srclang'))
+ or str_or_none(text_track.get('label')) or 'en').lower()
+ subtitles.setdefault(lang, []).append({
+ 'url': text_track_url,
+ })
+
+ is_live = False
+ duration = float_or_none(json_data.get('duration'), 1000)
+ if duration is not None and duration <= 0:
+ is_live = True
+
+ common_res = [(160, 90), (320, 180), (480, 720), (640, 360), (768, 432), (1024, 576), (1280, 720), (1366, 768), (1920, 1080)]
+ thumb_base_url = dict_get(json_data, ('poster', 'thumbnail'))
+ thumbnails = [{
+ 'url': re.sub(r'\d+x\d+', f'{w}x{h}', thumb_base_url),
+ 'width': w,
+ 'height': h,
+ } for w, h in common_res] if thumb_base_url else None
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': clean_html(json_data.get('description')),
+ 'thumbnails': thumbnails,
+ 'duration': duration,
+ 'timestamp': parse_iso8601(json_data.get('published_at')),
+ 'uploader_id': json_data.get('account_id'),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'tags': json_data.get('tags', []),
+ 'is_live': is_live,
+ }
+
+
+class BrightcoveNewIE(BrightcoveNewBaseIE):
IE_NAME = 'brightcove:new'
_VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/(?P<player_id>[^/]+)_(?P<embed>[^/]+)/index\.html\?.*(?P<content_type>video|playlist)Id=(?P<video_id>\d+|ref:[^&]+)'
_TESTS = [{
@@ -356,6 +635,7 @@ class BrightcoveNewIE(AdobePassIE):
'uploader_id': '929656772001',
'formats': 'mincount:20',
},
+ 'skip': '404 Not Found',
}, {
# with rtmp streams
'url': 'http://players.brightcove.net/4036320279001/5d112ed9-283f-485f-a7f9-33f42e8bc042_default/index.html?videoId=4279049078001',
@@ -403,13 +683,114 @@ class BrightcoveNewIE(AdobePassIE):
'only_matching': True,
}]
+ _WEBPAGE_TESTS = [{
+ # brightcove player url embed
+ 'url': 'https://nbc-2.com/weather/forecast/2022/11/16/forecast-warmest-day-of-the-week/',
+ 'md5': '2934d5372b354d27083ccf8575dbfee2',
+ 'info_dict': {
+ 'id': '6315650313112',
+ 'title': 'First Alert Forecast: November 15, 2022',
+ 'ext': 'mp4',
+ 'tags': ['nbc2', 'forecast'],
+ 'uploader_id': '6146886170001',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1668574571,
+ 'duration': 233.375,
+ 'upload_date': '20221116',
+ },
+ }, {
+ # embedded with video tag only
+ 'url': 'https://www.gooddishtv.com/tiktok-rapping-chef-mr-pyrex',
+ 'info_dict': {
+ 'id': 'tiktok-rapping-chef-mr-pyrex',
+ 'title': 'TikTok\'s Rapping Chef Makes Jambalaya for the Hosts',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'age_limit': 0,
+ 'description': 'Just in time for Mardi Gras',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '6299189544001',
+ 'ext': 'mp4',
+ 'title': 'TGD_01-032_5',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'tags': [],
+ 'timestamp': 1646078943,
+ 'uploader_id': '1569565978001',
+ 'upload_date': '20220228',
+ 'duration': 217.195,
+ },
+ }, {
+ 'info_dict': {
+ 'id': '6305565995112',
+ 'ext': 'mp4',
+ 'title': 'TGD 01-087 (Airs 05.25.22)_Segment 5',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'tags': [],
+ 'timestamp': 1651604591,
+ 'uploader_id': '1569565978001',
+ 'upload_date': '20220503',
+ 'duration': 310.421,
+ },
+ }],
+ }, {
+ # Brightcove:new type [2].
+ 'url': 'http://www.delawaresportszone.com/video-st-thomas-more-earns-first-trip-to-basketball-semis',
+ 'md5': '2b35148fcf48da41c9fb4591650784f3',
+ 'info_dict': {
+ 'id': '5348741021001',
+ 'ext': 'mp4',
+ 'upload_date': '20170306',
+ 'uploader_id': '4191638492001',
+ 'timestamp': 1488769918,
+ 'title': 'VIDEO: St. Thomas More earns first trip to basketball semis',
+ },
+ 'skip': '404 Not Found',
+ }, {
+ # Alternative brightcove <video> attributes
+ 'url': 'http://www.programme-tv.net/videos/extraits/81095-guillaume-canet-evoque-les-rumeurs-d-infidelite-de-marion-cotillard-avec-brad-pitt-dans-vivement-dimanche/',
+ 'info_dict': {
+ 'id': '81095-guillaume-canet-evoque-les-rumeurs-d-infidelite-de-marion-cotillard-avec-brad-pitt-dans-vivement-dimanche',
+ 'title': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche, Extraits : toutes les vidéos avec Télé-Loisirs",
+ },
+ 'playlist': [{
+ 'md5': '732d22ba3d33f2f3fc253c39f8f36523',
+ 'info_dict': {
+ 'id': '5311302538001',
+ 'ext': 'mp4',
+ 'title': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche",
+ 'description': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche (France 2, 5 février 2017)",
+ 'timestamp': 1486321708,
+ 'upload_date': '20170205',
+ 'uploader_id': '800000640001',
+ },
+ 'only_matching': True,
+ }],
+ 'skip': '404 Not Found',
+ }, {
+ # Brightcove URL in single quotes
+ 'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
+ 'md5': '4ae374f1f8b91c889c4b9203c8c752af',
+ 'info_dict': {
+ 'id': '4255764656001',
+ 'ext': 'mp4',
+ 'title': 'SN Presents: Russell Martin, World Citizen',
+ 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
+ 'uploader': 'Rogers Sportsnet',
+ 'uploader_id': '1704050871',
+ 'upload_date': '20150525',
+ 'timestamp': 1432570283,
+ },
+ 'skip': 'Page no longer has URL, now has javascript',
+ }]
+
@staticmethod
def _extract_url(ie, webpage):
- urls = BrightcoveNewIE._extract_urls(ie, webpage)
+ urls = BrightcoveNewIE._extract_brightcove_urls(ie, webpage)
return urls[0] if urls else None
@staticmethod
- def _extract_urls(ie, webpage):
+ def _extract_brightcove_urls(ie, webpage):
# Reference:
# 1. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideoiniframe
# 2. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#tag
@@ -469,129 +850,10 @@ class BrightcoveNewIE(AdobePassIE):
return entries
- def _parse_brightcove_metadata(self, json_data, video_id, headers={}):
- title = json_data['name'].strip()
-
- formats, subtitles = [], {}
- sources = json_data.get('sources') or []
- for source in sources:
- container = source.get('container')
- ext = mimetype2ext(source.get('type'))
- src = source.get('src')
- if ext == 'm3u8' or container == 'M2TS':
- if not src:
- continue
- fmts, subs = self._extract_m3u8_formats_and_subtitles(
- src, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
- subtitles = self._merge_subtitles(subtitles, subs)
- elif ext == 'mpd':
- if not src:
- continue
- fmts, subs = self._extract_mpd_formats_and_subtitles(src, video_id, 'dash', fatal=False)
- subtitles = self._merge_subtitles(subtitles, subs)
- else:
- streaming_src = source.get('streaming_src')
- stream_name, app_name = source.get('stream_name'), source.get('app_name')
- if not src and not streaming_src and (not stream_name or not app_name):
- continue
- tbr = float_or_none(source.get('avg_bitrate'), 1000)
- height = int_or_none(source.get('height'))
- width = int_or_none(source.get('width'))
- f = {
- 'tbr': tbr,
- 'filesize': int_or_none(source.get('size')),
- 'container': container,
- 'ext': ext or container.lower(),
- }
- if width == 0 and height == 0:
- f.update({
- 'vcodec': 'none',
- })
- else:
- f.update({
- 'width': width,
- 'height': height,
- 'vcodec': source.get('codec'),
- })
-
- def build_format_id(kind):
- format_id = kind
- if tbr:
- format_id += '-%dk' % int(tbr)
- if height:
- format_id += '-%dp' % height
- return format_id
-
- if src or streaming_src:
- f.update({
- 'url': src or streaming_src,
- 'format_id': build_format_id('http' if src else 'http-streaming'),
- 'source_preference': 0 if src else -1,
- })
- else:
- f.update({
- 'url': app_name,
- 'play_path': stream_name,
- 'format_id': build_format_id('rtmp'),
- })
- fmts = [f]
-
- # https://support.brightcove.com/playback-api-video-fields-reference#key_systems_object
- if container == 'WVM' or source.get('key_systems') or ext == 'ism':
- for f in fmts:
- f['has_drm'] = True
- formats.extend(fmts)
-
- if not formats:
- errors = json_data.get('errors')
- if errors:
- error = errors[0]
- self.raise_no_formats(
- error.get('message') or error.get('error_subcode') or error['error_code'], expected=True)
-
- self._sort_formats(formats)
-
- for f in formats:
- f.setdefault('http_headers', {}).update(headers)
-
- for text_track in json_data.get('text_tracks', []):
- if text_track.get('kind') != 'captions':
- continue
- text_track_url = url_or_none(text_track.get('src'))
- if not text_track_url:
- continue
- lang = (str_or_none(text_track.get('srclang'))
- or str_or_none(text_track.get('label')) or 'en').lower()
- subtitles.setdefault(lang, []).append({
- 'url': text_track_url,
- })
-
- is_live = False
- duration = float_or_none(json_data.get('duration'), 1000)
- if duration is not None and duration <= 0:
- is_live = True
-
- common_res = [(160, 90), (320, 180), (480, 720), (640, 360), (768, 432), (1024, 576), (1280, 720), (1366, 768), (1920, 1080)]
- thumb_base_url = dict_get(json_data, ('poster', 'thumbnail'))
- thumbnails = [{
- 'url': re.sub(r'\d+x\d+', f'{w}x{h}', thumb_base_url),
- 'width': w,
- 'height': h,
- } for w, h in common_res] if thumb_base_url else None
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': clean_html(json_data.get('description')),
- 'thumbnails': thumbnails,
- 'duration': duration,
- 'timestamp': parse_iso8601(json_data.get('published_at')),
- 'uploader_id': json_data.get('account_id'),
- 'formats': formats,
- 'subtitles': subtitles,
- 'tags': json_data.get('tags', []),
- 'is_live': is_live,
- }
+ def _extract_from_webpage(self, url, webpage):
+ bc_urls = self._extract_brightcove_urls(self, webpage)
+ for bc_url in bc_urls:
+ yield self.url_result(smuggle_url(bc_url, {'referrer': url}), BrightcoveNewIE)
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
@@ -603,9 +865,9 @@ class BrightcoveNewIE(AdobePassIE):
account_id, player_id, embed, content_type, video_id = self._match_valid_url(url).groups()
policy_key_id = '%s_%s' % (account_id, player_id)
- policy_key = self._downloader.cache.load('brightcove', policy_key_id)
+ policy_key = self.cache.load('brightcove', policy_key_id)
policy_key_extracted = False
- store_pk = lambda x: self._downloader.cache.store('brightcove', policy_key_id, x)
+ store_pk = lambda x: self.cache.store('brightcove', policy_key_id, x)
def extract_policy_key():
base_url = 'http://players.brightcove.net/%s/%s_%s/' % (account_id, player_id, embed)
@@ -635,7 +897,7 @@ class BrightcoveNewIE(AdobePassIE):
api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/%ss/%s' % (account_id, content_type, video_id)
headers = {}
- referrer = smuggled_data.get('referrer')
+ referrer = smuggled_data.get('referrer') # XXX: notice the spelling/case of the key
if referrer:
headers.update({
'Referer': referrer,
diff --git a/hypervideo_dl/extractor/bundesliga.py b/hypervideo_dl/extractor/bundesliga.py
new file mode 100644
index 0000000..e76dd58
--- /dev/null
+++ b/hypervideo_dl/extractor/bundesliga.py
@@ -0,0 +1,34 @@
+from .common import InfoExtractor
+from .jwplatform import JWPlatformIE
+
+
+class BundesligaIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?bundesliga\.com/[a-z]{2}/bundesliga/videos(?:/[^?]+)?\?vid=(?P<id>[a-zA-Z0-9]{8})'
+ _TESTS = [
+ {
+ 'url': 'https://www.bundesliga.com/en/bundesliga/videos?vid=bhhHkKyN',
+ 'md5': '8fc3b25cd12440e3a8cdc51f1493849c',
+ 'info_dict': {
+ 'id': 'bhhHkKyN',
+ 'ext': 'mp4',
+ 'title': 'Watch: Alphonso Davies and Jeremie Frimpong head-to-head',
+ 'thumbnail': 'https://cdn.jwplayer.com/v2/media/bhhHkKyN/poster.jpg?width=720',
+ 'upload_date': '20220928',
+ 'duration': 146,
+ 'timestamp': 1664366511,
+ 'description': 'md5:803d4411bd134140c774021dd4b7598b'
+ }
+ },
+ {
+ 'url': 'https://www.bundesliga.com/en/bundesliga/videos/latest-features/T8IKc8TX?vid=ROHjs06G',
+ 'only_matching': True
+ },
+ {
+ 'url': 'https://www.bundesliga.com/en/bundesliga/videos/goals?vid=mOG56vWA',
+ 'only_matching': True
+ }
+ ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return self.url_result(f'jwplatform:{video_id}', JWPlatformIE, video_id)
diff --git a/hypervideo_dl/extractor/businessinsider.py b/hypervideo_dl/extractor/businessinsider.py
index 73a57b1..4b3f5e6 100644
--- a/hypervideo_dl/extractor/businessinsider.py
+++ b/hypervideo_dl/extractor/businessinsider.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from .jwplatform import JWPlatformIE
diff --git a/hypervideo_dl/extractor/buzzfeed.py b/hypervideo_dl/extractor/buzzfeed.py
index ec41109..b30a3b7 100644
--- a/hypervideo_dl/extractor/buzzfeed.py
+++ b/hypervideo_dl/extractor/buzzfeed.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import json
import re
@@ -84,7 +81,7 @@ class BuzzFeedIE(InfoExtractor):
continue
entries.append(self.url_result(video['url']))
- facebook_urls = FacebookIE._extract_urls(webpage)
+ facebook_urls = FacebookIE._extract_embed_urls(url, webpage)
entries.extend([
self.url_result(facebook_url)
for facebook_url in facebook_urls])
diff --git a/hypervideo_dl/extractor/byutv.py b/hypervideo_dl/extractor/byutv.py
index f4d5086..9ed6efe 100644
--- a/hypervideo_dl/extractor/byutv.py
+++ b/hypervideo_dl/extractor/byutv.py
@@ -1,6 +1,3 @@
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..utils import (
determine_ext,
@@ -111,7 +108,6 @@ class BYUtvIE(InfoExtractor):
'thumbnail': ep.get('imageThumbnail'),
'duration': parse_duration(ep.get('length')),
})
- self._sort_formats(formats)
return merge_dicts(info, {
'id': video_id,
diff --git a/hypervideo_dl/extractor/c56.py b/hypervideo_dl/extractor/c56.py
index a853c53..e4b1c9a 100644
--- a/hypervideo_dl/extractor/c56.py
+++ b/hypervideo_dl/extractor/c56.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..utils import js_to_json
@@ -53,7 +49,6 @@ class C56IE(InfoExtractor):
'url': f['url']
} for f in info['rfiles']
]
- self._sort_formats(formats)
return {
'id': info['vid'],
diff --git a/hypervideo_dl/extractor/cableav.py b/hypervideo_dl/extractor/cableav.py
index 77efdf4..2e374e5 100644
--- a/hypervideo_dl/extractor/cableav.py
+++ b/hypervideo_dl/extractor/cableav.py
@@ -1,4 +1,3 @@
-# coding: utf-8
from .common import InfoExtractor
@@ -23,7 +22,6 @@ class CableAVIE(InfoExtractor):
video_url = self._og_search_video_url(webpage, secure=False)
formats = self._extract_m3u8_formats(video_url, video_id, 'mp4')
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/callin.py b/hypervideo_dl/extractor/callin.py
index 1f3b7cf..e966876 100644
--- a/hypervideo_dl/extractor/callin.py
+++ b/hypervideo_dl/extractor/callin.py
@@ -1,4 +1,3 @@
-# coding: utf-8
from .common import InfoExtractor
from ..utils import (
traverse_obj,
@@ -52,12 +51,9 @@ class CallinIE(InfoExtractor):
episode = next_data['props']['pageProps']['episode']
id = episode['id']
- title = (episode.get('title')
- or self._og_search_title(webpage, fatal=False)
- or self._html_extract_title(webpage))
+ title = episode.get('title') or self._generic_title('', webpage)
url = episode['m3u8']
formats = self._extract_m3u8_formats(url, display_id, ext='ts')
- self._sort_formats(formats)
show = traverse_obj(episode, ('show', 'title'))
show_id = traverse_obj(episode, ('show', 'id'))
diff --git a/hypervideo_dl/extractor/caltrans.py b/hypervideo_dl/extractor/caltrans.py
index 9ac740f..f4a4a83 100644
--- a/hypervideo_dl/extractor/caltrans.py
+++ b/hypervideo_dl/extractor/caltrans.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
@@ -30,7 +27,6 @@ class CaltransIE(InfoExtractor):
video_stream = self._search_regex(r'videoStreamURL\s*=\s*"([^"]+)"', global_vars, 'Video Stream URL', fatal=False)
formats = self._extract_m3u8_formats(video_stream, video_id, 'ts', live=True)
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/cam4.py b/hypervideo_dl/extractor/cam4.py
index 2a3931f..2650cc1 100644
--- a/hypervideo_dl/extractor/cam4.py
+++ b/hypervideo_dl/extractor/cam4.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
@@ -23,7 +20,6 @@ class CAM4IE(InfoExtractor):
m3u8_playlist = self._download_json('https://www.cam4.com/rest/v1.0/profile/{}/streamInfo'.format(channel_id), channel_id).get('cdnURL')
formats = self._extract_m3u8_formats(m3u8_playlist, channel_id, 'mp4', m3u8_id='hls', live=True)
- self._sort_formats(formats)
return {
'id': channel_id,
diff --git a/hypervideo_dl/extractor/camdemy.py b/hypervideo_dl/extractor/camdemy.py
index 8f0c6c5..c7079e4 100644
--- a/hypervideo_dl/extractor/camdemy.py
+++ b/hypervideo_dl/extractor/camdemy.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/cammodels.py b/hypervideo_dl/extractor/cammodels.py
index 3dc1937..0509057 100644
--- a/hypervideo_dl/extractor/cammodels.py
+++ b/hypervideo_dl/extractor/cammodels.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
ExtractorError,
@@ -87,7 +84,6 @@ class CamModelsIE(InfoExtractor):
else:
continue
formats.append(f)
- self._sort_formats(formats)
return {
'id': user_id,
diff --git a/hypervideo_dl/extractor/camsoda.py b/hypervideo_dl/extractor/camsoda.py
new file mode 100644
index 0000000..021cd91
--- /dev/null
+++ b/hypervideo_dl/extractor/camsoda.py
@@ -0,0 +1,57 @@
+import random
+
+from .common import InfoExtractor
+from ..utils import ExtractorError, traverse_obj
+
+
+class CamsodaIE(InfoExtractor):
+ _VALID_URL = r'https?://www\.camsoda\.com/(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://www.camsoda.com/lizzhopf',
+ 'info_dict': {
+ 'id': 'lizzhopf',
+ 'ext': 'mp4',
+ 'title': 'lizzhopf (lizzhopf) Nude on Cam. Free Live Sex Chat Room - CamSoda',
+ 'description': str,
+ 'is_live': True,
+ 'age_limit': 18,
+ },
+ 'skip': 'Room is offline',
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id, headers=self.geo_verification_headers())
+
+ data = self._download_json(
+ f'https://camsoda.com/api/v1/video/vtoken/{video_id}', video_id,
+ query={'username': f'guest_{random.randrange(10000, 99999)}'},
+ headers=self.geo_verification_headers())
+ if not data:
+ raise ExtractorError('Unable to find configuration for stream.')
+ elif data.get('private_servers'):
+ raise ExtractorError('Model is in private show.', expected=True)
+ elif not data.get('stream_name'):
+ raise ExtractorError('Model is offline.', expected=True)
+
+ stream_name = traverse_obj(data, 'stream_name', expected_type=str)
+ token = traverse_obj(data, 'token', expected_type=str)
+
+ formats = []
+ for server in traverse_obj(data, ('edge_servers', ...)):
+ formats = self._extract_m3u8_formats(
+ f'https://{server}/{stream_name}_v1/index.m3u8?token={token}',
+ video_id, ext='mp4', m3u8_id='hls', fatal=False, live=True)
+ if formats:
+ break
+ if not formats:
+ self.raise_no_formats('No active streams found', expected=True)
+
+ return {
+ 'id': video_id,
+ 'title': self._html_extract_title(webpage),
+ 'description': self._html_search_meta('description', webpage, default=None),
+ 'is_live': True,
+ 'formats': formats,
+ 'age_limit': 18,
+ }
diff --git a/hypervideo_dl/extractor/camtasia.py b/hypervideo_dl/extractor/camtasia.py
new file mode 100644
index 0000000..70ab6c6
--- /dev/null
+++ b/hypervideo_dl/extractor/camtasia.py
@@ -0,0 +1,71 @@
+import os
+import urllib.parse
+
+from .common import InfoExtractor
+from ..utils import float_or_none
+
+
+class CamtasiaEmbedIE(InfoExtractor):
+ _VALID_URL = False
+ _WEBPAGE_TESTS = [
+ {
+ 'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
+ 'playlist': [{
+ 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
+ 'info_dict': {
+ 'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
+ 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
+ 'ext': 'flv',
+ 'duration': 2235.90,
+ }
+ }, {
+ 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
+ 'info_dict': {
+ 'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
+ 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
+ 'ext': 'flv',
+ 'duration': 2235.93,
+ }
+ }],
+ 'info_dict': {
+ 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
+ },
+ 'skip': 'webpage dead'
+ },
+
+ ]
+
+ def _extract_from_webpage(self, url, webpage):
+ camtasia_cfg = self._search_regex(
+ r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
+ webpage, 'camtasia configuration file', default=None)
+ if camtasia_cfg is None:
+ return None
+
+ title = self._html_search_meta('DC.title', webpage, fatal=True)
+
+ camtasia_url = urllib.parse.urljoin(url, camtasia_cfg)
+ camtasia_cfg = self._download_xml(
+ camtasia_url, self._generic_id(url),
+ note='Downloading camtasia configuration',
+ errnote='Failed to download camtasia configuration')
+ fileset_node = camtasia_cfg.find('./playlist/array/fileset')
+
+ entries = []
+ for n in fileset_node.getchildren():
+ url_n = n.find('./uri')
+ if url_n is None:
+ continue
+
+ entries.append({
+ 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
+ 'title': f'{title} - {n.tag}',
+ 'url': urllib.parse.urljoin(url, url_n.text),
+ 'duration': float_or_none(n.find('./duration').text),
+ })
+
+ return {
+ '_type': 'playlist',
+ 'entries': entries,
+ 'title': title,
+ }
diff --git a/hypervideo_dl/extractor/camtube.py b/hypervideo_dl/extractor/camtube.py
deleted file mode 100644
index b3be3bd..0000000
--- a/hypervideo_dl/extractor/camtube.py
+++ /dev/null
@@ -1,71 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import (
- int_or_none,
- unified_timestamp,
-)
-
-
-class CamTubeIE(InfoExtractor):
- _VALID_URL = r'https?://(?:(?:www|api)\.)?camtube\.co/recordings?/(?P<id>[^/?#&]+)'
- _TESTS = [{
- 'url': 'https://camtube.co/recording/minafay-030618-1136-chaturbate-female',
- 'info_dict': {
- 'id': '42ad3956-dd5b-445a-8313-803ea6079fac',
- 'display_id': 'minafay-030618-1136-chaturbate-female',
- 'ext': 'mp4',
- 'title': 'minafay-030618-1136-chaturbate-female',
- 'duration': 1274,
- 'timestamp': 1528018608,
- 'upload_date': '20180603',
- 'age_limit': 18
- },
- 'params': {
- 'skip_download': True,
- },
- }]
-
- _API_BASE = 'https://api.camtube.co'
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
-
- token = self._download_json(
- '%s/rpc/session/new' % self._API_BASE, display_id,
- 'Downloading session token')['token']
-
- self._set_cookie('api.camtube.co', 'session', token)
-
- video = self._download_json(
- '%s/recordings/%s' % (self._API_BASE, display_id), display_id,
- headers={'Referer': url})
-
- video_id = video['uuid']
- timestamp = unified_timestamp(video.get('createdAt'))
- duration = int_or_none(video.get('duration'))
- view_count = int_or_none(video.get('viewCount'))
- like_count = int_or_none(video.get('likeCount'))
- creator = video.get('stageName')
-
- formats = [{
- 'url': '%s/recordings/%s/manifest.m3u8'
- % (self._API_BASE, video_id),
- 'format_id': 'hls',
- 'ext': 'mp4',
- 'protocol': 'm3u8_native',
- }]
-
- return {
- 'id': video_id,
- 'display_id': display_id,
- 'title': display_id,
- 'timestamp': timestamp,
- 'duration': duration,
- 'view_count': view_count,
- 'like_count': like_count,
- 'creator': creator,
- 'formats': formats,
- 'age_limit': 18
- }
diff --git a/hypervideo_dl/extractor/camwithher.py b/hypervideo_dl/extractor/camwithher.py
index bbc5205..a0b3749 100644
--- a/hypervideo_dl/extractor/camwithher.py
+++ b/hypervideo_dl/extractor/camwithher.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/canalalpha.py b/hypervideo_dl/extractor/canalalpha.py
index 0365cb2..df5ca58 100644
--- a/hypervideo_dl/extractor/canalalpha.py
+++ b/hypervideo_dl/extractor/canalalpha.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
clean_html,
@@ -85,7 +82,6 @@ class CanalAlphaIE(InfoExtractor):
dash_frmts, dash_subs = self._parse_mpd_formats_and_subtitles(manifests['dash'])
formats.extend(dash_frmts)
subtitles = self._merge_subtitles(subtitles, dash_subs)
- self._sort_formats(formats)
return {
'id': id,
'title': data_json.get('title').strip(),
diff --git a/hypervideo_dl/extractor/canalc2.py b/hypervideo_dl/extractor/canalc2.py
index 407cc80..597cb2a 100644
--- a/hypervideo_dl/extractor/canalc2.py
+++ b/hypervideo_dl/extractor/canalc2.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -61,8 +58,6 @@ class Canalc2IE(InfoExtractor):
else:
info = self._parse_html5_media_entries(url, webpage, url)[0]
- self._sort_formats(info['formats'])
-
info.update({
'id': video_id,
'title': title,
diff --git a/hypervideo_dl/extractor/canalplus.py b/hypervideo_dl/extractor/canalplus.py
index 211ea26..b7e2f9d 100644
--- a/hypervideo_dl/extractor/canalplus.py
+++ b/hypervideo_dl/extractor/canalplus.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..utils import (
# ExtractorError,
@@ -90,7 +86,6 @@ class CanalplusIE(InfoExtractor):
'format_id': format_id,
'quality': preference(format_id),
})
- self._sort_formats(formats)
thumbnails = [{
'id': image_id,
diff --git a/hypervideo_dl/extractor/canvas.py b/hypervideo_dl/extractor/canvas.py
index 8b99037..ae6e03a 100644
--- a/hypervideo_dl/extractor/canvas.py
+++ b/hypervideo_dl/extractor/canvas.py
@@ -1,4 +1,3 @@
-from __future__ import unicode_literals
import json
@@ -119,7 +118,6 @@ class CanvasIE(InfoExtractor):
'format_id': format_type,
'url': format_url,
})
- self._sort_formats(formats)
subtitle_urls = data.get('subtitleUrls')
if isinstance(subtitle_urls, list):
diff --git a/hypervideo_dl/extractor/carambatv.py b/hypervideo_dl/extractor/carambatv.py
index 7e5cc90..d6044a3 100644
--- a/hypervideo_dl/extractor/carambatv.py
+++ b/hypervideo_dl/extractor/carambatv.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
@@ -46,7 +43,6 @@ class CarambaTVIE(InfoExtractor):
'height': int_or_none(f.get('height')),
'format_id': format_field(f, 'height', '%sp'),
} for f in video['qualities'] if f.get('fn')]
- self._sort_formats(formats)
thumbnail = video.get('splash')
duration = float_or_none(try_get(
diff --git a/hypervideo_dl/extractor/cartoonnetwork.py b/hypervideo_dl/extractor/cartoonnetwork.py
index 48b3361..4dd7ac4 100644
--- a/hypervideo_dl/extractor/cartoonnetwork.py
+++ b/hypervideo_dl/extractor/cartoonnetwork.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .turner import TurnerBaseIE
from ..utils import int_or_none
diff --git a/hypervideo_dl/extractor/cbc.py b/hypervideo_dl/extractor/cbc.py
index 4892419..a9f6cd2 100644
--- a/hypervideo_dl/extractor/cbc.py
+++ b/hypervideo_dl/extractor/cbc.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
import json
import base64
@@ -307,13 +304,13 @@ class CBCGemIE(InfoExtractor):
def _get_claims_token(self, email, password):
if not self.claims_token_valid():
self._claims_token = self._new_claims_token(email, password)
- self._downloader.cache.store(self._NETRC_MACHINE, 'claims_token', self._claims_token)
+ self.cache.store(self._NETRC_MACHINE, 'claims_token', self._claims_token)
return self._claims_token
def _real_initialize(self):
if self.claims_token_valid():
return
- self._claims_token = self._downloader.cache.load(self._NETRC_MACHINE, 'claims_token')
+ self._claims_token = self.cache.load(self._NETRC_MACHINE, 'claims_token')
def _find_secret_formats(self, formats, video_id):
""" Find a valid video url and convert it to the secret variant """
@@ -383,8 +380,6 @@ class CBCGemIE(InfoExtractor):
if 'descriptive' in format['format_id'].lower():
format['preference'] = -2
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': video_info['title'],
diff --git a/hypervideo_dl/extractor/cbs.py b/hypervideo_dl/extractor/cbs.py
index 2af36ea..9aacd50 100644
--- a/hypervideo_dl/extractor/cbs.py
+++ b/hypervideo_dl/extractor/cbs.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .theplatform import ThePlatformFeedIE
from ..utils import (
ExtractorError,
@@ -12,7 +10,7 @@ from ..utils import (
)
-class CBSBaseIE(ThePlatformFeedIE):
+class CBSBaseIE(ThePlatformFeedIE): # XXX: Do not subclass from concrete IE
def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
subtitles = {}
for k, ext in [('sMPTE-TTCCURL', 'tt'), ('ClosedCaptionURL', 'ttml'), ('webVTTCaptionURL', 'vtt')]:
@@ -54,7 +52,6 @@ class CBSBaseIE(ThePlatformFeedIE):
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
if last_e and not formats:
self.raise_no_formats(last_e, True, content_id)
- self._sort_formats(formats)
extra_info.update({
'id': content_id,
diff --git a/hypervideo_dl/extractor/cbsinteractive.py b/hypervideo_dl/extractor/cbsinteractive.py
index 9d4f754..b09e982 100644
--- a/hypervideo_dl/extractor/cbsinteractive.py
+++ b/hypervideo_dl/extractor/cbsinteractive.py
@@ -1,12 +1,8 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .cbs import CBSIE
from ..utils import int_or_none
-class CBSInteractiveIE(CBSIE):
+class CBSInteractiveIE(CBSIE): # XXX: Do not subclass from concrete IE
_VALID_URL = r'https?://(?:www\.)?(?P<site>cnet|zdnet)\.com/(?:videos|video(?:/share)?)/(?P<id>[^/?]+)'
_TESTS = [{
'url': 'http://www.cnet.com/videos/hands-on-with-microsofts-windows-8-1-update/',
diff --git a/hypervideo_dl/extractor/cbslocal.py b/hypervideo_dl/extractor/cbslocal.py
index 3b7e1a8..3d50b04 100644
--- a/hypervideo_dl/extractor/cbslocal.py
+++ b/hypervideo_dl/extractor/cbslocal.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .anvato import AnvatoIE
from .sendtonews import SendtoNewsIE
from ..compat import compat_urlparse
@@ -10,7 +7,7 @@ from ..utils import (
)
-class CBSLocalIE(AnvatoIE):
+class CBSLocalIE(AnvatoIE): # XXX: Do not subclass from concrete IE
_VALID_URL_BASE = r'https?://[a-z]+\.cbslocal\.com/'
_VALID_URL = _VALID_URL_BASE + r'video/(?P<id>\d+)'
@@ -50,7 +47,7 @@ class CBSLocalIE(AnvatoIE):
'anvato:anvato_cbslocal_app_web_prod_547f3e49241ef0e5d30c79b2efbca5d92c698f67:' + mcp_id, 'Anvato', mcp_id)
-class CBSLocalArticleIE(AnvatoIE):
+class CBSLocalArticleIE(AnvatoIE): # XXX: Do not subclass from concrete IE
_VALID_URL = CBSLocalIE._VALID_URL_BASE + r'\d+/\d+/\d+/(?P<id>[0-9a-z-]+)'
_TESTS = [{
diff --git a/hypervideo_dl/extractor/cbsnews.py b/hypervideo_dl/extractor/cbsnews.py
index 1285ed6..16edf3a 100644
--- a/hypervideo_dl/extractor/cbsnews.py
+++ b/hypervideo_dl/extractor/cbsnews.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
import zlib
@@ -15,7 +12,7 @@ from ..utils import (
)
-class CBSNewsEmbedIE(CBSIE):
+class CBSNewsEmbedIE(CBSIE): # XXX: Do not subclass from concrete IE
IE_NAME = 'cbsnews:embed'
_VALID_URL = r'https?://(?:www\.)?cbsnews\.com/embed/video[^#]*#(?P<id>.+)'
_TESTS = [{
@@ -30,7 +27,7 @@ class CBSNewsEmbedIE(CBSIE):
return self._extract_video_info(item['mpxRefId'], 'cbsnews')
-class CBSNewsIE(CBSIE):
+class CBSNewsIE(CBSIE): # XXX: Do not subclass from concrete IE
IE_NAME = 'cbsnews'
IE_DESC = 'CBS News'
_VALID_URL = r'https?://(?:www\.)?cbsnews\.com/(?:news|video)/(?P<id>[\da-z_-]+)'
@@ -135,7 +132,6 @@ class CBSNewsLiveVideoIE(InfoExtractor):
})
formats = self._extract_akamai_formats(video_info['url'], display_id)
- self._sort_formats(formats)
return {
'id': display_id,
diff --git a/hypervideo_dl/extractor/cbssports.py b/hypervideo_dl/extractor/cbssports.py
index b8a6e59..b5d85af 100644
--- a/hypervideo_dl/extractor/cbssports.py
+++ b/hypervideo_dl/extractor/cbssports.py
@@ -1,6 +1,3 @@
-from __future__ import unicode_literals
-
-
# from .cbs import CBSBaseIE
from .common import InfoExtractor
from ..utils import (
@@ -43,7 +40,6 @@ class CBSSportsEmbedIE(InfoExtractor):
formats = self._extract_m3u8_formats(
metadata['files'][0]['url'], video_id, 'mp4',
'm3u8_native', m3u8_id='hls', fatal=False)
- self._sort_formats(formats)
image = video.get('image')
thumbnails = None
diff --git a/hypervideo_dl/extractor/ccc.py b/hypervideo_dl/extractor/ccc.py
index 36e6dff..22e3a22 100644
--- a/hypervideo_dl/extractor/ccc.py
+++ b/hypervideo_dl/extractor/ccc.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
int_or_none,
@@ -67,7 +64,6 @@ class CCCIE(InfoExtractor):
'language': language,
'vcodec': vcodec,
})
- self._sort_formats(formats)
return {
'id': event_id,
@@ -78,6 +74,7 @@ class CCCIE(InfoExtractor):
'thumbnail': event_data.get('thumb_url'),
'timestamp': parse_iso8601(event_data.get('date')),
'duration': int_or_none(event_data.get('length')),
+ 'view_count': int_or_none(event_data.get('view_count')),
'tags': event_data.get('tags'),
'formats': formats,
}
diff --git a/hypervideo_dl/extractor/ccma.py b/hypervideo_dl/extractor/ccma.py
index 9dbaabf..88ff82f 100644
--- a/hypervideo_dl/extractor/ccma.py
+++ b/hypervideo_dl/extractor/ccma.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
clean_html,
@@ -84,7 +81,6 @@ class CCMAIE(InfoExtractor):
'url': media_url,
'vcodec': 'none' if media_type == 'audio' else None,
})
- self._sort_formats(formats)
informacio = media['informacio']
title = informacio['titol']
diff --git a/hypervideo_dl/extractor/cctv.py b/hypervideo_dl/extractor/cctv.py
index 0ed5f32..466bdfb 100644
--- a/hypervideo_dl/extractor/cctv.py
+++ b/hypervideo_dl/extractor/cctv.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -173,8 +170,6 @@ class CCTVIE(InfoExtractor):
hls_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
- self._sort_formats(formats)
-
uploader = data.get('editer_name')
description = self._html_search_meta(
'description', webpage, default=None)
diff --git a/hypervideo_dl/extractor/cda.py b/hypervideo_dl/extractor/cda.py
index 72c4705..d1212e6 100644
--- a/hypervideo_dl/extractor/cda.py
+++ b/hypervideo_dl/extractor/cda.py
@@ -1,16 +1,13 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
+import base64
import codecs
-import re
+import datetime
+import hashlib
+import hmac
import json
+import re
from .common import InfoExtractor
-from ..compat import (
- compat_chr,
- compat_ord,
- compat_urllib_parse_unquote,
-)
+from ..compat import compat_ord, compat_urllib_parse_unquote
from ..utils import (
ExtractorError,
float_or_none,
@@ -19,14 +16,27 @@ from ..utils import (
multipart_encode,
parse_duration,
random_birthday,
- urljoin,
+ traverse_obj,
+ try_call,
try_get,
+ urljoin,
)
class CDAIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www\.)?cda\.pl/video|ebd\.cda\.pl/[0-9]+x[0-9]+)/(?P<id>[0-9a-z]+)'
+ _NETRC_MACHINE = 'cdapl'
+
_BASE_URL = 'http://www.cda.pl/'
+ _BASE_API_URL = 'https://api.cda.pl'
+ _API_HEADERS = {
+ 'Accept': 'application/vnd.cda.public+json',
+ 'User-Agent': 'pl.cda 1.0 (version 1.2.88 build 15306; Android 9; Xiaomi Redmi 3S)',
+ }
+ # hardcoded in the app
+ _LOGIN_REQUEST_AUTH = 'Basic YzU3YzBlZDUtYTIzOC00MWQwLWI2NjQtNmZmMWMxY2Y2YzVlOklBTm95QlhRRVR6U09MV1hnV3MwMW0xT2VyNWJNZzV4clRNTXhpNGZJUGVGZ0lWUlo5UGVYTDhtUGZaR1U1U3Q'
+ _BEARER_CACHE = 'cda-bearer'
+
_TESTS = [{
'url': 'http://www.cda.pl/video/5749950c',
'md5': '6f844bf51b15f31fae165365707ae970',
@@ -90,8 +100,71 @@ class CDAIE(InfoExtractor):
'Content-Type': content_type,
}, **kwargs)
+ def _perform_login(self, username, password):
+ cached_bearer = self.cache.load(self._BEARER_CACHE, username) or {}
+ if cached_bearer.get('valid_until', 0) > datetime.datetime.now().timestamp() + 5:
+ self._API_HEADERS['Authorization'] = f'Bearer {cached_bearer["token"]}'
+ return
+
+ password_hash = base64.urlsafe_b64encode(hmac.new(
+ b's01m1Oer5IANoyBXQETzSOLWXgWs01m1Oer5bMg5xrTMMxRZ9Pi4fIPeFgIVRZ9PeXL8mPfXQETZGUAN5StRZ9P',
+ ''.join(f'{bytes((bt & 255, )).hex():0>2}'
+ for bt in hashlib.md5(password.encode()).digest()).encode(),
+ hashlib.sha256).digest()).decode().replace('=', '')
+
+ token_res = self._download_json(
+ f'{self._BASE_API_URL}/oauth/token', None, 'Logging in', data=b'',
+ headers={**self._API_HEADERS, 'Authorization': self._LOGIN_REQUEST_AUTH},
+ query={
+ 'grant_type': 'password',
+ 'login': username,
+ 'password': password_hash,
+ })
+ self.cache.store(self._BEARER_CACHE, username, {
+ 'token': token_res['access_token'],
+ 'valid_until': token_res['expires_in'] + datetime.datetime.now().timestamp(),
+ })
+ self._API_HEADERS['Authorization'] = f'Bearer {token_res["access_token"]}'
+
def _real_extract(self, url):
video_id = self._match_id(url)
+
+ if 'Authorization' in self._API_HEADERS:
+ return self._api_extract(video_id)
+ else:
+ return self._web_extract(video_id, url)
+
+ def _api_extract(self, video_id):
+ meta = self._download_json(
+ f'{self._BASE_API_URL}/video/{video_id}', video_id, headers=self._API_HEADERS)['video']
+
+ if meta.get('premium') and not meta.get('premium_free'):
+ self.report_drm(video_id)
+
+ uploader = traverse_obj(meta, 'author', 'login')
+
+ formats = [{
+ 'url': quality['file'],
+ 'format': quality.get('title'),
+ 'resolution': quality.get('name'),
+ 'height': try_call(lambda: int(quality['name'][:-1])),
+ 'filesize': quality.get('length'),
+ } for quality in meta['qualities'] if quality.get('file')]
+
+ return {
+ 'id': video_id,
+ 'title': meta.get('title'),
+ 'description': meta.get('description'),
+ 'uploader': None if uploader == 'anonim' else uploader,
+ 'average_rating': float_or_none(meta.get('rating')),
+ 'thumbnail': meta.get('thumb'),
+ 'formats': formats,
+ 'duration': meta.get('duration'),
+ 'age_limit': 18 if meta.get('for_adults') else 0,
+ 'view_count': meta.get('views'),
+ }
+
+ def _web_extract(self, video_id, url):
self._set_cookie('cda.pl', 'cda.player', 'html5')
webpage = self._download_webpage(
self._BASE_URL + '/video/' + video_id, video_id)
@@ -147,7 +220,7 @@ class CDAIE(InfoExtractor):
b = []
for c in a:
f = compat_ord(c)
- b.append(compat_chr(33 + (f + 14) % 94) if 33 <= f <= 126 else compat_chr(f))
+ b.append(chr(33 + (f + 14) % 94) if 33 <= f <= 126 else chr(f))
a = ''.join(b)
a = a.replace('.cda.mp4', '')
for p in ('.2cda.pl', '.3cda.pl'):
@@ -229,6 +302,4 @@ class CDAIE(InfoExtractor):
extract_format(webpage, resolution)
- self._sort_formats(formats)
-
return merge_dicts(info_dict, info)
diff --git a/hypervideo_dl/extractor/cellebrite.py b/hypervideo_dl/extractor/cellebrite.py
new file mode 100644
index 0000000..9896a31
--- /dev/null
+++ b/hypervideo_dl/extractor/cellebrite.py
@@ -0,0 +1,63 @@
+from .common import InfoExtractor
+from ..utils import traverse_obj
+
+
+class CellebriteIE(InfoExtractor):
+ _VALID_URL = r'https?://cellebrite\.com/(?:\w+)?/(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://cellebrite.com/en/collect-data-from-android-devices-with-cellebrite-ufed/',
+ 'info_dict': {
+ 'id': '16025876',
+ 'ext': 'mp4',
+ 'description': 'md5:174571cb97083fd1d457d75c684f4e2b',
+ 'thumbnail': 'https://cellebrite.com/wp-content/uploads/2021/05/Chat-Capture-1024x559.png',
+ 'title': 'Ask the Expert: Chat Capture - Collect Data from Android Devices in Cellebrite UFED',
+ 'duration': 455,
+ 'tags': [],
+ }
+ }, {
+ 'url': 'https://cellebrite.com/en/how-to-lawfully-collect-the-maximum-amount-of-data-from-android-devices/',
+ 'info_dict': {
+ 'id': '29018255',
+ 'ext': 'mp4',
+ 'duration': 134,
+ 'tags': [],
+ 'description': 'md5:e9a3d124c7287b0b07bad2547061cacf',
+ 'thumbnail': 'https://cellebrite.com/wp-content/uploads/2022/07/How-to-Lawfully-Collect-the-Maximum-Amount-of-Data-From-Android-Devices.png',
+ 'title': 'Android Extractions Explained',
+ }
+ }]
+
+ def _get_formats_and_subtitles(self, json_data, display_id):
+ formats = [{'url': url} for url in traverse_obj(json_data, ('mp4', ..., 'url')) or []]
+ subtitles = {}
+
+ for url in traverse_obj(json_data, ('hls', ..., 'url')) or []:
+ fmt, sub = self._extract_m3u8_formats_and_subtitles(
+ url, display_id, ext='mp4', headers={'Referer': 'https://play.vidyard.com/'})
+ formats.extend(fmt)
+ self._merge_subtitles(sub, target=subtitles)
+
+ return formats, subtitles
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ player_uuid = self._search_regex(
+ r'<img\s[^>]*\bdata-uuid\s*=\s*"([^"\?]+)', webpage, 'player UUID')
+ json_data = self._download_json(
+ f'https://play.vidyard.com/player/{player_uuid}.json', display_id)['payload']['chapters'][0]
+
+ formats, subtitles = self._get_formats_and_subtitles(json_data['sources'], display_id)
+ return {
+ 'id': str(json_data['videoId']),
+ 'title': json_data.get('name') or self._og_search_title(webpage),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'description': json_data.get('description') or self._og_search_description(webpage),
+ 'duration': json_data.get('seconds'),
+ 'tags': json_data.get('tags'),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'http_headers': {'Referer': 'https://play.vidyard.com/'},
+ }
diff --git a/hypervideo_dl/extractor/ceskatelevize.py b/hypervideo_dl/extractor/ceskatelevize.py
index ddf66b2..be2b0bb 100644
--- a/hypervideo_dl/extractor/ceskatelevize.py
+++ b/hypervideo_dl/extractor/ceskatelevize.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -12,6 +9,7 @@ from ..utils import (
ExtractorError,
float_or_none,
sanitized_Request,
+ str_or_none,
traverse_obj,
urlencode_postdata,
USER_AGENTS,
@@ -19,13 +17,13 @@ from ..utils import (
class CeskaTelevizeIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(?:ivysilani|porady)/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
+ _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(?:ivysilani|porady|zive)/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
_TESTS = [{
'url': 'http://www.ceskatelevize.cz/ivysilani/10441294653-hyde-park-civilizace/215411058090502/bonus/20641-bonus-01-en',
'info_dict': {
'id': '61924494877028507',
'ext': 'mp4',
- 'title': 'Hyde Park Civilizace: Bonus 01 - En',
+ 'title': 'Bonus 01 - En - Hyde Park Civilizace',
'description': 'English Subtittles',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 81.3,
@@ -36,18 +34,29 @@ class CeskaTelevizeIE(InfoExtractor):
},
}, {
# live stream
- 'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/',
+ 'url': 'http://www.ceskatelevize.cz/zive/ct1/',
'info_dict': {
- 'id': 402,
+ 'id': '102',
'ext': 'mp4',
- 'title': r're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
+ 'title': r'ČT1 - živé vysílání online',
+ 'description': 'Sledujte živé vysílání kanálu ČT1 online. Vybírat si můžete i z dalších kanálů České televize na kterémkoli z vašich zařízení.',
'is_live': True,
},
'params': {
# m3u8 download
'skip_download': True,
},
- 'skip': 'Georestricted to Czech Republic',
+ }, {
+ # another
+ 'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/',
+ 'only_matching': True,
+ 'info_dict': {
+ 'id': 402,
+ 'ext': 'mp4',
+ 'title': r're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
+ 'is_live': True,
+ },
+ # 'skip': 'Georestricted to Czech Republic',
}, {
'url': 'http://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php?hash=d6a3e1370d2e4fa76296b90bad4dfc19673b641e&IDEC=217 562 22150/0004&channelID=1&width=100%25',
'only_matching': True,
@@ -56,21 +65,21 @@ class CeskaTelevizeIE(InfoExtractor):
'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/',
'info_dict': {
'id': '215562210900007-bogotart',
- 'title': 'Queer: Bogotart',
- 'description': 'Hlavní město Kolumbie v doprovodu queer umělců. Vroucí svět plný vášně, sebevědomí, ale i násilí a bolesti. Připravil Peter Serge Butko',
+ 'title': 'Bogotart - Queer',
+ 'description': 'Hlavní město Kolumbie v doprovodu queer umělců. Vroucí svět plný vášně, sebevědomí, ale i násilí a bolesti',
},
'playlist': [{
'info_dict': {
'id': '61924494877311053',
'ext': 'mp4',
- 'title': 'Queer: Bogotart (Varování 18+)',
+ 'title': 'Bogotart - Queer (Varování 18+)',
'duration': 11.9,
},
}, {
'info_dict': {
'id': '61924494877068022',
'ext': 'mp4',
- 'title': 'Queer: Bogotart (Queer)',
+ 'title': 'Bogotart - Queer (Queer)',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 1558.3,
},
@@ -87,28 +96,42 @@ class CeskaTelevizeIE(InfoExtractor):
def _real_extract(self, url):
playlist_id = self._match_id(url)
- parsed_url = compat_urllib_parse_urlparse(url)
- webpage = self._download_webpage(url, playlist_id)
- site_name = self._og_search_property('site_name', webpage, fatal=False, default=None)
+ webpage, urlh = self._download_webpage_handle(url, playlist_id)
+ parsed_url = compat_urllib_parse_urlparse(urlh.geturl())
+ site_name = self._og_search_property('site_name', webpage, fatal=False, default='Česká televize')
playlist_title = self._og_search_title(webpage, default=None)
if site_name and playlist_title:
- playlist_title = playlist_title.replace(f' — {site_name}', '', 1)
+ playlist_title = re.split(r'\s*[—|]\s*%s' % (site_name, ), playlist_title, 1)[0]
playlist_description = self._og_search_description(webpage, default=None)
if playlist_description:
playlist_description = playlist_description.replace('\xa0', ' ')
- if parsed_url.path.startswith('/porady/'):
+ type_ = 'IDEC'
+ if re.search(r'(^/porady|/zive)/', parsed_url.path):
next_data = self._search_nextjs_data(webpage, playlist_id)
- idec = traverse_obj(next_data, ('props', 'pageProps', 'data', ('show', 'mediaMeta'), 'idec'), get_all=False)
+ if '/zive/' in parsed_url.path:
+ idec = traverse_obj(next_data, ('props', 'pageProps', 'data', 'liveBroadcast', 'current', 'idec'), get_all=False)
+ else:
+ idec = traverse_obj(next_data, ('props', 'pageProps', 'data', ('show', 'mediaMeta'), 'idec'), get_all=False)
+ if not idec:
+ idec = traverse_obj(next_data, ('props', 'pageProps', 'data', 'videobonusDetail', 'bonusId'), get_all=False)
+ if idec:
+ type_ = 'bonus'
if not idec:
raise ExtractorError('Failed to find IDEC id')
- iframe_hash = self._download_webpage('https://www.ceskatelevize.cz/v-api/iframe-hash/', playlist_id)
- webpage = self._download_webpage('https://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php', playlist_id,
- query={'hash': iframe_hash, 'origin': 'iVysilani', 'autoStart': 'true', 'IDEC': idec})
+ iframe_hash = self._download_webpage(
+ 'https://www.ceskatelevize.cz/v-api/iframe-hash/',
+ playlist_id, note='Getting IFRAME hash')
+ query = {'hash': iframe_hash, 'origin': 'iVysilani', 'autoStart': 'true', type_: idec, }
+ webpage = self._download_webpage(
+ 'https://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php',
+ playlist_id, note='Downloading player', query=query)
NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.'
if '%s</p>' % NOT_AVAILABLE_STRING in webpage:
- raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
+ self.raise_geo_restricted(NOT_AVAILABLE_STRING)
+ if any(not_found in webpage for not_found in ('Neplatný parametr pro videopřehrávač', 'IDEC nebyl nalezen', )):
+ raise ExtractorError('no video with IDEC available', video_id=idec, expected=True)
type_ = None
episode_id = None
@@ -177,7 +200,6 @@ class CeskaTelevizeIE(InfoExtractor):
is_live = item.get('type') == 'LIVE'
formats = []
for format_id, stream_url in item.get('streamUrls', {}).items():
- stream_url = stream_url.replace('https://', 'http://')
if 'playerType=flash' in stream_url:
stream_formats = self._extract_m3u8_formats(
stream_url, playlist_id, 'mp4', 'm3u8_native',
@@ -199,7 +221,7 @@ class CeskaTelevizeIE(InfoExtractor):
entries[num]['formats'].extend(formats)
continue
- item_id = item.get('id') or item['assetId']
+ item_id = str_or_none(item.get('id') or item['assetId'])
title = item['title']
duration = float_or_none(item.get('duration'))
@@ -227,9 +249,8 @@ class CeskaTelevizeIE(InfoExtractor):
'is_live': is_live,
})
- for e in entries:
- self._sort_formats(e['formats'])
-
+ if len(entries) == 1:
+ return entries[0]
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
def _get_subtitles(self, episode_id, subs):
diff --git a/hypervideo_dl/extractor/cgtn.py b/hypervideo_dl/extractor/cgtn.py
index 89f1738..aaafa02 100644
--- a/hypervideo_dl/extractor/cgtn.py
+++ b/hypervideo_dl/extractor/cgtn.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
try_get,
diff --git a/hypervideo_dl/extractor/channel9.py b/hypervideo_dl/extractor/channel9.py
index 90024db..a884740 100644
--- a/hypervideo_dl/extractor/channel9.py
+++ b/hypervideo_dl/extractor/channel9.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -16,6 +14,7 @@ class Channel9IE(InfoExtractor):
IE_DESC = 'Channel 9'
IE_NAME = 'channel9'
_VALID_URL = r'https?://(?:www\.)?(?:channel9\.msdn\.com|s\.ch9\.ms)/(?P<contentpath>.+?)(?P<rss>/RSS)?/?(?:[?#&]|$)'
+ _EMBED_REGEX = [r'<iframe[^>]+src=["\'](?P<url>https?://channel9\.msdn\.com/(?:[^/]+/)+)player\b']
_TESTS = [{
'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002',
@@ -80,12 +79,6 @@ class Channel9IE(InfoExtractor):
_RSS_URL = 'http://channel9.msdn.com/%s/RSS'
- @staticmethod
- def _extract_urls(webpage):
- return re.findall(
- r'<iframe[^>]+src=["\'](https?://channel9\.msdn\.com/(?:[^/]+/)+)player\b',
- webpage)
-
def _extract_list(self, video_id, rss_url=None):
if not rss_url:
rss_url = self._RSS_URL % video_id
@@ -192,7 +185,6 @@ class Channel9IE(InfoExtractor):
if not formats and not slides and not zip_file:
self.raise_no_formats(
'None of recording, slides or zip are available for %s' % content_path)
- self._sort_formats(formats)
subtitles = {}
for caption in content_data.get('Captions', []):
diff --git a/hypervideo_dl/extractor/charlierose.py b/hypervideo_dl/extractor/charlierose.py
index 42c9af2..8fe6797 100644
--- a/hypervideo_dl/extractor/charlierose.py
+++ b/hypervideo_dl/extractor/charlierose.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import remove_end
@@ -40,8 +38,6 @@ class CharlieRoseIE(InfoExtractor):
info_dict = self._parse_html5_media_entries(
self._PLAYER_BASE % video_id, webpage, video_id,
m3u8_entry_protocol='m3u8_native')[0]
-
- self._sort_formats(info_dict['formats'])
self._remove_duplicate_formats(info_dict['formats'])
info_dict.update({
diff --git a/hypervideo_dl/extractor/chaturbate.py b/hypervideo_dl/extractor/chaturbate.py
index 8da51f9..99dfcfd 100644
--- a/hypervideo_dl/extractor/chaturbate.py
+++ b/hypervideo_dl/extractor/chaturbate.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -97,7 +95,6 @@ class ChaturbateIE(InfoExtractor):
# ffmpeg skips segments for fast m3u8
preference=-10 if m3u8_id == 'fast' else None,
m3u8_id=m3u8_id, fatal=False, live=True))
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/chilloutzone.py b/hypervideo_dl/extractor/chilloutzone.py
index fd5202b..1a2f77c 100644
--- a/hypervideo_dl/extractor/chilloutzone.py
+++ b/hypervideo_dl/extractor/chilloutzone.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import json
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/chingari.py b/hypervideo_dl/extractor/chingari.py
index e6841fb..48091dd 100644
--- a/hypervideo_dl/extractor/chingari.py
+++ b/hypervideo_dl/extractor/chingari.py
@@ -1,14 +1,11 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import itertools
import json
+import urllib.parse
from .common import InfoExtractor
-from ..compat import compat_urllib_parse_unquote_plus
from ..utils import (
- clean_html,
ExtractorError,
+ clean_html,
int_or_none,
str_to_int,
url_or_none,
@@ -35,7 +32,6 @@ class ChingariBaseIE(InfoExtractor):
'url': base_url + '/apipublic' + media_data['path'],
'quality': 10,
})
- self._sort_formats(formats)
timestamp = str_to_int(post_data.get('created_at'))
if timestamp:
timestamp = int_or_none(timestamp, 1000)
@@ -48,8 +44,10 @@ class ChingariBaseIE(InfoExtractor):
return {
'id': id,
- 'title': compat_urllib_parse_unquote_plus(clean_html(post_data.get('caption'))),
- 'description': compat_urllib_parse_unquote_plus(clean_html(post_data.get('caption'))),
+ 'extractor_key': ChingariIE.ie_key(),
+ 'extractor': 'Chingari',
+ 'title': urllib.parse.unquote_plus(clean_html(post_data.get('caption'))),
+ 'description': urllib.parse.unquote_plus(clean_html(post_data.get('caption'))),
'duration': media_data.get('duration'),
'thumbnail': url_or_none(thumbnail),
'like_count': post_data.get('likeCount'),
@@ -105,11 +103,11 @@ class ChingariUserIE(ChingariBaseIE):
_VALID_URL = r'https?://(?:www\.)?chingari\.io/(?!share/post)(?P<id>[^/?]+)'
_TESTS = [{
'url': 'https://chingari.io/dada1023',
- 'playlist_mincount': 3,
'info_dict': {
'id': 'dada1023',
},
- 'entries': [{
+ 'params': {'playlistend': 3},
+ 'playlist': [{
'url': 'https://chingari.io/share/post?id=614781f3ade60b3a0bfff42a',
'info_dict': {
'id': '614781f3ade60b3a0bfff42a',
diff --git a/hypervideo_dl/extractor/chirbit.py b/hypervideo_dl/extractor/chirbit.py
index 8d75cdf..452711d 100644
--- a/hypervideo_dl/extractor/chirbit.py
+++ b/hypervideo_dl/extractor/chirbit.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/cinchcast.py b/hypervideo_dl/extractor/cinchcast.py
index b861d54..7a7ea8b 100644
--- a/hypervideo_dl/extractor/cinchcast.py
+++ b/hypervideo_dl/extractor/cinchcast.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
unified_strdate,
@@ -10,6 +7,8 @@ from ..utils import (
class CinchcastIE(InfoExtractor):
_VALID_URL = r'https?://player\.cinchcast\.com/.*?(?:assetId|show_id)=(?P<id>[0-9]+)'
+ _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1']
+
_TESTS = [{
'url': 'http://player.cinchcast.com/?show_id=5258197&platformId=1&assetType=single',
'info_dict': {
@@ -48,7 +47,6 @@ class CinchcastIE(InfoExtractor):
'format_id': 'backup',
'url': backup_url,
})
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/cinemax.py b/hypervideo_dl/extractor/cinemax.py
index 2c3ff8d..54cab22 100644
--- a/hypervideo_dl/extractor/cinemax.py
+++ b/hypervideo_dl/extractor/cinemax.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .hbo import HBOBaseIE
diff --git a/hypervideo_dl/extractor/cinetecamilano.py b/hypervideo_dl/extractor/cinetecamilano.py
new file mode 100644
index 0000000..5e770eb
--- /dev/null
+++ b/hypervideo_dl/extractor/cinetecamilano.py
@@ -0,0 +1,61 @@
+import json
+import urllib.error
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ parse_iso8601,
+ strip_or_none,
+ traverse_obj,
+ try_get,
+ urljoin,
+)
+
+
+class CinetecaMilanoIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?cinetecamilano\.it/film/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.cinetecamilano.it/film/1942',
+ 'info_dict': {
+ 'id': '1942',
+ 'ext': 'mp4',
+ 'title': 'Il draghetto Gris\u00f9 (4 episodi)',
+ 'release_date': '20220129',
+ 'thumbnail': r're:.+\.png',
+ 'description': 'md5:5328cbe080b93224712b6f17fcaf2c01',
+ 'modified_date': '20200520',
+ 'duration': 3139,
+ 'release_timestamp': 1643446208,
+ 'modified_timestamp': int
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ try:
+ film_json = self._download_json(
+ f'https://www.cinetecamilano.it/api/catalogo/{video_id}/?',
+ video_id, headers={
+ 'Referer': url,
+ 'Authorization': try_get(self._get_cookies('https://www.cinetecamilano.it'), lambda x: f'Bearer {x["cnt-token"].value}') or ''
+ })
+ except ExtractorError as e:
+ if ((isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 500)
+ or isinstance(e.cause, json.JSONDecodeError)):
+ self.raise_login_required(method='cookies')
+ raise
+ if not film_json.get('success') or not film_json.get('archive'):
+ raise ExtractorError('Video information not found')
+ archive = film_json['archive']
+
+ return {
+ 'id': video_id,
+ 'title': archive.get('title'),
+ 'description': strip_or_none(archive.get('description')),
+ 'duration': float_or_none(archive.get('duration'), invscale=60),
+ 'release_timestamp': parse_iso8601(archive.get('updated_at'), delimiter=' '),
+ 'modified_timestamp': parse_iso8601(archive.get('created_at'), delimiter=' '),
+ 'thumbnail': urljoin(url, try_get(archive, lambda x: x['thumb']['src'].replace('/public/', '/storage/'))),
+ 'formats': self._extract_m3u8_formats(
+ urljoin(url, traverse_obj(archive, ('drm', 'hls'))), video_id, 'mp4')
+ }
diff --git a/hypervideo_dl/extractor/ciscolive.py b/hypervideo_dl/extractor/ciscolive.py
index 349c5eb..0668578 100644
--- a/hypervideo_dl/extractor/ciscolive.py
+++ b/hypervideo_dl/extractor/ciscolive.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import itertools
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/ciscowebex.py b/hypervideo_dl/extractor/ciscowebex.py
index 882dae9..44595d8 100644
--- a/hypervideo_dl/extractor/ciscowebex.py
+++ b/hypervideo_dl/extractor/ciscowebex.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
int_or_none,
@@ -75,7 +72,6 @@ class CiscoWebexIE(InfoExtractor):
'vcodec': 'none',
'acodec': 'mp3',
})
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/cjsw.py b/hypervideo_dl/extractor/cjsw.py
index 1dea0d7..c37a3b8 100644
--- a/hypervideo_dl/extractor/cjsw.py
+++ b/hypervideo_dl/extractor/cjsw.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..utils import (
determine_ext,
diff --git a/hypervideo_dl/extractor/cliphunter.py b/hypervideo_dl/extractor/cliphunter.py
index f2ca7a3..2b907dc 100644
--- a/hypervideo_dl/extractor/cliphunter.py
+++ b/hypervideo_dl/extractor/cliphunter.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
int_or_none,
@@ -64,7 +62,6 @@ class CliphunterIE(InfoExtractor):
'height': int_or_none(height),
'tbr': int_or_none(f.get('br')),
})
- self._sort_formats(formats)
thumbnail = self._search_regex(
r"var\s+mov_thumb\s*=\s*'([^']+)';",
diff --git a/hypervideo_dl/extractor/clippit.py b/hypervideo_dl/extractor/clippit.py
index a1a7a77..006a713 100644
--- a/hypervideo_dl/extractor/clippit.py
+++ b/hypervideo_dl/extractor/clippit.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
parse_iso8601,
diff --git a/hypervideo_dl/extractor/cliprs.py b/hypervideo_dl/extractor/cliprs.py
index d55b26d..567f77b 100644
--- a/hypervideo_dl/extractor/cliprs.py
+++ b/hypervideo_dl/extractor/cliprs.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .onet import OnetBaseIE
diff --git a/hypervideo_dl/extractor/clipsyndicate.py b/hypervideo_dl/extractor/clipsyndicate.py
index 6cdb42f..6064443 100644
--- a/hypervideo_dl/extractor/clipsyndicate.py
+++ b/hypervideo_dl/extractor/clipsyndicate.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
find_xpath_attr,
diff --git a/hypervideo_dl/extractor/closertotruth.py b/hypervideo_dl/extractor/closertotruth.py
index 517e121..e78e26a 100644
--- a/hypervideo_dl/extractor/closertotruth.py
+++ b/hypervideo_dl/extractor/closertotruth.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/cloudflarestream.py b/hypervideo_dl/extractor/cloudflarestream.py
index 2fdcfbb..748e8e9 100644
--- a/hypervideo_dl/extractor/cloudflarestream.py
+++ b/hypervideo_dl/extractor/cloudflarestream.py
@@ -1,8 +1,4 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import base64
-import re
from .common import InfoExtractor
@@ -19,6 +15,7 @@ class CloudflareStreamIE(InfoExtractor):
)
(?P<id>%s)
''' % (_DOMAIN_RE, _EMBED_RE, _ID_RE)
+ _EMBED_REGEX = [fr'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE}).*?)\1']
_TESTS = [{
'url': 'https://embed.cloudflarestream.com/embed/we4g.fla9.latest.js?video=31c9291ab41fac05471db4e73aa11717',
'info_dict': {
@@ -40,21 +37,13 @@ class CloudflareStreamIE(InfoExtractor):
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- return [
- mobj.group('url')
- for mobj in re.finditer(
- r'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//%s(?:%s).*?)\1' % (CloudflareStreamIE._EMBED_RE, CloudflareStreamIE._ID_RE),
- webpage)]
-
def _real_extract(self, url):
video_id = self._match_id(url)
domain = 'bytehighway.net' if 'bytehighway.net/' in url else 'videodelivery.net'
base_url = 'https://%s/%s/' % (domain, video_id)
if '.' in video_id:
video_id = self._parse_json(base64.urlsafe_b64decode(
- video_id.split('.')[1]), video_id)['sub']
+ video_id.split('.')[1] + '==='), video_id)['sub']
manifest_base_url = base_url + 'manifest/video.'
formats = self._extract_m3u8_formats(
@@ -62,7 +51,6 @@ class CloudflareStreamIE(InfoExtractor):
'm3u8_native', m3u8_id='hls', fatal=False)
formats.extend(self._extract_mpd_formats(
manifest_base_url + 'mpd', video_id, mpd_id='dash', fatal=False))
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/cloudy.py b/hypervideo_dl/extractor/cloudy.py
index 85ca20e..848643e 100644
--- a/hypervideo_dl/extractor/cloudy.py
+++ b/hypervideo_dl/extractor/cloudy.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
str_to_int,
diff --git a/hypervideo_dl/extractor/clubic.py b/hypervideo_dl/extractor/clubic.py
index 98f9cb5..403e44a 100644
--- a/hypervideo_dl/extractor/clubic.py
+++ b/hypervideo_dl/extractor/clubic.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
clean_html,
@@ -45,7 +42,6 @@ class ClubicIE(InfoExtractor):
'url': src['src'],
'quality': quality_order(src['streamQuality']),
} for src in sources]
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/clyp.py b/hypervideo_dl/extractor/clyp.py
index e6b2ac4..0aaf73d 100644
--- a/hypervideo_dl/extractor/clyp.py
+++ b/hypervideo_dl/extractor/clyp.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
float_or_none,
@@ -62,7 +60,6 @@ class ClypIE(InfoExtractor):
'format_id': format_id,
'vcodec': 'none',
})
- self._sort_formats(formats)
title = metadata['Title']
description = metadata.get('Description')
diff --git a/hypervideo_dl/extractor/cmt.py b/hypervideo_dl/extractor/cmt.py
index a4ddb91..8aed770 100644
--- a/hypervideo_dl/extractor/cmt.py
+++ b/hypervideo_dl/extractor/cmt.py
@@ -1,11 +1,9 @@
-from __future__ import unicode_literals
-
from .mtv import MTVIE
# TODO Remove - Reason: Outdated Site
-class CMTIE(MTVIE):
+class CMTIE(MTVIE): # XXX: Do not subclass from concrete IE
IE_NAME = 'cmt.com'
_VALID_URL = r'https?://(?:www\.)?cmt\.com/(?:videos|shows|(?:full-)?episodes|video-clips)/(?P<id>[^/]+)'
diff --git a/hypervideo_dl/extractor/cnbc.py b/hypervideo_dl/extractor/cnbc.py
index da3730c..68fd025 100644
--- a/hypervideo_dl/extractor/cnbc.py
+++ b/hypervideo_dl/extractor/cnbc.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..utils import smuggle_url
diff --git a/hypervideo_dl/extractor/cnn.py b/hypervideo_dl/extractor/cnn.py
index af11d95..61b62fa 100644
--- a/hypervideo_dl/extractor/cnn.py
+++ b/hypervideo_dl/extractor/cnn.py
@@ -1,9 +1,6 @@
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from .turner import TurnerBaseIE
-from ..utils import url_basename
+from ..utils import merge_dicts, try_call, url_basename
class CNNIE(TurnerBaseIE):
@@ -144,3 +141,58 @@ class CNNArticleIE(InfoExtractor):
webpage = self._download_webpage(url, url_basename(url))
cnn_url = self._html_search_regex(r"video:\s*'([^']+)'", webpage, 'cnn url')
return self.url_result('http://cnn.com/video/?/video/' + cnn_url, CNNIE.ie_key())
+
+
+class CNNIndonesiaIE(InfoExtractor):
+ _VALID_URL = r'https?://www\.cnnindonesia\.com/[\w-]+/(?P<upload_date>\d{8})\d+-\d+-(?P<id>\d+)/(?P<display_id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://www.cnnindonesia.com/ekonomi/20220909212635-89-845885/alasan-harga-bbm-di-indonesia-masih-disubsidi',
+ 'info_dict': {
+ 'id': '845885',
+ 'ext': 'mp4',
+ 'description': 'md5:e7954bfa6f1749bc9ef0c079a719c347',
+ 'upload_date': '20220909',
+ 'title': 'Alasan Harga BBM di Indonesia Masih Disubsidi',
+ 'timestamp': 1662859088,
+ 'duration': 120.0,
+ 'thumbnail': r're:https://akcdn\.detik\.net\.id/visual/2022/09/09/thumbnail-ekopedia-alasan-harga-bbm-disubsidi_169\.jpeg',
+ 'tags': ['ekopedia', 'subsidi bbm', 'subsidi', 'bbm', 'bbm subsidi', 'harga pertalite naik'],
+ 'age_limit': 0,
+ 'release_timestamp': 1662859088,
+ 'release_date': '20220911',
+ 'uploader': 'Asfahan Yahsyi',
+ }
+ }, {
+ 'url': 'https://www.cnnindonesia.com/internasional/20220911104341-139-846189/video-momen-charles-disambut-meriah-usai-dilantik-jadi-raja-inggris',
+ 'info_dict': {
+ 'id': '846189',
+ 'ext': 'mp4',
+ 'upload_date': '20220911',
+ 'duration': 76.0,
+ 'timestamp': 1662869995,
+ 'description': 'md5:ece7b003b3ee7d81c6a5cfede7d5397d',
+ 'thumbnail': r're:https://akcdn\.detik\.net\.id/visual/2022/09/11/thumbnail-video-1_169\.jpeg',
+ 'title': 'VIDEO: Momen Charles Disambut Meriah usai Dilantik jadi Raja Inggris',
+ 'tags': ['raja charles', 'raja charles iii', 'ratu elizabeth', 'ratu elizabeth meninggal dunia', 'raja inggris', 'inggris'],
+ 'age_limit': 0,
+ 'release_date': '20220911',
+ 'uploader': 'REUTERS',
+ 'release_timestamp': 1662869995,
+ }
+ }]
+
+ def _real_extract(self, url):
+ upload_date, video_id, display_id = self._match_valid_url(url).group('upload_date', 'id', 'display_id')
+ webpage = self._download_webpage(url, display_id)
+
+ json_ld_list = list(self._yield_json_ld(webpage, display_id))
+ json_ld_data = self._json_ld(json_ld_list, display_id)
+ embed_url = next(
+ json_ld.get('embedUrl') for json_ld in json_ld_list if json_ld.get('@type') == 'VideoObject')
+
+ return merge_dicts(json_ld_data, {
+ '_type': 'url_transparent',
+ 'url': embed_url,
+ 'upload_date': upload_date,
+ 'tags': try_call(lambda: self._html_search_meta('keywords', webpage).split(', '))
+ })
diff --git a/hypervideo_dl/extractor/comedycentral.py b/hypervideo_dl/extractor/comedycentral.py
index 5a12ab5..05fc9f2 100644
--- a/hypervideo_dl/extractor/comedycentral.py
+++ b/hypervideo_dl/extractor/comedycentral.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .mtv import MTVServicesInfoExtractor
diff --git a/hypervideo_dl/extractor/common.py b/hypervideo_dl/extractor/common.py
index 0035191..4b56307 100644
--- a/hypervideo_dl/extractor/common.py
+++ b/hypervideo_dl/extractor/common.py
@@ -1,67 +1,61 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import base64
import collections
+import getpass
import hashlib
+import http.client
+import http.cookiejar
+import http.cookies
+import inspect
import itertools
import json
+import math
import netrc
import os
import random
import re
import sys
import time
-import math
-
-from ..compat import (
- compat_cookiejar_Cookie,
- compat_cookies_SimpleCookie,
- compat_etree_Element,
- compat_etree_fromstring,
- compat_expanduser,
- compat_getpass,
- compat_http_client,
- compat_os_name,
- compat_str,
- compat_urllib_error,
- compat_urllib_parse_unquote,
- compat_urllib_parse_urlencode,
- compat_urllib_request,
- compat_urlparse,
- compat_xml_parse_error,
-)
-from ..downloader import FileDownloader
-from ..downloader.f4m import (
- get_base_url,
- remove_encrypted_media,
-)
+import types
+import urllib.parse
+import urllib.request
+import xml.etree.ElementTree
+
+from ..compat import functools # isort: split
+from ..compat import compat_etree_fromstring, compat_expanduser, compat_os_name
+from ..cookies import LenientSimpleCookie
+from ..downloader.f4m import get_base_url, remove_encrypted_media
from ..utils import (
+ IDENTITY,
+ JSON_LD_RE,
+ NO_DEFAULT,
+ ExtractorError,
+ FormatSorter,
+ GeoRestrictedError,
+ GeoUtils,
+ LenientJSONDecoder,
+ RegexNotFoundError,
+ RetryManager,
+ UnsupportedError,
age_restricted,
base_url,
bug_reports_message,
+ classproperty,
clean_html,
- compiled_regex_type,
+ deprecation_warning,
determine_ext,
- determine_protocol,
dict_get,
encode_data_uri,
error_to_compat_str,
extract_attributes,
- ExtractorError,
filter_dict,
fix_xml_ampersands,
float_or_none,
format_field,
- GeoRestrictedError,
- GeoUtils,
int_or_none,
join_nonempty,
js_to_json,
- JSON_LD_RE,
mimetype2ext,
network_exceptions,
- NO_DEFAULT,
orderedSet,
parse_bitrate,
parse_codecs,
@@ -69,16 +63,17 @@ from ..utils import (
parse_iso8601,
parse_m3u8_attributes,
parse_resolution,
- RegexNotFoundError,
sanitize_filename,
+ sanitize_url,
sanitized_Request,
+ smuggle_url,
str_or_none,
str_to_int,
strip_or_none,
traverse_obj,
+ try_call,
try_get,
unescapeHTML,
- UnsupportedError,
unified_strdate,
unified_timestamp,
update_Request,
@@ -93,7 +88,7 @@ from ..utils import (
)
-class InfoExtractor(object):
+class InfoExtractor:
"""Information Extractor class.
Information extractors are the classes that, given a URL, extract
@@ -111,7 +106,9 @@ class InfoExtractor(object):
For a video, the dictionaries must include the following fields:
id: Video identifier.
- title: Video title, unescaped.
+ title: Video title, unescaped. Set to an empty string if video has
+ no title as opposed to "None" which signifies that the
+ extractor failed to obtain a title
Additionally, it must contain either a formats entry or a url one:
@@ -153,13 +150,17 @@ class InfoExtractor(object):
("3D" or "DASH video")
* width Width of the video, if known
* height Height of the video, if known
+ * aspect_ratio Aspect ratio of the video, if known
+ Automatically calculated from width and height
* resolution Textual description of width and height
+ Automatically calculated from width and height
* dynamic_range The dynamic range of the video. One of:
"SDR" (None), "HDR10", "HDR10+, "HDR12", "HLG, "DV"
* tbr Average bitrate of audio and video in KBit/s
* abr Average audio bitrate in KBit/s
* acodec Name of the audio codec in use
* asr Audio sampling rate in Hertz
+ * audio_channels Number of audio channels
* vbr Average video bitrate in KBit/s
* fps Frame rate
* vcodec Name of the video codec in use
@@ -216,8 +217,10 @@ class InfoExtractor(object):
* no_resume The server does not support resuming the
(HTTP or RTMP) download. Boolean.
* has_drm The format has DRM and cannot be downloaded. Boolean
- * downloader_options A dictionary of downloader options as
- described in FileDownloader (For internal use only)
+ * downloader_options A dictionary of downloader options
+ (For internal use only)
+ * http_chunk_size Chunk size for HTTP downloads
+ * ffmpeg_args Extra arguments for ffmpeg downloader
RTMP formats can also have the additional fields: page_url,
app, play_path, tc_url, flash_version, rtmp_live, rtmp_conn,
rtmp_protocol, rtmp_real_time
@@ -285,6 +288,7 @@ class InfoExtractor(object):
captions instead of normal subtitles
duration: Length of the video in seconds, as an integer or float.
view_count: How many users have watched the video on the platform.
+ concurrent_view_count: How many users are currently watching the video on the platform.
like_count: Number of positive ratings of the video
dislike_count: Number of negative ratings of the video
repost_count: Number of reposts of the video
@@ -320,7 +324,8 @@ class InfoExtractor(object):
live stream that goes on instead of a fixed-length video.
was_live: True, False, or None (=unknown). Whether this video was
originally a live stream.
- live_status: 'is_live', 'is_upcoming', 'was_live', 'not_live' or None (=unknown)
+ live_status: None (=unknown), 'is_live', 'is_upcoming', 'was_live', 'not_live',
+ or 'post_live' (was live, but VOD is not yet processed)
If absent, automatically set from is_live, was_live
start_time: Time in seconds where the reproduction should start, as
specified in the URL.
@@ -333,11 +338,13 @@ class InfoExtractor(object):
playable_in_embed: Whether this video is allowed to play in embedded
players on other sites. Can be True (=always allowed),
False (=never allowed), None (=unknown), or a string
- specifying the criteria for embedability (Eg: 'whitelist')
+ specifying the criteria for embedability; e.g. 'whitelist'
availability: Under what condition the video is available. One of
'private', 'premium_only', 'subscriber_only', 'needs_auth',
'unlisted' or 'public'. Use 'InfoExtractor._availability'
to set it
+ _old_archive_ids: A list of old archive ids needed for backward compatibility
+ _format_sort_fields: A list of fields to use for sorting formats
__post_extractor: A function to be called just before the metadata is
written to either disk, logger or console. The function
must return a dict which will be added to the info_dict.
@@ -387,6 +394,15 @@ class InfoExtractor(object):
release_year: Year (YYYY) when the album was released.
composer: Composer of the piece
+ The following fields should only be set for clips that should be cut from the original video:
+
+ section_start: Start time of the section in seconds
+ section_end: End time of the section in seconds
+
+ The following fields should only be set for storyboards:
+ rows: Number of rows in each storyboard fragment, as an integer
+ columns: Number of columns in each storyboard fragment, as an integer
+
Unless mentioned otherwise, the fields should be Unicode strings.
Unless mentioned otherwise, None is equivalent to absence of information.
@@ -396,7 +412,7 @@ class InfoExtractor(object):
There must be a key "entries", which is a list, an iterable, or a PagedList
object, each element of which is a valid dictionary by this specification.
- Additionally, playlists can have "id", "title", and any other relevent
+ Additionally, playlists can have "id", "title", and any other relevant
attributes with the same semantics as videos (see above).
It can also have the following optional fields:
@@ -429,14 +445,26 @@ class InfoExtractor(object):
title, description etc.
- Subclasses of this should define a _VALID_URL regexp and, re-define the
- _real_extract() and (optionally) _real_initialize() methods.
- Probably, they should also be added to the list of extractors.
+ Subclasses of this should also be added to the list of extractors and
+ should define a _VALID_URL regexp and, re-define the _real_extract() and
+ (optionally) _real_initialize() methods.
Subclasses may also override suitable() if necessary, but ensure the function
signature is preserved and that this function imports everything it needs
(except other extractors), so that lazy_extractors works correctly.
+ Subclasses can define a list of _EMBED_REGEX, which will be searched for in
+ the HTML of Generic webpages. It may also override _extract_embed_urls
+ or _extract_from_webpage as necessary. While these are normally classmethods,
+ _extract_from_webpage is allowed to be an instance method.
+
+ _extract_from_webpage may raise self.StopExtraction() to stop further
+ processing of the webpage and obtain exclusive rights to it. This is useful
+ when the extractor cannot reliably be matched using just the URL,
+ e.g. invidious/peertube instances
+
+ Embed-only extractors can be defined by setting _VALID_URL = False.
+
To support username + password (or netrc) login, the extractor must define a
_NETRC_MACHINE and re-define _perform_login(username, password) and
(optionally) _initialize_pre_login() methods. The _perform_login method will
@@ -460,6 +488,9 @@ class InfoExtractor(object):
will be used by geo restriction bypass mechanism similarly
to _GEO_COUNTRIES.
+ The _ENABLED attribute should be set to False for IEs that
+ are disabled by default and must be explicitly enabled.
+
The _WORKING attribute should be set to False for broken IEs
in order to warn the users and skip the tests.
"""
@@ -471,16 +502,23 @@ class InfoExtractor(object):
_GEO_COUNTRIES = None
_GEO_IP_BLOCKS = None
_WORKING = True
+ _ENABLED = True
_NETRC_MACHINE = None
IE_DESC = None
+ SEARCH_KEY = None
+ _VALID_URL = None
+ _EMBED_REGEX = []
- _LOGIN_HINTS = {
- 'any': 'Use --cookies, --cookies-from-browser, --username and --password, or --netrc to provide account credentials',
- 'cookies': (
- 'Use --cookies-from-browser or --cookies for the authentication. '
- 'See https://github.com/ytdl-org/youtube-dl#how-do-i-pass-cookies-to-youtube-dl for how to manually pass cookies'),
- 'password': 'Use --username and --password, or --netrc to provide account credentials',
- }
+ def _login_hint(self, method=NO_DEFAULT, netrc=None):
+ password_hint = f'--username and --password, or --netrc ({netrc or self._NETRC_MACHINE}) to provide account credentials'
+ return {
+ None: '',
+ 'any': f'Use --cookies, --cookies-from-browser, {password_hint}',
+ 'password': f'Use {password_hint}',
+ 'cookies': (
+ 'Use --cookies-from-browser or --cookies for the authentication. '
+ 'See https://github.com/hypervideo/hypervideo/wiki/FAQ#how-do-i-pass-cookies-to-hypervideo for how to manually pass cookies'),
+ }[method if method is not NO_DEFAULT else 'any' if self.supports_login() else 'cookies']
def __init__(self, downloader=None):
"""Constructor. Receives an optional downloader (a YoutubeDL instance).
@@ -493,12 +531,12 @@ class InfoExtractor(object):
@classmethod
def _match_valid_url(cls, url):
+ if cls._VALID_URL is False:
+ return None
# This does not use has/getattr intentionally - we want to know whether
# we have cached the regexp for *this* class, whereas getattr would also
# match the superclass
if '_VALID_URL_RE' not in cls.__dict__:
- if '_VALID_URL' not in cls.__dict__:
- cls._VALID_URL = cls._make_valid_url()
cls._VALID_URL_RE = re.compile(cls._VALID_URL)
return cls._VALID_URL_RE.match(url)
@@ -543,7 +581,7 @@ class InfoExtractor(object):
if username:
self._perform_login(username, password)
elif self.get_param('username') and False not in (self.IE_DESC, self._NETRC_MACHINE):
- self.report_warning(f'Login with password is not supported for this website. {self._LOGIN_HINTS["cookies"]}')
+ self.report_warning(f'Login with password is not supported for this website. {self._login_hint("cookies")}')
self._real_initialize()
self._ready = True
@@ -609,8 +647,7 @@ class InfoExtractor(object):
if ip_block:
self._x_forwarded_for_ip = GeoUtils.random_ipv4(ip_block)
- self._downloader.write_debug(
- '[debug] Using fake IP %s as X-Forwarded-For' % self._x_forwarded_for_ip)
+ self.write_debug(f'Using fake IP {self._x_forwarded_for_ip} as X-Forwarded-For')
return
# Path 2: bypassing based on country code
@@ -629,7 +666,7 @@ class InfoExtractor(object):
if country:
self._x_forwarded_for_ip = GeoUtils.random_ipv4(country)
self._downloader.write_debug(
- 'Using fake IP %s (%s) as X-Forwarded-For' % (self._x_forwarded_for_ip, country.upper()))
+ f'Using fake IP {self._x_forwarded_for_ip} ({country.upper()}) as X-Forwarded-For')
def extract(self, url):
"""Extracts URL information and returns it in list of dicts."""
@@ -643,10 +680,10 @@ class InfoExtractor(object):
return None
if self._x_forwarded_for_ip:
ie_result['__x_forwarded_for_ip'] = self._x_forwarded_for_ip
- subtitles = ie_result.get('subtitles')
- if (subtitles and 'live_chat' in subtitles
- and 'no-live-chat' in self.get_param('compat_opts', [])):
- del subtitles['live_chat']
+ subtitles = ie_result.get('subtitles') or {}
+ if 'no-live-chat' in self.get_param('compat_opts'):
+ for lang in ('live_chat', 'comments', 'danmaku'):
+ subtitles.pop(lang, None)
return ie_result
except GeoRestrictedError as e:
if self.__maybe_fake_ip_and_retry(e.countries):
@@ -655,17 +692,11 @@ class InfoExtractor(object):
except UnsupportedError:
raise
except ExtractorError as e:
- kwargs = {
- 'video_id': e.video_id or self.get_temp_id(url),
- 'ie': self.IE_NAME,
- 'tb': e.traceback or sys.exc_info()[2],
- 'expected': e.expected,
- 'cause': e.cause
- }
- if hasattr(e, 'countries'):
- kwargs['countries'] = e.countries
- raise type(e)(e.orig_msg, **kwargs)
- except compat_http_client.IncompleteRead as e:
+ e.video_id = e.video_id or self.get_temp_id(url),
+ e.ie = e.ie or self.IE_NAME,
+ e.traceback = e.traceback or sys.exc_info()[2]
+ raise
+ except http.client.IncompleteRead as e:
raise ExtractorError('A network error has occurred.', cause=e, expected=True, video_id=self.get_temp_id(url))
except (KeyError, StopIteration) as e:
raise ExtractorError('An extractor error has occurred.', cause=e, video_id=self.get_temp_id(url))
@@ -689,8 +720,16 @@ class InfoExtractor(object):
"""Sets a YoutubeDL instance as the downloader for this IE."""
self._downloader = downloader
+ @property
+ def cache(self):
+ return self._downloader.cache
+
+ @property
+ def cookiejar(self):
+ return self._downloader.cookiejar
+
def _initialize_pre_login(self):
- """ Intialization before login. Redefine in subclasses."""
+ """ Initialization before login. Redefine in subclasses."""
pass
def _perform_login(self, username, password):
@@ -710,13 +749,13 @@ class InfoExtractor(object):
"""A string for getting the InfoExtractor with get_info_extractor"""
return cls.__name__[:-2]
- @property
- def IE_NAME(self):
- return compat_str(type(self).__name__[:-2])
+ @classproperty
+ def IE_NAME(cls):
+ return cls.__name__[:-2]
@staticmethod
def __can_accept_status_code(err, expected_status):
- assert isinstance(err, compat_urllib_error.HTTPError)
+ assert isinstance(err, urllib.error.HTTPError)
if expected_status is None:
return False
elif callable(expected_status):
@@ -724,7 +763,14 @@ class InfoExtractor(object):
else:
return err.code in variadic(expected_status)
- def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers={}, query={}, expected_status=None):
+ def _create_request(self, url_or_request, data=None, headers=None, query=None):
+ if isinstance(url_or_request, urllib.request.Request):
+ return update_Request(url_or_request, data=data, headers=headers, query=query)
+ if query:
+ url_or_request = update_url_query(url_or_request, query)
+ return sanitized_Request(url_or_request, data, headers or {})
+
+ def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers=None, query=None, expected_status=None):
"""
Return the response handle.
@@ -742,9 +788,9 @@ class InfoExtractor(object):
self.report_download_webpage(video_id)
elif note is not False:
if video_id is None:
- self.to_screen('%s' % (note,))
+ self.to_screen(str(note))
else:
- self.to_screen('%s: %s' % (video_id, note))
+ self.to_screen(f'{video_id}: {note}')
# Some sites check X-Forwarded-For HTTP header in order to figure out
# the origin of the client behind proxy. This allows bypassing geo
@@ -752,21 +798,13 @@ class InfoExtractor(object):
# geo unrestricted country. We will do so once we encounter any
# geo restriction error.
if self._x_forwarded_for_ip:
- if 'X-Forwarded-For' not in headers:
- headers['X-Forwarded-For'] = self._x_forwarded_for_ip
+ headers = (headers or {}).copy()
+ headers.setdefault('X-Forwarded-For', self._x_forwarded_for_ip)
- if isinstance(url_or_request, compat_urllib_request.Request):
- url_or_request = update_Request(
- url_or_request, data=data, headers=headers, query=query)
- else:
- if query:
- url_or_request = update_url_query(url_or_request, query)
- if data is not None or headers:
- url_or_request = sanitized_Request(url_or_request, data, headers)
try:
- return self._downloader.urlopen(url_or_request)
+ return self._downloader.urlopen(self._create_request(url_or_request, data, headers, query))
except network_exceptions as err:
- if isinstance(err, compat_urllib_error.HTTPError):
+ if isinstance(err, urllib.error.HTTPError):
if self.__can_accept_status_code(err, expected_status):
# Retain reference to error to prevent file object from
# being closed before it can be read. Works around the
@@ -780,21 +818,49 @@ class InfoExtractor(object):
if errnote is None:
errnote = 'Unable to download webpage'
- errmsg = '%s: %s' % (errnote, error_to_compat_str(err))
+ errmsg = f'{errnote}: {error_to_compat_str(err)}'
if fatal:
raise ExtractorError(errmsg, cause=err)
else:
self.report_warning(errmsg)
return False
- def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
+ def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True,
+ encoding=None, data=None, headers={}, query={}, expected_status=None):
"""
Return a tuple (page content as string, URL handle).
- See _download_webpage docstring for arguments specification.
+ Arguments:
+ url_or_request -- plain text URL as a string or
+ a urllib.request.Request object
+ video_id -- Video/playlist/item identifier (string)
+
+ Keyword arguments:
+ note -- note printed before downloading (string)
+ errnote -- note printed in case of an error (string)
+ fatal -- flag denoting whether error should be considered fatal,
+ i.e. whether it should cause ExtractionError to be raised,
+ otherwise a warning will be reported and extraction continued
+ encoding -- encoding for a page content decoding, guessed automatically
+ when not explicitly specified
+ data -- POST data (bytes)
+ headers -- HTTP headers (dict)
+ query -- URL query (dict)
+ expected_status -- allows to accept failed HTTP requests (non 2xx
+ status code) by explicitly specifying a set of accepted status
+ codes. Can be any of the following entities:
+ - an integer type specifying an exact failed status code to
+ accept
+ - a list or a tuple of integer types specifying a list of
+ failed status codes to accept
+ - a callable accepting an actual failed status code and
+ returning True if it should be accepted
+ Note that this argument does not affect success status codes (2xx)
+ which are always accepted.
"""
+
# Strip hashes from the URL (#1038)
- if isinstance(url_or_request, (compat_str, str)):
+ if isinstance(url_or_request, str):
url_or_request = url_or_request.partition('#')[0]
urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query, expected_status=expected_status)
@@ -849,247 +915,178 @@ class InfoExtractor(object):
'Visit http://blocklist.rkn.gov.ru/ for a block reason.',
expected=True)
+ def _request_dump_filename(self, url, video_id):
+ basen = f'{video_id}_{url}'
+ trim_length = self.get_param('trim_file_name') or 240
+ if len(basen) > trim_length:
+ h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest()
+ basen = basen[:trim_length - len(h)] + h
+ filename = sanitize_filename(f'{basen}.dump', restricted=True)
+ # Working around MAX_PATH limitation on Windows (see
+ # http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx)
+ if compat_os_name == 'nt':
+ absfilepath = os.path.abspath(filename)
+ if len(absfilepath) > 259:
+ filename = fR'\\?\{absfilepath}'
+ return filename
+
+ def __decode_webpage(self, webpage_bytes, encoding, headers):
+ if not encoding:
+ encoding = self._guess_encoding_from_content(headers.get('Content-Type', ''), webpage_bytes)
+ try:
+ return webpage_bytes.decode(encoding, 'replace')
+ except LookupError:
+ return webpage_bytes.decode('utf-8', 'replace')
+
def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None, encoding=None):
- content_type = urlh.headers.get('Content-Type', '')
webpage_bytes = urlh.read()
if prefix is not None:
webpage_bytes = prefix + webpage_bytes
- if not encoding:
- encoding = self._guess_encoding_from_content(content_type, webpage_bytes)
if self.get_param('dump_intermediate_pages', False):
self.to_screen('Dumping request to ' + urlh.geturl())
dump = base64.b64encode(webpage_bytes).decode('ascii')
self._downloader.to_screen(dump)
- if self.get_param('write_pages', False):
- basen = '%s_%s' % (video_id, urlh.geturl())
- trim_length = self.get_param('trim_file_name') or 240
- if len(basen) > trim_length:
- h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest()
- basen = basen[:trim_length - len(h)] + h
- raw_filename = basen + '.dump'
- filename = sanitize_filename(raw_filename, restricted=True)
- self.to_screen('Saving request to ' + filename)
- # Working around MAX_PATH limitation on Windows (see
- # http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx)
- if compat_os_name == 'nt':
- absfilepath = os.path.abspath(filename)
- if len(absfilepath) > 259:
- filename = '\\\\?\\' + absfilepath
+ if self.get_param('write_pages'):
+ filename = self._request_dump_filename(urlh.geturl(), video_id)
+ self.to_screen(f'Saving request to {filename}')
with open(filename, 'wb') as outf:
outf.write(webpage_bytes)
- try:
- content = webpage_bytes.decode(encoding, 'replace')
- except LookupError:
- content = webpage_bytes.decode('utf-8', 'replace')
-
+ content = self.__decode_webpage(webpage_bytes, encoding, urlh.headers)
self.__check_blocked(content)
return content
+ def __print_error(self, errnote, fatal, video_id, err):
+ if fatal:
+ raise ExtractorError(f'{video_id}: {errnote}', cause=err)
+ elif errnote:
+ self.report_warning(f'{video_id}: {errnote}: {err}')
+
+ def _parse_xml(self, xml_string, video_id, transform_source=None, fatal=True, errnote=None):
+ if transform_source:
+ xml_string = transform_source(xml_string)
+ try:
+ return compat_etree_fromstring(xml_string.encode('utf-8'))
+ except xml.etree.ElementTree.ParseError as ve:
+ self.__print_error('Failed to parse XML' if errnote is None else errnote, fatal, video_id, ve)
+
+ def _parse_json(self, json_string, video_id, transform_source=None, fatal=True, errnote=None, **parser_kwargs):
+ try:
+ return json.loads(
+ json_string, cls=LenientJSONDecoder, strict=False, transform_source=transform_source, **parser_kwargs)
+ except ValueError as ve:
+ self.__print_error('Failed to parse JSON' if errnote is None else errnote, fatal, video_id, ve)
+
+ def _parse_socket_response_as_json(self, data, *args, **kwargs):
+ return self._parse_json(data[data.find('{'):data.rfind('}') + 1], *args, **kwargs)
+
+ def __create_download_methods(name, parser, note, errnote, return_value):
+
+ def parse(ie, content, *args, errnote=errnote, **kwargs):
+ if parser is None:
+ return content
+ if errnote is False:
+ kwargs['errnote'] = errnote
+ # parser is fetched by name so subclasses can override it
+ return getattr(ie, parser)(content, *args, **kwargs)
+
+ def download_handle(self, url_or_request, video_id, note=note, errnote=errnote, transform_source=None,
+ fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
+ res = self._download_webpage_handle(
+ url_or_request, video_id, note=note, errnote=errnote, fatal=fatal, encoding=encoding,
+ data=data, headers=headers, query=query, expected_status=expected_status)
+ if res is False:
+ return res
+ content, urlh = res
+ return parse(self, content, video_id, transform_source=transform_source, fatal=fatal, errnote=errnote), urlh
+
+ def download_content(self, url_or_request, video_id, note=note, errnote=errnote, transform_source=None,
+ fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
+ if self.get_param('load_pages'):
+ url_or_request = self._create_request(url_or_request, data, headers, query)
+ filename = self._request_dump_filename(url_or_request.full_url, video_id)
+ self.to_screen(f'Loading request from {filename}')
+ try:
+ with open(filename, 'rb') as dumpf:
+ webpage_bytes = dumpf.read()
+ except OSError as e:
+ self.report_warning(f'Unable to load request from disk: {e}')
+ else:
+ content = self.__decode_webpage(webpage_bytes, encoding, url_or_request.headers)
+ return parse(self, content, video_id, transform_source=transform_source, fatal=fatal, errnote=errnote)
+ kwargs = {
+ 'note': note,
+ 'errnote': errnote,
+ 'transform_source': transform_source,
+ 'fatal': fatal,
+ 'encoding': encoding,
+ 'data': data,
+ 'headers': headers,
+ 'query': query,
+ 'expected_status': expected_status,
+ }
+ if parser is None:
+ kwargs.pop('transform_source')
+ # The method is fetched by name so subclasses can override _download_..._handle
+ res = getattr(self, download_handle.__name__)(url_or_request, video_id, **kwargs)
+ return res if res is False else res[0]
+
+ def impersonate(func, name, return_value):
+ func.__name__, func.__qualname__ = name, f'InfoExtractor.{name}'
+ func.__doc__ = f'''
+ @param transform_source Apply this transformation before parsing
+ @returns {return_value}
+
+ See _download_webpage_handle docstring for other arguments specification
+ '''
+
+ impersonate(download_handle, f'_download_{name}_handle', f'({return_value}, URL handle)')
+ impersonate(download_content, f'_download_{name}', f'{return_value}')
+ return download_handle, download_content
+
+ _download_xml_handle, _download_xml = __create_download_methods(
+ 'xml', '_parse_xml', 'Downloading XML', 'Unable to download XML', 'xml as an xml.etree.ElementTree.Element')
+ _download_json_handle, _download_json = __create_download_methods(
+ 'json', '_parse_json', 'Downloading JSON metadata', 'Unable to download JSON metadata', 'JSON object as a dict')
+ _download_socket_json_handle, _download_socket_json = __create_download_methods(
+ 'socket_json', '_parse_socket_response_as_json', 'Polling socket', 'Unable to poll socket', 'JSON object as a dict')
+ __download_webpage = __create_download_methods('webpage', None, None, None, 'data of the page as a string')[1]
+
def _download_webpage(
self, url_or_request, video_id, note=None, errnote=None,
- fatal=True, tries=1, timeout=5, encoding=None, data=None,
- headers={}, query={}, expected_status=None):
+ fatal=True, tries=1, timeout=NO_DEFAULT, *args, **kwargs):
"""
Return the data of the page as a string.
- Arguments:
- url_or_request -- plain text URL as a string or
- a compat_urllib_request.Requestobject
- video_id -- Video/playlist/item identifier (string)
-
Keyword arguments:
- note -- note printed before downloading (string)
- errnote -- note printed in case of an error (string)
- fatal -- flag denoting whether error should be considered fatal,
- i.e. whether it should cause ExtractionError to be raised,
- otherwise a warning will be reported and extraction continued
tries -- number of tries
timeout -- sleep interval between tries
- encoding -- encoding for a page content decoding, guessed automatically
- when not explicitly specified
- data -- POST data (bytes)
- headers -- HTTP headers (dict)
- query -- URL query (dict)
- expected_status -- allows to accept failed HTTP requests (non 2xx
- status code) by explicitly specifying a set of accepted status
- codes. Can be any of the following entities:
- - an integer type specifying an exact failed status code to
- accept
- - a list or a tuple of integer types specifying a list of
- failed status codes to accept
- - a callable accepting an actual failed status code and
- returning True if it should be accepted
- Note that this argument does not affect success status codes (2xx)
- which are always accepted.
+
+ See _download_webpage_handle docstring for other arguments specification.
"""
- success = False
+ R''' # NB: These are unused; should they be deprecated?
+ if tries != 1:
+ self._downloader.deprecation_warning('tries argument is deprecated in InfoExtractor._download_webpage')
+ if timeout is NO_DEFAULT:
+ timeout = 5
+ else:
+ self._downloader.deprecation_warning('timeout argument is deprecated in InfoExtractor._download_webpage')
+ '''
+
try_count = 0
- while success is False:
+ while True:
try:
- res = self._download_webpage_handle(
- url_or_request, video_id, note, errnote, fatal,
- encoding=encoding, data=data, headers=headers, query=query,
- expected_status=expected_status)
- success = True
- except compat_http_client.IncompleteRead as e:
+ return self.__download_webpage(url_or_request, video_id, note, errnote, None, fatal, *args, **kwargs)
+ except http.client.IncompleteRead as e:
try_count += 1
if try_count >= tries:
raise e
self._sleep(timeout, video_id)
- if res is False:
- return res
- else:
- content, _ = res
- return content
-
- def _download_xml_handle(
- self, url_or_request, video_id, note='Downloading XML',
- errnote='Unable to download XML', transform_source=None,
- fatal=True, encoding=None, data=None, headers={}, query={},
- expected_status=None):
- """
- Return a tuple (xml as an compat_etree_Element, URL handle).
-
- See _download_webpage docstring for arguments specification.
- """
- res = self._download_webpage_handle(
- url_or_request, video_id, note, errnote, fatal=fatal,
- encoding=encoding, data=data, headers=headers, query=query,
- expected_status=expected_status)
- if res is False:
- return res
- xml_string, urlh = res
- return self._parse_xml(
- xml_string, video_id, transform_source=transform_source,
- fatal=fatal), urlh
-
- def _download_xml(
- self, url_or_request, video_id,
- note='Downloading XML', errnote='Unable to download XML',
- transform_source=None, fatal=True, encoding=None,
- data=None, headers={}, query={}, expected_status=None):
- """
- Return the xml as an compat_etree_Element.
-
- See _download_webpage docstring for arguments specification.
- """
- res = self._download_xml_handle(
- url_or_request, video_id, note=note, errnote=errnote,
- transform_source=transform_source, fatal=fatal, encoding=encoding,
- data=data, headers=headers, query=query,
- expected_status=expected_status)
- return res if res is False else res[0]
-
- def _parse_xml(self, xml_string, video_id, transform_source=None, fatal=True):
- if transform_source:
- xml_string = transform_source(xml_string)
- try:
- return compat_etree_fromstring(xml_string.encode('utf-8'))
- except compat_xml_parse_error as ve:
- errmsg = '%s: Failed to parse XML ' % video_id
- if fatal:
- raise ExtractorError(errmsg, cause=ve)
- else:
- self.report_warning(errmsg + str(ve))
-
- def _download_json_handle(
- self, url_or_request, video_id, note='Downloading JSON metadata',
- errnote='Unable to download JSON metadata', transform_source=None,
- fatal=True, encoding=None, data=None, headers={}, query={},
- expected_status=None):
- """
- Return a tuple (JSON object, URL handle).
-
- See _download_webpage docstring for arguments specification.
- """
- res = self._download_webpage_handle(
- url_or_request, video_id, note, errnote, fatal=fatal,
- encoding=encoding, data=data, headers=headers, query=query,
- expected_status=expected_status)
- if res is False:
- return res
- json_string, urlh = res
- return self._parse_json(
- json_string, video_id, transform_source=transform_source,
- fatal=fatal), urlh
-
- def _download_json(
- self, url_or_request, video_id, note='Downloading JSON metadata',
- errnote='Unable to download JSON metadata', transform_source=None,
- fatal=True, encoding=None, data=None, headers={}, query={},
- expected_status=None):
- """
- Return the JSON object as a dict.
-
- See _download_webpage docstring for arguments specification.
- """
- res = self._download_json_handle(
- url_or_request, video_id, note=note, errnote=errnote,
- transform_source=transform_source, fatal=fatal, encoding=encoding,
- data=data, headers=headers, query=query,
- expected_status=expected_status)
- return res if res is False else res[0]
-
- def _parse_json(self, json_string, video_id, transform_source=None, fatal=True):
- if transform_source:
- json_string = transform_source(json_string)
- try:
- return json.loads(json_string, strict=False)
- except ValueError as ve:
- errmsg = '%s: Failed to parse JSON ' % video_id
- if fatal:
- raise ExtractorError(errmsg, cause=ve)
- else:
- self.report_warning(errmsg + str(ve))
-
- def _parse_socket_response_as_json(self, data, video_id, transform_source=None, fatal=True):
- return self._parse_json(
- data[data.find('{'):data.rfind('}') + 1],
- video_id, transform_source, fatal)
-
- def _download_socket_json_handle(
- self, url_or_request, video_id, note='Polling socket',
- errnote='Unable to poll socket', transform_source=None,
- fatal=True, encoding=None, data=None, headers={}, query={},
- expected_status=None):
- """
- Return a tuple (JSON object, URL handle).
-
- See _download_webpage docstring for arguments specification.
- """
- res = self._download_webpage_handle(
- url_or_request, video_id, note, errnote, fatal=fatal,
- encoding=encoding, data=data, headers=headers, query=query,
- expected_status=expected_status)
- if res is False:
- return res
- webpage, urlh = res
- return self._parse_socket_response_as_json(
- webpage, video_id, transform_source=transform_source,
- fatal=fatal), urlh
-
- def _download_socket_json(
- self, url_or_request, video_id, note='Polling socket',
- errnote='Unable to poll socket', transform_source=None,
- fatal=True, encoding=None, data=None, headers={}, query={},
- expected_status=None):
- """
- Return the JSON object as a dict.
-
- See _download_webpage docstring for arguments specification.
- """
- res = self._download_socket_json_handle(
- url_or_request, video_id, note=note, errnote=errnote,
- transform_source=transform_source, fatal=fatal, encoding=encoding,
- data=data, headers=headers, query=query,
- expected_status=expected_status)
- return res if res is False else res[0]
def report_warning(self, msg, video_id=None, *args, only_once=False, **kwargs):
- idstr = format_field(video_id, template='%s: ')
+ idstr = format_field(video_id, None, '%s: ')
msg = f'[{self.IE_NAME}] {idstr}{msg}'
if only_once:
if f'WARNING: {msg}' in self._printed_messages:
@@ -1099,17 +1096,19 @@ class InfoExtractor(object):
def to_screen(self, msg, *args, **kwargs):
"""Print msg to screen, prefixing it with '[ie_name]'"""
- self._downloader.to_screen('[%s] %s' % (self.IE_NAME, msg), *args, **kwargs)
+ self._downloader.to_screen(f'[{self.IE_NAME}] {msg}', *args, **kwargs)
def write_debug(self, msg, *args, **kwargs):
- self._downloader.write_debug('[%s] %s' % (self.IE_NAME, msg), *args, **kwargs)
+ self._downloader.write_debug(f'[{self.IE_NAME}] {msg}', *args, **kwargs)
def get_param(self, name, default=None, *args, **kwargs):
if self._downloader:
return self._downloader.params.get(name, default, *args, **kwargs)
return default
- def report_drm(self, video_id, partial=False):
+ def report_drm(self, video_id, partial=NO_DEFAULT):
+ if partial is not NO_DEFAULT:
+ self._downloader.deprecation_warning('InfoExtractor.report_drm no longer accepts the argument partial')
self.raise_no_formats('This video is DRM protected', expected=True, video_id=video_id)
def report_extraction(self, id_or_name):
@@ -1135,11 +1134,7 @@ class InfoExtractor(object):
self.get_param('ignore_no_formats_error') or self.get_param('wait_for_video')):
self.report_warning(msg)
return
- if method is NO_DEFAULT:
- method = 'any' if self.supports_login() else 'cookies'
- if method is not None:
- assert method in self._LOGIN_HINTS, 'Invalid login method'
- msg = '%s. %s' % (msg, self._LOGIN_HINTS[method])
+ msg += format_field(self._login_hint(method), None, '. %s')
raise ExtractorError(msg, expected=True)
def raise_geo_restricted(
@@ -1176,10 +1171,12 @@ class InfoExtractor(object):
'url': url,
}
- def playlist_from_matches(self, matches, playlist_id=None, playlist_title=None, getter=None, ie=None, video_kwargs=None, **kwargs):
- urls = (self.url_result(self._proto_relative_url(m), ie, **(video_kwargs or {}))
- for m in orderedSet(map(getter, matches) if getter else matches))
- return self.playlist_result(urls, playlist_id, playlist_title, **kwargs)
+ @classmethod
+ def playlist_from_matches(cls, matches, playlist_id=None, playlist_title=None,
+ getter=IDENTITY, ie=None, video_kwargs=None, **kwargs):
+ return cls.playlist_result(
+ (cls.url_result(m, ie, **(video_kwargs or {})) for m in orderedSet(map(getter, matches), lazy=True)),
+ playlist_id, playlist_title, **kwargs)
@staticmethod
def playlist_result(entries, playlist_id=None, playlist_title=None, playlist_description=None, *, multi_video=False, **kwargs):
@@ -1203,7 +1200,9 @@ class InfoExtractor(object):
In case of failure return a default value or raise a WARNING or a
RegexNotFoundError, depending on fatal, specifying the field name.
"""
- if isinstance(pattern, (str, compat_str, compiled_regex_type)):
+ if string is None:
+ mobj = None
+ elif isinstance(pattern, (str, re.Pattern)):
mobj = re.search(pattern, string, flags)
else:
for p in pattern:
@@ -1229,6 +1228,33 @@ class InfoExtractor(object):
self.report_warning('unable to extract %s' % _name + bug_reports_message())
return None
+ def _search_json(self, start_pattern, string, name, video_id, *, end_pattern='',
+ contains_pattern=r'{(?s:.+)}', fatal=True, default=NO_DEFAULT, **kwargs):
+ """Searches string for the JSON object specified by start_pattern"""
+ # NB: end_pattern is only used to reduce the size of the initial match
+ if default is NO_DEFAULT:
+ default, has_default = {}, False
+ else:
+ fatal, has_default = False, True
+
+ json_string = self._search_regex(
+ rf'(?:{start_pattern})\s*(?P<json>{contains_pattern})\s*(?:{end_pattern})',
+ string, name, group='json', fatal=fatal, default=None if has_default else NO_DEFAULT)
+ if not json_string:
+ return default
+
+ _name = self._downloader._format_err(name, self._downloader.Styles.EMPHASIS)
+ try:
+ return self._parse_json(json_string, video_id, ignore_extra=True, **kwargs)
+ except ExtractorError as e:
+ if fatal:
+ raise ExtractorError(
+ f'Unable to extract {_name} - Failed to parse JSON', cause=e.cause, video_id=video_id)
+ elif not has_default:
+ self.report_warning(
+ f'Unable to extract {_name} - Failed to parse JSON: {e}', video_id=video_id)
+ return default
+
def _html_search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None):
"""
Like _search_regex, but strips HTML tags and unescapes entities.
@@ -1256,7 +1282,7 @@ class InfoExtractor(object):
else:
raise netrc.NetrcParseError(
'No authenticators for %s' % netrc_machine)
- except (IOError, netrc.NetrcParseError) as err:
+ except (OSError, netrc.NetrcParseError) as err:
self.report_warning(
'parsing .netrc: %s' % error_to_compat_str(err))
@@ -1293,7 +1319,7 @@ class InfoExtractor(object):
if tfa is not None:
return tfa
- return compat_getpass('Type %s and press [Return]: ' % note)
+ return getpass.getpass('Type %s and press [Return]: ' % note)
# Helper functions for extracting OpenGraph info
@staticmethod
@@ -1344,7 +1370,7 @@ class InfoExtractor(object):
return self._og_search_property('url', html, **kargs)
def _html_extract_title(self, html, name='title', *, fatal=False, **kwargs):
- return self._html_search_regex(r'(?s)<title>([^<]+)</title>', html, name, fatal=fatal, **kwargs)
+ return self._html_search_regex(r'(?s)<title\b[^>]*>([^<]+)</title>', html, name, fatal=fatal, **kwargs)
def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs):
name = variadic(name)
@@ -1357,12 +1383,20 @@ class InfoExtractor(object):
def _dc_search_uploader(self, html):
return self._html_search_meta('dc.creator', html, 'uploader')
- def _rta_search(self, html):
+ @staticmethod
+ def _rta_search(html):
# See http://www.rtalabel.org/index.php?content=howtofaq#single
if re.search(r'(?ix)<meta\s+name="rating"\s+'
r' content="RTA-5042-1996-1400-1577-RTA"',
html):
return 18
+
+ # And then there are the jokers who advertise that they use RTA, but actually don't.
+ AGE_LIMIT_MARKERS = [
+ r'Proudly Labeled <a href="http://www\.rtalabel\.org/" title="Restricted to Adults">RTA</a>',
+ ]
+ if any(re.search(marker, html) for marker in AGE_LIMIT_MARKERS):
+ return 18
return 0
def _media_rating_search(self, html):
@@ -1401,27 +1435,25 @@ class InfoExtractor(object):
return self._html_search_meta('twitter:player', html,
'twitter card player')
- def _search_json_ld(self, html, video_id, expected_type=None, **kwargs):
- json_ld_list = list(re.finditer(JSON_LD_RE, html))
- default = kwargs.get('default', NO_DEFAULT)
- # JSON-LD may be malformed and thus `fatal` should be respected.
- # At the same time `default` may be passed that assumes `fatal=False`
- # for _search_regex. Let's simulate the same behavior here as well.
- fatal = kwargs.get('fatal', True) if default is NO_DEFAULT else False
- json_ld = []
- for mobj in json_ld_list:
- json_ld_item = self._parse_json(
- mobj.group('json_ld'), video_id, fatal=fatal)
- if not json_ld_item:
- continue
- if isinstance(json_ld_item, dict):
- json_ld.append(json_ld_item)
- elif isinstance(json_ld_item, (list, tuple)):
- json_ld.extend(json_ld_item)
- if json_ld:
- json_ld = self._json_ld(json_ld, video_id, fatal=fatal, expected_type=expected_type)
- if json_ld:
- return json_ld
+ def _yield_json_ld(self, html, video_id, *, fatal=True, default=NO_DEFAULT):
+ """Yield all json ld objects in the html"""
+ if default is not NO_DEFAULT:
+ fatal = False
+ for mobj in re.finditer(JSON_LD_RE, html):
+ json_ld_item = self._parse_json(mobj.group('json_ld'), video_id, fatal=fatal)
+ for json_ld in variadic(json_ld_item):
+ if isinstance(json_ld, dict):
+ yield json_ld
+
+ def _search_json_ld(self, html, video_id, expected_type=None, *, fatal=True, default=NO_DEFAULT):
+ """Search for a video in any json ld in the html"""
+ if default is not NO_DEFAULT:
+ fatal = False
+ info = self._json_ld(
+ list(self._yield_json_ld(html, video_id, fatal=fatal, default=default)),
+ video_id, fatal=fatal, expected_type=expected_type)
+ if info:
+ return info
if default is not NO_DEFAULT:
return default
elif fatal:
@@ -1431,15 +1463,11 @@ class InfoExtractor(object):
return {}
def _json_ld(self, json_ld, video_id, fatal=True, expected_type=None):
- if isinstance(json_ld, compat_str):
+ if isinstance(json_ld, str):
json_ld = self._parse_json(json_ld, video_id, fatal=fatal)
if not json_ld:
return {}
info = {}
- if not isinstance(json_ld, (list, tuple, dict)):
- return info
- if isinstance(json_ld, dict):
- json_ld = [json_ld]
INTERACTION_TYPE_MAP = {
'CommentAction': 'comment',
@@ -1452,6 +1480,10 @@ class InfoExtractor(object):
'ViewAction': 'view',
}
+ def is_type(e, *expected_types):
+ type = variadic(traverse_obj(e, '@type'))
+ return any(x in type for x in expected_types)
+
def extract_interaction_type(e):
interaction_type = e.get('interactionType')
if isinstance(interaction_type, dict):
@@ -1465,9 +1497,7 @@ class InfoExtractor(object):
if not isinstance(interaction_statistic, list):
return
for is_e in interaction_statistic:
- if not isinstance(is_e, dict):
- continue
- if is_e.get('@type') != 'InteractionCounter':
+ if not is_type(is_e, 'InteractionCounter'):
continue
interaction_type = extract_interaction_type(is_e)
if not interaction_type:
@@ -1504,44 +1534,53 @@ class InfoExtractor(object):
info['chapters'] = chapters
def extract_video_object(e):
- assert e['@type'] == 'VideoObject'
author = e.get('author')
info.update({
'url': url_or_none(e.get('contentUrl')),
+ 'ext': mimetype2ext(e.get('encodingFormat')),
'title': unescapeHTML(e.get('name')),
'description': unescapeHTML(e.get('description')),
- 'thumbnails': [{'url': url_or_none(url)}
- for url in variadic(traverse_obj(e, 'thumbnailUrl', 'thumbnailURL'))],
+ 'thumbnails': [{'url': unescapeHTML(url)}
+ for url in variadic(traverse_obj(e, 'thumbnailUrl', 'thumbnailURL'))
+ if url_or_none(url)],
'duration': parse_duration(e.get('duration')),
'timestamp': unified_timestamp(e.get('uploadDate')),
# author can be an instance of 'Organization' or 'Person' types.
# both types can have 'name' property(inherited from 'Thing' type). [1]
# however some websites are using 'Text' type instead.
# 1. https://schema.org/VideoObject
- 'uploader': author.get('name') if isinstance(author, dict) else author if isinstance(author, compat_str) else None,
- 'filesize': float_or_none(e.get('contentSize')),
+ 'uploader': author.get('name') if isinstance(author, dict) else author if isinstance(author, str) else None,
+ 'artist': traverse_obj(e, ('byArtist', 'name'), expected_type=str),
+ 'filesize': int_or_none(float_or_none(e.get('contentSize'))),
'tbr': int_or_none(e.get('bitrate')),
'width': int_or_none(e.get('width')),
'height': int_or_none(e.get('height')),
'view_count': int_or_none(e.get('interactionCount')),
+ 'tags': try_call(lambda: e.get('keywords').split(',')),
})
+ if is_type(e, 'AudioObject'):
+ info.update({
+ 'vcodec': 'none',
+ 'abr': int_or_none(e.get('bitrate')),
+ })
extract_interaction_statistic(e)
extract_chapter_information(e)
def traverse_json_ld(json_ld, at_top_level=True):
- for e in json_ld:
+ for e in variadic(json_ld):
+ if not isinstance(e, dict):
+ continue
if at_top_level and '@context' not in e:
continue
if at_top_level and set(e.keys()) == {'@context', '@graph'}:
- traverse_json_ld(variadic(e['@graph'], allowed_types=(dict,)), at_top_level=False)
- break
- item_type = e.get('@type')
- if expected_type is not None and expected_type != item_type:
+ traverse_json_ld(e['@graph'], at_top_level=False)
+ continue
+ if expected_type is not None and not is_type(e, expected_type):
continue
rating = traverse_obj(e, ('aggregateRating', 'ratingValue'), expected_type=float_or_none)
if rating is not None:
info['average_rating'] = rating
- if item_type in ('TVEpisode', 'Episode'):
+ if is_type(e, 'TVEpisode', 'Episode'):
episode_name = unescapeHTML(e.get('name'))
info.update({
'episode': episode_name,
@@ -1551,44 +1590,46 @@ class InfoExtractor(object):
if not info.get('title') and episode_name:
info['title'] = episode_name
part_of_season = e.get('partOfSeason')
- if isinstance(part_of_season, dict) and part_of_season.get('@type') in ('TVSeason', 'Season', 'CreativeWorkSeason'):
+ if is_type(part_of_season, 'TVSeason', 'Season', 'CreativeWorkSeason'):
info.update({
'season': unescapeHTML(part_of_season.get('name')),
'season_number': int_or_none(part_of_season.get('seasonNumber')),
})
part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries')
- if isinstance(part_of_series, dict) and part_of_series.get('@type') in ('TVSeries', 'Series', 'CreativeWorkSeries'):
+ if is_type(part_of_series, 'TVSeries', 'Series', 'CreativeWorkSeries'):
info['series'] = unescapeHTML(part_of_series.get('name'))
- elif item_type == 'Movie':
+ elif is_type(e, 'Movie'):
info.update({
'title': unescapeHTML(e.get('name')),
'description': unescapeHTML(e.get('description')),
'duration': parse_duration(e.get('duration')),
'timestamp': unified_timestamp(e.get('dateCreated')),
})
- elif item_type in ('Article', 'NewsArticle'):
+ elif is_type(e, 'Article', 'NewsArticle'):
info.update({
'timestamp': parse_iso8601(e.get('datePublished')),
'title': unescapeHTML(e.get('headline')),
'description': unescapeHTML(e.get('articleBody') or e.get('description')),
})
- if traverse_obj(e, ('video', 0, '@type')) == 'VideoObject':
+ if is_type(traverse_obj(e, ('video', 0)), 'VideoObject'):
extract_video_object(e['video'][0])
- elif item_type == 'VideoObject':
+ elif is_type(traverse_obj(e, ('subjectOf', 0)), 'VideoObject'):
+ extract_video_object(e['subjectOf'][0])
+ elif is_type(e, 'VideoObject', 'AudioObject'):
extract_video_object(e)
if expected_type is None:
continue
else:
break
video = e.get('video')
- if isinstance(video, dict) and video.get('@type') == 'VideoObject':
+ if is_type(video, 'VideoObject'):
extract_video_object(video)
if expected_type is None:
continue
else:
break
- traverse_json_ld(json_ld)
+ traverse_json_ld(json_ld)
return filter_dict(info)
def _search_nextjs_data(self, webpage, video_id, *, transform_source=None, fatal=True, **kw):
@@ -1598,15 +1639,16 @@ class InfoExtractor(object):
webpage, 'next.js data', fatal=fatal, **kw),
video_id, transform_source=transform_source, fatal=fatal)
- def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__'):
- ''' Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function. '''
- # not all website do this, but it can be changed
- # https://stackoverflow.com/questions/67463109/how-to-change-or-hide-nuxt-and-nuxt-keyword-in-page-source
+ def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal=True, traverse=('data', 0)):
+ """Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function"""
rectx = re.escape(context_name)
+ FUNCTION_RE = r'\(function\((?P<arg_keys>.*?)\){return\s+(?P<js>{.*?})\s*;?\s*}\((?P<arg_vals>.*?)\)'
js, arg_keys, arg_vals = self._search_regex(
- (r'<script>window\.%s=\(function\((?P<arg_keys>.*?)\)\{return\s(?P<js>\{.*?\})\}\((?P<arg_vals>.+?)\)\);?</script>' % rectx,
- r'%s\(.*?\(function\((?P<arg_keys>.*?)\)\{return\s(?P<js>\{.*?\})\}\((?P<arg_vals>.*?)\)' % rectx),
- webpage, context_name, group=['js', 'arg_keys', 'arg_vals'])
+ (rf'<script>\s*window\.{rectx}={FUNCTION_RE}\s*\)\s*;?\s*</script>', rf'{rectx}\(.*?{FUNCTION_RE}'),
+ webpage, context_name, group=('js', 'arg_keys', 'arg_vals'),
+ default=NO_DEFAULT if fatal else (None, None, None))
+ if js is None:
+ return {}
args = dict(zip(arg_keys.split(','), arg_vals.split(',')))
@@ -1614,7 +1656,8 @@ class InfoExtractor(object):
if val in ('undefined', 'void 0'):
args[key] = 'null'
- return self._parse_json(js_to_json(js, args), video_id)['data'][0]
+ ret = self._parse_json(js, video_id, transform_source=functools.partial(js_to_json, vars=args), fatal=fatal)
+ return traverse_obj(ret, traverse) or {}
@staticmethod
def _hidden_inputs(html):
@@ -1638,296 +1681,27 @@ class InfoExtractor(object):
html, '%s form' % form_id, group='form')
return self._hidden_inputs(form)
- class FormatSort:
- regex = r' *((?P<reverse>\+)?(?P<field>[a-zA-Z0-9_]+)((?P<separator>[~:])(?P<limit>.*?))?)? *$'
-
- default = ('hidden', 'aud_or_vid', 'hasvid', 'ie_pref', 'lang', 'quality',
- 'res', 'fps', 'hdr:12', 'codec:vp9.2', 'size', 'br', 'asr',
- 'proto', 'ext', 'hasaud', 'source', 'id') # These must not be aliases
- ytdl_default = ('hasaud', 'lang', 'quality', 'tbr', 'filesize', 'vbr',
- 'height', 'width', 'proto', 'vext', 'abr', 'aext',
- 'fps', 'fs_approx', 'source', 'id')
-
- settings = {
- 'vcodec': {'type': 'ordered', 'regex': True,
- 'order': ['av0?1', 'vp0?9.2', 'vp0?9', '[hx]265|he?vc?', '[hx]264|avc', 'vp0?8', 'mp4v|h263', 'theora', '', None, 'none']},
- 'acodec': {'type': 'ordered', 'regex': True,
- 'order': ['[af]lac', 'wav|aiff', 'opus', 'vorbis|ogg', 'aac', 'mp?4a?', 'mp3', 'e-?a?c-?3', 'ac-?3', 'dts', '', None, 'none']},
- 'hdr': {'type': 'ordered', 'regex': True, 'field': 'dynamic_range',
- 'order': ['dv', '(hdr)?12', r'(hdr)?10\+', '(hdr)?10', 'hlg', '', 'sdr', None]},
- 'proto': {'type': 'ordered', 'regex': True, 'field': 'protocol',
- 'order': ['(ht|f)tps', '(ht|f)tp$', 'm3u8.*', '.*dash', 'websocket_frag', 'rtmpe?', '', 'mms|rtsp', 'ws|websocket', 'f4']},
- 'vext': {'type': 'ordered', 'field': 'video_ext',
- 'order': ('mp4', 'webm', 'flv', '', 'none'),
- 'order_free': ('webm', 'mp4', 'flv', '', 'none')},
- 'aext': {'type': 'ordered', 'field': 'audio_ext',
- 'order': ('m4a', 'aac', 'mp3', 'ogg', 'opus', 'webm', '', 'none'),
- 'order_free': ('opus', 'ogg', 'webm', 'm4a', 'mp3', 'aac', '', 'none')},
- 'hidden': {'visible': False, 'forced': True, 'type': 'extractor', 'max': -1000},
- 'aud_or_vid': {'visible': False, 'forced': True, 'type': 'multiple',
- 'field': ('vcodec', 'acodec'),
- 'function': lambda it: int(any(v != 'none' for v in it))},
- 'ie_pref': {'priority': True, 'type': 'extractor'},
- 'hasvid': {'priority': True, 'field': 'vcodec', 'type': 'boolean', 'not_in_list': ('none',)},
- 'hasaud': {'field': 'acodec', 'type': 'boolean', 'not_in_list': ('none',)},
- 'lang': {'convert': 'float', 'field': 'language_preference', 'default': -1},
- 'quality': {'convert': 'float', 'default': -1},
- 'filesize': {'convert': 'bytes'},
- 'fs_approx': {'convert': 'bytes', 'field': 'filesize_approx'},
- 'id': {'convert': 'string', 'field': 'format_id'},
- 'height': {'convert': 'float_none'},
- 'width': {'convert': 'float_none'},
- 'fps': {'convert': 'float_none'},
- 'tbr': {'convert': 'float_none'},
- 'vbr': {'convert': 'float_none'},
- 'abr': {'convert': 'float_none'},
- 'asr': {'convert': 'float_none'},
- 'source': {'convert': 'float', 'field': 'source_preference', 'default': -1},
-
- 'codec': {'type': 'combined', 'field': ('vcodec', 'acodec')},
- 'br': {'type': 'combined', 'field': ('tbr', 'vbr', 'abr'), 'same_limit': True},
- 'size': {'type': 'combined', 'same_limit': True, 'field': ('filesize', 'fs_approx')},
- 'ext': {'type': 'combined', 'field': ('vext', 'aext')},
- 'res': {'type': 'multiple', 'field': ('height', 'width'),
- 'function': lambda it: (lambda l: min(l) if l else 0)(tuple(filter(None, it)))},
-
- # For compatibility with youtube-dl
- 'format_id': {'type': 'alias', 'field': 'id'},
- 'preference': {'type': 'alias', 'field': 'ie_pref'},
- 'language_preference': {'type': 'alias', 'field': 'lang'},
- 'source_preference': {'type': 'alias', 'field': 'source'},
- 'protocol': {'type': 'alias', 'field': 'proto'},
- 'filesize_approx': {'type': 'alias', 'field': 'fs_approx'},
-
- # Deprecated
- 'dimension': {'type': 'alias', 'field': 'res', 'deprecated': True},
- 'resolution': {'type': 'alias', 'field': 'res', 'deprecated': True},
- 'extension': {'type': 'alias', 'field': 'ext', 'deprecated': True},
- 'bitrate': {'type': 'alias', 'field': 'br', 'deprecated': True},
- 'total_bitrate': {'type': 'alias', 'field': 'tbr', 'deprecated': True},
- 'video_bitrate': {'type': 'alias', 'field': 'vbr', 'deprecated': True},
- 'audio_bitrate': {'type': 'alias', 'field': 'abr', 'deprecated': True},
- 'framerate': {'type': 'alias', 'field': 'fps', 'deprecated': True},
- 'filesize_estimate': {'type': 'alias', 'field': 'size', 'deprecated': True},
- 'samplerate': {'type': 'alias', 'field': 'asr', 'deprecated': True},
- 'video_ext': {'type': 'alias', 'field': 'vext', 'deprecated': True},
- 'audio_ext': {'type': 'alias', 'field': 'aext', 'deprecated': True},
- 'video_codec': {'type': 'alias', 'field': 'vcodec', 'deprecated': True},
- 'audio_codec': {'type': 'alias', 'field': 'acodec', 'deprecated': True},
- 'video': {'type': 'alias', 'field': 'hasvid', 'deprecated': True},
- 'has_video': {'type': 'alias', 'field': 'hasvid', 'deprecated': True},
- 'audio': {'type': 'alias', 'field': 'hasaud', 'deprecated': True},
- 'has_audio': {'type': 'alias', 'field': 'hasaud', 'deprecated': True},
- 'extractor': {'type': 'alias', 'field': 'ie_pref', 'deprecated': True},
- 'extractor_preference': {'type': 'alias', 'field': 'ie_pref', 'deprecated': True},
- }
+ @classproperty(cache=True)
+ def FormatSort(cls):
+ class FormatSort(FormatSorter):
+ def __init__(ie, *args, **kwargs):
+ super().__init__(ie._downloader, *args, **kwargs)
- def __init__(self, ie, field_preference):
- self._order = []
- self.ydl = ie._downloader
- self.evaluate_params(self.ydl.params, field_preference)
- if ie.get_param('verbose'):
- self.print_verbose_info(self.ydl.write_debug)
-
- def _get_field_setting(self, field, key):
- if field not in self.settings:
- if key in ('forced', 'priority'):
- return False
- self.ydl.deprecation_warning(
- f'Using arbitrary fields ({field}) for format sorting is deprecated '
- 'and may be removed in a future version')
- self.settings[field] = {}
- propObj = self.settings[field]
- if key not in propObj:
- type = propObj.get('type')
- if key == 'field':
- default = 'preference' if type == 'extractor' else (field,) if type in ('combined', 'multiple') else field
- elif key == 'convert':
- default = 'order' if type == 'ordered' else 'float_string' if field else 'ignore'
- else:
- default = {'type': 'field', 'visible': True, 'order': [], 'not_in_list': (None,)}.get(key, None)
- propObj[key] = default
- return propObj[key]
-
- def _resolve_field_value(self, field, value, convertNone=False):
- if value is None:
- if not convertNone:
- return None
- else:
- value = value.lower()
- conversion = self._get_field_setting(field, 'convert')
- if conversion == 'ignore':
- return None
- if conversion == 'string':
- return value
- elif conversion == 'float_none':
- return float_or_none(value)
- elif conversion == 'bytes':
- return FileDownloader.parse_bytes(value)
- elif conversion == 'order':
- order_list = (self._use_free_order and self._get_field_setting(field, 'order_free')) or self._get_field_setting(field, 'order')
- use_regex = self._get_field_setting(field, 'regex')
- list_length = len(order_list)
- empty_pos = order_list.index('') if '' in order_list else list_length + 1
- if use_regex and value is not None:
- for i, regex in enumerate(order_list):
- if regex and re.match(regex, value):
- return list_length - i
- return list_length - empty_pos # not in list
- else: # not regex or value = None
- return list_length - (order_list.index(value) if value in order_list else empty_pos)
- else:
- if value.isnumeric():
- return float(value)
- else:
- self.settings[field]['convert'] = 'string'
- return value
-
- def evaluate_params(self, params, sort_extractor):
- self._use_free_order = params.get('prefer_free_formats', False)
- self._sort_user = params.get('format_sort', [])
- self._sort_extractor = sort_extractor
-
- def add_item(field, reverse, closest, limit_text):
- field = field.lower()
- if field in self._order:
- return
- self._order.append(field)
- limit = self._resolve_field_value(field, limit_text)
- data = {
- 'reverse': reverse,
- 'closest': False if limit is None else closest,
- 'limit_text': limit_text,
- 'limit': limit}
- if field in self.settings:
- self.settings[field].update(data)
- else:
- self.settings[field] = data
-
- sort_list = (
- tuple(field for field in self.default if self._get_field_setting(field, 'forced'))
- + (tuple() if params.get('format_sort_force', False)
- else tuple(field for field in self.default if self._get_field_setting(field, 'priority')))
- + tuple(self._sort_user) + tuple(sort_extractor) + self.default)
-
- for item in sort_list:
- match = re.match(self.regex, item)
- if match is None:
- raise ExtractorError('Invalid format sort string "%s" given by extractor' % item)
- field = match.group('field')
- if field is None:
- continue
- if self._get_field_setting(field, 'type') == 'alias':
- alias, field = field, self._get_field_setting(field, 'field')
- if self._get_field_setting(alias, 'deprecated'):
- self.ydl.deprecation_warning(
- f'Format sorting alias {alias} is deprecated '
- f'and may be removed in a future version. Please use {field} instead')
- reverse = match.group('reverse') is not None
- closest = match.group('separator') == '~'
- limit_text = match.group('limit')
-
- has_limit = limit_text is not None
- has_multiple_fields = self._get_field_setting(field, 'type') == 'combined'
- has_multiple_limits = has_limit and has_multiple_fields and not self._get_field_setting(field, 'same_limit')
-
- fields = self._get_field_setting(field, 'field') if has_multiple_fields else (field,)
- limits = limit_text.split(':') if has_multiple_limits else (limit_text,) if has_limit else tuple()
- limit_count = len(limits)
- for (i, f) in enumerate(fields):
- add_item(f, reverse, closest,
- limits[i] if i < limit_count
- else limits[0] if has_limit and not has_multiple_limits
- else None)
-
- def print_verbose_info(self, write_debug):
- if self._sort_user:
- write_debug('Sort order given by user: %s' % ', '.join(self._sort_user))
- if self._sort_extractor:
- write_debug('Sort order given by extractor: %s' % ', '.join(self._sort_extractor))
- write_debug('Formats sorted by: %s' % ', '.join(['%s%s%s' % (
- '+' if self._get_field_setting(field, 'reverse') else '', field,
- '%s%s(%s)' % ('~' if self._get_field_setting(field, 'closest') else ':',
- self._get_field_setting(field, 'limit_text'),
- self._get_field_setting(field, 'limit'))
- if self._get_field_setting(field, 'limit_text') is not None else '')
- for field in self._order if self._get_field_setting(field, 'visible')]))
-
- def _calculate_field_preference_from_value(self, format, field, type, value):
- reverse = self._get_field_setting(field, 'reverse')
- closest = self._get_field_setting(field, 'closest')
- limit = self._get_field_setting(field, 'limit')
-
- if type == 'extractor':
- maximum = self._get_field_setting(field, 'max')
- if value is None or (maximum is not None and value >= maximum):
- value = -1
- elif type == 'boolean':
- in_list = self._get_field_setting(field, 'in_list')
- not_in_list = self._get_field_setting(field, 'not_in_list')
- value = 0 if ((in_list is None or value in in_list) and (not_in_list is None or value not in not_in_list)) else -1
- elif type == 'ordered':
- value = self._resolve_field_value(field, value, True)
-
- # try to convert to number
- val_num = float_or_none(value, default=self._get_field_setting(field, 'default'))
- is_num = self._get_field_setting(field, 'convert') != 'string' and val_num is not None
- if is_num:
- value = val_num
-
- return ((-10, 0) if value is None
- else (1, value, 0) if not is_num # if a field has mixed strings and numbers, strings are sorted higher
- else (0, -abs(value - limit), value - limit if reverse else limit - value) if closest
- else (0, value, 0) if not reverse and (limit is None or value <= limit)
- else (0, -value, 0) if limit is None or (reverse and value == limit) or value > limit
- else (-1, value, 0))
-
- def _calculate_field_preference(self, format, field):
- type = self._get_field_setting(field, 'type') # extractor, boolean, ordered, field, multiple
- get_value = lambda f: format.get(self._get_field_setting(f, 'field'))
- if type == 'multiple':
- type = 'field' # Only 'field' is allowed in multiple for now
- actual_fields = self._get_field_setting(field, 'field')
-
- value = self._get_field_setting(field, 'function')(get_value(f) for f in actual_fields)
- else:
- value = get_value(field)
- return self._calculate_field_preference_from_value(format, field, type, value)
-
- def calculate_preference(self, format):
- # Determine missing protocol
- if not format.get('protocol'):
- format['protocol'] = determine_protocol(format)
-
- # Determine missing ext
- if not format.get('ext') and 'url' in format:
- format['ext'] = determine_ext(format['url'])
- if format.get('vcodec') == 'none':
- format['audio_ext'] = format['ext'] if format.get('acodec') != 'none' else 'none'
- format['video_ext'] = 'none'
- else:
- format['video_ext'] = format['ext']
- format['audio_ext'] = 'none'
- # if format.get('preference') is None and format.get('ext') in ('f4f', 'f4m'): # Not supported?
- # format['preference'] = -1000
-
- # Determine missing bitrates
- if format.get('tbr') is None:
- if format.get('vbr') is not None and format.get('abr') is not None:
- format['tbr'] = format.get('vbr', 0) + format.get('abr', 0)
- else:
- if format.get('vcodec') != 'none' and format.get('vbr') is None:
- format['vbr'] = format.get('tbr') - format.get('abr', 0)
- if format.get('acodec') != 'none' and format.get('abr') is None:
- format['abr'] = format.get('tbr') - format.get('vbr', 0)
-
- return tuple(self._calculate_field_preference(format, field) for field in self._order)
+ deprecation_warning(
+ 'hypervideo_dl.InfoExtractor.FormatSort is deprecated and may be removed in the future. '
+ 'Use hypervideo_dl.utils.FormatSorter instead')
+ return FormatSort
def _sort_formats(self, formats, field_preference=[]):
- if not formats:
+ if not field_preference:
+ self._downloader.deprecation_warning(
+ 'hypervideo_dl.InfoExtractor._sort_formats is deprecated and is no longer required')
return
- format_sort = self.FormatSort(self, field_preference)
- formats.sort(key=lambda f: format_sort.calculate_preference(f))
+ self._downloader.deprecation_warning(
+ 'hypervideo_dl.InfoExtractor._sort_formats is deprecated and no longer works as expected. '
+ 'Return _format_sort_fields in the info_dict instead')
+ if formats:
+ formats[0]['__sort_fields'] = field_preference
def _check_formats(self, formats, video_id):
if formats:
@@ -1969,14 +1743,9 @@ class InfoExtractor(object):
else 'https:')
def _proto_relative_url(self, url, scheme=None):
- if url is None:
- return url
- if url.startswith('//'):
- if scheme is None:
- scheme = self.http_scheme()
- return scheme + url
- else:
- return url
+ scheme = scheme or self.http_scheme()
+ assert scheme.endswith(':')
+ return sanitize_url(url, scheme=scheme[:-1])
def _sleep(self, timeout, video_id, msg_template=None):
if msg_template is None:
@@ -1988,17 +1757,19 @@ class InfoExtractor(object):
def _extract_f4m_formats(self, manifest_url, video_id, preference=None, quality=None, f4m_id=None,
transform_source=lambda s: fix_xml_ampersands(s).strip(),
fatal=True, m3u8_id=None, data=None, headers={}, query={}):
- manifest = self._download_xml(
+ res = self._download_xml_handle(
manifest_url, video_id, 'Downloading f4m manifest',
'Unable to download f4m manifest',
# Some manifests may be malformed, e.g. prosiebensat1 generated manifests
# (see https://github.com/ytdl-org/youtube-dl/issues/6215#issuecomment-121704244)
transform_source=transform_source,
fatal=fatal, data=data, headers=headers, query=query)
-
- if manifest is False:
+ if res is False:
return []
+ manifest, urlh = res
+ manifest_url = urlh.geturl()
+
return self._parse_f4m_formats(
manifest, manifest_url, video_id, preference=preference, quality=quality, f4m_id=f4m_id,
transform_source=transform_source, fatal=fatal, m3u8_id=m3u8_id)
@@ -2006,7 +1777,7 @@ class InfoExtractor(object):
def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None, quality=None, f4m_id=None,
transform_source=lambda s: fix_xml_ampersands(s).strip(),
fatal=True, m3u8_id=None):
- if not isinstance(manifest, compat_etree_Element) and not fatal:
+ if not isinstance(manifest, xml.etree.ElementTree.Element) and not fatal:
return []
# currently hypervideo cannot decode the playerVerificationChallenge as Akamai uses Adobe Alchemy
@@ -2166,7 +1937,7 @@ class InfoExtractor(object):
]), m3u8_doc)
def format_url(url):
- return url if re.match(r'^https?://', url) else compat_urlparse.urljoin(m3u8_url, url)
+ return url if re.match(r'^https?://', url) else urllib.parse.urljoin(m3u8_url, url)
if self.get_param('hls_split_discontinuity', False):
def _extract_m3u8_playlist_indices(manifest_url=None, m3u8_doc=None):
@@ -2342,7 +2113,7 @@ class InfoExtractor(object):
audio_group_id = last_stream_inf.get('AUDIO')
# As per [1, 4.3.4.1.1] any EXT-X-STREAM-INF tag which
# references a rendition group MUST have a CODECS attribute.
- # However, this is not always respected, for example, [2]
+ # However, this is not always respected. E.g. [2]
# contains EXT-X-STREAM-INF tag which references AUDIO
# rendition group but does not have CODECS and despite
# referencing an audio group it represents a complete
@@ -2406,12 +2177,14 @@ class InfoExtractor(object):
return '/'.join(out)
def _extract_smil_formats_and_subtitles(self, smil_url, video_id, fatal=True, f4m_params=None, transform_source=None):
- smil = self._download_smil(smil_url, video_id, fatal=fatal, transform_source=transform_source)
-
- if smil is False:
+ res = self._download_smil(smil_url, video_id, fatal=fatal, transform_source=transform_source)
+ if res is False:
assert not fatal
return [], {}
+ smil, urlh = res
+ smil_url = urlh.geturl()
+
namespace = self._parse_smil_namespace(smil)
fmts = self._parse_smil_formats(
@@ -2428,13 +2201,17 @@ class InfoExtractor(object):
return fmts
def _extract_smil_info(self, smil_url, video_id, fatal=True, f4m_params=None):
- smil = self._download_smil(smil_url, video_id, fatal=fatal)
- if smil is False:
+ res = self._download_smil(smil_url, video_id, fatal=fatal)
+ if res is False:
return {}
+
+ smil, urlh = res
+ smil_url = urlh.geturl()
+
return self._parse_smil(smil, smil_url, video_id, f4m_params=f4m_params)
def _download_smil(self, smil_url, video_id, fatal=True, transform_source=None):
- return self._download_xml(
+ return self._download_xml_handle(
smil_url, video_id, 'Downloading SMIL file',
'Unable to download SMIL file', fatal=fatal, transform_source=transform_source)
@@ -2533,7 +2310,7 @@ class InfoExtractor(object):
})
continue
- src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src)
+ src_url = src if src.startswith('http') else urllib.parse.urljoin(base, src)
src_url = src_url.strip()
if proto == 'm3u8' or src_ext == 'm3u8':
@@ -2556,7 +2333,7 @@ class InfoExtractor(object):
'plugin': 'flowplayer-3.2.0.1',
}
f4m_url += '&' if '?' in f4m_url else '?'
- f4m_url += compat_urllib_parse_urlencode(f4m_params)
+ f4m_url += urllib.parse.urlencode(f4m_params)
formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False))
elif src_ext == 'mpd':
formats.extend(self._extract_mpd_formats(
@@ -2613,11 +2390,15 @@ class InfoExtractor(object):
return subtitles
def _extract_xspf_playlist(self, xspf_url, playlist_id, fatal=True):
- xspf = self._download_xml(
+ res = self._download_xml_handle(
xspf_url, playlist_id, 'Downloading xpsf playlist',
'Unable to download xspf manifest', fatal=fatal)
- if xspf is False:
+ if res is False:
return []
+
+ xspf, urlh = res
+ xspf_url = urlh.geturl()
+
return self._parse_xspf(
xspf, playlist_id, xspf_url=xspf_url,
xspf_base_url=base_url(xspf_url))
@@ -2651,7 +2432,6 @@ class InfoExtractor(object):
'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))),
'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))),
})
- self._sort_formats(formats)
entries.append({
'id': playlist_id,
@@ -2682,7 +2462,10 @@ class InfoExtractor(object):
mpd_doc, urlh = res
if mpd_doc is None:
return [], {}
- mpd_base_url = base_url(urlh.geturl())
+
+ # We could have been redirected to a new url when we retrieved our mpd file.
+ mpd_url = urlh.geturl()
+ mpd_base_url = base_url(mpd_url)
return self._parse_mpd_formats_and_subtitles(
mpd_doc, mpd_id, mpd_base_url, mpd_url)
@@ -2790,15 +2573,20 @@ class InfoExtractor(object):
mime_type = representation_attrib['mimeType']
content_type = representation_attrib.get('contentType', mime_type.split('/')[0])
- codecs = parse_codecs(representation_attrib.get('codecs', ''))
+ codec_str = representation_attrib.get('codecs', '')
+ # Some kind of binary subtitle found in some youtube livestreams
+ if mime_type == 'application/x-rawcc':
+ codecs = {'scodec': codec_str}
+ else:
+ codecs = parse_codecs(codec_str)
if content_type not in ('video', 'audio', 'text'):
if mime_type == 'image/jpeg':
content_type = mime_type
- elif codecs['vcodec'] != 'none':
+ elif codecs.get('vcodec', 'none') != 'none':
content_type = 'video'
- elif codecs['acodec'] != 'none':
+ elif codecs.get('acodec', 'none') != 'none':
content_type = 'audio'
- elif codecs.get('tcodec', 'none') != 'none':
+ elif codecs.get('scodec', 'none') != 'none':
content_type = 'text'
elif mimetype2ext(mime_type) in ('tt', 'dfxp', 'ttml', 'xml', 'json'):
content_type = 'text'
@@ -2809,12 +2597,12 @@ class InfoExtractor(object):
base_url = ''
for element in (representation, adaptation_set, period, mpd_doc):
base_url_e = element.find(_add_ns('BaseURL'))
- if base_url_e is not None:
+ if try_call(lambda: base_url_e.text) is not None:
base_url = base_url_e.text + base_url
if re.match(r'^https?://', base_url):
break
if mpd_base_url and base_url.startswith('/'):
- base_url = compat_urlparse.urljoin(mpd_base_url, base_url)
+ base_url = urllib.parse.urljoin(mpd_base_url, base_url)
elif mpd_base_url and not re.match(r'^https?://', base_url):
if not mpd_base_url.endswith('/'):
mpd_base_url += '/'
@@ -2869,6 +2657,8 @@ class InfoExtractor(object):
def prepare_template(template_name, identifiers):
tmpl = representation_ms_info[template_name]
+ if representation_id is not None:
+ tmpl = tmpl.replace('$RepresentationID$', representation_id)
# First of, % characters outside $...$ templates
# must be escaped by doubling for proper processing
# by % operator string formatting used further (see
@@ -2883,8 +2673,6 @@ class InfoExtractor(object):
t += c
# Next, $...$ templates are translated to their
# %(...) counterparts to be used with % operator
- if representation_id is not None:
- t = t.replace('$RepresentationID$', representation_id)
t = re.sub(r'\$(%s)\$' % '|'.join(identifiers), r'%(\1)d', t)
t = re.sub(r'\$(%s)%%([^$]+)\$' % '|'.join(identifiers), r'%(\1)\2', t)
t.replace('$$', '$')
@@ -2960,8 +2748,8 @@ class InfoExtractor(object):
segment_number += 1
segment_time += segment_d
elif 'segment_urls' in representation_ms_info and 's' in representation_ms_info:
- # No media template
- # Example: https://www.youtube.com/watch?v=iXZV5uAYMJI
+ # No media template,
+ # e.g. https://www.youtube.com/watch?v=iXZV5uAYMJI
# or any YouTube dashsegments video
fragments = []
segment_index = 0
@@ -2978,7 +2766,7 @@ class InfoExtractor(object):
representation_ms_info['fragments'] = fragments
elif 'segment_urls' in representation_ms_info:
# Segment URLs with no SegmentTimeline
- # Example: https://www.seznam.cz/zpravy/clanek/cesko-zasahne-vitr-o-sile-vichrice-muze-byt-i-zivotu-nebezpecny-39091
+ # E.g. https://www.seznam.cz/zpravy/clanek/cesko-zasahne-vitr-o-sile-vichrice-muze-byt-i-zivotu-nebezpecny-39091
# https://github.com/ytdl-org/youtube-dl/pull/14844
fragments = []
segment_duration = float_or_none(
@@ -3070,9 +2858,10 @@ class InfoExtractor(object):
stream_name = stream.get('Name')
stream_language = stream.get('Language', 'und')
for track in stream.findall('QualityLevel'):
- fourcc = track.get('FourCC') or ('AACL' if track.get('AudioTag') == '255' else None)
+ KNOWN_TAGS = {'255': 'AACL', '65534': 'EC-3'}
+ fourcc = track.get('FourCC') or KNOWN_TAGS.get(track.get('AudioTag'))
# TODO: add support for WVC1 and WMAP
- if fourcc not in ('H264', 'AVC1', 'AACL', 'TTML'):
+ if fourcc not in ('H264', 'AVC1', 'AACL', 'TTML', 'EC-3'):
self.report_warning('%s is not a supported codec' % fourcc)
continue
tbr = int(track.attrib['Bitrate']) // 1000
@@ -3084,7 +2873,7 @@ class InfoExtractor(object):
sampling_rate = int_or_none(track.get('SamplingRate'))
track_url_pattern = re.sub(r'{[Bb]itrate}', track.attrib['Bitrate'], url_pattern)
- track_url_pattern = compat_urlparse.urljoin(ism_url, track_url_pattern)
+ track_url_pattern = urllib.parse.urljoin(ism_url, track_url_pattern)
fragments = []
fragment_ctx = {
@@ -3103,7 +2892,7 @@ class InfoExtractor(object):
fragment_ctx['duration'] = (next_fragment_time - fragment_ctx['time']) / fragment_repeat
for _ in range(fragment_repeat):
fragments.append({
- 'url': re.sub(r'{start[ _]time}', compat_str(fragment_ctx['time']), track_url_pattern),
+ 'url': re.sub(r'{start[ _]time}', str(fragment_ctx['time']), track_url_pattern),
'duration': fragment_ctx['duration'] / stream_timescale,
})
fragment_ctx['time'] += fragment_ctx['duration']
@@ -3171,7 +2960,8 @@ class InfoExtractor(object):
return f
return {}
- def _media_formats(src, cur_media_type, type_info={}):
+ def _media_formats(src, cur_media_type, type_info=None):
+ type_info = type_info or {}
full_url = absolute_url(src)
ext = type_info.get('ext') or determine_ext(full_url)
if ext == 'm3u8':
@@ -3189,12 +2979,13 @@ class InfoExtractor(object):
formats = [{
'url': full_url,
'vcodec': 'none' if cur_media_type == 'audio' else None,
+ 'ext': ext,
}]
return is_plain_url, formats
entries = []
# amp-video and amp-audio are very similar to their HTML5 counterparts
- # so we wll include them right here (see
+ # so we will include them right here (see
# https://www.ampproject.org/docs/reference/components/amp-video)
# For dl8-* tags see https://delight-vr.com/documentation/dl8-video/
_MEDIA_TAG_NAME_RE = r'(?:(?:amp|dl8(?:-live)?)-)?(video|audio)'
@@ -3204,8 +2995,8 @@ class InfoExtractor(object):
media_tags.extend(re.findall(
# We only allow video|audio followed by a whitespace or '>'.
# Allowing more characters may end up in significant slow down (see
- # https://github.com/ytdl-org/youtube-dl/issues/11979, example URL:
- # http://www.porntrex.com/maps/videositemap.xml).
+ # https://github.com/ytdl-org/youtube-dl/issues/11979,
+ # e.g. http://www.porntrex.com/maps/videositemap.xml).
r'(?s)(<(?P<tag>%s)(?:\s+[^>]*)?>)(.*?)</(?P=tag)>' % _MEDIA_TAG_NAME_RE, webpage))
for media_tag, _, media_type, media_content in media_tags:
media_info = {
@@ -3213,9 +3004,10 @@ class InfoExtractor(object):
'subtitles': {},
}
media_attributes = extract_attributes(media_tag)
- src = strip_or_none(media_attributes.get('src'))
+ src = strip_or_none(dict_get(media_attributes, ('src', 'data-video-src', 'data-src', 'data-source')))
if src:
- _, formats = _media_formats(src, media_type)
+ f = parse_content_type(media_attributes.get('type'))
+ _, formats = _media_formats(src, media_type, f)
media_info['formats'].extend(formats)
media_info['thumbnail'] = absolute_url(media_attributes.get('poster'))
if media_content:
@@ -3223,7 +3015,7 @@ class InfoExtractor(object):
s_attr = extract_attributes(source_tag)
# data-video-src and data-src are non standard but seen
# several times in the wild
- src = strip_or_none(dict_get(s_attr, ('src', 'data-video-src', 'data-src')))
+ src = strip_or_none(dict_get(s_attr, ('src', 'data-video-src', 'data-src', 'data-source')))
if not src:
continue
f = parse_content_type(s_attr.get('type'))
@@ -3332,7 +3124,7 @@ class InfoExtractor(object):
http_f = f.copy()
del http_f['manifest_url']
http_url = re.sub(
- REPL_REGEX, protocol + r'://%s/\g<1>%s\3' % (http_host, qualities[i]), f['url'])
+ REPL_REGEX, protocol + fr'://{http_host}/\g<1>{qualities[i]}\3', f['url'])
http_f.update({
'format_id': http_f['format_id'].replace('hls-', protocol + '-'),
'url': http_url,
@@ -3344,7 +3136,7 @@ class InfoExtractor(object):
return formats, subtitles
def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
- query = compat_urlparse.urlparse(url).query
+ query = urllib.parse.urlparse(url).query
url = re.sub(r'/(?:manifest|playlist|jwplayer)\.(?:m3u8|f4m|mpd|smil)', '', url)
mobj = re.search(
r'(?:(?:http|rtmp|rtsp)(?P<s>s)?:)?(?P<url>//[^?]+)', url)
@@ -3353,7 +3145,7 @@ class InfoExtractor(object):
formats = []
def manifest_url(manifest):
- m_url = '%s/%s' % (http_base_url, manifest)
+ m_url = f'{http_base_url}/{manifest}'
if query:
m_url += '?%s' % query
return m_url
@@ -3390,7 +3182,7 @@ class InfoExtractor(object):
for protocol in ('rtmp', 'rtsp'):
if protocol not in skip_protocols:
formats.append({
- 'url': '%s:%s' % (protocol, url_base),
+ 'url': f'{protocol}:{url_base}',
'format_id': protocol,
'protocol': protocol,
})
@@ -3450,7 +3242,7 @@ class InfoExtractor(object):
if not isinstance(track, dict):
continue
track_kind = track.get('kind')
- if not track_kind or not isinstance(track_kind, compat_str):
+ if not track_kind or not isinstance(track_kind, str):
continue
if track_kind.lower() not in ('captions', 'subtitles'):
continue
@@ -3477,7 +3269,6 @@ class InfoExtractor(object):
'url': formats[0]['url'],
})
else:
- self._sort_formats(formats)
entry['formats'] = formats
entries.append(entry)
if len(entries) == 1:
@@ -3523,13 +3314,14 @@ class InfoExtractor(object):
# Often no height is provided but there is a label in
# format like "1080p", "720p SD", or 1080.
height = int_or_none(self._search_regex(
- r'^(\d{3,4})[pP]?(?:\b|$)', compat_str(source.get('label') or ''),
+ r'^(\d{3,4})[pP]?(?:\b|$)', str(source.get('label') or ''),
'height', default=None))
a_format = {
'url': source_url,
'width': int_or_none(source.get('width')),
'height': height,
- 'tbr': int_or_none(source.get('bitrate')),
+ 'tbr': int_or_none(source.get('bitrate'), scale=1000),
+ 'filesize': int_or_none(source.get('filesize')),
'ext': ext,
}
if source_url.startswith('rtmp'):
@@ -3556,7 +3348,7 @@ class InfoExtractor(object):
def _int(self, v, name, fatal=False, **kwargs):
res = int_or_none(v, **kwargs)
if res is None:
- msg = 'Failed to extract %s: Could not parse value %r' % (name, v)
+ msg = f'Failed to extract {name}: Could not parse value {v!r}'
if fatal:
raise ExtractorError(msg)
else:
@@ -3566,7 +3358,7 @@ class InfoExtractor(object):
def _float(self, v, name, fatal=False, **kwargs):
res = float_or_none(v, **kwargs)
if res is None:
- msg = 'Failed to extract %s: Could not parse value %r' % (name, v)
+ msg = f'Failed to extract {name}: Could not parse value {v!r}'
if fatal:
raise ExtractorError(msg)
else:
@@ -3575,17 +3367,15 @@ class InfoExtractor(object):
def _set_cookie(self, domain, name, value, expire_time=None, port=None,
path='/', secure=False, discard=False, rest={}, **kwargs):
- cookie = compat_cookiejar_Cookie(
+ cookie = http.cookiejar.Cookie(
0, name, value, port, port is not None, domain, True,
domain.startswith('.'), path, True, secure, expire_time,
discard, None, None, rest)
- self._downloader.cookiejar.set_cookie(cookie)
+ self.cookiejar.set_cookie(cookie)
def _get_cookies(self, url):
- """ Return a compat_cookies_SimpleCookie with the cookies for the url """
- req = sanitized_Request(url)
- self._downloader.cookiejar.add_cookie_header(req)
- return compat_cookies_SimpleCookie(req.get_header('Cookie'))
+ """ Return a http.cookies.SimpleCookie with the cookies for the url """
+ return LenientSimpleCookie(self._downloader._calc_cookies(url))
def _apply_first_set_cookie_header(self, url_handle, cookie):
"""
@@ -3604,9 +3394,7 @@ class InfoExtractor(object):
for header, cookies in url_handle.headers.items():
if header.lower() != 'set-cookie':
continue
- if sys.version_info[0] >= 3:
- cookies = cookies.encode('iso-8859-1')
- cookies = cookies.decode('utf-8')
+ cookies = cookies.encode('iso-8859-1').decode('utf-8')
cookie_value = re.search(
r'%s=(.+?);.*?\b[Dd]omain=(.+?)(?:[,;]|$)' % cookie, cookies)
if cookie_value:
@@ -3614,34 +3402,82 @@ class InfoExtractor(object):
self._set_cookie(domain, cookie, value)
break
- def get_testcases(self, include_onlymatching=False):
- t = getattr(self, '_TEST', None)
+ @classmethod
+ def get_testcases(cls, include_onlymatching=False):
+ # Do not look in super classes
+ t = vars(cls).get('_TEST')
if t:
- assert not hasattr(self, '_TESTS'), \
- '%s has _TEST and _TESTS' % type(self).__name__
+ assert not hasattr(cls, '_TESTS'), f'{cls.ie_key()}IE has _TEST and _TESTS'
tests = [t]
else:
- tests = getattr(self, '_TESTS', [])
+ tests = vars(cls).get('_TESTS', [])
for t in tests:
if not include_onlymatching and t.get('only_matching', False):
continue
- t['name'] = type(self).__name__[:-len('IE')]
+ t['name'] = cls.ie_key()
yield t
- def is_suitable(self, age_limit):
- """ Test whether the extractor is generally suitable for the given
- age limit (i.e. pornographic sites are not, all others usually are) """
-
- any_restricted = False
- for tc in self.get_testcases(include_onlymatching=False):
- if tc.get('playlist', []):
- tc = tc['playlist'][0]
- is_restricted = age_restricted(
- tc.get('info_dict', {}).get('age_limit'), age_limit)
- if not is_restricted:
- return True
- any_restricted = any_restricted or is_restricted
- return not any_restricted
+ @classmethod
+ def get_webpage_testcases(cls):
+ tests = vars(cls).get('_WEBPAGE_TESTS', [])
+ for t in tests:
+ t['name'] = cls.ie_key()
+ return tests
+
+ @classproperty(cache=True)
+ def age_limit(cls):
+ """Get age limit from the testcases"""
+ return max(traverse_obj(
+ (*cls.get_testcases(include_onlymatching=False), *cls.get_webpage_testcases()),
+ (..., (('playlist', 0), None), 'info_dict', 'age_limit')) or [0])
+
+ @classproperty(cache=True)
+ def _RETURN_TYPE(cls):
+ """What the extractor returns: "video", "playlist", "any", or None (Unknown)"""
+ tests = tuple(cls.get_testcases(include_onlymatching=False))
+ if not tests:
+ return None
+ elif not any(k.startswith('playlist') for test in tests for k in test):
+ return 'video'
+ elif all(any(k.startswith('playlist') for k in test) for test in tests):
+ return 'playlist'
+ return 'any'
+
+ @classmethod
+ def is_single_video(cls, url):
+ """Returns whether the URL is of a single video, None if unknown"""
+ assert cls.suitable(url), 'The URL must be suitable for the extractor'
+ return {'video': True, 'playlist': False}.get(cls._RETURN_TYPE)
+
+ @classmethod
+ def is_suitable(cls, age_limit):
+ """Test whether the extractor is generally suitable for the given age limit"""
+ return not age_restricted(cls.age_limit, age_limit)
+
+ @classmethod
+ def description(cls, *, markdown=True, search_examples=None):
+ """Description of the extractor"""
+ desc = ''
+ if cls._NETRC_MACHINE:
+ if markdown:
+ desc += f' [<abbr title="netrc machine"><em>{cls._NETRC_MACHINE}</em></abbr>]'
+ else:
+ desc += f' [{cls._NETRC_MACHINE}]'
+ if cls.IE_DESC is False:
+ desc += ' [HIDDEN]'
+ elif cls.IE_DESC:
+ desc += f' {cls.IE_DESC}'
+ if cls.SEARCH_KEY:
+ desc += f'; "{cls.SEARCH_KEY}:" prefix'
+ if search_examples:
+ _COUNTS = ('', '5', '10', 'all')
+ desc += f' (e.g. "{cls.SEARCH_KEY}{random.choice(_COUNTS)}:{random.choice(search_examples)}")'
+ if not cls.working():
+ desc += ' (**Currently broken**)' if markdown else ' (Currently broken)'
+
+ # Escape emojis. Ref: https://github.com/github/markup/issues/1153
+ name = (' - **%s**' % re.sub(r':(\w+:)', ':\u200B\\g<1>', cls.IE_NAME)) if markdown else cls.IE_NAME
+ return f'{name}:{desc}' if desc else name
def extract_subtitles(self, *args, **kwargs):
if (self.get_param('writesubtitles', False)
@@ -3652,6 +3488,9 @@ class InfoExtractor(object):
def _get_subtitles(self, *args, **kwargs):
raise NotImplementedError('This method must be implemented by subclasses')
+ class CommentsDisabled(Exception):
+ """Raise in _get_comments if comments are disabled for the video"""
+
def extract_comments(self, *args, **kwargs):
if not self.get_param('getcomments'):
return None
@@ -3667,6 +3506,8 @@ class InfoExtractor(object):
interrupted = False
except KeyboardInterrupt:
self.to_screen('Interrupted by user')
+ except self.CommentsDisabled:
+ return {'comments': None, 'comment_count': None}
except Exception as e:
if self.get_param('ignoreerrors') is not True:
raise
@@ -3686,7 +3527,7 @@ class InfoExtractor(object):
def _merge_subtitle_items(subtitle_list1, subtitle_list2):
""" Merge subtitle items for one language. Items with duplicated URLs/data
will be dropped. """
- list1_data = set((item.get('url'), item.get('data')) for item in subtitle_list1)
+ list1_data = {(item.get('url'), item.get('data')) for item in subtitle_list1}
ret = list(subtitle_list1)
ret.extend(item for item in subtitle_list2 if (item.get('url'), item.get('data')) not in list1_data)
return ret
@@ -3710,11 +3551,15 @@ class InfoExtractor(object):
def _get_automatic_captions(self, *args, **kwargs):
raise NotImplementedError('This method must be implemented by subclasses')
+ @functools.cached_property
+ def _cookies_passed(self):
+ """Whether cookies have been passed to YoutubeDL"""
+ return self.get_param('cookiefile') is not None or self.get_param('cookiesfrombrowser') is not None
+
def mark_watched(self, *args, **kwargs):
if not self.get_param('mark_watched', False):
return
- if (self.supports_login() and self._get_login_info()[0] is not None
- or self.get_param('cookiefile') or self.get_param('cookiesfrombrowser')):
+ if self.supports_login() and self._get_login_info()[0] is not None or self._cookies_passed:
self._mark_watched(*args, **kwargs)
def _mark_watched(self, *args, **kwargs):
@@ -3727,11 +3572,15 @@ class InfoExtractor(object):
headers['Ytdl-request-proxy'] = geo_verification_proxy
return headers
- def _generic_id(self, url):
- return compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
+ @staticmethod
+ def _generic_id(url):
+ return urllib.parse.unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
- def _generic_title(self, url):
- return compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0])
+ def _generic_title(self, url='', webpage='', *, default=None):
+ return (self._og_search_title(webpage, default=None)
+ or self._html_extract_title(webpage, default=None)
+ or urllib.parse.unquote(os.path.splitext(url_basename(url))[0])
+ or default)
@staticmethod
def _availability(is_private=None, needs_premium=None, needs_subscription=None, needs_auth=None, is_unlisted=None):
@@ -3754,8 +3603,8 @@ class InfoExtractor(object):
@param default The default value to return when the key is not present (default: [])
@param casesense When false, the values are converted to lower case
'''
- val = traverse_obj(
- self._downloader.params, ('extractor_args', (ie_key or self.ie_key()).lower(), key))
+ ie_key = ie_key if isinstance(ie_key, str) else (ie_key or self).ie_key()
+ val = traverse_obj(self._downloader.params, ('extractor_args', ie_key.lower(), key))
if val is None:
return [] if default is NO_DEFAULT else default
return list(val) if casesense else [x.lower() for x in val]
@@ -3776,6 +3625,72 @@ class InfoExtractor(object):
self.to_screen(f'Downloading {playlist_label}{playlist_id} - add --no-playlist to download just the {video_label}{video_id}')
return True
+ def _error_or_warning(self, err, _count=None, _retries=0, *, fatal=True):
+ RetryManager.report_retry(
+ err, _count or int(fatal), _retries,
+ info=self.to_screen, warn=self.report_warning, error=None if fatal else self.report_warning,
+ sleep_func=self.get_param('retry_sleep_functions', {}).get('extractor'))
+
+ def RetryManager(self, **kwargs):
+ return RetryManager(self.get_param('extractor_retries', 3), self._error_or_warning, **kwargs)
+
+ def _extract_generic_embeds(self, url, *args, info_dict={}, note='Extracting generic embeds', **kwargs):
+ display_id = traverse_obj(info_dict, 'display_id', 'id')
+ self.to_screen(f'{format_field(display_id, None, "%s: ")}{note}')
+ return self._downloader.get_info_extractor('Generic')._extract_embeds(
+ smuggle_url(url, {'block_ies': [self.ie_key()]}), *args, **kwargs)
+
+ @classmethod
+ def extract_from_webpage(cls, ydl, url, webpage):
+ ie = (cls if isinstance(cls._extract_from_webpage, types.MethodType)
+ else ydl.get_info_extractor(cls.ie_key()))
+ for info in ie._extract_from_webpage(url, webpage) or []:
+ # url = None since we do not want to set (webpage/original)_url
+ ydl.add_default_extra_info(info, ie, None)
+ yield info
+
+ @classmethod
+ def _extract_from_webpage(cls, url, webpage):
+ for embed_url in orderedSet(
+ cls._extract_embed_urls(url, webpage) or [], lazy=True):
+ yield cls.url_result(embed_url, None if cls._VALID_URL is False else cls)
+
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ """@returns all the embed urls on the webpage"""
+ if '_EMBED_URL_RE' not in cls.__dict__:
+ assert isinstance(cls._EMBED_REGEX, (list, tuple))
+ for idx, regex in enumerate(cls._EMBED_REGEX):
+ assert regex.count('(?P<url>') == 1, \
+ f'{cls.__name__}._EMBED_REGEX[{idx}] must have exactly 1 url group\n\t{regex}'
+ cls._EMBED_URL_RE = tuple(map(re.compile, cls._EMBED_REGEX))
+
+ for regex in cls._EMBED_URL_RE:
+ for mobj in regex.finditer(webpage):
+ embed_url = urllib.parse.urljoin(url, unescapeHTML(mobj.group('url')))
+ if cls._VALID_URL is False or cls.suitable(embed_url):
+ yield embed_url
+
+ class StopExtraction(Exception):
+ pass
+
+ @classmethod
+ def _extract_url(cls, webpage): # TODO: Remove
+ """Only for compatibility with some older extractors"""
+ return next(iter(cls._extract_embed_urls(None, webpage) or []), None)
+
+ @classmethod
+ def __init_subclass__(cls, *, plugin_name=None, **kwargs):
+ if plugin_name:
+ mro = inspect.getmro(cls)
+ super_class = cls.__wrapped__ = mro[mro.index(cls) + 1]
+ cls.IE_NAME, cls.ie_key = f'{super_class.IE_NAME}+{plugin_name}', super_class.ie_key
+ while getattr(super_class, '__wrapped__', None):
+ super_class = super_class.__wrapped__
+ setattr(sys.modules[super_class.__module__], super_class.__name__, cls)
+
+ return super().__init_subclass__(**kwargs)
+
class SearchInfoExtractor(InfoExtractor):
"""
@@ -3785,9 +3700,10 @@ class SearchInfoExtractor(InfoExtractor):
"""
_MAX_RESULTS = float('inf')
+ _RETURN_TYPE = 'playlist'
- @classmethod
- def _make_valid_url(cls):
+ @classproperty
+ def _VALID_URL(cls):
return r'%s(?P<prefix>|[1-9][0-9]*|all):(?P<query>[\s\S]+)' % cls._SEARCH_KEY
def _real_extract(self, query):
@@ -3799,7 +3715,7 @@ class SearchInfoExtractor(InfoExtractor):
else:
n = int(prefix)
if n <= 0:
- raise ExtractorError('invalid download number %s for query "%s"' % (n, query))
+ raise ExtractorError(f'invalid download number {n} for query "{query}"')
elif n > self._MAX_RESULTS:
self.report_warning('%s returns max %i results (you requested %i)' % (self._SEARCH_KEY, self._MAX_RESULTS, n))
n = self._MAX_RESULTS
@@ -3816,6 +3732,15 @@ class SearchInfoExtractor(InfoExtractor):
"""Returns an iterator of search results"""
raise NotImplementedError('This method must be implemented by subclasses')
- @property
- def SEARCH_KEY(self):
- return self._SEARCH_KEY
+ @classproperty
+ def SEARCH_KEY(cls):
+ return cls._SEARCH_KEY
+
+
+class UnsupportedURLIE(InfoExtractor):
+ _VALID_URL = '.*'
+ _ENABLED = False
+ IE_DESC = False
+
+ def _real_extract(self, url):
+ raise UnsupportedError(url)
diff --git a/hypervideo_dl/extractor/commonmistakes.py b/hypervideo_dl/extractor/commonmistakes.py
index eb76fe5..a4a38cf 100644
--- a/hypervideo_dl/extractor/commonmistakes.py
+++ b/hypervideo_dl/extractor/commonmistakes.py
@@ -1,16 +1,10 @@
-from __future__ import unicode_literals
-
-import sys
-
from .common import InfoExtractor
from ..utils import ExtractorError
class CommonMistakesIE(InfoExtractor):
IE_DESC = False # Do not list
- _VALID_URL = r'''(?x)
- (?:url|URL)$
- '''
+ _VALID_URL = r'(?:url|URL|hypervideo)$'
_TESTS = [{
'url': 'url',
@@ -35,9 +29,7 @@ class UnicodeBOMIE(InfoExtractor):
IE_DESC = False
_VALID_URL = r'(?P<bom>\ufeff)(?P<id>.*)$'
- # Disable test for python 3.2 since BOM is broken in re in this version
- # (see https://github.com/ytdl-org/youtube-dl/issues/9751)
- _TESTS = [] if (3, 0) < sys.version_info <= (3, 3) else [{
+ _TESTS = [{
'url': '\ufeffhttp://www.youtube.com/watch?v=BaW_jenozKc',
'only_matching': True,
}]
diff --git a/hypervideo_dl/extractor/commonprotocols.py b/hypervideo_dl/extractor/commonprotocols.py
index 3708c6a..2f93e8e 100644
--- a/hypervideo_dl/extractor/commonprotocols.py
+++ b/hypervideo_dl/extractor/commonprotocols.py
@@ -1,10 +1,6 @@
-from __future__ import unicode_literals
-
+import urllib.parse
from .common import InfoExtractor
-from ..compat import (
- compat_urlparse,
-)
class RtmpIE(InfoExtractor):
@@ -28,7 +24,7 @@ class RtmpIE(InfoExtractor):
'formats': [{
'url': url,
'ext': 'flv',
- 'format_id': compat_urlparse.urlparse(url).scheme,
+ 'format_id': urllib.parse.urlparse(url).scheme,
}],
}
diff --git a/hypervideo_dl/extractor/condenast.py b/hypervideo_dl/extractor/condenast.py
index 54e7af8..3170c29 100644
--- a/hypervideo_dl/extractor/condenast.py
+++ b/hypervideo_dl/extractor/condenast.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -61,7 +58,10 @@ class CondeNastIE(InfoExtractor):
)''' % '|'.join(_SITES.keys())
IE_DESC = 'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values()))
- EMBED_URL = r'(?:https?:)?//player(?:-backend)?\.(?:%s)\.com/(?:embed(?:js)?|(?:script|inline)/video)/.+?' % '|'.join(_SITES.keys())
+ _EMBED_REGEX = [r'''(?x)
+ <(?:iframe|script)[^>]+?src=(["\'])(?P<url>
+ (?:https?:)?//player(?:-backend)?\.(?:%s)\.com/(?:embed(?:js)?|(?:script|inline)/video)/.+?
+ )\1''' % '|'.join(_SITES.keys())]
_TESTS = [{
'url': 'http://video.wired.com/watch/3d-printed-speakers-lit-with-led',
@@ -197,7 +197,6 @@ class CondeNastIE(InfoExtractor):
'ext': ext,
'quality': 1 if quality == 'high' else 0,
})
- self._sort_formats(formats)
subtitles = {}
for t, caption in video_info.get('captions', {}).items():
diff --git a/hypervideo_dl/extractor/contv.py b/hypervideo_dl/extractor/contv.py
index 84b462d..d69e816 100644
--- a/hypervideo_dl/extractor/contv.py
+++ b/hypervideo_dl/extractor/contv.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
float_or_none,
@@ -72,8 +69,6 @@ class CONtvIE(InfoExtractor):
'url': media_mp4_url,
})
- self._sort_formats(formats)
-
subtitles = {}
captions = m_details.get('captions') or {}
for caption_url in captions.values():
diff --git a/hypervideo_dl/extractor/corus.py b/hypervideo_dl/extractor/corus.py
index 1194613..c03d653 100644
--- a/hypervideo_dl/extractor/corus.py
+++ b/hypervideo_dl/extractor/corus.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .theplatform import ThePlatformFeedIE
from ..utils import (
dict_get,
@@ -11,7 +7,7 @@ from ..utils import (
)
-class CorusIE(ThePlatformFeedIE):
+class CorusIE(ThePlatformFeedIE): # XXX: Do not subclass from concrete IE
_VALID_URL = r'''(?x)
https?://
(?:www\.)?
@@ -130,7 +126,6 @@ class CorusIE(ThePlatformFeedIE):
smil, smil_url, video_id, namespace))
if not formats and video.get('drm'):
self.report_drm(video_id)
- self._sort_formats(formats)
subtitles = {}
for track in video.get('tracks', []):
diff --git a/hypervideo_dl/extractor/coub.py b/hypervideo_dl/extractor/coub.py
index e90aa19..9bab698 100644
--- a/hypervideo_dl/extractor/coub.py
+++ b/hypervideo_dl/extractor/coub.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
ExtractorError,
@@ -107,8 +104,6 @@ class CoubIE(InfoExtractor):
'source_preference': preference_key(MOBILE),
})
- self._sort_formats(formats)
-
thumbnail = coub.get('picture')
duration = float_or_none(coub.get('duration'))
timestamp = parse_iso8601(coub.get('published_at') or coub.get('created_at'))
diff --git a/hypervideo_dl/extractor/cozytv.py b/hypervideo_dl/extractor/cozytv.py
index d49f1ca..5ef5afc 100644
--- a/hypervideo_dl/extractor/cozytv.py
+++ b/hypervideo_dl/extractor/cozytv.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import unified_strdate
diff --git a/hypervideo_dl/extractor/cpac.py b/hypervideo_dl/extractor/cpac.py
index 2274115..0f23f2b 100644
--- a/hypervideo_dl/extractor/cpac.py
+++ b/hypervideo_dl/extractor/cpac.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
@@ -12,13 +9,6 @@ from ..utils import (
urljoin,
)
-# compat_range
-try:
- if callable(xrange):
- range = xrange
-except (NameError, TypeError):
- pass
-
class CPACIE(InfoExtractor):
IE_NAME = 'cpac'
@@ -64,8 +54,6 @@ class CPACIE(InfoExtractor):
else:
fmt['language_preference'] = -10
- self._sort_formats(formats)
-
category = str_or_none(content['details']['category_%s_t' % (url_lang, )])
def is_live(v_type):
diff --git a/hypervideo_dl/extractor/cracked.py b/hypervideo_dl/extractor/cracked.py
index f77a68e..c6aabcc 100644
--- a/hypervideo_dl/extractor/cracked.py
+++ b/hypervideo_dl/extractor/cracked.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/crackle.py b/hypervideo_dl/extractor/crackle.py
index db4962c..4610015 100644
--- a/hypervideo_dl/extractor/crackle.py
+++ b/hypervideo_dl/extractor/crackle.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals, division
-
import hashlib
import hmac
import re
@@ -180,7 +177,6 @@ class CrackleIE(InfoExtractor):
})
if not formats and has_drm:
self.report_drm(video_id)
- self._sort_formats(formats)
description = media.get('Description')
duration = int_or_none(media.get(
diff --git a/hypervideo_dl/extractor/craftsy.py b/hypervideo_dl/extractor/craftsy.py
index ed2f442..307bfb9 100644
--- a/hypervideo_dl/extractor/craftsy.py
+++ b/hypervideo_dl/extractor/craftsy.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .brightcove import BrightcoveNewIE
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/crooksandliars.py b/hypervideo_dl/extractor/crooksandliars.py
index 7fb782d..4de7e3d 100644
--- a/hypervideo_dl/extractor/crooksandliars.py
+++ b/hypervideo_dl/extractor/crooksandliars.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
int_or_none,
@@ -9,6 +7,8 @@ from ..utils import (
class CrooksAndLiarsIE(InfoExtractor):
_VALID_URL = r'https?://embed\.crooksandliars\.com/(?:embed|v)/(?P<id>[A-Za-z0-9]+)'
+ _EMBED_REGEX = [r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1']
+
_TESTS = [{
'url': 'https://embed.crooksandliars.com/embed/8RUoRhRi',
'info_dict': {
@@ -45,7 +45,6 @@ class CrooksAndLiarsIE(InfoExtractor):
'format_id': item['type'],
'quality': quality(item['type']),
} for item in manifest['flavors'] if item['mime'].startswith('video/')]
- self._sort_formats(formats)
return {
'url': url,
diff --git a/hypervideo_dl/extractor/crowdbunker.py b/hypervideo_dl/extractor/crowdbunker.py
index 72906af..d83c015 100644
--- a/hypervideo_dl/extractor/crowdbunker.py
+++ b/hypervideo_dl/extractor/crowdbunker.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import itertools
from .common import InfoExtractor
@@ -63,7 +60,6 @@ class CrowdBunkerIE(InfoExtractor):
'width': int_or_none(image.get('width')),
} for image in video_json.get('thumbnails') or [] if image.get('url')]
- self._sort_formats(formats)
return {
'id': id,
'title': video_json.get('title'),
diff --git a/hypervideo_dl/extractor/crunchyroll.py b/hypervideo_dl/extractor/crunchyroll.py
index 7edb645..d226050 100644
--- a/hypervideo_dl/extractor/crunchyroll.py
+++ b/hypervideo_dl/extractor/crunchyroll.py
@@ -1,44 +1,16 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import base64
-import re
-import json
-import zlib
+import urllib.parse
-from hashlib import sha1
-from math import pow, sqrt, floor
from .common import InfoExtractor
-from .vrv import VRVBaseIE
-from ..compat import (
- compat_b64decode,
- compat_etree_Element,
- compat_etree_fromstring,
- compat_str,
- compat_urllib_parse_urlencode,
- compat_urllib_request,
- compat_urlparse,
-)
from ..utils import (
ExtractorError,
- bytes_to_intlist,
- extract_attributes,
float_or_none,
format_field,
- intlist_to_bytes,
- int_or_none,
join_nonempty,
- lowercase_escape,
- merge_dicts,
+ parse_iso8601,
qualities,
- remove_end,
- sanitized_Request,
traverse_obj,
try_get,
- xpath_text,
-)
-from ..aes import (
- aes_cbc_decrypt,
)
@@ -46,16 +18,7 @@ class CrunchyrollBaseIE(InfoExtractor):
_LOGIN_URL = 'https://www.crunchyroll.com/welcome/login'
_API_BASE = 'https://api.crunchyroll.com'
_NETRC_MACHINE = 'crunchyroll'
-
- def _call_rpc_api(self, method, video_id, note=None, data=None):
- data = data or {}
- data['req'] = 'RpcApi' + method
- data = compat_urllib_parse_urlencode(data).encode('utf-8')
- return self._download_xml(
- 'https://www.crunchyroll.com/xml/',
- video_id, note, fatal=False, data=data, headers={
- 'Content-Type': 'application/x-www-form-urlencoded',
- })
+ params = None
def _perform_login(self, username, password):
if self._get_cookies(self._LOGIN_URL).get('etp_rt'):
@@ -76,7 +39,7 @@ class CrunchyrollBaseIE(InfoExtractor):
login_response = self._download_json(
f'{self._API_BASE}/login.1.json', None, 'Logging in',
- data=compat_urllib_parse_urlencode({
+ data=urllib.parse.urlencode({
'account': username,
'password': password,
'session_id': session_id
@@ -86,800 +49,173 @@ class CrunchyrollBaseIE(InfoExtractor):
if not self._get_cookies(self._LOGIN_URL).get('etp_rt'):
raise ExtractorError('Login succeeded but did not set etp_rt cookie')
- # Beta-specific, but needed for redirects
- def _get_beta_embedded_json(self, webpage, display_id):
+ def _get_embedded_json(self, webpage, display_id):
initial_state = self._parse_json(self._search_regex(
r'__INITIAL_STATE__\s*=\s*({.+?})\s*;', webpage, 'initial state'), display_id)
app_config = self._parse_json(self._search_regex(
r'__APP_CONFIG__\s*=\s*({.+?})\s*;', webpage, 'app config'), display_id)
return initial_state, app_config
- def _redirect_to_beta(self, webpage, iekey, video_id):
- if not self._get_cookies(self._LOGIN_URL).get('etp_rt'):
- raise ExtractorError('Received a beta page from non-beta url when not logged in.')
- initial_state, app_config = self._get_beta_embedded_json(webpage, video_id)
- url = app_config['baseSiteUrl'] + initial_state['router']['locations']['current']['pathname']
- self.to_screen(f'{video_id}: Redirected to beta site - {url}')
- return self.url_result(f'{url}', iekey, video_id)
-
- @staticmethod
- def _add_skip_wall(url):
- parsed_url = compat_urlparse.urlparse(url)
- qs = compat_urlparse.parse_qs(parsed_url.query)
- # Always force skip_wall to bypass maturity wall, namely 18+ confirmation message:
- # > This content may be inappropriate for some people.
- # > Are you sure you want to continue?
- # since it's not disabled by default in crunchyroll account's settings.
- # See https://github.com/ytdl-org/youtube-dl/issues/7202.
- qs['skip_wall'] = ['1']
- return compat_urlparse.urlunparse(
- parsed_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
-
-
-class CrunchyrollIE(CrunchyrollBaseIE, VRVBaseIE):
- IE_NAME = 'crunchyroll'
- _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|(?:[^/]*/){1,2}[^/?&]*?)(?P<id>[0-9]+))(?:[/?&]|$)'
- _TESTS = [{
- 'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
- 'info_dict': {
- 'id': '645513',
- 'ext': 'mp4',
- 'title': 'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!',
- 'description': 'md5:2d17137920c64f2f49981a7797d275ef',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'uploader': 'Yomiuri Telecasting Corporation (YTV)',
- 'upload_date': '20131013',
- 'url': 're:(?!.*&amp)',
- },
- 'params': {
- # rtmp
- 'skip_download': True,
- },
- 'skip': 'Video gone',
- }, {
- 'url': 'http://www.crunchyroll.com/media-589804/culture-japan-1',
- 'info_dict': {
- 'id': '589804',
- 'ext': 'flv',
- 'title': 'Culture Japan Episode 1 – Rebuilding Japan after the 3.11',
- 'description': 'md5:2fbc01f90b87e8e9137296f37b461c12',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'uploader': 'Danny Choo Network',
- 'upload_date': '20120213',
- },
- 'params': {
- # rtmp
- 'skip_download': True,
- },
- 'skip': 'Video gone',
- }, {
- 'url': 'http://www.crunchyroll.com/rezero-starting-life-in-another-world-/episode-5-the-morning-of-our-promise-is-still-distant-702409',
- 'info_dict': {
- 'id': '702409',
- 'ext': 'mp4',
- 'title': compat_str,
- 'description': compat_str,
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'uploader': 'Re:Zero Partners',
- 'timestamp': 1462098900,
- 'upload_date': '20160501',
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- }, {
- 'url': 'http://www.crunchyroll.com/konosuba-gods-blessing-on-this-wonderful-world/episode-1-give-me-deliverance-from-this-judicial-injustice-727589',
- 'info_dict': {
- 'id': '727589',
- 'ext': 'mp4',
- 'title': compat_str,
- 'description': compat_str,
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'uploader': 'Kadokawa Pictures Inc.',
- 'timestamp': 1484130900,
- 'upload_date': '20170111',
- 'series': compat_str,
- 'season': "KONOSUBA -God's blessing on this wonderful world! 2",
- 'season_number': 2,
- 'episode': 'Give Me Deliverance From This Judicial Injustice!',
- 'episode_number': 1,
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- }, {
- 'url': 'http://www.crunchyroll.fr/girl-friend-beta/episode-11-goodbye-la-mode-661697',
- 'only_matching': True,
- }, {
- # geo-restricted (US), 18+ maturity wall, non-premium available
- 'url': 'http://www.crunchyroll.com/cosplay-complex-ova/episode-1-the-birth-of-the-cosplay-club-565617',
- 'only_matching': True,
- }, {
- # A description with double quotes
- 'url': 'http://www.crunchyroll.com/11eyes/episode-1-piros-jszaka-red-night-535080',
- 'info_dict': {
- 'id': '535080',
- 'ext': 'mp4',
- 'title': compat_str,
- 'description': compat_str,
- 'uploader': 'Marvelous AQL Inc.',
- 'timestamp': 1255512600,
- 'upload_date': '20091014',
- },
- 'params': {
- # Just test metadata extraction
- 'skip_download': True,
- },
- }, {
- # make sure we can extract an uploader name that's not a link
- 'url': 'http://www.crunchyroll.com/hakuoki-reimeiroku/episode-1-dawn-of-the-divine-warriors-606899',
- 'info_dict': {
- 'id': '606899',
- 'ext': 'mp4',
- 'title': 'Hakuoki Reimeiroku Episode 1 – Dawn of the Divine Warriors',
- 'description': 'Ryunosuke was left to die, but Serizawa-san asked him a simple question "Do you want to live?"',
- 'uploader': 'Geneon Entertainment',
- 'upload_date': '20120717',
- },
- 'params': {
- # just test metadata extraction
- 'skip_download': True,
- },
- 'skip': 'Video gone',
- }, {
- # A video with a vastly different season name compared to the series name
- 'url': 'http://www.crunchyroll.com/nyarko-san-another-crawling-chaos/episode-1-test-590532',
- 'info_dict': {
- 'id': '590532',
- 'ext': 'mp4',
- 'title': compat_str,
- 'description': compat_str,
- 'uploader': 'TV TOKYO',
- 'timestamp': 1330956000,
- 'upload_date': '20120305',
- 'series': 'Nyarko-san: Another Crawling Chaos',
- 'season': 'Haiyoru! Nyaruani (ONA)',
- },
- 'params': {
- # Just test metadata extraction
- 'skip_download': True,
- },
- }, {
- 'url': 'http://www.crunchyroll.com/media-723735',
- 'only_matching': True,
- }, {
- 'url': 'https://www.crunchyroll.com/en-gb/mob-psycho-100/episode-2-urban-legends-encountering-rumors-780921',
- 'only_matching': True,
- }]
-
- _FORMAT_IDS = {
- '360': ('60', '106'),
- '480': ('61', '106'),
- '720': ('62', '106'),
- '1080': ('80', '108'),
- }
-
- def _download_webpage(self, url_or_request, *args, **kwargs):
- request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request)
- else sanitized_Request(url_or_request))
- # Accept-Language must be set explicitly to accept any language to avoid issues
- # similar to https://github.com/ytdl-org/youtube-dl/issues/6797.
- # Along with IP address Crunchyroll uses Accept-Language to guess whether georestriction
- # should be imposed or not (from what I can see it just takes the first language
- # ignoring the priority and requires it to correspond the IP). By the way this causes
- # Crunchyroll to not work in georestriction cases in some browsers that don't place
- # the locale lang first in header. However allowing any language seems to workaround the issue.
- request.add_header('Accept-Language', '*')
- return super(CrunchyrollBaseIE, self)._download_webpage(request, *args, **kwargs)
-
- def _decrypt_subtitles(self, data, iv, id):
- data = bytes_to_intlist(compat_b64decode(data))
- iv = bytes_to_intlist(compat_b64decode(iv))
- id = int(id)
-
- def obfuscate_key_aux(count, modulo, start):
- output = list(start)
- for _ in range(count):
- output.append(output[-1] + output[-2])
- # cut off start values
- output = output[2:]
- output = list(map(lambda x: x % modulo + 33, output))
- return output
-
- def obfuscate_key(key):
- num1 = int(floor(pow(2, 25) * sqrt(6.9)))
- num2 = (num1 ^ key) << 5
- num3 = key ^ num1
- num4 = num3 ^ (num3 >> 3) ^ num2
- prefix = intlist_to_bytes(obfuscate_key_aux(20, 97, (1, 2)))
- shaHash = bytes_to_intlist(sha1(prefix + str(num4).encode('ascii')).digest())
- # Extend 160 Bit hash to 256 Bit
- return shaHash + [0] * 12
-
- key = obfuscate_key(id)
-
- decrypted_data = intlist_to_bytes(aes_cbc_decrypt(data, key, iv))
- return zlib.decompress(decrypted_data)
-
- def _convert_subtitles_to_srt(self, sub_root):
- output = ''
-
- for i, event in enumerate(sub_root.findall('./events/event'), 1):
- start = event.attrib['start'].replace('.', ',')
- end = event.attrib['end'].replace('.', ',')
- text = event.attrib['text'].replace('\\N', '\n')
- output += '%d\n%s --> %s\n%s\n\n' % (i, start, end, text)
- return output
-
- def _convert_subtitles_to_ass(self, sub_root):
- output = ''
-
- def ass_bool(strvalue):
- assvalue = '0'
- if strvalue == '1':
- assvalue = '-1'
- return assvalue
-
- output = '[Script Info]\n'
- output += 'Title: %s\n' % sub_root.attrib['title']
- output += 'ScriptType: v4.00+\n'
- output += 'WrapStyle: %s\n' % sub_root.attrib['wrap_style']
- output += 'PlayResX: %s\n' % sub_root.attrib['play_res_x']
- output += 'PlayResY: %s\n' % sub_root.attrib['play_res_y']
- output += """
-[V4+ Styles]
-Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
-"""
- for style in sub_root.findall('./styles/style'):
- output += 'Style: ' + style.attrib['name']
- output += ',' + style.attrib['font_name']
- output += ',' + style.attrib['font_size']
- output += ',' + style.attrib['primary_colour']
- output += ',' + style.attrib['secondary_colour']
- output += ',' + style.attrib['outline_colour']
- output += ',' + style.attrib['back_colour']
- output += ',' + ass_bool(style.attrib['bold'])
- output += ',' + ass_bool(style.attrib['italic'])
- output += ',' + ass_bool(style.attrib['underline'])
- output += ',' + ass_bool(style.attrib['strikeout'])
- output += ',' + style.attrib['scale_x']
- output += ',' + style.attrib['scale_y']
- output += ',' + style.attrib['spacing']
- output += ',' + style.attrib['angle']
- output += ',' + style.attrib['border_style']
- output += ',' + style.attrib['outline']
- output += ',' + style.attrib['shadow']
- output += ',' + style.attrib['alignment']
- output += ',' + style.attrib['margin_l']
- output += ',' + style.attrib['margin_r']
- output += ',' + style.attrib['margin_v']
- output += ',' + style.attrib['encoding']
- output += '\n'
-
- output += """
-[Events]
-Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
-"""
- for event in sub_root.findall('./events/event'):
- output += 'Dialogue: 0'
- output += ',' + event.attrib['start']
- output += ',' + event.attrib['end']
- output += ',' + event.attrib['style']
- output += ',' + event.attrib['name']
- output += ',' + event.attrib['margin_l']
- output += ',' + event.attrib['margin_r']
- output += ',' + event.attrib['margin_v']
- output += ',' + event.attrib['effect']
- output += ',' + event.attrib['text']
- output += '\n'
-
- return output
-
- def _extract_subtitles(self, subtitle):
- sub_root = compat_etree_fromstring(subtitle)
- return [{
- 'ext': 'srt',
- 'data': self._convert_subtitles_to_srt(sub_root),
- }, {
- 'ext': 'ass',
- 'data': self._convert_subtitles_to_ass(sub_root),
- }]
-
- def _get_subtitles(self, video_id, webpage):
- subtitles = {}
- for sub_id, sub_name in re.findall(r'\bssid=([0-9]+)"[^>]+?\btitle="([^"]+)', webpage):
- sub_doc = self._call_rpc_api(
- 'Subtitle_GetXml', video_id,
- 'Downloading subtitles for ' + sub_name, data={
- 'subtitle_script_id': sub_id,
- })
- if not isinstance(sub_doc, compat_etree_Element):
- continue
- sid = sub_doc.get('id')
- iv = xpath_text(sub_doc, 'iv', 'subtitle iv')
- data = xpath_text(sub_doc, 'data', 'subtitle data')
- if not sid or not iv or not data:
- continue
- subtitle = self._decrypt_subtitles(data, iv, sid).decode('utf-8')
- lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
- if not lang_code:
- continue
- subtitles[lang_code] = self._extract_subtitles(subtitle)
- return subtitles
-
- def _real_extract(self, url):
- mobj = self._match_valid_url(url)
- video_id = mobj.group('id')
-
- if mobj.group('prefix') == 'm':
- mobile_webpage = self._download_webpage(url, video_id, 'Downloading mobile webpage')
- webpage_url = self._search_regex(r'<link rel="canonical" href="([^"]+)" />', mobile_webpage, 'webpage_url')
- else:
- webpage_url = 'http://www.' + mobj.group('url')
-
- webpage = self._download_webpage(
- self._add_skip_wall(webpage_url), video_id,
- headers=self.geo_verification_headers())
- if re.search(r'<div id="preload-data">', webpage):
- return self._redirect_to_beta(webpage, CrunchyrollBetaIE.ie_key(), video_id)
- note_m = self._html_search_regex(
- r'<div class="showmedia-trailer-notice">(.+?)</div>',
- webpage, 'trailer-notice', default='')
- if note_m:
- raise ExtractorError(note_m, expected=True)
-
- mobj = re.search(r'Page\.messaging_box_controller\.addItems\(\[(?P<msg>{.+?})\]\)', webpage)
- if mobj:
- msg = json.loads(mobj.group('msg'))
- if msg.get('type') == 'error':
- raise ExtractorError('crunchyroll returned error: %s' % msg['message_body'], expected=True)
-
- if 'To view this, please log in to verify you are 18 or older.' in webpage:
- self.raise_login_required()
-
- media = self._parse_json(self._search_regex(
- r'vilos\.config\.media\s*=\s*({.+?});',
- webpage, 'vilos media', default='{}'), video_id)
- media_metadata = media.get('metadata') or {}
-
- language = self._search_regex(
- r'(?:vilos\.config\.player\.language|LOCALE)\s*=\s*(["\'])(?P<lang>(?:(?!\1).)+)\1',
- webpage, 'language', default=None, group='lang')
-
- video_title = self._html_search_regex(
- (r'(?s)<h1[^>]*>((?:(?!<h1).)*?<(?:span[^>]+itemprop=["\']title["\']|meta[^>]+itemprop=["\']position["\'])[^>]*>(?:(?!<h1).)+?)</h1>',
- r'<title>(.+?),\s+-\s+.+? Crunchyroll'),
- webpage, 'video_title', default=None)
- if not video_title:
- video_title = re.sub(r'^Watch\s+', '', self._og_search_description(webpage))
- video_title = re.sub(r' {2,}', ' ', video_title)
- video_description = (self._parse_json(self._html_search_regex(
- r'<script[^>]*>\s*.+?\[media_id=%s\].+?({.+?"description"\s*:.+?})\);' % video_id,
- webpage, 'description', default='{}'), video_id) or media_metadata).get('description')
-
- thumbnails = []
- thumbnail_url = (self._parse_json(self._html_search_regex(
- r'<script type="application\/ld\+json">\n\s*(.+?)<\/script>',
- webpage, 'thumbnail_url', default='{}'), video_id)).get('image')
- if thumbnail_url:
- thumbnails.append({
- 'url': thumbnail_url,
- 'width': 1920,
- 'height': 1080
- })
-
- if video_description:
- video_description = lowercase_escape(video_description.replace(r'\r\n', '\n'))
- video_uploader = self._html_search_regex(
- # try looking for both an uploader that's a link and one that's not
- [r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', r'<div>\s*Publisher:\s*<span>\s*(.+?)\s*</span>\s*</div>'],
- webpage, 'video_uploader', default=False)
-
- requested_languages = self._configuration_arg('language')
- requested_hardsubs = [('' if val == 'none' else val) for val in self._configuration_arg('hardsub')]
- language_preference = qualities((requested_languages or [language or ''])[::-1])
- hardsub_preference = qualities((requested_hardsubs or ['', language or ''])[::-1])
-
- formats = []
- for stream in media.get('streams', []):
- audio_lang = stream.get('audio_lang') or ''
- hardsub_lang = stream.get('hardsub_lang') or ''
- if (requested_languages and audio_lang.lower() not in requested_languages
- or requested_hardsubs and hardsub_lang.lower() not in requested_hardsubs):
- continue
- vrv_formats = self._extract_vrv_formats(
- stream.get('url'), video_id, stream.get('format'),
- audio_lang, hardsub_lang)
- for f in vrv_formats:
- f['language_preference'] = language_preference(audio_lang)
- f['quality'] = hardsub_preference(hardsub_lang)
- formats.extend(vrv_formats)
- if not formats:
- available_fmts = []
- for a, fmt in re.findall(r'(<a[^>]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage):
- attrs = extract_attributes(a)
- href = attrs.get('href')
- if href and '/freetrial' in href:
- continue
- available_fmts.append(fmt)
- if not available_fmts:
- for p in (r'token=["\']showmedia\.([0-9]{3,4})p"', r'showmedia\.([0-9]{3,4})p'):
- available_fmts = re.findall(p, webpage)
- if available_fmts:
- break
- if not available_fmts:
- available_fmts = self._FORMAT_IDS.keys()
- video_encode_ids = []
-
- for fmt in available_fmts:
- stream_quality, stream_format = self._FORMAT_IDS[fmt]
- video_format = fmt + 'p'
- stream_infos = []
- streamdata = self._call_rpc_api(
- 'VideoPlayer_GetStandardConfig', video_id,
- 'Downloading media info for %s' % video_format, data={
- 'media_id': video_id,
- 'video_format': stream_format,
- 'video_quality': stream_quality,
- 'current_page': url,
- })
- if isinstance(streamdata, compat_etree_Element):
- stream_info = streamdata.find('./{default}preload/stream_info')
- if stream_info is not None:
- stream_infos.append(stream_info)
- stream_info = self._call_rpc_api(
- 'VideoEncode_GetStreamInfo', video_id,
- 'Downloading stream info for %s' % video_format, data={
- 'media_id': video_id,
- 'video_format': stream_format,
- 'video_encode_quality': stream_quality,
- })
- if isinstance(stream_info, compat_etree_Element):
- stream_infos.append(stream_info)
- for stream_info in stream_infos:
- video_encode_id = xpath_text(stream_info, './video_encode_id')
- if video_encode_id in video_encode_ids:
- continue
- video_encode_ids.append(video_encode_id)
-
- video_file = xpath_text(stream_info, './file')
- if not video_file:
- continue
- if video_file.startswith('http'):
- formats.extend(self._extract_m3u8_formats(
- video_file, video_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id='hls', fatal=False))
- continue
-
- video_url = xpath_text(stream_info, './host')
- if not video_url:
- continue
- metadata = stream_info.find('./metadata')
- format_info = {
- 'format': video_format,
- 'height': int_or_none(xpath_text(metadata, './height')),
- 'width': int_or_none(xpath_text(metadata, './width')),
- }
-
- if '.fplive.net/' in video_url:
- video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip())
- parsed_video_url = compat_urlparse.urlparse(video_url)
- direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace(
- netloc='v.lvlt.crcdn.net',
- path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_file.split(':')[-1])))
- if self._is_valid_url(direct_video_url, video_id, video_format):
- format_info.update({
- 'format_id': 'http-' + video_format,
- 'url': direct_video_url,
- })
- formats.append(format_info)
- continue
-
- format_info.update({
- 'format_id': 'rtmp-' + video_format,
- 'url': video_url,
- 'play_path': video_file,
- 'ext': 'flv',
- })
- formats.append(format_info)
- self._sort_formats(formats)
-
- metadata = self._call_rpc_api(
- 'VideoPlayer_GetMediaMetadata', video_id,
- note='Downloading media info', data={
- 'media_id': video_id,
- })
-
- subtitles = {}
- for subtitle in media.get('subtitles', []):
- subtitle_url = subtitle.get('url')
- if not subtitle_url:
- continue
- subtitles.setdefault(subtitle.get('language', 'enUS'), []).append({
- 'url': subtitle_url,
- 'ext': subtitle.get('format', 'ass'),
- })
- if not subtitles:
- subtitles = self.extract_subtitles(video_id, webpage)
-
- # webpage provide more accurate data than series_title from XML
- series = self._html_search_regex(
- r'(?s)<h\d[^>]+\bid=["\']showmedia_about_episode_num[^>]+>(.+?)</h\d',
- webpage, 'series', fatal=False)
-
- season = episode = episode_number = duration = None
-
- if isinstance(metadata, compat_etree_Element):
- season = xpath_text(metadata, 'series_title')
- episode = xpath_text(metadata, 'episode_title')
- episode_number = int_or_none(xpath_text(metadata, 'episode_number'))
- duration = float_or_none(media_metadata.get('duration'), 1000)
-
- if not episode:
- episode = media_metadata.get('title')
- if not episode_number:
- episode_number = int_or_none(media_metadata.get('episode_number'))
- thumbnail_url = try_get(media, lambda x: x['thumbnail']['url'])
- if thumbnail_url:
- thumbnails.append({
- 'url': thumbnail_url,
- 'width': 640,
- 'height': 360
- })
-
- season_number = int_or_none(self._search_regex(
- r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)',
- webpage, 'season number', default=None))
-
- info = self._search_json_ld(webpage, video_id, default={})
-
- return merge_dicts({
- 'id': video_id,
- 'title': video_title,
- 'description': video_description,
- 'duration': duration,
- 'thumbnails': thumbnails,
- 'uploader': video_uploader,
- 'series': series,
- 'season': season,
- 'season_number': season_number,
- 'episode': episode,
- 'episode_number': episode_number,
- 'subtitles': subtitles,
- 'formats': formats,
- }, info)
-
-
-class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
- IE_NAME = 'crunchyroll:playlist'
- _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?:\w{1,2}/)?(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login|media-\d+))(?P<id>[\w\-]+))/?(?:\?|$)'
-
- _TESTS = [{
- 'url': 'https://www.crunchyroll.com/a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi',
- 'info_dict': {
- 'id': 'a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi',
- 'title': 'A Bridge to the Starry Skies - Hoshizora e Kakaru Hashi'
- },
- 'playlist_count': 13,
- }, {
- # geo-restricted (US), 18+ maturity wall, non-premium available
- 'url': 'http://www.crunchyroll.com/cosplay-complex-ova',
- 'info_dict': {
- 'id': 'cosplay-complex-ova',
- 'title': 'Cosplay Complex OVA'
- },
- 'playlist_count': 3,
- 'skip': 'Georestricted',
- }, {
- # geo-restricted (US), 18+ maturity wall, non-premium will be available since 2015.11.14
- 'url': 'http://www.crunchyroll.com/ladies-versus-butlers?skip_wall=1',
- 'only_matching': True,
- }, {
- 'url': 'http://www.crunchyroll.com/fr/ladies-versus-butlers',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- show_id = self._match_id(url)
-
- webpage = self._download_webpage(
- # https:// gives a 403, but http:// does not
- self._add_skip_wall(url).replace('https://', 'http://'), show_id,
- headers=self.geo_verification_headers())
- if re.search(r'<div id="preload-data">', webpage):
- return self._redirect_to_beta(webpage, CrunchyrollBetaShowIE.ie_key(), show_id)
- title = self._html_search_meta('name', webpage, default=None)
-
- episode_re = r'<li id="showview_videos_media_(\d+)"[^>]+>.*?<a href="([^"]+)"'
- season_re = r'<a [^>]+season-dropdown[^>]+>([^<]+)'
- paths = re.findall(f'(?s){episode_re}|{season_re}', webpage)
-
- entries, current_season = [], None
- for ep_id, ep, season in paths:
- if season:
- current_season = season
- continue
- entries.append(self.url_result(
- f'http://www.crunchyroll.com{ep}', CrunchyrollIE.ie_key(), ep_id, season=current_season))
-
- return {
- '_type': 'playlist',
- 'id': show_id,
- 'title': title,
- 'entries': reversed(entries),
- }
+ def _get_params(self, lang):
+ if not CrunchyrollBaseIE.params:
+ if self._get_cookies(f'https://www.crunchyroll.com/{lang}').get('etp_rt'):
+ grant_type, key = 'etp_rt_cookie', 'accountAuthClientId'
+ else:
+ grant_type, key = 'client_id', 'anonClientId'
+ initial_state, app_config = self._get_embedded_json(self._download_webpage(
+ f'https://www.crunchyroll.com/{lang}', None, note='Retrieving main page'), None)
+ api_domain = app_config['cxApiParams']['apiDomain'].replace('beta.crunchyroll.com', 'www.crunchyroll.com')
-class CrunchyrollBetaBaseIE(CrunchyrollBaseIE):
- params = None
-
- def _get_params(self, lang):
- if not CrunchyrollBetaBaseIE.params:
- initial_state, app_config = self._get_beta_embedded_json(self._download_webpage(
- f'https://beta.crunchyroll.com/{lang}', None, note='Retrieving main page'), None)
- api_domain = app_config['cxApiParams']['apiDomain']
- basic_token = str(base64.b64encode(('%s:' % app_config['cxApiParams']['accountAuthClientId']).encode('ascii')), 'ascii')
auth_response = self._download_json(
- f'{api_domain}/auth/v1/token', None, note='Authenticating with cookie',
+ f'{api_domain}/auth/v1/token', None, note=f'Authenticating with grant_type={grant_type}',
headers={
- 'Authorization': 'Basic ' + basic_token
- }, data='grant_type=etp_rt_cookie'.encode('ascii'))
+ 'Authorization': 'Basic ' + str(base64.b64encode(('%s:' % app_config['cxApiParams'][key]).encode('ascii')), 'ascii')
+ }, data=f'grant_type={grant_type}'.encode('ascii'))
policy_response = self._download_json(
f'{api_domain}/index/v2', None, note='Retrieving signed policy',
headers={
'Authorization': auth_response['token_type'] + ' ' + auth_response['access_token']
})
- bucket = policy_response['cms']['bucket']
+ cms = policy_response.get('cms_web')
+ bucket = cms['bucket']
params = {
- 'Policy': policy_response['cms']['policy'],
- 'Signature': policy_response['cms']['signature'],
- 'Key-Pair-Id': policy_response['cms']['key_pair_id']
+ 'Policy': cms['policy'],
+ 'Signature': cms['signature'],
+ 'Key-Pair-Id': cms['key_pair_id']
}
locale = traverse_obj(initial_state, ('localization', 'locale'))
if locale:
params['locale'] = locale
- CrunchyrollBetaBaseIE.params = (api_domain, bucket, params)
- return CrunchyrollBetaBaseIE.params
-
- def _redirect_from_beta(self, url, lang, internal_id, display_id, is_episode, iekey):
- initial_state, app_config = self._get_beta_embedded_json(self._download_webpage(url, display_id), display_id)
- content_data = initial_state['content']['byId'][internal_id]
- if is_episode:
- video_id = content_data['external_id'].split('.')[1]
- series_id = content_data['episode_metadata']['series_slug_title']
- else:
- series_id = content_data['slug_title']
- series_id = re.sub(r'-{2,}', '-', series_id)
- url = f'https://www.crunchyroll.com/{lang}{series_id}'
- if is_episode:
- url = url + f'/{display_id}-{video_id}'
- self.to_screen(f'{display_id}: Not logged in. Redirecting to non-beta site - {url}')
- return self.url_result(url, iekey, display_id)
+ CrunchyrollBaseIE.params = (api_domain, bucket, params)
+ return CrunchyrollBaseIE.params
-class CrunchyrollBetaIE(CrunchyrollBetaBaseIE):
- IE_NAME = 'crunchyroll:beta'
- _VALID_URL = r'https?://beta\.crunchyroll\.com/(?P<lang>(?:\w{1,2}/)?)watch/(?P<id>\w+)/(?P<display_id>[\w\-]*)/?(?:\?|$)'
+class CrunchyrollBetaIE(CrunchyrollBaseIE):
+ IE_NAME = 'crunchyroll'
+ _VALID_URL = r'''(?x)
+ https?://(?:beta|www)\.crunchyroll\.com/
+ (?P<lang>(?:\w{2}(?:-\w{2})?/)?)
+ watch/(?P<id>\w+)
+ (?:/(?P<display_id>[\w-]+))?/?(?:[?#]|$)'''
_TESTS = [{
- 'url': 'https://beta.crunchyroll.com/watch/GY2P1Q98Y/to-the-future',
+ 'url': 'https://www.crunchyroll.com/watch/GY2P1Q98Y/to-the-future',
'info_dict': {
- 'id': '696363',
+ 'id': 'GY2P1Q98Y',
'ext': 'mp4',
- 'timestamp': 1459610100,
+ 'duration': 1380.241,
+ 'timestamp': 1459632600,
'description': 'md5:a022fbec4fbb023d43631032c91ed64b',
- 'uploader': 'Toei Animation',
'title': 'World Trigger Episode 73 – To the Future',
'upload_date': '20160402',
- 'episode_number': 73,
'series': 'World Trigger',
- 'average_rating': 4.9,
- 'episode': 'To the Future',
+ 'series_id': 'GR757DMKY',
'season': 'World Trigger',
- 'thumbnail': 'https://img1.ak.crunchyroll.com/i/spire3-tmb/c870dedca1a83137c2d3d144984155ed1459527119_main.jpg',
+ 'season_id': 'GR9P39NJ6',
'season_number': 1,
+ 'episode': 'To the Future',
+ 'episode_number': 73,
+ 'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg$',
},
- 'params': {'skip_download': 'm3u8'},
- 'expected_warnings': ['Unable to download XML']
+ 'params': {'skip_download': 'm3u8', 'format': 'all[format_id~=hardsub]'},
}, {
- 'url': 'https://beta.crunchyroll.com/watch/GYK53DMPR/wicked-lord-shingan-reborn',
+ 'url': 'https://www.crunchyroll.com/watch/GYE5WKQGR',
'info_dict': {
- 'id': '648781',
+ 'id': 'GYE5WKQGR',
'ext': 'mp4',
- 'episode_number': 1,
- 'timestamp': 1389173400,
- 'series': 'Love, Chunibyo & Other Delusions - Heart Throb -',
- 'description': 'md5:5579d1a0355cc618558ba23d27067a62',
- 'uploader': 'TBS',
- 'episode': 'Wicked Lord Shingan... Reborn',
- 'average_rating': 4.9,
- 'season': 'Love, Chunibyo & Other Delusions - Heart Throb -',
- 'thumbnail': 'https://img1.ak.crunchyroll.com/i/spire3-tmb/2ba0384e225a5370d5f0ee9496d91ea51389046521_main.jpg',
- 'title': 'Love, Chunibyo & Other Delusions - Heart Throb - Episode 1 – Wicked Lord Shingan... Reborn',
- 'season_number': 2,
- 'upload_date': '20140108',
+ 'duration': 366.459,
+ 'timestamp': 1476788400,
+ 'description': 'md5:74b67283ffddd75f6e224ca7dc031e76',
+ 'title': 'SHELTER Episode – Porter Robinson presents Shelter the Animation',
+ 'upload_date': '20161018',
+ 'series': 'SHELTER',
+ 'series_id': 'GYGG09WWY',
+ 'season': 'SHELTER',
+ 'season_id': 'GR09MGK4R',
+ 'season_number': 1,
+ 'episode': 'Porter Robinson presents Shelter the Animation',
+ 'episode_number': 0,
+ 'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg$',
},
- 'params': {'skip_download': 'm3u8'},
- 'expected_warnings': ['Unable to download XML']
+ 'params': {'skip_download': True},
+ 'skip': 'Video is Premium only',
+ }, {
+ 'url': 'https://www.crunchyroll.com/watch/GY2P1Q98Y',
+ 'only_matching': True,
}, {
- 'url': 'https://beta.crunchyroll.com/watch/GY2P1Q98Y/',
+ 'url': 'https://beta.crunchyroll.com/pt-br/watch/G8WUN8VKP/the-ruler-of-conspiracy',
'only_matching': True,
}]
def _real_extract(self, url):
lang, internal_id, display_id = self._match_valid_url(url).group('lang', 'id', 'display_id')
-
- if not self._get_cookies(url).get('etp_rt'):
- return self._redirect_from_beta(url, lang, internal_id, display_id, True, CrunchyrollIE.ie_key())
-
api_domain, bucket, params = self._get_params(lang)
episode_response = self._download_json(
f'{api_domain}/cms/v2{bucket}/episodes/{internal_id}', display_id,
- note='Retrieving episode metadata',
- query=params)
+ note='Retrieving episode metadata', query=params)
if episode_response.get('is_premium_only') and not episode_response.get('playback'):
raise ExtractorError('This video is for premium members only.', expected=True)
- stream_response = self._download_json(
- episode_response['playback'], display_id,
- note='Retrieving stream info')
- thumbnails = []
- for thumbnails_data in traverse_obj(episode_response, ('images', 'thumbnail')):
- for thumbnail_data in thumbnails_data:
- thumbnails.append({
- 'url': thumbnail_data.get('source'),
- 'width': thumbnail_data.get('width'),
- 'height': thumbnail_data.get('height'),
- })
- subtitles = {}
- for lang, subtitle_data in stream_response.get('subtitles').items():
- subtitles[lang] = [{
- 'url': subtitle_data.get('url'),
- 'ext': subtitle_data.get('format')
- }]
+ stream_response = self._download_json(
+ f'{api_domain}{episode_response["__links__"]["streams"]["href"]}', display_id,
+ note='Retrieving stream info', query=params)
+ get_streams = lambda name: (traverse_obj(stream_response, name) or {}).items()
requested_hardsubs = [('' if val == 'none' else val) for val in (self._configuration_arg('hardsub') or ['none'])]
hardsub_preference = qualities(requested_hardsubs[::-1])
requested_formats = self._configuration_arg('format') or ['adaptive_hls']
- formats = []
- for stream_type, streams in stream_response.get('streams', {}).items():
+ available_formats = {}
+ for stream_type, streams in get_streams('streams'):
if stream_type not in requested_formats:
continue
for stream in streams.values():
- hardsub_lang = stream.get('hardsub_locale') or ''
- if hardsub_lang.lower() not in requested_hardsubs:
- continue
- format_id = join_nonempty(
- stream_type,
- format_field(stream, 'hardsub_locale', 'hardsub-%s'))
if not stream.get('url'):
continue
- if stream_type.split('_')[-1] == 'hls':
+ hardsub_lang = stream.get('hardsub_locale') or ''
+ format_id = join_nonempty(stream_type, format_field(stream, 'hardsub_locale', 'hardsub-%s'))
+ available_formats[hardsub_lang] = (stream_type, format_id, hardsub_lang, stream['url'])
+
+ if '' in available_formats and 'all' not in requested_hardsubs:
+ full_format_langs = set(requested_hardsubs)
+ self.to_screen(
+ 'To get all formats of a hardsub language, use '
+ '"--extractor-args crunchyrollbeta:hardsub=<language_code or all>". '
+ 'See https://github.com/hypervideo/hypervideo#crunchyrollbeta for more info',
+ only_once=True)
+ else:
+ full_format_langs = set(map(str.lower, available_formats))
+
+ formats = []
+ for stream_type, format_id, hardsub_lang, stream_url in available_formats.values():
+ if stream_type.endswith('hls'):
+ if hardsub_lang.lower() in full_format_langs:
adaptive_formats = self._extract_m3u8_formats(
- stream['url'], display_id, 'mp4', m3u8_id=format_id,
- note='Downloading %s information' % format_id,
- fatal=False)
- elif stream_type.split('_')[-1] == 'dash':
- adaptive_formats = self._extract_mpd_formats(
- stream['url'], display_id, mpd_id=format_id,
- note='Downloading %s information' % format_id,
- fatal=False)
- for f in adaptive_formats:
- if f.get('acodec') != 'none':
- f['language'] = stream_response.get('audio_locale')
- f['quality'] = hardsub_preference(hardsub_lang.lower())
- formats.extend(adaptive_formats)
- self._sort_formats(formats)
+ stream_url, display_id, 'mp4', m3u8_id=format_id,
+ fatal=False, note=f'Downloading {format_id} HLS manifest')
+ else:
+ adaptive_formats = (self._m3u8_meta_format(stream_url, ext='mp4', m3u8_id=format_id),)
+ elif stream_type.endswith('dash'):
+ adaptive_formats = self._extract_mpd_formats(
+ stream_url, display_id, mpd_id=format_id,
+ fatal=False, note=f'Downloading {format_id} MPD manifest')
+ else:
+ self.report_warning(f'Encountered unknown stream_type: {stream_type!r}', display_id, only_once=True)
+ continue
+ for f in adaptive_formats:
+ if f.get('acodec') != 'none':
+ f['language'] = stream_response.get('audio_locale')
+ f['quality'] = hardsub_preference(hardsub_lang.lower())
+ formats.extend(adaptive_formats)
return {
'id': internal_id,
- 'title': '%s Episode %s – %s' % (episode_response.get('season_title'), episode_response.get('episode'), episode_response.get('title')),
- 'description': episode_response.get('description').replace(r'\r\n', '\n'),
+ 'title': '%s Episode %s – %s' % (
+ episode_response.get('season_title'), episode_response.get('episode'), episode_response.get('title')),
+ 'description': try_get(episode_response, lambda x: x['description'].replace(r'\r\n', '\n')),
'duration': float_or_none(episode_response.get('duration_ms'), 1000),
- 'thumbnails': thumbnails,
+ 'timestamp': parse_iso8601(episode_response.get('upload_date')),
'series': episode_response.get('series_title'),
'series_id': episode_response.get('series_id'),
'season': episode_response.get('season_title'),
@@ -887,39 +223,42 @@ class CrunchyrollBetaIE(CrunchyrollBetaBaseIE):
'season_number': episode_response.get('season_number'),
'episode': episode_response.get('title'),
'episode_number': episode_response.get('sequence_number'),
- 'subtitles': subtitles,
- 'formats': formats
+ 'formats': formats,
+ 'thumbnails': [{
+ 'url': thumb.get('source'),
+ 'width': thumb.get('width'),
+ 'height': thumb.get('height'),
+ } for thumb in traverse_obj(episode_response, ('images', 'thumbnail', ..., ...)) or []],
+ 'subtitles': {
+ lang: [{
+ 'url': subtitle_data.get('url'),
+ 'ext': subtitle_data.get('format')
+ }] for lang, subtitle_data in get_streams('subtitles')
+ },
}
-class CrunchyrollBetaShowIE(CrunchyrollBetaBaseIE):
- IE_NAME = 'crunchyroll:playlist:beta'
- _VALID_URL = r'https?://beta\.crunchyroll\.com/(?P<lang>(?:\w{1,2}/)?)series/(?P<id>\w+)/(?P<display_id>[\w\-]*)/?(?:\?|$)'
+class CrunchyrollBetaShowIE(CrunchyrollBaseIE):
+ IE_NAME = 'crunchyroll:playlist'
+ _VALID_URL = r'''(?x)
+ https?://(?:beta|www)\.crunchyroll\.com/
+ (?P<lang>(?:\w{2}(?:-\w{2})?/)?)
+ series/(?P<id>\w+)
+ (?:/(?P<display_id>[\w-]+))?/?(?:[?#]|$)'''
_TESTS = [{
- 'url': 'https://beta.crunchyroll.com/series/GY19NQ2QR/Girl-Friend-BETA',
+ 'url': 'https://www.crunchyroll.com/series/GY19NQ2QR/Girl-Friend-BETA',
'info_dict': {
- 'id': 'girl-friend-beta',
+ 'id': 'GY19NQ2QR',
'title': 'Girl Friend BETA',
},
'playlist_mincount': 10,
}, {
- 'url': 'https://beta.crunchyroll.com/series/GYJQV73V6/love-chunibyo--other-delusions---heart-throb--',
- 'info_dict': {
- 'id': 'love-chunibyo-other-delusions-heart-throb-',
- 'title': 'Love, Chunibyo & Other Delusions - Heart Throb -',
- },
- 'playlist_mincount': 10,
- }, {
- 'url': 'https://beta.crunchyroll.com/it/series/GY19NQ2QR/Girl-Friend-BETA',
+ 'url': 'https://beta.crunchyroll.com/it/series/GY19NQ2QR',
'only_matching': True,
}]
def _real_extract(self, url):
lang, internal_id, display_id = self._match_valid_url(url).group('lang', 'id', 'display_id')
-
- if not self._get_cookies(url).get('etp_rt'):
- return self._redirect_from_beta(url, lang, internal_id, display_id, False, CrunchyrollShowPlaylistIE.ie_key())
-
api_domain, bucket, params = self._get_params(lang)
series_response = self._download_json(
@@ -940,7 +279,7 @@ class CrunchyrollBetaShowIE(CrunchyrollBetaBaseIE):
episode_display_id = episode['slug_title']
yield {
'_type': 'url',
- 'url': f'https://beta.crunchyroll.com/{lang}watch/{episode_id}/{episode_display_id}',
+ 'url': f'https://www.crunchyroll.com/{lang}watch/{episode_id}/{episode_display_id}',
'ie_key': CrunchyrollBetaIE.ie_key(),
'id': episode_id,
'title': '%s Episode %s – %s' % (episode.get('season_title'), episode.get('episode'), episode.get('title')),
diff --git a/hypervideo_dl/extractor/cspan.py b/hypervideo_dl/extractor/cspan.py
index f51159b..0075680 100644
--- a/hypervideo_dl/extractor/cspan.py
+++ b/hypervideo_dl/extractor/cspan.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -165,7 +163,7 @@ class CSpanIE(InfoExtractor):
video_id = m.group('id')
video_type = 'program' if m.group('type') == 'prog' else 'clip'
else:
- senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
+ senate_isvp_url = SenateISVPIE._extract_url(webpage)
if senate_isvp_url:
title = self._og_search_title(webpage)
surl = smuggle_url(senate_isvp_url, {'force_title': title})
@@ -220,7 +218,6 @@ class CSpanIE(InfoExtractor):
path, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls') if determine_ext(path) == 'm3u8' else [{'url': path, }]
add_referer(formats)
- self._sort_formats(formats)
entries.append({
'id': '%s_%d' % (video_id, partnum + 1),
'title': (
@@ -277,8 +274,7 @@ class CSpanCongressIE(InfoExtractor):
self._search_regex(r'jwsetup\s*=\s*({(?:.|\n)[^;]+});', webpage, 'player config'),
video_id, transform_source=js_to_json)
- title = (self._og_search_title(webpage, default=None)
- or self._html_extract_title(webpage, 'video title'))
+ title = self._generic_title('', webpage)
description = (self._og_search_description(webpage, default=None)
or self._html_search_meta('description', webpage, 'description', default=None))
diff --git a/hypervideo_dl/extractor/ctsnews.py b/hypervideo_dl/extractor/ctsnews.py
index 679f1d9..cec178f 100644
--- a/hypervideo_dl/extractor/ctsnews.py
+++ b/hypervideo_dl/extractor/ctsnews.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import unified_timestamp
from .youtube import YoutubeIE
diff --git a/hypervideo_dl/extractor/ctv.py b/hypervideo_dl/extractor/ctv.py
index 756bcc2..f125c1c 100644
--- a/hypervideo_dl/extractor/ctv.py
+++ b/hypervideo_dl/extractor/ctv.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/ctvnews.py b/hypervideo_dl/extractor/ctvnews.py
index 952f4c7..ad3f0d8 100644
--- a/hypervideo_dl/extractor/ctvnews.py
+++ b/hypervideo_dl/extractor/ctvnews.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/cultureunplugged.py b/hypervideo_dl/extractor/cultureunplugged.py
index 9002e4c..2fb2280 100644
--- a/hypervideo_dl/extractor/cultureunplugged.py
+++ b/hypervideo_dl/extractor/cultureunplugged.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import time
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/curiositystream.py b/hypervideo_dl/extractor/curiositystream.py
index b8abcf7..26cf24f 100644
--- a/hypervideo_dl/extractor/curiositystream.py
+++ b/hypervideo_dl/extractor/curiositystream.py
@@ -1,15 +1,8 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
-from ..utils import (
- int_or_none,
- urlencode_postdata,
- compat_str,
- ExtractorError,
-)
+from ..compat import compat_str
+from ..utils import ExtractorError, int_or_none, urlencode_postdata
class CuriosityStreamBaseIE(InfoExtractor):
@@ -26,6 +19,11 @@ class CuriosityStreamBaseIE(InfoExtractor):
def _call_api(self, path, video_id, query=None):
headers = {}
+ if not self._auth_token:
+ auth_cookie = self._get_cookies('https://curiositystream.com').get('auth_token')
+ if auth_cookie:
+ self.write_debug('Obtained auth_token cookie')
+ self._auth_token = auth_cookie.value
if self._auth_token:
headers['X-Auth-Token'] = self._auth_token
result = self._download_json(
@@ -48,7 +46,7 @@ class CuriosityStreamIE(CuriosityStreamBaseIE):
IE_NAME = 'curiositystream'
_VALID_URL = r'https?://(?:app\.)?curiositystream\.com/video/(?P<id>\d+)'
_TESTS = [{
- 'url': 'https://app.curiositystream.com/video/2',
+ 'url': 'http://app.curiositystream.com/video/2',
'info_dict': {
'id': '2',
'ext': 'mp4',
@@ -119,7 +117,6 @@ class CuriosityStreamIE(CuriosityStreamBaseIE):
'format_id': 'http',
})
formats.append(fmt)
- self._sort_formats(formats)
title = media['title']
diff --git a/hypervideo_dl/extractor/cwtv.py b/hypervideo_dl/extractor/cwtv.py
index 7338243..9b83264 100644
--- a/hypervideo_dl/extractor/cwtv.py
+++ b/hypervideo_dl/extractor/cwtv.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
ExtractorError,
@@ -94,4 +91,5 @@ class CWTVIE(InfoExtractor):
'timestamp': parse_iso8601(video_data.get('start_time')),
'age_limit': parse_age_limit(video_data.get('rating')),
'ie_key': 'ThePlatform',
+ 'thumbnail': video_data.get('large_thumbnail')
}
diff --git a/hypervideo_dl/extractor/cybrary.py b/hypervideo_dl/extractor/cybrary.py
index c278f0f..73f2439 100644
--- a/hypervideo_dl/extractor/cybrary.py
+++ b/hypervideo_dl/extractor/cybrary.py
@@ -1,12 +1,10 @@
-# coding: utf-8
from .common import InfoExtractor
-
from ..utils import (
ExtractorError,
smuggle_url,
str_or_none,
traverse_obj,
- urlencode_postdata
+ urlencode_postdata,
)
diff --git a/hypervideo_dl/extractor/daftsex.py b/hypervideo_dl/extractor/daftsex.py
index 6037fd9..551d5e3 100644
--- a/hypervideo_dl/extractor/daftsex.py
+++ b/hypervideo_dl/extractor/daftsex.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import compat_b64decode
from ..utils import (
@@ -84,7 +81,6 @@ class DaftsexIE(InfoExtractor):
'height': int_or_none(height),
'ext': ext,
})
- self._sort_formats(formats)
return {
'id': video_id,
@@ -120,7 +116,6 @@ class DaftsexIE(InfoExtractor):
'height': int_or_none(height),
'ext': ext,
})
- self._sort_formats(formats)
thumbnails = []
for k, v in item.items():
diff --git a/hypervideo_dl/extractor/dailymail.py b/hypervideo_dl/extractor/dailymail.py
index 67b88fd..43401e1 100644
--- a/hypervideo_dl/extractor/dailymail.py
+++ b/hypervideo_dl/extractor/dailymail.py
@@ -1,8 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
@@ -15,6 +10,7 @@ from ..utils import (
class DailyMailIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dailymail\.co\.uk/(?:video/[^/]+/video-|embed/video/)(?P<id>[0-9]+)'
+ _EMBED_REGEX = [r'<iframe\b[^>]+\bsrc=["\'](?P<url>(?:https?:)?//(?:www\.)?dailymail\.co\.uk/embed/video/\d+\.html)']
_TESTS = [{
'url': 'http://www.dailymail.co.uk/video/tvshowbiz/video-1295863/The-Mountain-appears-sparkling-water-ad-Heavy-Bubbles.html',
'md5': 'f6129624562251f628296c3a9ffde124',
@@ -29,12 +25,6 @@ class DailyMailIE(InfoExtractor):
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- return re.findall(
- r'<iframe\b[^>]+\bsrc=["\'](?P<url>(?:https?:)?//(?:www\.)?dailymail\.co\.uk/embed/video/\d+\.html)',
- webpage)
-
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
@@ -73,7 +63,6 @@ class DailyMailIE(InfoExtractor):
'protocol': protocol,
'ext': 'mp4' if is_hls else None,
})
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/dailymotion.py b/hypervideo_dl/extractor/dailymotion.py
index 9cb5618..2a44718 100644
--- a/hypervideo_dl/extractor/dailymotion.py
+++ b/hypervideo_dl/extractor/dailymotion.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import functools
import json
import re
@@ -8,13 +5,15 @@ import re
from .common import InfoExtractor
from ..compat import compat_HTTPError
from ..utils import (
+ ExtractorError,
+ OnDemandPagedList,
age_restricted,
clean_html,
- ExtractorError,
int_or_none,
- OnDemandPagedList,
+ traverse_obj,
try_get,
unescapeHTML,
+ unsmuggle_url,
urlencode_postdata,
)
@@ -100,6 +99,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
[/=](?P<id>[^/?_&]+)(?:.+?\bplaylist=(?P<playlist_id>x[0-9a-z]+))?
'''
IE_NAME = 'dailymotion'
+ _EMBED_REGEX = [r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1']
_TESTS = [{
'url': 'http://www.dailymotion.com/video/x5kesuj_office-christmas-party-review-jason-bateman-olivia-munn-t-j-miller_news',
'md5': '074b95bdee76b9e3654137aee9c79dfe',
@@ -209,20 +209,16 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
}
xid'''
- @staticmethod
- def _extract_urls(webpage):
- urls = []
- # Look for embedded Dailymotion player
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
# https://developer.dailymotion.com/player#player-parameters
- for mobj in re.finditer(
- r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage):
- urls.append(unescapeHTML(mobj.group('url')))
+ yield from super()._extract_embed_urls(url, webpage)
for mobj in re.finditer(
r'(?s)DM\.player\([^,]+,\s*{.*?video[\'"]?\s*:\s*["\']?(?P<id>[0-9a-zA-Z]+).+?}\s*\);', webpage):
- urls.append('https://www.dailymotion.com/embed/video/' + mobj.group('id'))
- return urls
+ yield from 'https://www.dailymotion.com/embed/video/' + mobj.group('id')
def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url)
video_id, playlist_id = self._match_valid_url(url).groups()
if playlist_id:
@@ -255,7 +251,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
metadata = self._download_json(
'https://www.dailymotion.com/player/metadata/video/' + xid,
xid, 'Downloading metadata JSON',
- query={'app': 'com.dailymotion.neon'})
+ query=traverse_obj(smuggled_data, 'query') or {'app': 'com.dailymotion.neon'})
error = metadata.get('error')
if error:
@@ -297,7 +293,6 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
f['url'] = f['url'].split('#')[0]
if not f.get('fps') and f['format_id'].endswith('@60'):
f['fps'] = 60
- self._sort_formats(formats)
subtitles = {}
subtitles_data = try_get(metadata, lambda x: x['subtitles']['data'], dict) or {}
@@ -378,6 +373,15 @@ class DailymotionPlaylistIE(DailymotionPlaylistBaseIE):
}]
_OBJECT_TYPE = 'collection'
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ # Look for embedded Dailymotion playlist player (#3822)
+ for mobj in re.finditer(
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1',
+ webpage):
+ for p in re.findall(r'list\[\]=/playlist/([^/]+)/', unescapeHTML(mobj.group('url'))):
+ yield '//dailymotion.com/playlist/%s' % p
+
class DailymotionUserIE(DailymotionPlaylistBaseIE):
IE_NAME = 'dailymotion:user'
diff --git a/hypervideo_dl/extractor/dailywire.py b/hypervideo_dl/extractor/dailywire.py
new file mode 100644
index 0000000..f177c9d
--- /dev/null
+++ b/hypervideo_dl/extractor/dailywire.py
@@ -0,0 +1,113 @@
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ float_or_none,
+ join_nonempty,
+ traverse_obj,
+ url_or_none,
+)
+
+
+class DailyWireBaseIE(InfoExtractor):
+ _JSON_PATH = {
+ 'episode': ('props', 'pageProps', 'episodeData', 'episode'),
+ 'videos': ('props', 'pageProps', 'videoData', 'video'),
+ 'podcasts': ('props', 'pageProps', 'episode'),
+ }
+
+ def _get_json(self, url):
+ sites_type, slug = self._match_valid_url(url).group('sites_type', 'id')
+ json_data = self._search_nextjs_data(self._download_webpage(url, slug), slug)
+ return slug, traverse_obj(json_data, self._JSON_PATH[sites_type])
+
+
+class DailyWireIE(DailyWireBaseIE):
+ _VALID_URL = r'https?://(?:www\.)dailywire(?:\.com)/(?P<sites_type>episode|videos)/(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://www.dailywire.com/episode/1-fauci',
+ 'info_dict': {
+ 'id': 'ckzsl50xnqpy30850in3v4bu7',
+ 'ext': 'mp4',
+ 'display_id': '1-fauci',
+ 'title': '1. Fauci',
+ 'description': 'md5:9df630347ef85081b7e97dd30bc22853',
+ 'thumbnail': 'https://daily-wire-production.imgix.net/episodes/ckzsl50xnqpy30850in3v4bu7/ckzsl50xnqpy30850in3v4bu7-1648237399554.jpg',
+ 'creator': 'Caroline Roberts',
+ 'series_id': 'ckzplm0a097fn0826r2vc3j7h',
+ 'series': 'China: The Enemy Within',
+ }
+ }, {
+ 'url': 'https://www.dailywire.com/episode/ep-124-bill-maher',
+ 'info_dict': {
+ 'id': 'cl0ngbaalplc80894sfdo9edf',
+ 'ext': 'mp3',
+ 'display_id': 'ep-124-bill-maher',
+ 'title': 'Ep. 124 - Bill Maher',
+ 'thumbnail': 'https://daily-wire-production.imgix.net/episodes/cl0ngbaalplc80894sfdo9edf/cl0ngbaalplc80894sfdo9edf-1647065568518.jpg',
+ 'creator': 'Caroline Roberts',
+ 'description': 'md5:adb0de584bcfa9c41374999d9e324e98',
+ 'series_id': 'cjzvep7270hp00786l9hwccob',
+ 'series': 'The Sunday Special',
+ }
+ }, {
+ 'url': 'https://www.dailywire.com/videos/the-hyperions',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ slug, episode_info = self._get_json(url)
+ urls = traverse_obj(
+ episode_info, (('segments', 'videoUrl'), ..., ('video', 'audio')), expected_type=url_or_none)
+
+ formats, subtitles = [], {}
+ for url in urls:
+ if determine_ext(url) != 'm3u8':
+ formats.append({'url': url})
+ continue
+ format_, subs_ = self._extract_m3u8_formats_and_subtitles(url, slug)
+ formats.extend(format_)
+ self._merge_subtitles(subs_, target=subtitles)
+ return {
+ 'id': episode_info['id'],
+ 'display_id': slug,
+ 'title': traverse_obj(episode_info, 'title', 'name'),
+ 'description': episode_info.get('description'),
+ 'creator': join_nonempty(('createdBy', 'firstName'), ('createdBy', 'lastName'), from_dict=episode_info, delim=' '),
+ 'duration': float_or_none(episode_info.get('duration')),
+ 'is_live': episode_info.get('isLive'),
+ 'thumbnail': traverse_obj(episode_info, 'thumbnail', 'image', expected_type=url_or_none),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'series_id': traverse_obj(episode_info, ('show', 'id')),
+ 'series': traverse_obj(episode_info, ('show', 'name')),
+ }
+
+
+class DailyWirePodcastIE(DailyWireBaseIE):
+ _VALID_URL = r'https?://(?:www\.)dailywire(?:\.com)/(?P<sites_type>podcasts)/(?P<podcaster>[\w-]+/(?P<id>[\w-]+))'
+ _TESTS = [{
+ 'url': 'https://www.dailywire.com/podcasts/morning-wire/get-ready-for-recession-6-15-22',
+ 'info_dict': {
+ 'id': 'cl4f01d0w8pbe0a98ydd0cfn1',
+ 'ext': 'm4a',
+ 'display_id': 'get-ready-for-recession-6-15-22',
+ 'title': 'Get Ready for Recession | 6.15.22',
+ 'description': 'md5:c4afbadda4e1c38a4496f6d62be55634',
+ 'thumbnail': 'https://daily-wire-production.imgix.net/podcasts/ckx4otgd71jm508699tzb6hf4-1639506575562.jpg',
+ 'duration': 900.117667,
+ }
+ }]
+
+ def _real_extract(self, url):
+ slug, episode_info = self._get_json(url)
+ audio_id = traverse_obj(episode_info, 'audioMuxPlaybackId', 'VUsAipTrBVSgzw73SpC2DAJD401TYYwEp')
+
+ return {
+ 'id': episode_info['id'],
+ 'url': f'https://stream.media.dailywire.com/{audio_id}/audio.m4a',
+ 'display_id': slug,
+ 'title': episode_info.get('title'),
+ 'duration': float_or_none(episode_info.get('duration')),
+ 'thumbnail': episode_info.get('thumbnail'),
+ 'description': episode_info.get('description'),
+ }
diff --git a/hypervideo_dl/extractor/damtomo.py b/hypervideo_dl/extractor/damtomo.py
index 456cd35..0e08e4f 100644
--- a/hypervideo_dl/extractor/damtomo.py
+++ b/hypervideo_dl/extractor/damtomo.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -39,7 +36,6 @@ class DamtomoBaseIE(InfoExtractor):
if not m3u8_url:
raise ExtractorError('Failed to obtain m3u8 URL')
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/daum.py b/hypervideo_dl/extractor/daum.py
index 4362e92..3ef5140 100644
--- a/hypervideo_dl/extractor/daum.py
+++ b/hypervideo_dl/extractor/daum.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-
-from __future__ import unicode_literals
-
import itertools
from .common import InfoExtractor
@@ -129,7 +125,7 @@ class DaumClipIE(DaumBaseIE):
self._KAKAO_EMBED_BASE + video_id, 'Kakao', video_id)
-class DaumListIE(InfoExtractor):
+class DaumListIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor
def _get_entries(self, list_id, list_id_type):
name = None
entries = []
diff --git a/hypervideo_dl/extractor/daystar.py b/hypervideo_dl/extractor/daystar.py
index 4f59d90..ef3520a 100644
--- a/hypervideo_dl/extractor/daystar.py
+++ b/hypervideo_dl/extractor/daystar.py
@@ -36,7 +36,6 @@ class DaystarClipIE(InfoExtractor):
video_id, 'mp4', fatal=False, headers={'Referer': src_iframe})
formats.extend(fmts)
subtitles = self._merge_subtitles(subtitles, subs)
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/dbtv.py b/hypervideo_dl/extractor/dbtv.py
index 8e73176..18be46f 100644
--- a/hypervideo_dl/extractor/dbtv.py
+++ b/hypervideo_dl/extractor/dbtv.py
@@ -1,13 +1,9 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
from .common import InfoExtractor
class DBTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dagbladet\.no/video/(?:(?:embed|(?P<display_id>[^/]+))/)?(?P<id>[0-9A-Za-z_-]{11}|[a-zA-Z0-9]{8})'
+ _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dagbladet\.no/video/embed/(?:[0-9A-Za-z_-]{11}|[a-zA-Z0-9]{8}).*?)\1']
_TESTS = [{
'url': 'https://www.dagbladet.no/video/PynxJnNWChE/',
'md5': 'b8f850ba1860adbda668d367f9b77699',
@@ -31,12 +27,6 @@ class DBTVIE(InfoExtractor):
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- return [url for _, url in re.findall(
- r'<iframe[^>]+src=(["\'])((?:https?:)?//(?:www\.)?dagbladet\.no/video/embed/(?:[0-9A-Za-z_-]{11}|[a-zA-Z0-9]{8}).*?)\1',
- webpage)]
-
def _real_extract(self, url):
display_id, video_id = self._match_valid_url(url).groups()
info = {
diff --git a/hypervideo_dl/extractor/dctp.py b/hypervideo_dl/extractor/dctp.py
index e700f8d..24bb6ac 100644
--- a/hypervideo_dl/extractor/dctp.py
+++ b/hypervideo_dl/extractor/dctp.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
diff --git a/hypervideo_dl/extractor/deezer.py b/hypervideo_dl/extractor/deezer.py
index 7ba02e5..f61f12a 100644
--- a/hypervideo_dl/extractor/deezer.py
+++ b/hypervideo_dl/extractor/deezer.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import json
from .common import InfoExtractor
@@ -64,7 +62,6 @@ class DeezerPlaylistIE(DeezerBaseInfoExtractor):
'preference': -100, # Only the first 30 seconds
'ext': 'mp3',
}]
- self._sort_formats(formats)
artists = ', '.join(
orderedSet(a.get('ART_NAME') for a in s.get('ARTISTS')))
entries.append({
@@ -117,7 +114,6 @@ class DeezerAlbumIE(DeezerBaseInfoExtractor):
'preference': -100, # Only the first 30 seconds
'ext': 'mp3',
}]
- self._sort_formats(formats)
artists = ', '.join(
orderedSet(a.get('ART_NAME') for a in s.get('ARTISTS')))
entries.append({
diff --git a/hypervideo_dl/extractor/defense.py b/hypervideo_dl/extractor/defense.py
index 9fe144e..7d73ea8 100644
--- a/hypervideo_dl/extractor/defense.py
+++ b/hypervideo_dl/extractor/defense.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/democracynow.py b/hypervideo_dl/extractor/democracynow.py
index 5c9c0ec..1624d08 100644
--- a/hypervideo_dl/extractor/democracynow.py
+++ b/hypervideo_dl/extractor/democracynow.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
import os.path
@@ -62,8 +59,6 @@ class DemocracynowIE(InfoExtractor):
'vcodec': 'none' if key == 'audio' else None,
})
- self._sort_formats(formats)
-
default_lang = 'en'
subtitles = {}
diff --git a/hypervideo_dl/extractor/detik.py b/hypervideo_dl/extractor/detik.py
new file mode 100644
index 0000000..f148054
--- /dev/null
+++ b/hypervideo_dl/extractor/detik.py
@@ -0,0 +1,159 @@
+from .common import InfoExtractor
+from ..utils import int_or_none, merge_dicts, try_call, url_basename
+
+
+class DetikEmbedIE(InfoExtractor):
+ _VALID_URL = False
+ _WEBPAGE_TESTS = [{
+ # cnn embed
+ 'url': 'https://www.cnnindonesia.com/embed/video/846189',
+ 'info_dict': {
+ 'id': '846189',
+ 'ext': 'mp4',
+ 'description': 'md5:ece7b003b3ee7d81c6a5cfede7d5397d',
+ 'thumbnail': r're:https?://akcdn\.detik\.net\.id/visual/2022/09/11/thumbnail-video-1_169.jpeg',
+ 'title': 'Video CNN Indonesia - VIDEO: Momen Charles Disambut Meriah usai Dilantik jadi Raja Inggris',
+ 'age_limit': 0,
+ 'tags': ['raja charles', ' raja charles iii', ' ratu elizabeth', ' ratu elizabeth meninggal dunia', ' raja inggris', ' inggris'],
+ 'release_timestamp': 1662869995,
+ 'release_date': '20220911',
+ 'uploader': 'REUTERS'
+ }
+ }, {
+ # 20.detik
+ 'url': 'https://20.detik.com/otobuzz/20220704-220704093/mulai-rp-10-jutaan-ini-skema-kredit-mitsubishi-pajero-sport',
+ 'info_dict': {
+ 'display_id': 'mulai-rp-10-jutaan-ini-skema-kredit-mitsubishi-pajero-sport',
+ 'id': '220704093',
+ 'ext': 'mp4',
+ 'description': 'md5:9b2257341b6f375cdcf90106146d5ffb',
+ 'thumbnail': r're:https?://cdnv\.detik\.com/videoservice/AdminTV/2022/07/04/5d6187e402ec4a91877755a5886ff5b6-20220704161859-0s.jpg',
+ 'title': 'Mulai Rp 10 Jutaan! Ini Skema Kredit Mitsubishi Pajero Sport',
+ 'timestamp': 1656951521,
+ 'upload_date': '20220704',
+ 'duration': 83.0,
+ 'tags': ['cicilan mobil', 'mitsubishi pajero sport', 'mitsubishi', 'pajero sport'],
+ 'release_timestamp': 1656926321,
+ 'release_date': '20220704',
+ 'age_limit': 0,
+ 'uploader': 'Ridwan Arifin ' # TODO: strip trailling whitespace at uploader
+ }
+ }, {
+ # pasangmata.detik
+ 'url': 'https://pasangmata.detik.com/contribution/366649',
+ 'info_dict': {
+ 'id': '366649',
+ 'ext': 'mp4',
+ 'title': 'Saling Dorong Aparat dan Pendemo di Aksi Tolak Kenaikan BBM',
+ 'description': 'md5:7a6580876c8381c454679e028620bea7',
+ 'age_limit': 0,
+ 'tags': 'count:17',
+ 'thumbnail': 'https://akcdn.detik.net.id/community/data/media/thumbs-pasangmata/2022/09/08/366649-16626229351533009620.mp4-03.jpg',
+ }
+ }, {
+ # insertlive embed
+ 'url': 'https://www.insertlive.com/embed/video/290482',
+ 'info_dict': {
+ 'id': '290482',
+ 'ext': 'mp4',
+ 'release_timestamp': 1663063704,
+ 'thumbnail': 'https://akcdn.detik.net.id/visual/2022/09/13/leonardo-dicaprio_169.png?w=600&q=90',
+ 'age_limit': 0,
+ 'description': 'Aktor Leonardo DiCaprio memang baru saja putus dari kekasihnya yang bernama Camilla Morrone.',
+ 'release_date': '20220913',
+ 'title': 'Diincar Leonardo DiCaprio, Gigi Hadid Ngaku Tertarik Tapi Belum Cinta',
+ 'tags': ['leonardo dicaprio', ' gigi hadid', ' hollywood'],
+ 'uploader': '!nsertlive',
+ }
+ }, {
+ # beautynesia embed
+ 'url': 'https://www.beautynesia.id/embed/video/261636',
+ 'info_dict': {
+ 'id': '261636',
+ 'ext': 'mp4',
+ 'age_limit': 0,
+ 'release_timestamp': 1662375600,
+ 'description': 'Menurut ramalan astrologi, tiga zodiak ini bakal hoki sepanjang September 2022.',
+ 'title': '3 Zodiak Paling Beruntung Selama September 2022',
+ 'release_date': '20220905',
+ 'tags': ['zodiac update', ' zodiak', ' ramalan bintang', ' zodiak beruntung 2022', ' zodiak hoki september 2022', ' zodiak beruntung september 2022'],
+ 'thumbnail': 'https://akcdn.detik.net.id/visual/2022/09/05/3-zodiak-paling-beruntung-selama-september-2022_169.jpeg?w=600&q=90',
+ 'uploader': 'amh',
+ }
+ }, {
+ # cnbcindonesia embed
+ 'url': 'https://www.cnbcindonesia.com/embed/video/371839',
+ 'info_dict': {
+ 'id': '371839',
+ 'ext': 'mp4',
+ 'title': 'Puluhan Pejabat Rusia Tuntut Putin Mundur',
+ 'tags': ['putin'],
+ 'age_limit': 0,
+ 'thumbnail': 'https://awsimages.detik.net.id/visual/2022/09/13/cnbc-indonesia-tv-3_169.png?w=600&q=80',
+ 'description': 'md5:8b9111e37555fcd95fe549a9b4ae6fdc',
+ }
+ }, {
+ # detik shortlink (we can get it from https://dtk.id/?<url>)
+ 'url': 'https://dtk.id/NkISKr',
+ 'info_dict': {
+ 'id': '220914049',
+ 'ext': 'mp4',
+ 'release_timestamp': 1663114488,
+ 'uploader': 'Tim 20Detik',
+ 'title': 'Pakar Bicara soal Tim Khusus Jokowi dan Mereka yang Pro ke Bjorka',
+ 'age_limit': 0,
+ 'thumbnail': 'https://cdnv.detik.com/videoservice/AdminTV/2022/09/14/f15cae71d7b640c58e75b254ecbb1ce1-20220914071613-0s.jpg?w=400&q=80',
+ 'display_id': 'pakar-bicara-soal-tim-khusus-jokowi-dan-mereka-yang-pro-ke-bjorka',
+ 'upload_date': '20220914',
+ 'release_date': '20220914',
+ 'description': 'md5:5eb03225f7ee40207dd3a1e18a73f1ff',
+ 'timestamp': 1663139688,
+ 'duration': 213.0,
+ 'tags': ['hacker bjorka', 'bjorka', 'hacker bjorka bocorkan data rahasia presiden jokowi', 'jokowi'],
+ }
+ }]
+
+ def _extract_from_webpage(self, url, webpage):
+ player_type, video_data = self._search_regex(
+ r'<script\s*[^>]+src="https?://(aws)?cdn\.detik\.net\.id/(?P<type>flowplayer|detikVideo)[^>]+>\s*(?P<video_data>{[^}]+})',
+ webpage, 'playerjs', group=('type', 'video_data'), default=(None, ''))
+ if not player_type:
+ return
+
+ display_id, extra_info_dict = url_basename(url), {}
+
+ if player_type == 'flowplayer':
+ video_json_data = self._parse_json(video_data.replace('\'', '"'), display_id)
+ video_url = video_json_data['videoUrl']
+
+ extra_info_dict = {
+ 'id': self._search_regex(r'identifier\s*:\s*\'([^\']+)', webpage, 'identifier'),
+ 'thumbnail': video_json_data.get('imageUrl'),
+ }
+
+ elif player_type == 'detikVideo':
+ video_url = self._search_regex(
+ r'videoUrl\s*:\s*[\'"]?([^"\']+)', video_data, 'videoUrl')
+ extra_info_dict = {
+ 'id': self._html_search_meta(['video_id', 'dtk:video_id'], webpage),
+ 'thumbnail': self._search_regex(r'imageUrl\s*:\s*[\'"]?([^"\']+)', video_data, 'videoUrl'),
+ 'duration': int_or_none(self._html_search_meta('duration', webpage, fatal=False, default=None)),
+ 'release_timestamp': int_or_none(self._html_search_meta('dtk:publishdateunix', webpage, fatal=False, default=None), 1000),
+ 'timestamp': int_or_none(self._html_search_meta('dtk:createdateunix', webpage, fatal=False, default=None), 1000),
+ 'uploader': self._search_regex(
+ r'([^-]+)', self._html_search_meta('dtk:author', webpage, default='').strip(), 'uploader',
+ default=None)
+ }
+
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_url, display_id)
+
+ json_ld_data = self._search_json_ld(webpage, display_id, default={})
+ yield merge_dicts(json_ld_data, extra_info_dict, {
+ 'display_id': display_id,
+ 'title': self._html_search_meta(['og:title', 'originalTitle'], webpage) or self._html_extract_title(webpage),
+ 'description': self._html_search_meta(['og:description', 'twitter:description', 'description'], webpage),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'tags': try_call(lambda: self._html_search_meta(
+ ['keywords', 'keyword', 'dtk:keywords'], webpage).split(',')),
+ })
diff --git a/hypervideo_dl/extractor/deuxm.py b/hypervideo_dl/extractor/deuxm.py
new file mode 100644
index 0000000..74a6da6
--- /dev/null
+++ b/hypervideo_dl/extractor/deuxm.py
@@ -0,0 +1,76 @@
+from .common import InfoExtractor
+from ..utils import url_or_none
+
+
+class DeuxMIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?2m\.ma/[^/]+/replay/single/(?P<id>([\w.]{1,24})+)'
+
+ _TESTS = [{
+ 'url': 'https://2m.ma/fr/replay/single/6351d439b15e1a613b3debe8',
+ 'md5': '5f761f04c9d686e553b685134dca5d32',
+ 'info_dict': {
+ 'id': '6351d439b15e1a613b3debe8',
+ 'ext': 'mp4',
+ 'title': 'Grand Angle : Jeudi 20 Octobre 2022',
+ 'thumbnail': r're:^https?://2msoread-ww.amagi.tv/mediasfiles/videos/images/.*\.png$'
+ }
+ }, {
+ 'url': 'https://2m.ma/fr/replay/single/635c0aeab4eec832622356da',
+ 'md5': 'ad6af2f5e4d5b2ad2194a84b6e890b4c',
+ 'info_dict': {
+ 'id': '635c0aeab4eec832622356da',
+ 'ext': 'mp4',
+ 'title': 'Journal Amazigh : Vendredi 28 Octobre 2022',
+ 'thumbnail': r're:^https?://2msoread-ww.amagi.tv/mediasfiles/videos/images/.*\.png$'
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video = self._download_json(
+ f'https://2m.ma/api/watchDetail/{video_id}', video_id)['response']['News']
+ return {
+ 'id': video_id,
+ 'title': video.get('titre'),
+ 'url': video['url'],
+ 'description': video.get('description'),
+ 'thumbnail': url_or_none(video.get('image')),
+ }
+
+
+class DeuxMNewsIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?2m\.ma/(?P<lang>\w+)/news/(?P<id>[^/#?]+)'
+
+ _TESTS = [{
+ 'url': 'https://2m.ma/fr/news/Kan-Ya-Mkan-d%C3%A9poussi%C3%A8re-l-histoire-du-phare-du-Cap-Beddouza-20221028',
+ 'md5': '43d5e693a53fa0b71e8a5204c7d4542a',
+ 'info_dict': {
+ 'id': '635c5d1233b83834e35b282e',
+ 'ext': 'mp4',
+ 'title': 'Kan Ya Mkan d\u00e9poussi\u00e8re l\u2019histoire du phare du Cap Beddouza',
+ 'description': 'md5:99dcf29b82f1d7f2a4acafed1d487527',
+ 'thumbnail': r're:^https?://2msoread-ww.amagi.tv/mediasfiles/videos/images/.*\.png$'
+ }
+ }, {
+ 'url': 'https://2m.ma/fr/news/Interview-Casablanca-hors-des-sentiers-battus-avec-Abderrahim-KASSOU-Replay--20221017',
+ 'md5': '7aca29f02230945ef635eb8290283c0c',
+ 'info_dict': {
+ 'id': '634d9e108b70d40bc51a844b',
+ 'ext': 'mp4',
+ 'title': 'Interview: Casablanca hors des sentiers battus avec Abderrahim KASSOU (Replay) ',
+ 'description': 'md5:3b8e78111de9fcc6ef7f7dd6cff2430c',
+ 'thumbnail': r're:^https?://2msoread-ww.amagi.tv/mediasfiles/videos/images/.*\.png$'
+ }
+ }]
+
+ def _real_extract(self, url):
+ article_name, lang = self._match_valid_url(url).group('id', 'lang')
+ video = self._download_json(
+ f'https://2m.ma/api/articlesByUrl?lang={lang}&url=/news/{article_name}', article_name)['response']['article'][0]
+ return {
+ 'id': video['id'],
+ 'title': video.get('title'),
+ 'url': video['image'][0],
+ 'description': video.get('content'),
+ 'thumbnail': url_or_none(video.get('cover')),
+ }
diff --git a/hypervideo_dl/extractor/dfb.py b/hypervideo_dl/extractor/dfb.py
index 97f70fc..c4fb5c2 100644
--- a/hypervideo_dl/extractor/dfb.py
+++ b/hypervideo_dl/extractor/dfb.py
@@ -1,6 +1,3 @@
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..utils import unified_strdate
@@ -44,7 +41,6 @@ class DFBIE(InfoExtractor):
formats.extend(self._extract_m3u8_formats(
manifest_url, display_id, 'mp4',
'm3u8_native', m3u8_id='hls', fatal=False))
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/dhm.py b/hypervideo_dl/extractor/dhm.py
index aee72a6..3d42fc2 100644
--- a/hypervideo_dl/extractor/dhm.py
+++ b/hypervideo_dl/extractor/dhm.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import parse_duration
diff --git a/hypervideo_dl/extractor/digg.py b/hypervideo_dl/extractor/digg.py
index 913c175..86e8a6f 100644
--- a/hypervideo_dl/extractor/digg.py
+++ b/hypervideo_dl/extractor/digg.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import js_to_json
diff --git a/hypervideo_dl/extractor/digitalconcerthall.py b/hypervideo_dl/extractor/digitalconcerthall.py
index 8398ae3..3461e36 100644
--- a/hypervideo_dl/extractor/digitalconcerthall.py
+++ b/hypervideo_dl/extractor/digitalconcerthall.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
@@ -89,9 +86,8 @@ class DigitalConcertHallIE(InfoExtractor):
})
m3u8_url = traverse_obj(
- stream_info, ('channel', lambda x: x.startswith('vod_mixed'), 'stream', 0, 'url'), get_all=False)
+ stream_info, ('channel', lambda k, _: k.startswith('vod_mixed'), 'stream', 0, 'url'), get_all=False)
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', 'm3u8_native', fatal=False)
- self._sort_formats(formats)
yield {
'id': video_id,
diff --git a/hypervideo_dl/extractor/digiteka.py b/hypervideo_dl/extractor/digiteka.py
index d632047..912e33b 100644
--- a/hypervideo_dl/extractor/digiteka.py
+++ b/hypervideo_dl/extractor/digiteka.py
@@ -1,8 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
from .common import InfoExtractor
from ..utils import int_or_none
@@ -28,6 +23,7 @@ class DigitekaIE(InfoExtractor):
)
/id
)/(?P<id>[\d+a-z]+)'''
+ _EMBED_REGEX = [r'<(?:iframe|script)[^>]+src=["\'](?P<url>(?:https?:)?//(?:www\.)?ultimedia\.com/deliver/(?:generic|musique)(?:/[^/]+)*/(?:src|article)/[\d+a-z]+)']
_TESTS = [{
# news
'url': 'https://www.ultimedia.com/default/index/videogeneric/id/s8uk0r',
@@ -61,14 +57,6 @@ class DigitekaIE(InfoExtractor):
'only_matching': True,
}]
- @staticmethod
- def _extract_url(webpage):
- mobj = re.search(
- r'<(?:iframe|script)[^>]+src=["\'](?P<url>(?:https?:)?//(?:www\.)?ultimedia\.com/deliver/(?:generic|musique)(?:/[^/]+)*/(?:src|article)/[\d+a-z]+)',
- webpage)
- if mobj:
- return mobj.group('url')
-
def _real_extract(self, url):
mobj = self._match_valid_url(url)
video_id = mobj.group('id')
@@ -93,8 +81,6 @@ class DigitekaIE(InfoExtractor):
'format_id': source.get('label'),
})
- self._sort_formats(formats)
-
title = deliver_info['title']
thumbnail = jwconf.get('image')
duration = int_or_none(deliver_info.get('duration'))
diff --git a/hypervideo_dl/extractor/discovery.py b/hypervideo_dl/extractor/discovery.py
index fd3ad75..fd3fc8f 100644
--- a/hypervideo_dl/extractor/discovery.py
+++ b/hypervideo_dl/extractor/discovery.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import random
import string
diff --git a/hypervideo_dl/extractor/discoverygo.py b/hypervideo_dl/extractor/discoverygo.py
index 9e7b14a..1f3d8e3 100644
--- a/hypervideo_dl/extractor/discoverygo.py
+++ b/hypervideo_dl/extractor/discoverygo.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -52,7 +50,6 @@ class DiscoveryGoBaseIE(InfoExtractor):
elif stream_kind == 'hds':
formats.extend(self._extract_f4m_formats(
stream_url, display_id, f4m_id=stream_kind, fatal=False))
- self._sort_formats(formats)
video_id = video.get('id') or display_id
description = video.get('description', {}).get('detailed')
diff --git a/hypervideo_dl/extractor/discoverynetworks.py b/hypervideo_dl/extractor/discoverynetworks.py
deleted file mode 100644
index f43c871..0000000
--- a/hypervideo_dl/extractor/discoverynetworks.py
+++ /dev/null
@@ -1,42 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
-from .dplay import DPlayIE
-
-
-class DiscoveryNetworksDeIE(DPlayIE):
- _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show|sendungen)/(?P<programme>[^/]+)/(?:video/)?(?P<alternate_id>[^/]+)'
-
- _TESTS = [{
- 'url': 'https://www.tlc.de/programme/breaking-amish/video/die-welt-da-drauen/DCB331270001100',
- 'info_dict': {
- 'id': '78867',
- 'ext': 'mp4',
- 'title': 'Die Welt da draußen',
- 'description': 'md5:61033c12b73286e409d99a41742ef608',
- 'timestamp': 1554069600,
- 'upload_date': '20190331',
- },
- 'params': {
- 'format': 'bestvideo',
- 'skip_download': True,
- },
- }, {
- 'url': 'https://www.dmax.de/programme/dmax-highlights/video/tuning-star-sidney-hoffmann-exklusiv-bei-dmax/191023082312316',
- 'only_matching': True,
- }, {
- 'url': 'https://www.dplay.co.uk/show/ghost-adventures/video/hotel-leger-103620/EHD_280313B',
- 'only_matching': True,
- }, {
- 'url': 'https://tlc.de/sendungen/breaking-amish/die-welt-da-drauen/',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- domain, programme, alternate_id = self._match_valid_url(url).groups()
- country = 'GB' if domain == 'dplay.co.uk' else 'DE'
- realm = 'questuk' if country == 'GB' else domain.replace('.', '')
- return self._get_disco_api_info(
- url, '%s/%s' % (programme, alternate_id),
- 'sonic-eu1-prod.disco-api.com', realm, country)
diff --git a/hypervideo_dl/extractor/discoveryplusindia.py b/hypervideo_dl/extractor/discoveryplusindia.py
deleted file mode 100644
index 5180140..0000000
--- a/hypervideo_dl/extractor/discoveryplusindia.py
+++ /dev/null
@@ -1,98 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import json
-
-from ..compat import compat_str
-from ..utils import try_get
-from .common import InfoExtractor
-from .dplay import DPlayIE
-
-
-class DiscoveryPlusIndiaIE(DPlayIE):
- _VALID_URL = r'https?://(?:www\.)?discoveryplus\.in/videos?' + DPlayIE._PATH_REGEX
- _TESTS = [{
- 'url': 'https://www.discoveryplus.in/videos/how-do-they-do-it/fugu-and-more?seasonId=8&type=EPISODE',
- 'info_dict': {
- 'id': '27104',
- 'ext': 'mp4',
- 'display_id': 'how-do-they-do-it/fugu-and-more',
- 'title': 'Fugu and More',
- 'description': 'The Japanese catch, prepare and eat the deadliest fish on the planet.',
- 'duration': 1319,
- 'timestamp': 1582309800,
- 'upload_date': '20200221',
- 'series': 'How Do They Do It?',
- 'season_number': 8,
- 'episode_number': 2,
- 'creator': 'Discovery Channel',
- },
- 'params': {
- 'format': 'bestvideo',
- 'skip_download': True,
- },
- 'skip': 'Cookies (not necessarily logged in) are needed'
- }]
-
- def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
- headers['x-disco-params'] = 'realm=%s' % realm
- headers['x-disco-client'] = 'WEB:UNKNOWN:dplus-india:17.0.0'
-
- def _download_video_playback_info(self, disco_base, video_id, headers):
- return self._download_json(
- disco_base + 'playback/v3/videoPlaybackInfo',
- video_id, headers=headers, data=json.dumps({
- 'deviceInfo': {
- 'adBlocker': False,
- },
- 'videoId': video_id,
- }).encode('utf-8'))['data']['attributes']['streaming']
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
- return self._get_disco_api_info(
- url, display_id, 'ap2-prod-direct.discoveryplus.in', 'dplusindia', 'in')
-
-
-class DiscoveryPlusIndiaShowIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?discoveryplus\.in/show/(?P<show_name>[^/]+)/?(?:[?#]|$)'
- _TESTS = [{
- 'url': 'https://www.discoveryplus.in/show/how-do-they-do-it',
- 'playlist_mincount': 140,
- 'info_dict': {
- 'id': 'how-do-they-do-it',
- },
- }]
-
- def _entries(self, show_name):
- headers = {
- 'x-disco-client': 'WEB:UNKNOWN:dplus-india:prod',
- 'x-disco-params': 'realm=dplusindia',
- 'referer': 'https://www.discoveryplus.in/',
- }
- show_url = 'https://ap2-prod-direct.discoveryplus.in/cms/routes/show/{}?include=default'.format(show_name)
- show_json = self._download_json(show_url,
- video_id=show_name,
- headers=headers)['included'][4]['attributes']['component']
- show_id = show_json['mandatoryParams'].split('=')[-1]
- season_url = 'https://ap2-prod-direct.discoveryplus.in/content/videos?sort=episodeNumber&filter[seasonNumber]={}&filter[show.id]={}&page[size]=100&page[number]={}'
- for season in show_json['filters'][0]['options']:
- season_id = season['id']
- total_pages, page_num = 1, 0
- while page_num < total_pages:
- season_json = self._download_json(season_url.format(season_id, show_id, compat_str(page_num + 1)),
- video_id=show_id, headers=headers,
- note='Downloading JSON metadata%s' % (' page %d' % page_num if page_num else ''))
- if page_num == 0:
- total_pages = try_get(season_json, lambda x: x['meta']['totalPages'], int) or 1
- episodes_json = season_json['data']
- for episode in episodes_json:
- video_id = episode['attributes']['path']
- yield self.url_result(
- 'https://discoveryplus.in/videos/%s' % video_id,
- ie=DiscoveryPlusIndiaIE.ie_key(), video_id=video_id)
- page_num += 1
-
- def _real_extract(self, url):
- show_name = self._match_valid_url(url).group('show_name')
- return self.playlist_result(self._entries(show_name), playlist_id=show_name)
diff --git a/hypervideo_dl/extractor/discoveryvr.py b/hypervideo_dl/extractor/discoveryvr.py
deleted file mode 100644
index cb63c26..0000000
--- a/hypervideo_dl/extractor/discoveryvr.py
+++ /dev/null
@@ -1,59 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import parse_duration
-
-
-class DiscoveryVRIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?discoveryvr\.com/watch/(?P<id>[^/?#]+)'
- _TEST = {
- 'url': 'http://www.discoveryvr.com/watch/discovery-vr-an-introduction',
- 'md5': '32b1929798c464a54356378b7912eca4',
- 'info_dict': {
- 'id': 'discovery-vr-an-introduction',
- 'ext': 'mp4',
- 'title': 'Discovery VR - An Introduction',
- 'description': 'md5:80d418a10efb8899d9403e61d8790f06',
- }
- }
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
- webpage = self._download_webpage(url, display_id)
-
- bootstrap_data = self._search_regex(
- r'root\.DVR\.bootstrapData\s+=\s+"({.+?})";',
- webpage, 'bootstrap data')
- bootstrap_data = self._parse_json(
- bootstrap_data.encode('utf-8').decode('unicode_escape'),
- display_id)
- videos = self._parse_json(bootstrap_data['videos'], display_id)['allVideos']
- video_data = next(video for video in videos if video.get('slug') == display_id)
-
- series = video_data.get('showTitle')
- title = episode = video_data.get('title') or series
- if series and series != title:
- title = '%s - %s' % (series, title)
-
- formats = []
- for f, format_id in (('cdnUriM3U8', 'mobi'), ('webVideoUrlSd', 'sd'), ('webVideoUrlHd', 'hd')):
- f_url = video_data.get(f)
- if not f_url:
- continue
- formats.append({
- 'format_id': format_id,
- 'url': f_url,
- })
-
- return {
- 'id': display_id,
- 'display_id': display_id,
- 'title': title,
- 'description': video_data.get('description'),
- 'thumbnail': video_data.get('thumbnail'),
- 'duration': parse_duration(video_data.get('runTime')),
- 'formats': formats,
- 'episode': episode,
- 'series': series,
- }
diff --git a/hypervideo_dl/extractor/disney.py b/hypervideo_dl/extractor/disney.py
index 0ad7b1f..430de32 100644
--- a/hypervideo_dl/extractor/disney.py
+++ b/hypervideo_dl/extractor/disney.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -137,7 +134,6 @@ class DisneyIE(InfoExtractor):
self.raise_no_formats(
'%s said: %s' % (self.IE_NAME, page_data['translations']['video_expired']),
expected=True)
- self._sort_formats(formats)
subtitles = {}
for caption in video_data.get('captions', []):
diff --git a/hypervideo_dl/extractor/dispeak.py b/hypervideo_dl/extractor/dispeak.py
index 3d651f3..37f89b9 100644
--- a/hypervideo_dl/extractor/dispeak.py
+++ b/hypervideo_dl/extractor/dispeak.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -119,7 +117,6 @@ class DigitallySpeakingIE(InfoExtractor):
video_formats = self._parse_mp4(metadata)
if video_formats is None:
video_formats = self._parse_flv(metadata)
- self._sort_formats(video_formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/dlive.py b/hypervideo_dl/extractor/dlive.py
index 7410eb6..30fcf9f 100644
--- a/hypervideo_dl/extractor/dlive.py
+++ b/hypervideo_dl/extractor/dlive.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import json
from .common import InfoExtractor
@@ -42,7 +40,6 @@ class DLiveVODIE(InfoExtractor):
title = broadcast['title']
formats = self._extract_m3u8_formats(
broadcast['playbackUrl'], vod_id, 'mp4', 'm3u8_native')
- self._sort_formats(formats)
return {
'id': vod_id,
'title': title,
@@ -81,7 +78,6 @@ class DLiveStreamIE(InfoExtractor):
formats = self._extract_m3u8_formats(
'https://live.prd.dlive.tv/hls/live/%s.m3u8' % username,
display_name, 'mp4')
- self._sort_formats(formats)
return {
'id': display_name,
'title': title,
diff --git a/hypervideo_dl/extractor/doodstream.py b/hypervideo_dl/extractor/doodstream.py
deleted file mode 100644
index f692127..0000000
--- a/hypervideo_dl/extractor/doodstream.py
+++ /dev/null
@@ -1,76 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import string
-import random
-import time
-
-from .common import InfoExtractor
-
-
-class DoodStreamIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?dood\.(?:to|watch)/[ed]/(?P<id>[a-z0-9]+)'
- _TESTS = [{
- 'url': 'http://dood.to/e/5s1wmbdacezb',
- 'md5': '4568b83b31e13242b3f1ff96c55f0595',
- 'info_dict': {
- 'id': '5s1wmbdacezb',
- 'ext': 'mp4',
- 'title': 'Kat Wonders - Monthly May 2020',
- 'description': 'Kat Wonders - Monthly May 2020 | DoodStream.com',
- 'thumbnail': 'https://img.doodcdn.com/snaps/flyus84qgl2fsk4g.jpg',
- }
- }, {
- 'url': 'http://dood.watch/d/5s1wmbdacezb',
- 'md5': '4568b83b31e13242b3f1ff96c55f0595',
- 'info_dict': {
- 'id': '5s1wmbdacezb',
- 'ext': 'mp4',
- 'title': 'Kat Wonders - Monthly May 2020',
- 'description': 'Kat Wonders - Monthly May 2020 | DoodStream.com',
- 'thumbnail': 'https://img.doodcdn.com/snaps/flyus84qgl2fsk4g.jpg',
- }
- }, {
- 'url': 'https://dood.to/d/jzrxn12t2s7n',
- 'md5': '3207e199426eca7c2aa23c2872e6728a',
- 'info_dict': {
- 'id': 'jzrxn12t2s7n',
- 'ext': 'mp4',
- 'title': 'Stacy Cruz Cute ALLWAYSWELL',
- 'description': 'Stacy Cruz Cute ALLWAYSWELL | DoodStream.com',
- 'thumbnail': 'https://img.doodcdn.com/snaps/8edqd5nppkac3x8u.jpg',
- }
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- url = f'https://dood.to/e/{video_id}'
- webpage = self._download_webpage(url, video_id)
-
- title = self._html_search_meta(['og:title', 'twitter:title'], webpage, default=None)
- thumb = self._html_search_meta(['og:image', 'twitter:image'], webpage, default=None)
- token = self._html_search_regex(r'[?&]token=([a-z0-9]+)[&\']', webpage, 'token')
- description = self._html_search_meta(
- ['og:description', 'description', 'twitter:description'], webpage, default=None)
-
- headers = {
- 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:53.0) Gecko/20100101 Firefox/66.0',
- 'referer': url
- }
-
- pass_md5 = self._html_search_regex(r'(/pass_md5.*?)\'', webpage, 'pass_md5')
- final_url = ''.join((
- self._download_webpage(f'https://dood.to{pass_md5}', video_id, headers=headers),
- *(random.choice(string.ascii_letters + string.digits) for _ in range(10)),
- f'?token={token}&expiry={int(time.time() * 1000)}',
- ))
-
- return {
- 'id': video_id,
- 'title': title,
- 'url': final_url,
- 'http_headers': headers,
- 'ext': 'mp4',
- 'description': description,
- 'thumbnail': thumb,
- }
diff --git a/hypervideo_dl/extractor/dotsub.py b/hypervideo_dl/extractor/dotsub.py
index 148605c..079f837 100644
--- a/hypervideo_dl/extractor/dotsub.py
+++ b/hypervideo_dl/extractor/dotsub.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
float_or_none,
diff --git a/hypervideo_dl/extractor/douyutv.py b/hypervideo_dl/extractor/douyutv.py
index 26a8d64..477f468 100644
--- a/hypervideo_dl/extractor/douyutv.py
+++ b/hypervideo_dl/extractor/douyutv.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import time
import hashlib
import re
diff --git a/hypervideo_dl/extractor/dplay.py b/hypervideo_dl/extractor/dplay.py
index a25f27c..8eb4d8f 100644
--- a/hypervideo_dl/extractor/dplay.py
+++ b/hypervideo_dl/extractor/dplay.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import json
import uuid
@@ -11,6 +8,7 @@ from ..utils import (
ExtractorError,
float_or_none,
int_or_none,
+ remove_start,
strip_or_none,
try_get,
unified_timestamp,
@@ -128,7 +126,6 @@ class DPlayBaseIE(InfoExtractor):
'url': format_url,
'format_id': format_id,
})
- self._sort_formats(formats)
creator = series = None
tags = []
@@ -314,7 +311,7 @@ class DPlayIE(DPlayBaseIE):
def _real_extract(self, url):
mobj = self._match_valid_url(url)
display_id = mobj.group('id')
- domain = mobj.group('domain').lstrip('www.')
+ domain = remove_start(mobj.group('domain'), 'www.')
country = mobj.group('country') or mobj.group('subdomain_country') or mobj.group('plus_country')
host = 'disco-api.' + domain if domain[0] == 'd' else 'eu2-prod.disco-api.com'
return self._get_disco_api_info(
@@ -720,6 +717,72 @@ class TLCIE(DiscoveryPlusBaseIE):
}
+class MotorTrendIE(DiscoveryPlusBaseIE):
+ _VALID_URL = r'https?://(?:watch\.)?motortrend\.com/video' + DPlayBaseIE._PATH_REGEX
+ _TESTS = [{
+ 'url': 'https://watch.motortrend.com/video/car-issues-motortrend-atve-us/double-dakotas',
+ 'info_dict': {
+ 'id': '"4859182"',
+ 'display_id': 'double-dakotas',
+ 'ext': 'mp4',
+ 'title': 'Double Dakotas',
+ 'description': 'Tylers buy-one-get-one Dakota deal has the Wizard pulling double duty.',
+ 'season_number': 2,
+ 'episode_number': 3,
+ },
+ 'skip': 'Available for Premium users',
+ }, {
+ 'url': 'https://watch.motortrend.com/video/car-issues-motortrend-atve-us/double-dakotas',
+ 'only_matching': True,
+ }]
+
+ _PRODUCT = 'vel'
+ _DISCO_API_PARAMS = {
+ 'disco_host': 'us1-prod-direct.watch.motortrend.com',
+ 'realm': 'go',
+ 'country': 'us',
+ }
+
+
+class MotorTrendOnDemandIE(DiscoveryPlusBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?motortrendondemand\.com/detail' + DPlayBaseIE._PATH_REGEX
+ _TESTS = [{
+ 'url': 'https://www.motortrendondemand.com/detail/wheelstanding-dump-truck-stubby-bobs-comeback/37699/784',
+ 'info_dict': {
+ 'id': '37699',
+ 'display_id': 'wheelstanding-dump-truck-stubby-bobs-comeback/37699',
+ 'ext': 'mp4',
+ 'title': 'Wheelstanding Dump Truck! Stubby Bob’s Comeback',
+ 'description': 'md5:996915abe52a1c3dfc83aecea3cce8e7',
+ 'season_number': 5,
+ 'episode_number': 52,
+ 'episode': 'Episode 52',
+ 'season': 'Season 5',
+ 'thumbnail': r're:^https?://.+\.jpe?g$',
+ 'timestamp': 1388534401,
+ 'duration': 1887.345,
+ 'creator': 'Originals',
+ 'series': 'Roadkill',
+ 'upload_date': '20140101',
+ 'tags': [],
+ },
+ }]
+
+ _PRODUCT = 'MTOD'
+ _DISCO_API_PARAMS = {
+ 'disco_host': 'us1-prod-direct.motortrendondemand.com',
+ 'realm': 'motortrend',
+ 'country': 'us',
+ }
+
+ def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
+ headers.update({
+ 'x-disco-params': f'realm={realm}',
+ 'x-disco-client': f'WEB:UNKNOWN:{self._PRODUCT}:4.39.1-gi1',
+ 'Authorization': self._get_auth(disco_base, display_id, realm),
+ })
+
+
class DiscoveryPlusIE(DiscoveryPlusBaseIE):
_VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/(?!it/)(?:\w{2}/)?video' + DPlayBaseIE._PATH_REGEX
_TESTS = [{
@@ -882,6 +945,9 @@ class DiscoveryPlusItalyIE(DiscoveryPlusBaseIE):
_TESTS = [{
'url': 'https://www.discoveryplus.com/it/video/i-signori-della-neve/stagione-2-episodio-1-i-preparativi',
'only_matching': True,
+ }, {
+ 'url': 'https://www.discoveryplus.com/it/video/super-benny/trailer',
+ 'only_matching': True,
}]
_PRODUCT = 'dplus_us'
@@ -891,6 +957,13 @@ class DiscoveryPlusItalyIE(DiscoveryPlusBaseIE):
'country': 'it',
}
+ def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
+ headers.update({
+ 'x-disco-params': 'realm=%s' % realm,
+ 'x-disco-client': f'WEB:UNKNOWN:{self._PRODUCT}:25.2.6',
+ 'Authorization': self._get_auth(disco_base, display_id, realm),
+ })
+
class DiscoveryPlusItalyShowIE(DiscoveryPlusShowBaseIE):
_VALID_URL = r'https?://(?:www\.)?discoveryplus\.it/programmi/(?P<show_name>[^/]+)/?(?:[?#]|$)'
diff --git a/hypervideo_dl/extractor/drbonanza.py b/hypervideo_dl/extractor/drbonanza.py
index ea0f06d..824d70d 100644
--- a/hypervideo_dl/extractor/drbonanza.py
+++ b/hypervideo_dl/extractor/drbonanza.py
@@ -1,6 +1,3 @@
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..utils import (
js_to_json,
@@ -33,7 +30,6 @@ class DRBonanzaIE(InfoExtractor):
info = self._parse_html5_media_entries(
url, webpage, display_id, m3u8_id='hls',
m3u8_entry_protocol='m3u8_native')[0]
- self._sort_formats(info['formats'])
asset = self._parse_json(
self._search_regex(
diff --git a/hypervideo_dl/extractor/dreisat.py b/hypervideo_dl/extractor/dreisat.py
index 5a07c18..8a59c23 100644
--- a/hypervideo_dl/extractor/dreisat.py
+++ b/hypervideo_dl/extractor/dreisat.py
@@ -1,9 +1,7 @@
-from __future__ import unicode_literals
-
from .zdf import ZDFIE
-class DreiSatIE(ZDFIE):
+class DreiSatIE(ZDFIE): # XXX: Do not subclass from concrete IE
IE_NAME = '3sat'
_VALID_URL = r'https?://(?:www\.)?3sat\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)\.html'
_TESTS = [{
diff --git a/hypervideo_dl/extractor/drooble.py b/hypervideo_dl/extractor/drooble.py
index 0584250..106e5c4 100644
--- a/hypervideo_dl/extractor/drooble.py
+++ b/hypervideo_dl/extractor/drooble.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import json
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/dropbox.py b/hypervideo_dl/extractor/dropbox.py
index 2559657..214b309 100644
--- a/hypervideo_dl/extractor/dropbox.py
+++ b/hypervideo_dl/extractor/dropbox.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import os.path
import re
@@ -56,8 +53,8 @@ class DropboxIE(InfoExtractor):
else:
raise ExtractorError('Password protected video, use --video-password <password>', expected=True)
- json_string = self._html_search_regex(r'InitReact\.mountComponent\(.*?,\s*(\{.+\})\s*?\)', webpage, 'Info JSON')
- info_json = self._parse_json(json_string, video_id).get('props')
+ info_json = self._search_json(r'InitReact\.mountComponent\(.*?,', webpage, 'mountComponent', video_id,
+ contains_pattern=r'{.+?"preview".+?}', end_pattern=r'\)')['props']
transcode_url = traverse_obj(info_json, ((None, 'preview'), 'file', 'preview', 'content', 'transcode_url'), get_all=False)
formats, subtitles = self._extract_m3u8_formats_and_subtitles(transcode_url, video_id)
@@ -66,7 +63,6 @@ class DropboxIE(InfoExtractor):
video_url = re.sub(r'[?&]dl=0', '', url)
video_url += ('?' if '?' not in video_url else '&') + 'dl=1'
formats.append({'url': video_url, 'format_id': 'original', 'format_note': 'Original', 'quality': 1})
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/dropout.py b/hypervideo_dl/extractor/dropout.py
index 2fa6195..e280b1c 100644
--- a/hypervideo_dl/extractor/dropout.py
+++ b/hypervideo_dl/extractor/dropout.py
@@ -1,9 +1,8 @@
-# coding: utf-8
from .common import InfoExtractor
from .vimeo import VHXEmbedIE
from ..utils import (
- clean_html,
ExtractorError,
+ clean_html,
get_element_by_class,
get_element_by_id,
get_elements_by_class,
@@ -97,11 +96,12 @@ class DropoutIE(InfoExtractor):
def _login(self, display_id):
username, password = self._get_login_info()
- if not (username and password):
- self.raise_login_required(method='password')
+ if not username:
+ return True
response = self._download_webpage(
- self._LOGIN_URL, display_id, note='Logging in', data=urlencode_postdata({
+ self._LOGIN_URL, display_id, note='Logging in', fatal=False,
+ data=urlencode_postdata({
'email': username,
'password': password,
'authenticity_token': self._get_authenticity_token(display_id),
@@ -111,19 +111,25 @@ class DropoutIE(InfoExtractor):
user_has_subscription = self._search_regex(
r'user_has_subscription:\s*["\'](.+?)["\']', response, 'subscription status', default='none')
if user_has_subscription.lower() == 'true':
- return response
+ return
elif user_has_subscription.lower() == 'false':
- raise ExtractorError('Account is not subscribed')
+ return 'Account is not subscribed'
else:
- raise ExtractorError('Incorrect username/password')
+ return 'Incorrect username/password'
def _real_extract(self, url):
display_id = self._match_id(url)
- try:
- self._login(display_id)
- webpage = self._download_webpage(url, display_id, note='Downloading video webpage')
- finally:
- self._download_webpage('https://www.dropout.tv/logout', display_id, note='Logging out', fatal=False)
+
+ webpage = None
+ if self._get_cookies('https://www.dropout.tv').get('_session'):
+ webpage = self._download_webpage(url, display_id)
+ if not webpage or '<div id="watch-unauthorized"' in webpage:
+ login_err = self._login(display_id)
+ webpage = self._download_webpage(url, display_id)
+ if login_err and '<div id="watch-unauthorized"' in webpage:
+ if login_err is True:
+ self.raise_login_required(method='any')
+ raise ExtractorError(login_err, expected=True)
embed_url = self._search_regex(r'embed_url:\s*["\'](.+?)["\']', webpage, 'embed url')
thumbnail = self._og_search_thumbnail(webpage)
@@ -138,7 +144,7 @@ class DropoutIE(InfoExtractor):
return {
'_type': 'url_transparent',
'ie_key': VHXEmbedIE.ie_key(),
- 'url': embed_url,
+ 'url': VHXEmbedIE._smuggle_referrer(embed_url, 'https://www.dropout.tv'),
'id': self._search_regex(r'embed\.vhx\.tv/videos/(.+?)\?', embed_url, 'id'),
'display_id': display_id,
'title': title,
diff --git a/hypervideo_dl/extractor/drtuber.py b/hypervideo_dl/extractor/drtuber.py
index 540b86a..e5dab6a 100644
--- a/hypervideo_dl/extractor/drtuber.py
+++ b/hypervideo_dl/extractor/drtuber.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -13,6 +11,7 @@ from ..utils import (
class DrTuberIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www|m)\.)?drtuber\.com/(?:video|embed)/(?P<id>\d+)(?:/(?P<display_id>[\w-]+))?'
+ _EMBED_REGEX = [r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?drtuber\.com/embed/\d+)']
_TESTS = [{
'url': 'http://www.drtuber.com/video/1740434/hot-perky-blonde-naked-golf',
'md5': '93e680cf2536ad0dfb7e74d94a89facd',
@@ -35,12 +34,6 @@ class DrTuberIE(InfoExtractor):
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- return re.findall(
- r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?drtuber\.com/embed/\d+)',
- webpage)
-
def _real_extract(self, url):
mobj = self._match_valid_url(url)
video_id = mobj.group('id')
@@ -65,7 +58,6 @@ class DrTuberIE(InfoExtractor):
'quality': 2 if format_id == 'hq' else 1,
'url': video_url
})
- self._sort_formats(formats)
duration = int_or_none(video_data.get('duration')) or parse_duration(
video_data.get('duration_format'))
diff --git a/hypervideo_dl/extractor/drtv.py b/hypervideo_dl/extractor/drtv.py
index 37e4d5b..128f439 100644
--- a/hypervideo_dl/extractor/drtv.py
+++ b/hypervideo_dl/extractor/drtv.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import binascii
import hashlib
import re
@@ -26,7 +23,7 @@ class DRTVIE(InfoExtractor):
_VALID_URL = r'''(?x)
https?://
(?:
- (?:www\.)?dr\.dk/(?:tv/se|nyheder|radio(?:/ondemand)?)/(?:[^/]+/)*|
+ (?:www\.)?dr\.dk/(?:tv/se|nyheder|(?:radio|lyd)(?:/ondemand)?)/(?:[^/]+/)*|
(?:www\.)?(?:dr\.dk|dr-massive\.com)/drtv/(?:se|episode|program)/
)
(?P<id>[\da-z_-]+)
@@ -54,6 +51,7 @@ class DRTVIE(InfoExtractor):
'release_year': 2016,
},
'expected_warnings': ['Unable to download f4m manifest'],
+ 'skip': 'this video has been removed',
}, {
# embed
'url': 'https://www.dr.dk/nyheder/indland/live-christianias-rydning-af-pusher-street-er-i-gang',
@@ -74,31 +72,41 @@ class DRTVIE(InfoExtractor):
# with SignLanguage formats
'url': 'https://www.dr.dk/tv/se/historien-om-danmark/-/historien-om-danmark-stenalder',
'info_dict': {
- 'id': 'historien-om-danmark-stenalder',
+ 'id': '00831690010',
'ext': 'mp4',
'title': 'Historien om Danmark: Stenalder',
'description': 'md5:8c66dcbc1669bbc6f873879880f37f2a',
'timestamp': 1546628400,
'upload_date': '20190104',
- 'duration': 3502.56,
+ 'duration': 3504.618,
'formats': 'mincount:20',
+ 'release_year': 2017,
+ 'season_id': 'urn:dr:mu:bundle:5afc03ad6187a4065ca5fd35',
+ 'season_number': 1,
+ 'season': 'Historien om Danmark',
+ 'series': 'Historien om Danmark',
},
'params': {
'skip_download': True,
},
}, {
- 'url': 'https://www.dr.dk/radio/p4kbh/regionale-nyheder-kh4/p4-nyheder-2019-06-26-17-30-9',
+ 'url': 'https://www.dr.dk/lyd/p4kbh/regionale-nyheder-kh4/p4-nyheder-2019-06-26-17-30-9',
'only_matching': True,
}, {
'url': 'https://www.dr.dk/drtv/se/bonderoeven_71769',
'info_dict': {
'id': '00951930010',
'ext': 'mp4',
- 'title': 'Bonderøven (1:8)',
- 'description': 'md5:3cf18fc0d3b205745d4505f896af8121',
- 'timestamp': 1546542000,
- 'upload_date': '20190103',
+ 'title': 'Bonderøven 2019 (1:8)',
+ 'description': 'md5:b6dcfe9b6f0bea6703e9a0092739a5bd',
+ 'timestamp': 1603188600,
+ 'upload_date': '20201020',
'duration': 2576.6,
+ 'season': 'Bonderøven 2019',
+ 'season_id': 'urn:dr:mu:bundle:5c201667a11fa01ca4528ce5',
+ 'release_year': 2019,
+ 'season_number': 2019,
+ 'series': 'Frank & Kastaniegaarden'
},
'params': {
'skip_download': True,
@@ -112,6 +120,24 @@ class DRTVIE(InfoExtractor):
}, {
'url': 'https://www.dr.dk/drtv/program/jagten_220924',
'only_matching': True,
+ }, {
+ 'url': 'https://www.dr.dk/lyd/p4aarhus/regionale-nyheder-ar4/regionale-nyheder-2022-05-05-12-30-3',
+ 'info_dict': {
+ 'id': 'urn:dr:mu:programcard:6265cb2571401424d0360113',
+ 'title': "Regionale nyheder",
+ 'ext': 'mp4',
+ 'duration': 120.043,
+ 'series': 'P4 Østjylland regionale nyheder',
+ 'timestamp': 1651746600,
+ 'season': 'Regionale nyheder',
+ 'release_year': 0,
+ 'season_id': 'urn:dr:mu:bundle:61c26889539f0201586b73c5',
+ 'description': '',
+ 'upload_date': '20220505',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
}]
def _real_extract(self, url):
@@ -274,8 +300,6 @@ class DRTVIE(InfoExtractor):
'Unfortunately, DR is not allowed to show this program outside Denmark.',
countries=self._GEO_COUNTRIES)
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': title,
@@ -340,7 +364,6 @@ class DRTVLiveIE(InfoExtractor):
formats.extend(self._extract_f4m_formats(update_url_query(
'%s/%s' % (server, stream_path), {'hdcore': '3.7.0'}),
channel_id, f4m_id=link_type, fatal=False))
- self._sort_formats(formats)
return {
'id': channel_id,
diff --git a/hypervideo_dl/extractor/dtube.py b/hypervideo_dl/extractor/dtube.py
index ad247b7..25a98f6 100644
--- a/hypervideo_dl/extractor/dtube.py
+++ b/hypervideo_dl/extractor/dtube.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import json
from socket import timeout
diff --git a/hypervideo_dl/extractor/duboku.py b/hypervideo_dl/extractor/duboku.py
index a875978..fb0546c 100644
--- a/hypervideo_dl/extractor/duboku.py
+++ b/hypervideo_dl/extractor/duboku.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -54,31 +51,39 @@ def _get_element_by_tag_and_attrib(html, tag=None, attribute=None, value=None, e
class DubokuIE(InfoExtractor):
IE_NAME = 'duboku'
- IE_DESC = 'www.duboku.co'
+ IE_DESC = 'www.duboku.io'
- _VALID_URL = r'(?:https?://[^/]+\.duboku\.co/vodplay/)(?P<id>[0-9]+-[0-9-]+)\.html.*'
+ _VALID_URL = r'(?:https?://[^/]+\.duboku\.io/vodplay/)(?P<id>[0-9]+-[0-9-]+)\.html.*'
_TESTS = [{
- 'url': 'https://www.duboku.co/vodplay/1575-1-1.html',
+ 'url': 'https://w.duboku.io/vodplay/1575-1-1.html',
'info_dict': {
'id': '1575-1-1',
- 'ext': 'ts',
+ 'ext': 'mp4',
'series': '白色月光',
'title': 'contains:白色月光',
'season_number': 1,
'episode_number': 1,
+ 'season': 'Season 1',
+ 'episode_id': '1',
+ 'season_id': '1',
+ 'episode': 'Episode 1',
},
'params': {
'skip_download': 'm3u8 download',
},
}, {
- 'url': 'https://www.duboku.co/vodplay/1588-1-1.html',
+ 'url': 'https://w.duboku.io/vodplay/1588-1-1.html',
'info_dict': {
'id': '1588-1-1',
- 'ext': 'ts',
+ 'ext': 'mp4',
'series': '亲爱的自己',
- 'title': 'contains:预告片',
+ 'title': 'contains:第1集',
'season_number': 1,
'episode_number': 1,
+ 'episode': 'Episode 1',
+ 'season': 'Season 1',
+ 'episode_id': '1',
+ 'season_id': '1',
},
'params': {
'skip_download': 'm3u8 download',
@@ -94,7 +99,7 @@ class DubokuIE(InfoExtractor):
season_id = temp[1]
episode_id = temp[2]
- webpage_url = 'https://www.duboku.co/vodplay/%s.html' % video_id
+ webpage_url = 'https://w.duboku.io/vodplay/%s.html' % video_id
webpage_html = self._download_webpage(webpage_url, video_id)
# extract video url
@@ -127,12 +132,13 @@ class DubokuIE(InfoExtractor):
data_from = player_data.get('from')
# if it is an embedded iframe, maybe it's an external source
+ headers = {'Referer': webpage_url}
if data_from == 'iframe':
# use _type url_transparent to retain the meaningful details
# of the video.
return {
'_type': 'url_transparent',
- 'url': smuggle_url(data_url, {'http_headers': {'Referer': webpage_url}}),
+ 'url': smuggle_url(data_url, {'http_headers': headers}),
'id': video_id,
'title': title,
'series': series_title,
@@ -142,7 +148,7 @@ class DubokuIE(InfoExtractor):
'episode_id': episode_id,
}
- formats = self._extract_m3u8_formats(data_url, video_id, 'mp4')
+ formats = self._extract_m3u8_formats(data_url, video_id, 'mp4', headers=headers)
return {
'id': video_id,
@@ -153,36 +159,29 @@ class DubokuIE(InfoExtractor):
'episode_number': int_or_none(episode_id),
'episode_id': episode_id,
'formats': formats,
- 'http_headers': {'Referer': 'https://www.duboku.co/static/player/videojs.html'}
+ 'http_headers': headers
}
class DubokuPlaylistIE(InfoExtractor):
IE_NAME = 'duboku:list'
- IE_DESC = 'www.duboku.co entire series'
+ IE_DESC = 'www.duboku.io entire series'
- _VALID_URL = r'(?:https?://[^/]+\.duboku\.co/voddetail/)(?P<id>[0-9]+)\.html.*'
+ _VALID_URL = r'(?:https?://[^/]+\.duboku\.io/voddetail/)(?P<id>[0-9]+)\.html.*'
_TESTS = [{
- 'url': 'https://www.duboku.co/voddetail/1575.html',
+ 'url': 'https://w.duboku.io/voddetail/1575.html',
'info_dict': {
'id': 'startswith:1575',
'title': '白色月光',
},
'playlist_count': 12,
}, {
- 'url': 'https://www.duboku.co/voddetail/1554.html',
+ 'url': 'https://w.duboku.io/voddetail/1554.html',
'info_dict': {
'id': 'startswith:1554',
'title': '以家人之名',
},
'playlist_mincount': 30,
- }, {
- 'url': 'https://www.duboku.co/voddetail/1554.html#playlist2',
- 'info_dict': {
- 'id': '1554#playlist2',
- 'title': '以家人之名',
- },
- 'playlist_mincount': 27,
}]
def _real_extract(self, url):
@@ -192,7 +191,7 @@ class DubokuPlaylistIE(InfoExtractor):
series_id = mobj.group('id')
fragment = compat_urlparse.urlparse(url).fragment
- webpage_url = 'https://www.duboku.co/voddetail/%s.html' % series_id
+ webpage_url = 'https://w.duboku.io/voddetail/%s.html' % series_id
webpage_html = self._download_webpage(webpage_url, series_id)
# extract title
@@ -237,6 +236,6 @@ class DubokuPlaylistIE(InfoExtractor):
# return url results
return self.playlist_result([
self.url_result(
- compat_urlparse.urljoin('https://www.duboku.co', x['href']),
+ compat_urlparse.urljoin('https://w.duboku.io', x['href']),
ie=DubokuIE.ie_key(), video_title=x.get('title'))
for x in playlist], series_id + '#' + playlist_id, title)
diff --git a/hypervideo_dl/extractor/dumpert.py b/hypervideo_dl/extractor/dumpert.py
index d9d9afd..010c2d0 100644
--- a/hypervideo_dl/extractor/dumpert.py
+++ b/hypervideo_dl/extractor/dumpert.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
int_or_none,
@@ -51,7 +48,6 @@ class DumpertIE(InfoExtractor):
'format_id': version,
'quality': quality(version),
})
- self._sort_formats(formats)
thumbnails = []
stills = item.get('stills') or {}
diff --git a/hypervideo_dl/extractor/dvtv.py b/hypervideo_dl/extractor/dvtv.py
index 08663cf..e671433 100644
--- a/hypervideo_dl/extractor/dvtv.py
+++ b/hypervideo_dl/extractor/dvtv.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -145,7 +142,6 @@ class DVTVIE(InfoExtractor):
'format_id': join_nonempty('http', ext, label),
'height': int_or_none(height),
})
- self._sort_formats(formats)
return {
'id': data.get('mediaid') or video_id,
diff --git a/hypervideo_dl/extractor/dw.py b/hypervideo_dl/extractor/dw.py
index 6eaee07..9c4a08e 100644
--- a/hypervideo_dl/extractor/dw.py
+++ b/hypervideo_dl/extractor/dw.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
int_or_none,
@@ -65,7 +62,6 @@ class DWIE(InfoExtractor):
transform_source=lambda s: s.replace(
'rtmp://tv-od.dw.de/flash/',
'http://tv-download.dw.de/dwtv_video/flv/'))
- self._sort_formats(formats)
upload_date = hidden_inputs.get('display_date')
if not upload_date:
diff --git a/hypervideo_dl/extractor/eagleplatform.py b/hypervideo_dl/extractor/eagleplatform.py
index f86731a..9ebd24d 100644
--- a/hypervideo_dl/extractor/eagleplatform.py
+++ b/hypervideo_dl/extractor/eagleplatform.py
@@ -1,6 +1,4 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
+import functools
import re
from .common import InfoExtractor
@@ -8,6 +6,7 @@ from ..compat import compat_HTTPError
from ..utils import (
ExtractorError,
int_or_none,
+ smuggle_url,
unsmuggle_url,
url_or_none,
)
@@ -21,6 +20,7 @@ class EaglePlatformIE(InfoExtractor):
)
(?P<id>\d+)
'''
+ _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//.+?\.media\.eagleplatform\.com/index/player\?.+?)\1']
_TESTS = [{
# http://lenta.ru/news/2015/03/06/navalny/
'url': 'http://lentaru.media.eagleplatform.com/index/player?player=new&record_id=227304&player_template_id=5201',
@@ -55,14 +55,14 @@ class EaglePlatformIE(InfoExtractor):
'only_matching': True,
}]
- @staticmethod
- def _extract_url(webpage):
- # Regular iframe embedding
- mobj = re.search(
- r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//.+?\.media\.eagleplatform\.com/index/player\?.+?)\1',
- webpage)
- if mobj is not None:
- return mobj.group('url')
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ add_referer = functools.partial(smuggle_url, data={'referrer': url})
+
+ res = tuple(super()._extract_embed_urls(url, webpage))
+ if res:
+ return map(add_referer, res)
+
PLAYER_JS_RE = r'''
<script[^>]+
src=(?P<qjs>["\'])(?:https?:)?//(?P<host>(?:(?!(?P=qjs)).)+\.media\.eagleplatform\.com)/player/player\.js(?P=qjs)
@@ -77,7 +77,7 @@ class EaglePlatformIE(InfoExtractor):
data-id=["\'](?P<id>\d+)
''' % PLAYER_JS_RE, webpage)
if mobj is not None:
- return 'eagleplatform:%(host)s:%(id)s' % mobj.groupdict()
+ return [add_referer('eagleplatform:%(host)s:%(id)s' % mobj.groupdict())]
# Generalization of "Javascript code usage", "Combined usage" and
# "Usage without attaching to DOM" embeddings (see
# http://dultonmedia.github.io/eplayer/)
@@ -98,7 +98,7 @@ class EaglePlatformIE(InfoExtractor):
</script>
''' % PLAYER_JS_RE, webpage)
if mobj is not None:
- return 'eagleplatform:%(host)s:%(id)s' % mobj.groupdict()
+ return [add_referer('eagleplatform:%(host)s:%(id)s' % mobj.groupdict())]
@staticmethod
def _handle_error(response):
@@ -192,8 +192,6 @@ class EaglePlatformIE(InfoExtractor):
f['url'] = format_url
formats.append(f)
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': title,
@@ -204,3 +202,14 @@ class EaglePlatformIE(InfoExtractor):
'age_limit': age_limit,
'formats': formats,
}
+
+
+class ClipYouEmbedIE(InfoExtractor):
+ _VALID_URL = False
+
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ mobj = re.search(
+ r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
+ if mobj is not None:
+ yield smuggle_url('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), {'referrer': url})
diff --git a/hypervideo_dl/extractor/ebaumsworld.py b/hypervideo_dl/extractor/ebaumsworld.py
index c97682c..0854d03 100644
--- a/hypervideo_dl/extractor/ebaumsworld.py
+++ b/hypervideo_dl/extractor/ebaumsworld.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/echomsk.py b/hypervideo_dl/extractor/echomsk.py
index 6b7cc65..850eabb 100644
--- a/hypervideo_dl/extractor/echomsk.py
+++ b/hypervideo_dl/extractor/echomsk.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/egghead.py b/hypervideo_dl/extractor/egghead.py
index b6b8676..a4b2a12 100644
--- a/hypervideo_dl/extractor/egghead.py
+++ b/hypervideo_dl/extractor/egghead.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
@@ -120,7 +117,6 @@ class EggheadLessonIE(EggheadBaseIE):
formats.append({
'url': format_url,
})
- self._sort_formats(formats)
return {
'id': lesson_id,
diff --git a/hypervideo_dl/extractor/ehow.py b/hypervideo_dl/extractor/ehow.py
index b1cd4f5..74469ce 100644
--- a/hypervideo_dl/extractor/ehow.py
+++ b/hypervideo_dl/extractor/ehow.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import compat_urllib_parse_unquote
diff --git a/hypervideo_dl/extractor/eighttracks.py b/hypervideo_dl/extractor/eighttracks.py
index 9a44f89..3dd9ab1 100644
--- a/hypervideo_dl/extractor/eighttracks.py
+++ b/hypervideo_dl/extractor/eighttracks.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import json
import random
diff --git a/hypervideo_dl/extractor/einthusan.py b/hypervideo_dl/extractor/einthusan.py
index 7af279a..53bc253 100644
--- a/hypervideo_dl/extractor/einthusan.py
+++ b/hypervideo_dl/extractor/einthusan.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import json
from .common import InfoExtractor
@@ -92,8 +89,6 @@ class EinthusanIE(InfoExtractor):
'url': mp4_url,
})
- self._sort_formats(formats)
-
description = get_elements_by_class('synopsis', webpage)[0]
thumbnail = self._html_search_regex(
r'''<img[^>]+src=(["'])(?P<url>(?!\1).+?/moviecovers/(?!\1).+?)\1''',
diff --git a/hypervideo_dl/extractor/eitb.py b/hypervideo_dl/extractor/eitb.py
index ee5ead1..bd027da 100644
--- a/hypervideo_dl/extractor/eitb.py
+++ b/hypervideo_dl/extractor/eitb.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
float_or_none,
@@ -74,8 +71,6 @@ class EitbIE(InfoExtractor):
'%s?hdcore=3.7.0' % hds_url.replace('euskalsvod', 'euskalvod'),
video_id, f4m_id='hds', fatal=False))
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': media.get('NAME_ES') or media.get('name') or media['NAME_EU'],
diff --git a/hypervideo_dl/extractor/ellentube.py b/hypervideo_dl/extractor/ellentube.py
index d451bc0..6eb00f9 100644
--- a/hypervideo_dl/extractor/ellentube.py
+++ b/hypervideo_dl/extractor/ellentube.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
clean_html,
@@ -31,7 +28,6 @@ class EllenTubeBaseIE(InfoExtractor):
entry_protocol='m3u8_native', m3u8_id='hls')
duration = int_or_none(entry.get('duration'))
break
- self._sort_formats(formats)
def get_insight(kind):
return int_or_none(try_get(
diff --git a/hypervideo_dl/extractor/elonet.py b/hypervideo_dl/extractor/elonet.py
index 9c6aea2..c5558ff 100644
--- a/hypervideo_dl/extractor/elonet.py
+++ b/hypervideo_dl/extractor/elonet.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import determine_ext
@@ -56,7 +53,6 @@ class ElonetIE(InfoExtractor):
else:
formats, subtitles = [], {}
self.raise_no_formats(f'Unknown streaming format {ext}')
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/elpais.py b/hypervideo_dl/extractor/elpais.py
index b89f6db..7c6c880 100644
--- a/hypervideo_dl/extractor/elpais.py
+++ b/hypervideo_dl/extractor/elpais.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import strip_jsonp, unified_strdate
diff --git a/hypervideo_dl/extractor/embedly.py b/hypervideo_dl/extractor/embedly.py
index a5820b2..483d018 100644
--- a/hypervideo_dl/extractor/embedly.py
+++ b/hypervideo_dl/extractor/embedly.py
@@ -1,6 +1,5 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
+import re
+import urllib.parse
from .common import InfoExtractor
from ..compat import compat_urllib_parse_unquote
@@ -12,5 +11,14 @@ class EmbedlyIE(InfoExtractor):
'only_matching': True,
}]
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ # Bypass suitable check
+ for mobj in re.finditer(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage):
+ yield mobj.group('url')
+
+ for mobj in re.finditer(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage):
+ yield urllib.parse.unquote(mobj.group('url'))
+
def _real_extract(self, url):
return self.url_result(compat_urllib_parse_unquote(self._match_id(url)))
diff --git a/hypervideo_dl/extractor/engadget.py b/hypervideo_dl/extractor/engadget.py
index 733bf32..e7c5d7b 100644
--- a/hypervideo_dl/extractor/engadget.py
+++ b/hypervideo_dl/extractor/engadget.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/epicon.py b/hypervideo_dl/extractor/epicon.py
index cd19325..3bfcc54 100644
--- a/hypervideo_dl/extractor/epicon.py
+++ b/hypervideo_dl/extractor/epicon.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -62,7 +59,6 @@ class EpiconIE(InfoExtractor):
description = self._og_search_description(webpage) or None
thumbnail = self._og_search_thumbnail(webpage) or None
formats = self._extract_m3u8_formats(data_json['url']['video_url'], id)
- self._sort_formats(formats)
subtitles = {}
for subtitle in data_json.get('subtitles', []):
diff --git a/hypervideo_dl/extractor/epoch.py b/hypervideo_dl/extractor/epoch.py
new file mode 100644
index 0000000..110e78c
--- /dev/null
+++ b/hypervideo_dl/extractor/epoch.py
@@ -0,0 +1,55 @@
+from .common import InfoExtractor
+from ..utils import extract_attributes, get_element_html_by_id
+
+
+class EpochIE(InfoExtractor):
+ _VALID_URL = r'https?://www.theepochtimes\.com/[\w-]+_(?P<id>\d+).html'
+ _TESTS = [
+ {
+ 'url': 'https://www.theepochtimes.com/they-can-do-audio-video-physical-surveillance-on-you-24h-365d-a-year-rex-lee-on-intrusive-apps_4661688.html',
+ 'info_dict': {
+ 'id': 'a3dd732c-4750-4bc8-8156-69180668bda1',
+ 'ext': 'mp4',
+ 'title': '‘They Can Do Audio, Video, Physical Surveillance on You 24H/365D a Year’: Rex Lee on Intrusive Apps',
+ }
+ },
+ {
+ 'url': 'https://www.theepochtimes.com/the-communist-partys-cyberattacks-on-america-explained-rex-lee-talks-tech-hybrid-warfare_4342413.html',
+ 'info_dict': {
+ 'id': '276c7f46-3bbf-475d-9934-b9bbe827cf0a',
+ 'ext': 'mp4',
+ 'title': 'The Communist Party’s Cyberattacks on America Explained; Rex Lee Talks Tech Hybrid Warfare',
+ }
+ },
+ {
+ 'url': 'https://www.theepochtimes.com/kash-patel-a-6-year-saga-of-government-corruption-from-russiagate-to-mar-a-lago_4690250.html',
+ 'info_dict': {
+ 'id': 'aa9ceecd-a127-453d-a2de-7153d6fd69b6',
+ 'ext': 'mp4',
+ 'title': 'Kash Patel: A ‘6-Year-Saga’ of Government Corruption, From Russiagate to Mar-a-Lago',
+ }
+ },
+ {
+ 'url': 'https://www.theepochtimes.com/dick-morris-discusses-his-book-the-return-trumps-big-2024-comeback_4819205.html',
+ 'info_dict': {
+ 'id': '9489f994-2a20-4812-b233-ac0e5c345632',
+ 'ext': 'mp4',
+ 'title': 'Dick Morris Discusses His Book ‘The Return: Trump’s Big 2024 Comeback’',
+ }
+ },
+ ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ youmaker_video_id = extract_attributes(get_element_html_by_id('videobox', webpage))['data-id']
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ f'http://vs1.youmaker.com/assets/{youmaker_video_id}/playlist.m3u8', video_id, 'mp4', m3u8_id='hls')
+
+ return {
+ 'id': youmaker_video_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'title': self._html_extract_title(webpage)
+ }
diff --git a/hypervideo_dl/extractor/eporner.py b/hypervideo_dl/extractor/eporner.py
index 25a0d97..a233797 100644
--- a/hypervideo_dl/extractor/eporner.py
+++ b/hypervideo_dl/extractor/eporner.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..utils import (
encode_base_n,
@@ -110,7 +106,6 @@ class EpornerIE(InfoExtractor):
'height': height,
'fps': fps,
})
- self._sort_formats(formats)
json_ld = self._search_json_ld(webpage, display_id, default={})
diff --git a/hypervideo_dl/extractor/eroprofile.py b/hypervideo_dl/extractor/eroprofile.py
index 5d5e7f2..2b61f3b 100644
--- a/hypervideo_dl/extractor/eroprofile.py
+++ b/hypervideo_dl/extractor/eroprofile.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/ertgr.py b/hypervideo_dl/extractor/ertgr.py
index 19ce23f..9ecdf5d 100644
--- a/hypervideo_dl/extractor/ertgr.py
+++ b/hypervideo_dl/extractor/ertgr.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import json
import re
@@ -18,7 +15,6 @@ from ..utils import (
parse_iso8601,
str_or_none,
try_get,
- unescapeHTML,
url_or_none,
variadic,
)
@@ -77,7 +73,7 @@ class ERTFlixCodenameIE(ERTFlixBaseIE):
},
]
- def _extract_formats_and_subs(self, video_id, allow_none=True):
+ def _extract_formats_and_subs(self, video_id):
media_info = self._call_api(video_id, codename=video_id)
formats, subs = [], {}
for media_file in try_get(media_info, lambda x: x['MediaFiles'], list) or []:
@@ -101,8 +97,6 @@ class ERTFlixCodenameIE(ERTFlixBaseIE):
formats.extend(formats_)
self._merge_subtitles(subs_, target=subs)
- if formats or not allow_none:
- self._sort_formats(formats)
return formats, subs
def _real_extract(self, url):
@@ -122,7 +116,7 @@ class ERTFlixCodenameIE(ERTFlixBaseIE):
class ERTFlixIE(ERTFlixBaseIE):
IE_NAME = 'ertflix'
IE_DESC = 'ERTFLIX videos'
- _VALID_URL = r'https?://www\.ertflix\.gr/(?:series|vod)/(?P<id>[a-z]{3}\.\d+)'
+ _VALID_URL = r'https?://www\.ertflix\.gr/(?:[^/]+/)?(?:series|vod)/(?P<id>[a-z]{3}\.\d+)'
_TESTS = [{
'url': 'https://www.ertflix.gr/vod/vod.173258-aoratoi-ergates',
'md5': '6479d5e60fd7e520b07ba5411dcdd6e7',
@@ -174,6 +168,9 @@ class ERTFlixIE(ERTFlixBaseIE):
'title': 'Το δίκτυο',
},
'playlist_mincount': 9,
+ }, {
+ 'url': 'https://www.ertflix.gr/en/vod/vod.127652-ta-kalytera-mas-chronia-ep1-mia-volta-sto-feggari',
+ 'only_matching': True,
}]
def _extract_episode(self, episode):
@@ -275,6 +272,7 @@ class ERTWebtvEmbedIE(InfoExtractor):
IE_DESC = 'ert.gr webtv embedded videos'
_BASE_PLAYER_URL_RE = re.escape('//www.ert.gr/webtv/live-uni/vod/dt-uni-vod.php')
_VALID_URL = rf'https?:{_BASE_PLAYER_URL_RE}\?([^#]+&)?f=(?P<id>[^#&]+)'
+ _EMBED_REGEX = [rf'<iframe[^>]+?src=(?P<_q1>["\'])(?P<url>(?:https?:)?{_BASE_PLAYER_URL_RE}\?(?:(?!(?P=_q1)).)+)(?P=_q1)']
_TESTS = [{
'url': 'https://www.ert.gr/webtv/live-uni/vod/dt-uni-vod.php?f=trailers/E2251_TO_DIKTYO_E09_16-01_1900.mp4&bgimg=/photos/2022/1/to_diktio_ep09_i_istoria_tou_diadiktiou_stin_Ellada_1021x576.jpg',
@@ -287,23 +285,11 @@ class ERTWebtvEmbedIE(InfoExtractor):
},
}]
- @classmethod
- def _extract_urls(cls, webpage):
- EMBED_URL_RE = rf'(?:https?:)?{cls._BASE_PLAYER_URL_RE}\?(?:(?!(?P=_q1)).)+'
- EMBED_RE = rf'<iframe[^>]+?src=(?P<_q1>["\'])(?P<url>{EMBED_URL_RE})(?P=_q1)'
-
- for mobj in re.finditer(EMBED_RE, webpage):
- url = unescapeHTML(mobj.group('url'))
- if not cls.suitable(url):
- continue
- yield url
-
def _real_extract(self, url):
video_id = self._match_id(url)
formats, subs = self._extract_m3u8_formats_and_subtitles(
f'https://mediastream.ert.gr/vodedge/_definst_/mp4:dvrorigin/{video_id}/playlist.m3u8',
video_id, 'mp4')
- self._sort_formats(formats)
thumbnail_id = parse_qs(url).get('bgimg', [None])[0]
if thumbnail_id and not thumbnail_id.startswith('http'):
thumbnail_id = f'https://program.ert.gr{thumbnail_id}'
diff --git a/hypervideo_dl/extractor/escapist.py b/hypervideo_dl/extractor/escapist.py
index 4cd815e..85a1cbf 100644
--- a/hypervideo_dl/extractor/escapist.py
+++ b/hypervideo_dl/extractor/escapist.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
determine_ext,
@@ -97,7 +95,6 @@ class EscapistIE(InfoExtractor):
'format_id': '%s-%sp' % (determine_ext(video['src']), video['res']),
'height': int_or_none(video.get('res')),
} for video in data['files']['videos']]
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/espn.py b/hypervideo_dl/extractor/espn.py
index dc50f3b..f4b0134 100644
--- a/hypervideo_dl/extractor/espn.py
+++ b/hypervideo_dl/extractor/espn.py
@@ -1,14 +1,16 @@
-from __future__ import unicode_literals
-
+import base64
+import json
import re
+import urllib.parse
+from .adobepass import AdobePassIE
from .common import InfoExtractor
from .once import OnceIE
-from ..compat import compat_str
from ..utils import (
determine_ext,
dict_get,
int_or_none,
+ traverse_obj,
unified_strdate,
unified_timestamp,
)
@@ -26,7 +28,6 @@ class ESPNIE(OnceIE):
(?:
(?:
video/(?:clip|iframe/twitter)|
- watch/player
)
(?:
.*?\?.*?\bid=|
@@ -49,6 +50,8 @@ class ESPNIE(OnceIE):
'description': 'md5:39370c2e016cb4ecf498ffe75bef7f0f',
'timestamp': 1390936111,
'upload_date': '20140128',
+ 'duration': 1302,
+ 'thumbnail': r're:https://.+\.jpg',
},
'params': {
'skip_download': True,
@@ -74,15 +77,6 @@ class ESPNIE(OnceIE):
'url': 'https://cdn.espn.go.com/video/clip/_/id/19771774',
'only_matching': True,
}, {
- 'url': 'http://www.espn.com/watch/player?id=19141491',
- 'only_matching': True,
- }, {
- 'url': 'http://www.espn.com/watch/player?bucketId=257&id=19505875',
- 'only_matching': True,
- }, {
- 'url': 'http://www.espn.com/watch/player/_/id/19141491',
- 'only_matching': True,
- }, {
'url': 'http://www.espn.com/video/clip?id=10365079',
'only_matching': True,
}, {
@@ -100,7 +94,13 @@ class ESPNIE(OnceIE):
}, {
'url': 'http://www.espn.com/espnw/video/26066627/arkansas-gibson-completes-hr-cycle-four-innings',
'only_matching': True,
- }]
+ }, {
+ 'url': 'http://www.espn.com/watch/player?id=19141491',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.espn.com/watch/player?bucketId=257&id=19505875',
+ 'only_matching': True,
+ }, ]
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -118,7 +118,7 @@ class ESPNIE(OnceIE):
for source_id, source in source.items():
if source_id == 'alert':
continue
- elif isinstance(source, compat_str):
+ elif isinstance(source, str):
extract_source(source, base_source_id)
elif isinstance(source, dict):
traverse_source(
@@ -162,7 +162,6 @@ class ESPNIE(OnceIE):
links = clip.get('links', {})
traverse_source(links.get('source', {}))
traverse_source(links.get('mobile', {}))
- self._sort_formats(formats)
description = clip.get('caption') or clip.get('description')
thumbnail = clip.get('thumbnail')
@@ -198,7 +197,7 @@ class ESPNArticleIE(InfoExtractor):
@classmethod
def suitable(cls, url):
- return False if ESPNIE.suitable(url) else super(ESPNArticleIE, cls).suitable(url)
+ return False if (ESPNIE.suitable(url) or WatchESPNIE.suitable(url)) else super().suitable(url)
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -269,7 +268,6 @@ class ESPNCricInfoIE(InfoExtractor):
'url': item['url'],
'vcodec': 'none',
})
- self._sort_formats(formats)
return {
'id': id,
'title': data_json.get('title'),
@@ -279,3 +277,134 @@ class ESPNCricInfoIE(InfoExtractor):
'formats': formats,
'subtitles': subtitles,
}
+
+
+class WatchESPNIE(AdobePassIE):
+ _VALID_URL = r'https?://(?:www\.)?espn\.com/(?:watch|espnplus)/player/_/id/(?P<id>[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})'
+ _TESTS = [{
+ 'url': 'https://www.espn.com/watch/player/_/id/dbbc6b1d-c084-4b47-9878-5f13c56ce309',
+ 'info_dict': {
+ 'id': 'dbbc6b1d-c084-4b47-9878-5f13c56ce309',
+ 'ext': 'mp4',
+ 'title': 'Huddersfield vs. Burnley',
+ 'duration': 7500,
+ 'thumbnail': 'https://artwork.api.espn.com/artwork/collections/media/dbbc6b1d-c084-4b47-9878-5f13c56ce309/default?width=640&apikey=1ngjw23osgcis1i1vbj96lmfqs',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.espn.com/watch/player/_/id/a049a56e-a7ce-477e-aef3-c7e48ef8221c',
+ 'info_dict': {
+ 'id': 'a049a56e-a7ce-477e-aef3-c7e48ef8221c',
+ 'ext': 'mp4',
+ 'title': 'Dynamo Dresden vs. VfB Stuttgart (Round #1) (German Cup)',
+ 'duration': 8335,
+ 'thumbnail': 'https://s.secure.espncdn.com/stitcher/artwork/collections/media/bd1f3d12-0654-47d9-852e-71b85ea695c7/16x9.jpg?timestamp=202201112217&showBadge=true&cb=12&package=ESPN_PLUS',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.espn.com/espnplus/player/_/id/317f5fd1-c78a-4ebe-824a-129e0d348421',
+ 'info_dict': {
+ 'id': '317f5fd1-c78a-4ebe-824a-129e0d348421',
+ 'ext': 'mp4',
+ 'title': 'The Wheel - Episode 10',
+ 'duration': 3352,
+ 'thumbnail': 'https://s.secure.espncdn.com/stitcher/artwork/collections/media/317f5fd1-c78a-4ebe-824a-129e0d348421/16x9.jpg?timestamp=202205031523&showBadge=true&cb=12&package=ESPN_PLUS',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ _API_KEY = 'ZXNwbiZicm93c2VyJjEuMC4w.ptUt7QxsteaRruuPmGZFaJByOoqKvDP2a5YkInHrc7c'
+
+ def _call_bamgrid_api(self, path, video_id, payload=None, headers={}):
+ if 'Authorization' not in headers:
+ headers['Authorization'] = f'Bearer {self._API_KEY}'
+ parse = urllib.parse.urlencode if path == 'token' else json.dumps
+ return self._download_json(
+ f'https://espn.api.edge.bamgrid.com/{path}', video_id, headers=headers, data=parse(payload).encode())
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ cdn_data = self._download_json(
+ f'https://watch-cdn.product.api.espn.com/api/product/v3/watchespn/web/playback/event?id={video_id}',
+ video_id)
+ video_data = cdn_data['playbackState']
+
+ # ESPN+ subscription required, through cookies
+ if 'DTC' in video_data.get('sourceId'):
+ cookie = self._get_cookies(url).get('ESPN-ONESITE.WEB-PROD.token')
+ if not cookie:
+ self.raise_login_required(method='cookies')
+
+ assertion = self._call_bamgrid_api(
+ 'devices', video_id,
+ headers={'Content-Type': 'application/json; charset=UTF-8'},
+ payload={
+ 'deviceFamily': 'android',
+ 'applicationRuntime': 'android',
+ 'deviceProfile': 'tv',
+ 'attributes': {},
+ })['assertion']
+ token = self._call_bamgrid_api(
+ 'token', video_id, payload={
+ 'subject_token': assertion,
+ 'subject_token_type': 'urn:bamtech:params:oauth:token-type:device',
+ 'platform': 'android',
+ 'grant_type': 'urn:ietf:params:oauth:grant-type:token-exchange'
+ })['access_token']
+
+ assertion = self._call_bamgrid_api(
+ 'accounts/grant', video_id, payload={'id_token': cookie.value.split('|')[1]},
+ headers={
+ 'Authorization': token,
+ 'Content-Type': 'application/json; charset=UTF-8'
+ })['assertion']
+ token = self._call_bamgrid_api(
+ 'token', video_id, payload={
+ 'subject_token': assertion,
+ 'subject_token_type': 'urn:bamtech:params:oauth:token-type:account',
+ 'platform': 'android',
+ 'grant_type': 'urn:ietf:params:oauth:grant-type:token-exchange'
+ })['access_token']
+
+ playback = self._download_json(
+ video_data['videoHref'].format(scenario='browser~ssai'), video_id,
+ headers={
+ 'Accept': 'application/vnd.media-service+json; version=5',
+ 'Authorization': token
+ })
+ m3u8_url, headers = playback['stream']['complete'][0]['url'], {'authorization': token}
+
+ # No login required
+ elif video_data.get('sourceId') == 'ESPN_FREE':
+ asset = self._download_json(
+ f'https://watch.auth.api.espn.com/video/auth/media/{video_id}/asset?apikey=uiqlbgzdwuru14v627vdusswb',
+ video_id)
+ m3u8_url, headers = asset['stream'], {}
+
+ # TV Provider required
+ else:
+ resource = self._get_mvpd_resource('ESPN', video_data['name'], video_id, None)
+ auth = self._extract_mvpd_auth(url, video_id, 'ESPN', resource).encode()
+
+ asset = self._download_json(
+ f'https://watch.auth.api.espn.com/video/auth/media/{video_id}/asset?apikey=uiqlbgzdwuru14v627vdusswb',
+ video_id, data=f'adobeToken={urllib.parse.quote_plus(base64.b64encode(auth))}&drmSupport=HLS'.encode())
+ m3u8_url, headers = asset['stream'], {}
+
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4', m3u8_id='hls')
+
+ return {
+ 'id': video_id,
+ 'duration': traverse_obj(cdn_data, ('tracking', 'duration')),
+ 'title': video_data.get('name'),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'thumbnail': video_data.get('posterHref'),
+ 'http_headers': headers,
+ }
diff --git a/hypervideo_dl/extractor/esri.py b/hypervideo_dl/extractor/esri.py
index e9dcaeb..02e7efa 100644
--- a/hypervideo_dl/extractor/esri.py
+++ b/hypervideo_dl/extractor/esri.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -46,7 +43,6 @@ class EsriVideoIE(InfoExtractor):
'height': int(height),
'filesize_approx': parse_filesize(filesize),
})
- self._sort_formats(formats)
title = self._html_search_meta('title', webpage, 'title')
description = self._html_search_meta(
diff --git a/hypervideo_dl/extractor/europa.py b/hypervideo_dl/extractor/europa.py
index 60ab2ce..c2b4937 100644
--- a/hypervideo_dl/extractor/europa.py
+++ b/hypervideo_dl/extractor/europa.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
int_or_none,
@@ -79,7 +76,6 @@ class EuropaIE(InfoExtractor):
'format_note': xpath_text(file_, './lglabel'),
'language_preference': language_preference(lang)
})
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/europeantour.py b/hypervideo_dl/extractor/europeantour.py
index e28f067..1995a74 100644
--- a/hypervideo_dl/extractor/europeantour.py
+++ b/hypervideo_dl/extractor/europeantour.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/eurosport.py b/hypervideo_dl/extractor/eurosport.py
new file mode 100644
index 0000000..654e112
--- /dev/null
+++ b/hypervideo_dl/extractor/eurosport.py
@@ -0,0 +1,97 @@
+from .common import InfoExtractor
+from ..utils import traverse_obj
+
+
+class EurosportIE(InfoExtractor):
+ _VALID_URL = r'https?://www\.eurosport\.com/\w+/[\w-]+/\d+/[\w-]+_(?P<id>vid\d+)'
+ _TESTS = [{
+ 'url': 'https://www.eurosport.com/tennis/roland-garros/2022/highlights-rafael-nadal-brushes-aside-caper-ruud-to-win-record-extending-14th-french-open-title_vid1694147/video.shtml',
+ 'info_dict': {
+ 'id': '2480939',
+ 'ext': 'mp4',
+ 'title': 'Highlights: Rafael Nadal brushes aside Caper Ruud to win record-extending 14th French Open title',
+ 'description': 'md5:b564db73ecfe4b14ebbd8e62a3692c76',
+ 'thumbnail': 'https://imgresizer.eurosport.com/unsafe/1280x960/smart/filters:format(jpeg)/origin-imgresizer.eurosport.com/2022/06/05/3388285-69245968-2560-1440.png',
+ 'duration': 195.0,
+ 'display_id': 'vid1694147',
+ 'timestamp': 1654446698,
+ 'upload_date': '20220605',
+ }
+ }, {
+ 'url': 'https://www.eurosport.com/tennis/roland-garros/2022/watch-the-top-five-shots-from-men-s-final-as-rafael-nadal-beats-casper-ruud-to-seal-14th-french-open_vid1694283/video.shtml',
+ 'info_dict': {
+ 'id': '2481254',
+ 'ext': 'mp4',
+ 'title': 'md5:149dcc5dfb38ab7352acc008cc9fb071',
+ 'duration': 130.0,
+ 'thumbnail': 'https://imgresizer.eurosport.com/unsafe/1280x960/smart/filters:format(jpeg)/origin-imgresizer.eurosport.com/2022/06/05/3388422-69248708-2560-1440.png',
+ 'description': 'md5:a0c8a7f6b285e48ae8ddbe7aa85cfee6',
+ 'display_id': 'vid1694283',
+ 'timestamp': 1654456090,
+ 'upload_date': '20220605',
+ }
+ }, {
+ # geo-fence but can bypassed by xff
+ 'url': 'https://www.eurosport.com/cycling/tour-de-france-femmes/2022/incredible-ride-marlen-reusser-storms-to-stage-4-win-at-tour-de-france-femmes_vid1722221/video.shtml',
+ 'info_dict': {
+ 'id': '2582552',
+ 'ext': 'mp4',
+ 'title': '‘Incredible ride!’ - Marlen Reusser storms to Stage 4 win at Tour de France Femmes',
+ 'duration': 188.0,
+ 'display_id': 'vid1722221',
+ 'timestamp': 1658936167,
+ 'thumbnail': 'https://imgresizer.eurosport.com/unsafe/1280x960/smart/filters:format(jpeg)/origin-imgresizer.eurosport.com/2022/07/27/3423347-69852108-2560-1440.jpg',
+ 'description': 'md5:32bbe3a773ac132c57fb1e8cca4b7c71',
+ 'upload_date': '20220727',
+ }
+ }]
+
+ _TOKEN = None
+
+ # actually defined in https://netsport.eurosport.io/?variables={"databaseId":<databaseId>,"playoutType":"VDP"}&extensions={"persistedQuery":{"version":1 ..
+ # but this method require to get sha256 hash
+ _GEO_COUNTRIES = ['DE', 'NL', 'EU', 'IT', 'FR'] # Not complete list but it should work
+
+ def _real_initialize(self):
+ if EurosportIE._TOKEN is None:
+ EurosportIE._TOKEN = self._download_json(
+ 'https://eu3-prod-direct.eurosport.com/token?realm=eurosport', None,
+ 'Trying to get token')['data']['attributes']['token']
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ json_data = self._download_json(
+ f'https://eu3-prod-direct.eurosport.com/playback/v2/videoPlaybackInfo/sourceSystemId/eurosport-{display_id}',
+ display_id, query={'usePreAuth': True}, headers={'Authorization': f'Bearer {EurosportIE._TOKEN}'})['data']
+
+ json_ld_data = self._search_json_ld(webpage, display_id)
+
+ formats, subtitles = [], {}
+ for stream_type in json_data['attributes']['streaming']:
+ if stream_type == 'hls':
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id, ext='mp4')
+ elif stream_type == 'dash':
+ fmts, subs = self._extract_mpd_formats_and_subtitles(
+ traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id)
+ elif stream_type == 'mss':
+ fmts, subs = self._extract_ism_formats_and_subtitles(
+ traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id)
+
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+
+ return {
+ 'id': json_data['id'],
+ 'title': json_ld_data.get('title') or self._og_search_title(webpage),
+ 'display_id': display_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'thumbnails': json_ld_data.get('thumbnails'),
+ 'description': (json_ld_data.get('description')
+ or self._html_search_meta(['og:description', 'description'], webpage)),
+ 'duration': json_ld_data.get('duration'),
+ 'timestamp': json_ld_data.get('timestamp'),
+ }
diff --git a/hypervideo_dl/extractor/euscreen.py b/hypervideo_dl/extractor/euscreen.py
index 2759e74..65a1dc7 100644
--- a/hypervideo_dl/extractor/euscreen.py
+++ b/hypervideo_dl/extractor/euscreen.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
@@ -48,7 +45,6 @@ class EUScreenIE(InfoExtractor):
formats = [{
'url': source['src'],
} for source in video_json.get('sources', [])]
- self._sort_formats(formats)
return {
'id': id,
diff --git a/hypervideo_dl/extractor/everyonesmixtape.py b/hypervideo_dl/extractor/everyonesmixtape.py
deleted file mode 100644
index 80cb032..0000000
--- a/hypervideo_dl/extractor/everyonesmixtape.py
+++ /dev/null
@@ -1,76 +0,0 @@
-from __future__ import unicode_literals
-
-
-from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
- sanitized_Request,
-)
-
-
-class EveryonesMixtapeIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?everyonesmixtape\.com/#/mix/(?P<id>[0-9a-zA-Z]+)(?:/(?P<songnr>[0-9]))?$'
-
- _TESTS = [{
- 'url': 'http://everyonesmixtape.com/#/mix/m7m0jJAbMQi/5',
- 'info_dict': {
- 'id': '5bfseWNmlds',
- 'ext': 'mp4',
- 'title': "Passion Pit - \"Sleepyhead\" (Official Music Video)",
- 'uploader': 'FKR.TV',
- 'uploader_id': 'frenchkissrecords',
- 'description': "Music video for \"Sleepyhead\" from Passion Pit's debut EP Chunk Of Change.\nBuy on iTunes: https://itunes.apple.com/us/album/chunk-of-change-ep/id300087641\n\nDirected by The Wilderness.\n\nhttp://www.passionpitmusic.com\nhttp://www.frenchkissrecords.com",
- 'upload_date': '20081015'
- },
- 'params': {
- 'skip_download': True, # This is simply YouTube
- }
- }, {
- 'url': 'http://everyonesmixtape.com/#/mix/m7m0jJAbMQi',
- 'info_dict': {
- 'id': 'm7m0jJAbMQi',
- 'title': 'Driving',
- },
- 'playlist_count': 24
- }]
-
- def _real_extract(self, url):
- mobj = self._match_valid_url(url)
- playlist_id = mobj.group('id')
-
- pllist_url = 'http://everyonesmixtape.com/mixtape.php?a=getMixes&u=-1&linked=%s&explore=' % playlist_id
- pllist_req = sanitized_Request(pllist_url)
- pllist_req.add_header('X-Requested-With', 'XMLHttpRequest')
-
- playlist_list = self._download_json(
- pllist_req, playlist_id, note='Downloading playlist metadata')
- try:
- playlist_no = next(playlist['id']
- for playlist in playlist_list
- if playlist['code'] == playlist_id)
- except StopIteration:
- raise ExtractorError('Playlist id not found')
-
- pl_url = 'http://everyonesmixtape.com/mixtape.php?a=getMix&id=%s&userId=null&code=' % playlist_no
- pl_req = sanitized_Request(pl_url)
- pl_req.add_header('X-Requested-With', 'XMLHttpRequest')
- playlist = self._download_json(
- pl_req, playlist_id, note='Downloading playlist info')
-
- entries = [{
- '_type': 'url',
- 'url': t['url'],
- 'title': t['title'],
- } for t in playlist['tracks']]
-
- if mobj.group('songnr'):
- songnr = int(mobj.group('songnr')) - 1
- return entries[songnr]
-
- playlist_title = playlist['mixData']['name']
- return {
- '_type': 'playlist',
- 'id': playlist_id,
- 'title': playlist_title,
- 'entries': entries,
- }
diff --git a/hypervideo_dl/extractor/expotv.py b/hypervideo_dl/extractor/expotv.py
index 95a8977..bda6e3c 100644
--- a/hypervideo_dl/extractor/expotv.py
+++ b/hypervideo_dl/extractor/expotv.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
int_or_none,
@@ -51,7 +49,6 @@ class ExpoTVIE(InfoExtractor):
r'filename=.*\.([a-z0-9_A-Z]+)&', media_url,
'file extension', default=None) or fcfg.get('type'),
})
- self._sort_formats(formats)
title = self._og_search_title(webpage)
description = self._og_search_description(webpage)
diff --git a/hypervideo_dl/extractor/expressen.py b/hypervideo_dl/extractor/expressen.py
index dc8b855..86967b6 100644
--- a/hypervideo_dl/extractor/expressen.py
+++ b/hypervideo_dl/extractor/expressen.py
@@ -1,8 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
from .common import InfoExtractor
from ..utils import (
determine_ext,
@@ -20,11 +15,13 @@ class ExpressenIE(InfoExtractor):
tv/(?:[^/]+/)*
(?P<id>[^/?#&]+)
'''
+ _EMBED_REGEX = [r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?(?:expressen|di)\.se/(?:tvspelare/video|videoplayer/embed)/tv/.+?)\1']
_TESTS = [{
'url': 'https://www.expressen.se/tv/ledare/ledarsnack/ledarsnack-om-arbetslosheten-bland-kvinnor-i-speciellt-utsatta-omraden/',
- 'md5': '2fbbe3ca14392a6b1b36941858d33a45',
+ 'md5': 'deb2ca62e7b1dcd19fa18ba37523f66e',
'info_dict': {
- 'id': '8690962',
+ 'id': 'ba90f5a9-78d1-4511-aa02-c177b9c99136',
+ 'display_id': 'ledarsnack-om-arbetslosheten-bland-kvinnor-i-speciellt-utsatta-omraden',
'ext': 'mp4',
'title': 'Ledarsnack: Om arbetslösheten bland kvinnor i speciellt utsatta områden',
'description': 'md5:f38c81ff69f3de4d269bbda012fcbbba',
@@ -47,13 +44,6 @@ class ExpressenIE(InfoExtractor):
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- return [
- mobj.group('url') for mobj in re.finditer(
- r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?(?:expressen|di)\.se/(?:tvspelare/video|videoplayer/embed)/tv/.+?)\1',
- webpage)]
-
def _real_extract(self, url):
display_id = self._match_id(url)
@@ -67,7 +57,7 @@ class ExpressenIE(InfoExtractor):
display_id, transform_source=unescapeHTML)
info = extract_data('video-tracking-info')
- video_id = info['videoId']
+ video_id = info['contentId']
data = extract_data('article-data')
stream = data['stream']
@@ -80,7 +70,6 @@ class ExpressenIE(InfoExtractor):
formats = [{
'url': stream,
}]
- self._sort_formats(formats)
title = info.get('titleRaw') or data['title']
description = info.get('descriptionRaw')
diff --git a/hypervideo_dl/extractor/extractors.py b/hypervideo_dl/extractor/extractors.py
index 457f4c2..610e02f 100644
--- a/hypervideo_dl/extractor/extractors.py
+++ b/hypervideo_dl/extractor/extractors.py
@@ -1,2144 +1,26 @@
-# flake8: noqa
-from __future__ import unicode_literals
+import contextlib
+import os
-from .abc import (
- ABCIE,
- ABCIViewIE,
- ABCIViewShowSeriesIE,
-)
-from .abcnews import (
- AbcNewsIE,
- AbcNewsVideoIE,
-)
-from .abcotvs import (
- ABCOTVSIE,
- ABCOTVSClipsIE,
-)
-from .abematv import (
- AbemaTVIE,
- AbemaTVTitleIE,
-)
-from .academicearth import AcademicEarthCourseIE
-from .acast import (
- ACastIE,
- ACastChannelIE,
-)
-from .adn import ADNIE
-from .adobeconnect import AdobeConnectIE
-from .adobetv import (
- AdobeTVEmbedIE,
- AdobeTVIE,
- AdobeTVShowIE,
- AdobeTVChannelIE,
- AdobeTVVideoIE,
-)
-from .adultswim import AdultSwimIE
-from .aenetworks import (
- AENetworksIE,
- AENetworksCollectionIE,
- AENetworksShowIE,
- HistoryTopicIE,
- HistoryPlayerIE,
- BiographyIE,
-)
-from .afreecatv import (
- AfreecaTVIE,
- AfreecaTVLiveIE,
-)
-from .airmozilla import AirMozillaIE
-from .aljazeera import AlJazeeraIE
-from .alphaporno import AlphaPornoIE
-from .amara import AmaraIE
-from .alura import (
- AluraIE,
- AluraCourseIE
-)
-from .amcnetworks import AMCNetworksIE
-from .animelab import (
- AnimeLabIE,
- AnimeLabShowsIE,
-)
-from .amazon import AmazonStoreIE
-from .americastestkitchen import (
- AmericasTestKitchenIE,
- AmericasTestKitchenSeasonIE,
-)
-from .animeondemand import AnimeOnDemandIE
-from .anvato import AnvatoIE
-from .aol import AolIE
-from .allocine import AllocineIE
-from .aliexpress import AliExpressLiveIE
-from .alsace20tv import (
- Alsace20TVIE,
- Alsace20TVEmbedIE,
-)
-from .apa import APAIE
-from .aparat import AparatIE
-from .appleconnect import AppleConnectIE
-from .appletrailers import (
- AppleTrailersIE,
- AppleTrailersSectionIE,
-)
-from .applepodcasts import ApplePodcastsIE
-from .archiveorg import (
- ArchiveOrgIE,
- YoutubeWebArchiveIE,
-)
-from .arcpublishing import ArcPublishingIE
-from .arkena import ArkenaIE
-from .ard import (
- ARDBetaMediathekIE,
- ARDIE,
- ARDMediathekIE,
-)
-from .arte import (
- ArteTVIE,
- ArteTVEmbedIE,
- ArteTVPlaylistIE,
- ArteTVCategoryIE,
-)
-from .arnes import ArnesIE
-from .asiancrush import (
- AsianCrushIE,
- AsianCrushPlaylistIE,
-)
-from .atresplayer import AtresPlayerIE
-from .atttechchannel import ATTTechChannelIE
-from .atvat import ATVAtIE
-from .audimedia import AudiMediaIE
-from .audioboom import AudioBoomIE
-from .audiomack import AudiomackIE, AudiomackAlbumIE
-from .audius import (
- AudiusIE,
- AudiusTrackIE,
- AudiusPlaylistIE,
- AudiusProfileIE,
-)
-from .awaan import (
- AWAANIE,
- AWAANVideoIE,
- AWAANLiveIE,
- AWAANSeasonIE,
-)
-from .azmedien import AZMedienIE
-from .baidu import BaiduVideoIE
-from .banbye import (
- BanByeIE,
- BanByeChannelIE,
-)
-from .bandaichannel import BandaiChannelIE
-from .bandcamp import (
- BandcampIE,
- BandcampAlbumIE,
- BandcampWeeklyIE,
- BandcampUserIE,
-)
-from .bannedvideo import BannedVideoIE
-from .bbc import (
- BBCCoUkIE,
- BBCCoUkArticleIE,
- BBCCoUkIPlayerEpisodesIE,
- BBCCoUkIPlayerGroupIE,
- BBCCoUkPlaylistIE,
- BBCIE,
-)
-from .beeg import BeegIE
-from .behindkink import BehindKinkIE
-from .bellmedia import BellMediaIE
-from .beatport import BeatportIE
-from .bet import BetIE
-from .bfi import BFIPlayerIE
-from .bfmtv import (
- BFMTVIE,
- BFMTVLiveIE,
- BFMTVArticleIE,
-)
-from .bibeltv import BibelTVIE
-from .bigflix import BigflixIE
-from .bigo import BigoIE
-from .bild import BildIE
-from .bilibili import (
- BiliBiliIE,
- BiliBiliSearchIE,
- BilibiliCategoryIE,
- BiliBiliBangumiIE,
- BilibiliAudioIE,
- BilibiliAudioAlbumIE,
- BiliBiliPlayerIE,
- BilibiliChannelIE,
- BiliIntlIE,
- BiliIntlSeriesIE,
-)
-from .biobiochiletv import BioBioChileTVIE
-from .bitchute import (
- BitChuteIE,
- BitChuteChannelIE,
-)
-from .bitwave import (
- BitwaveReplayIE,
- BitwaveStreamIE,
-)
-from .biqle import BIQLEIE
-from .blackboardcollaborate import BlackboardCollaborateIE
-from .bleacherreport import (
- BleacherReportIE,
- BleacherReportCMSIE,
-)
-from .blogger import BloggerIE
-from .bloomberg import BloombergIE
-from .bokecc import BokeCCIE
-from .bongacams import BongaCamsIE
-from .bostonglobe import BostonGlobeIE
-from .box import BoxIE
-from .bpb import BpbIE
-from .br import (
- BRIE,
- BRMediathekIE,
-)
-from .bravotv import BravoTVIE
-from .breakcom import BreakIE
-from .breitbart import BreitBartIE
-from .brightcove import (
- BrightcoveLegacyIE,
- BrightcoveNewIE,
-)
-from .businessinsider import BusinessInsiderIE
-from .buzzfeed import BuzzFeedIE
-from .byutv import BYUtvIE
-from .c56 import C56IE
-from .cableav import CableAVIE
-from .callin import CallinIE
-from .caltrans import CaltransIE
-from .cam4 import CAM4IE
-from .camdemy import (
- CamdemyIE,
- CamdemyFolderIE
-)
-from .cammodels import CamModelsIE
-from .camwithher import CamWithHerIE
-from .canalalpha import CanalAlphaIE
-from .canalplus import CanalplusIE
-from .canalc2 import Canalc2IE
-from .canvas import (
- CanvasIE,
- CanvasEenIE,
- VrtNUIE,
- DagelijkseKostIE,
-)
-from .carambatv import (
- CarambaTVIE,
- CarambaTVPageIE,
-)
-from .cartoonnetwork import CartoonNetworkIE
-from .cbc import (
- CBCIE,
- CBCPlayerIE,
- CBCGemIE,
- CBCGemPlaylistIE,
- CBCGemLiveIE,
-)
-from .cbs import CBSIE
-from .cbslocal import (
- CBSLocalIE,
- CBSLocalArticleIE,
-)
-from .cbsinteractive import CBSInteractiveIE
-from .cbsnews import (
- CBSNewsEmbedIE,
- CBSNewsIE,
- CBSNewsLiveVideoIE,
-)
-from .cbssports import (
- CBSSportsEmbedIE,
- CBSSportsIE,
- TwentyFourSevenSportsIE,
-)
-from .ccc import (
- CCCIE,
- CCCPlaylistIE,
-)
-from .ccma import CCMAIE
-from .cctv import CCTVIE
-from .cda import CDAIE
-from .ceskatelevize import CeskaTelevizeIE
-from .cgtn import CGTNIE
-from .channel9 import Channel9IE
-from .charlierose import CharlieRoseIE
-from .chaturbate import ChaturbateIE
-from .chilloutzone import ChilloutzoneIE
-from .chingari import (
- ChingariIE,
- ChingariUserIE,
-)
-from .chirbit import (
- ChirbitIE,
- ChirbitProfileIE,
-)
-from .cinchcast import CinchcastIE
-from .cinemax import CinemaxIE
-from .ciscolive import (
- CiscoLiveSessionIE,
- CiscoLiveSearchIE,
-)
-from .ciscowebex import CiscoWebexIE
-from .cjsw import CJSWIE
-from .cliphunter import CliphunterIE
-from .clippit import ClippitIE
-from .cliprs import ClipRsIE
-from .clipsyndicate import ClipsyndicateIE
-from .closertotruth import CloserToTruthIE
-from .cloudflarestream import CloudflareStreamIE
-from .cloudy import CloudyIE
-from .clubic import ClubicIE
-from .clyp import ClypIE
-from .cmt import CMTIE
-from .cnbc import (
- CNBCIE,
- CNBCVideoIE,
-)
-from .cnn import (
- CNNIE,
- CNNBlogsIE,
- CNNArticleIE,
-)
-from .coub import CoubIE
-from .comedycentral import (
- ComedyCentralIE,
- ComedyCentralTVIE,
-)
-from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
-from .commonprotocols import (
- MmsIE,
- RtmpIE,
- ViewSourceIE,
-)
-from .condenast import CondeNastIE
-from .contv import CONtvIE
-from .corus import CorusIE
-from .cpac import (
- CPACIE,
- CPACPlaylistIE,
-)
-from .cozytv import CozyTVIE
-from .cracked import CrackedIE
-from .crackle import CrackleIE
-from .craftsy import CraftsyIE
-from .crooksandliars import CrooksAndLiarsIE
-from .crowdbunker import (
- CrowdBunkerIE,
- CrowdBunkerChannelIE,
-)
-from .crunchyroll import (
- CrunchyrollIE,
- CrunchyrollShowPlaylistIE,
- CrunchyrollBetaIE,
- CrunchyrollBetaShowIE,
-)
-from .cspan import CSpanIE, CSpanCongressIE
-from .ctsnews import CtsNewsIE
-from .ctv import CTVIE
-from .ctvnews import CTVNewsIE
-from .cultureunplugged import CultureUnpluggedIE
-from .curiositystream import (
- CuriosityStreamIE,
- CuriosityStreamCollectionsIE,
- CuriosityStreamSeriesIE,
-)
-from .cwtv import CWTVIE
-from .cybrary import (
- CybraryIE,
- CybraryCourseIE
-)
-from .daftsex import DaftsexIE
-from .dailymail import DailyMailIE
-from .dailymotion import (
- DailymotionIE,
- DailymotionPlaylistIE,
- DailymotionUserIE,
-)
-from .damtomo import (
- DamtomoRecordIE,
- DamtomoVideoIE,
-)
-from .daum import (
- DaumIE,
- DaumClipIE,
- DaumPlaylistIE,
- DaumUserIE,
-)
-from .daystar import DaystarClipIE
-from .dbtv import DBTVIE
-from .dctp import DctpTvIE
-from .deezer import (
- DeezerPlaylistIE,
- DeezerAlbumIE,
-)
-from .democracynow import DemocracynowIE
-from .dfb import DFBIE
-from .dhm import DHMIE
-from .digg import DiggIE
-from .dotsub import DotsubIE
-from .douyutv import (
- DouyuShowIE,
- DouyuTVIE,
-)
-from .dplay import (
- DPlayIE,
- DiscoveryPlusIE,
- HGTVDeIE,
- GoDiscoveryIE,
- TravelChannelIE,
- CookingChannelIE,
- HGTVUsaIE,
- FoodNetworkIE,
- InvestigationDiscoveryIE,
- DestinationAmericaIE,
- AmHistoryChannelIE,
- ScienceChannelIE,
- DIYNetworkIE,
- DiscoveryLifeIE,
- AnimalPlanetIE,
- TLCIE,
- DiscoveryPlusIndiaIE,
- DiscoveryNetworksDeIE,
- DiscoveryPlusItalyIE,
- DiscoveryPlusItalyShowIE,
- DiscoveryPlusIndiaShowIE,
-)
-from .dreisat import DreiSatIE
-from .drbonanza import DRBonanzaIE
-from .drtuber import DrTuberIE
-from .drtv import (
- DRTVIE,
- DRTVLiveIE,
-)
-from .dtube import DTubeIE
-from .dvtv import DVTVIE
-from .duboku import (
- DubokuIE,
- DubokuPlaylistIE
-)
-from .dumpert import DumpertIE
-from .defense import DefenseGouvFrIE
-from .digitalconcerthall import DigitalConcertHallIE
-from .discovery import DiscoveryIE
-from .disney import DisneyIE
-from .dispeak import DigitallySpeakingIE
-from .doodstream import DoodStreamIE
-from .dropbox import DropboxIE
-from .dropout import (
- DropoutSeasonIE,
- DropoutIE
-)
-from .dw import (
- DWIE,
- DWArticleIE,
-)
-from .eagleplatform import EaglePlatformIE
-from .ebaumsworld import EbaumsWorldIE
-from .echomsk import EchoMskIE
-from .egghead import (
- EggheadCourseIE,
- EggheadLessonIE,
-)
-from .ehow import EHowIE
-from .eighttracks import EightTracksIE
-from .einthusan import EinthusanIE
-from .eitb import EitbIE
-from .ellentube import (
- EllenTubeIE,
- EllenTubeVideoIE,
- EllenTubePlaylistIE,
-)
-from .elonet import ElonetIE
-from .elpais import ElPaisIE
-from .embedly import EmbedlyIE
-from .engadget import EngadgetIE
-from .epicon import (
- EpiconIE,
- EpiconSeriesIE,
-)
-from .eporner import EpornerIE
-from .eroprofile import (
- EroProfileIE,
- EroProfileAlbumIE,
-)
-from .ertgr import (
- ERTFlixCodenameIE,
- ERTFlixIE,
- ERTWebtvEmbedIE,
-)
-from .escapist import EscapistIE
-from .espn import (
- ESPNIE,
- ESPNArticleIE,
- FiveThirtyEightIE,
- ESPNCricInfoIE,
-)
-from .esri import EsriVideoIE
-from .europa import EuropaIE
-from .europeantour import EuropeanTourIE
-from .euscreen import EUScreenIE
-from .expotv import ExpoTVIE
-from .expressen import ExpressenIE
-from .extremetube import ExtremeTubeIE
-from .eyedotv import EyedoTVIE
-from .facebook import (
- FacebookIE,
- FacebookPluginsVideoIE,
- FacebookRedirectURLIE,
-)
-from .fancode import (
- FancodeVodIE,
- FancodeLiveIE
-)
+from ..utils import load_plugins
-from .faz import FazIE
-from .fc2 import (
- FC2IE,
- FC2EmbedIE,
- FC2LiveIE,
-)
-from .fczenit import FczenitIE
-from .filmmodu import FilmmoduIE
-from .filmon import (
- FilmOnIE,
- FilmOnChannelIE,
-)
-from .filmweb import FilmwebIE
-from .firsttv import FirstTVIE
-from .fivetv import FiveTVIE
-from .flickr import FlickrIE
-from .folketinget import FolketingetIE
-from .footyroom import FootyRoomIE
-from .formula1 import Formula1IE
-from .fourtube import (
- FourTubeIE,
- PornTubeIE,
- PornerBrosIE,
- FuxIE,
-)
-from .fox import FOXIE
-from .fox9 import (
- FOX9IE,
- FOX9NewsIE,
-)
-from .foxgay import FoxgayIE
-from .foxnews import (
- FoxNewsIE,
- FoxNewsArticleIE,
-)
-from .foxsports import FoxSportsIE
-from .fptplay import FptplayIE
-from .franceculture import FranceCultureIE
-from .franceinter import FranceInterIE
-from .francetv import (
- FranceTVIE,
- FranceTVSiteIE,
- FranceTVInfoIE,
-)
-from .freesound import FreesoundIE
-from .freespeech import FreespeechIE
-from .frontendmasters import (
- FrontendMastersIE,
- FrontendMastersLessonIE,
- FrontendMastersCourseIE
-)
-from .fujitv import FujiTVFODPlus7IE
-from .funimation import (
- FunimationIE,
- FunimationPageIE,
- FunimationShowIE,
-)
-from .funk import FunkIE
-from .fusion import FusionIE
-from .gab import (
- GabTVIE,
- GabIE,
-)
-from .gaia import GaiaIE
-from .gameinformer import GameInformerIE
-from .gamejolt import (
- GameJoltIE,
- GameJoltUserIE,
- GameJoltGameIE,
- GameJoltGameSoundtrackIE,
- GameJoltCommunityIE,
- GameJoltSearchIE,
-)
-from .gamespot import GameSpotIE
-from .gamestar import GameStarIE
-from .gaskrank import GaskrankIE
-from .gazeta import GazetaIE
-from .gdcvault import GDCVaultIE
-from .gedidigital import GediDigitalIE
-from .generic import GenericIE
-from .gettr import (
- GettrIE,
- GettrStreamingIE,
-)
-from .gfycat import GfycatIE
-from .giantbomb import GiantBombIE
-from .giga import GigaIE
-from .glide import GlideIE
-from .globo import (
- GloboIE,
- GloboArticleIE,
-)
-from .go import GoIE
-from .godtube import GodTubeIE
-from .gofile import GofileIE
-from .golem import GolemIE
-from .googledrive import GoogleDriveIE
-from .googlepodcasts import (
- GooglePodcastsIE,
- GooglePodcastsFeedIE,
-)
-from .googlesearch import GoogleSearchIE
-from .gopro import GoProIE
-from .goshgay import GoshgayIE
-from .gotostage import GoToStageIE
-from .gputechconf import GPUTechConfIE
-from .gronkh import GronkhIE
-from .groupon import GrouponIE
-from .hbo import HBOIE
-from .hearthisat import HearThisAtIE
-from .heise import HeiseIE
-from .hellporno import HellPornoIE
-from .helsinki import HelsinkiIE
-from .hentaistigma import HentaiStigmaIE
-from .hgtv import HGTVComShowIE
-from .hketv import HKETVIE
-from .hidive import HiDiveIE
-from .historicfilms import HistoricFilmsIE
-from .hitbox import HitboxIE, HitboxLiveIE
-from .hitrecord import HitRecordIE
-from .hotnewhiphop import HotNewHipHopIE
-from .hotstar import (
- HotStarIE,
- HotStarPlaylistIE,
- HotStarSeriesIE,
-)
-from .howcast import HowcastIE
-from .howstuffworks import HowStuffWorksIE
-from .hrfensehen import HRFernsehenIE
-from .hrti import (
- HRTiIE,
- HRTiPlaylistIE,
-)
-from .hse import (
- HSEShowIE,
- HSEProductIE,
-)
-from .huajiao import HuajiaoIE
-from .huya import HuyaLiveIE
-from .huffpost import HuffPostIE
-from .hungama import (
- HungamaIE,
- HungamaSongIE,
- HungamaAlbumPlaylistIE,
-)
-from .hypem import HypemIE
-from .ichinanalive import (
- IchinanaLiveIE,
- IchinanaLiveClipIE,
-)
-from .ign import (
- IGNIE,
- IGNVideoIE,
- IGNArticleIE,
-)
-from .iheart import (
- IHeartRadioIE,
- IHeartRadioPodcastIE,
-)
-from .imdb import (
- ImdbIE,
- ImdbListIE
-)
-from .imgur import (
- ImgurIE,
- ImgurAlbumIE,
- ImgurGalleryIE,
-)
-from .ina import InaIE
-from .inc import IncIE
-from .indavideo import IndavideoEmbedIE
-from .infoq import InfoQIE
-from .instagram import (
- InstagramIE,
- InstagramIOSIE,
- InstagramUserIE,
- InstagramTagIE,
- InstagramStoryIE,
-)
-from .internazionale import InternazionaleIE
-from .internetvideoarchive import InternetVideoArchiveIE
-from .iprima import (
- IPrimaIE,
- IPrimaCNNIE
-)
-from .iqiyi import (
- IqiyiIE,
- IqIE,
- IqAlbumIE
-)
+# NB: Must be before other imports so that plugins can be correctly injected
+_PLUGIN_CLASSES = load_plugins('extractor', 'IE', {})
-from .itprotv import (
- ITProTVIE,
- ITProTVCourseIE
-)
+_LAZY_LOADER = False
+if not os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
+ with contextlib.suppress(ImportError):
+ from .lazy_extractors import * # noqa: F403
+ from .lazy_extractors import _ALL_CLASSES
+ _LAZY_LOADER = True
-from .itv import (
- ITVIE,
- ITVBTCCIE,
-)
-from .ivi import (
- IviIE,
- IviCompilationIE
-)
-from .ivideon import IvideonIE
-from .iwara import IwaraIE
-from .izlesene import IzleseneIE
-from .jamendo import (
- JamendoIE,
- JamendoAlbumIE,
-)
-from .jeuxvideo import JeuxVideoIE
-from .jove import JoveIE
-from .joj import JojIE
-from .jwplatform import JWPlatformIE
-from .kakao import KakaoIE
-from .kaltura import KalturaIE
-from .karaoketv import KaraoketvIE
-from .karrierevideos import KarriereVideosIE
-from .keezmovies import KeezMoviesIE
-from .kelbyone import KelbyOneIE
-from .ketnet import KetnetIE
-from .khanacademy import (
- KhanAcademyIE,
- KhanAcademyUnitIE,
-)
-from .kickstarter import KickStarterIE
-from .kinja import KinjaEmbedIE
-from .kinopoisk import KinoPoiskIE
-from .konserthusetplay import KonserthusetPlayIE
-from .koo import KooIE
-from .krasview import KrasViewIE
-from .ku6 import Ku6IE
-from .kusi import KUSIIE
-from .kuwo import (
- KuwoIE,
- KuwoAlbumIE,
- KuwoChartIE,
- KuwoSingerIE,
- KuwoCategoryIE,
- KuwoMvIE,
-)
-from .la7 import (
- LA7IE,
- LA7PodcastEpisodeIE,
- LA7PodcastIE,
-)
-from .laola1tv import (
- Laola1TvEmbedIE,
- Laola1TvIE,
- EHFTVIE,
- ITTFIE,
-)
-from .lastfm import (
- LastFMIE,
- LastFMPlaylistIE,
- LastFMUserIE,
-)
-from .lbry import (
- LBRYIE,
- LBRYChannelIE,
-)
-from .lci import LCIIE
-from .lcp import (
- LcpPlayIE,
- LcpIE,
-)
-from .lecture2go import Lecture2GoIE
-from .lecturio import (
- LecturioIE,
- LecturioCourseIE,
- LecturioDeCourseIE,
-)
-from .leeco import (
- LeIE,
- LePlaylistIE,
- LetvCloudIE,
-)
-from .lego import LEGOIE
-from .lemonde import LemondeIE
-from .lenta import LentaIE
-from .libraryofcongress import LibraryOfCongressIE
-from .libsyn import LibsynIE
-from .lifenews import (
- LifeNewsIE,
- LifeEmbedIE,
-)
-from .limelight import (
- LimelightMediaIE,
- LimelightChannelIE,
- LimelightChannelListIE,
-)
-from .line import (
- LineLiveIE,
- LineLiveChannelIE,
-)
-from .linkedin import (
- LinkedInIE,
- LinkedInLearningIE,
- LinkedInLearningCourseIE,
-)
-from .linuxacademy import LinuxAcademyIE
-from .litv import LiTVIE
-from .livejournal import LiveJournalIE
-from .livestream import (
- LivestreamIE,
- LivestreamOriginalIE,
- LivestreamShortenerIE,
-)
-from .lnkgo import (
- LnkGoIE,
- LnkIE,
-)
-from .localnews8 import LocalNews8IE
-from .lovehomeporn import LoveHomePornIE
-from .lrt import LRTIE
-from .lynda import (
- LyndaIE,
- LyndaCourseIE
-)
-from .m6 import M6IE
-from .magentamusik360 import MagentaMusik360IE
-from .mailru import (
- MailRuIE,
- MailRuMusicIE,
- MailRuMusicSearchIE,
-)
-from .mainstreaming import MainStreamingIE
-from .malltv import MallTVIE
-from .mangomolo import (
- MangomoloVideoIE,
- MangomoloLiveIE,
-)
-from .manoto import (
- ManotoTVIE,
- ManotoTVShowIE,
- ManotoTVLiveIE,
-)
-from .manyvids import ManyVidsIE
-from .maoritv import MaoriTVIE
-from .markiza import (
- MarkizaIE,
- MarkizaPageIE,
-)
-from .massengeschmacktv import MassengeschmackTVIE
-from .matchtv import MatchTVIE
-from .mdr import MDRIE
-from .medaltv import MedalTVIE
-from .mediaite import MediaiteIE
-from .mediaklikk import MediaKlikkIE
-from .mediaset import (
- MediasetIE,
- MediasetShowIE,
-)
-from .mediasite import (
- MediasiteIE,
- MediasiteCatalogIE,
- MediasiteNamedCatalogIE,
-)
-from .medici import MediciIE
-from .megaphone import MegaphoneIE
-from .meipai import MeipaiIE
-from .melonvod import MelonVODIE
-from .meta import METAIE
-from .metacafe import MetacafeIE
-from .metacritic import MetacriticIE
-from .mgoon import MgoonIE
-from .mgtv import MGTVIE
-from .miaopai import MiaoPaiIE
-from .microsoftstream import MicrosoftStreamIE
-from .microsoftvirtualacademy import (
- MicrosoftVirtualAcademyIE,
- MicrosoftVirtualAcademyCourseIE,
-)
-from .mildom import (
- MildomIE,
- MildomVodIE,
- MildomClipIE,
- MildomUserVodIE,
-)
-from .minds import (
- MindsIE,
- MindsChannelIE,
- MindsGroupIE,
-)
-from .ministrygrid import MinistryGridIE
-from .minoto import MinotoIE
-from .miomio import MioMioIE
-from .mirrativ import (
- MirrativIE,
- MirrativUserIE,
-)
-from .mit import TechTVMITIE, OCWMITIE
-from .mitele import MiTeleIE
-from .mixch import (
- MixchIE,
- MixchArchiveIE,
-)
-from .mixcloud import (
- MixcloudIE,
- MixcloudUserIE,
- MixcloudPlaylistIE,
-)
-from .mlb import (
- MLBIE,
- MLBVideoIE,
-)
-from .mlssoccer import MLSSoccerIE
-from .mnet import MnetIE
-from .moevideo import MoeVideoIE
-from .mofosex import (
- MofosexIE,
- MofosexEmbedIE,
-)
-from .mojvideo import MojvideoIE
-from .morningstar import MorningstarIE
-from .motherless import (
- MotherlessIE,
- MotherlessGroupIE
-)
-from .motorsport import MotorsportIE
-from .movieclips import MovieClipsIE
-from .moviezine import MoviezineIE
-from .movingimage import MovingImageIE
-from .msn import MSNIE
-from .mtv import (
- MTVIE,
- MTVVideoIE,
- MTVServicesEmbeddedIE,
- MTVDEIE,
- MTVJapanIE,
- MTVItaliaIE,
- MTVItaliaProgrammaIE,
-)
-from .muenchentv import MuenchenTVIE
-from .murrtube import MurrtubeIE, MurrtubeUserIE
-from .musescore import MuseScoreIE
-from .musicdex import (
- MusicdexSongIE,
- MusicdexAlbumIE,
- MusicdexArtistIE,
- MusicdexPlaylistIE,
-)
-from .mwave import MwaveIE, MwaveMeetGreetIE
-from .mxplayer import (
- MxplayerIE,
- MxplayerShowIE,
-)
-from .mychannels import MyChannelsIE
-from .myspace import MySpaceIE, MySpaceAlbumIE
-from .myspass import MySpassIE
-from .myvi import (
- MyviIE,
- MyviEmbedIE,
-)
-from .myvideoge import MyVideoGeIE
-from .myvidster import MyVidsterIE
-from .n1 import (
- N1InfoAssetIE,
- N1InfoIIE,
-)
-from .nate import (
- NateIE,
- NateProgramIE,
-)
-from .nationalgeographic import (
- NationalGeographicVideoIE,
- NationalGeographicTVIE,
-)
-from .naver import (
- NaverIE,
- NaverLiveIE,
-)
-from .nba import (
- NBAWatchEmbedIE,
- NBAWatchIE,
- NBAWatchCollectionIE,
- NBAEmbedIE,
- NBAIE,
- NBAChannelIE,
-)
-from .nbc import (
- NBCIE,
- NBCNewsIE,
- NBCOlympicsIE,
- NBCOlympicsStreamIE,
- NBCSportsIE,
- NBCSportsStreamIE,
- NBCSportsVPlayerIE,
-)
-from .ndr import (
- NDRIE,
- NJoyIE,
- NDREmbedBaseIE,
- NDREmbedIE,
- NJoyEmbedIE,
-)
-from .ndtv import NDTVIE
-from .nebula import (
- NebulaIE,
- NebulaCollectionIE,
-)
-from .nerdcubed import NerdCubedFeedIE
-from .netzkino import NetzkinoIE
-from .neteasemusic import (
- NetEaseMusicIE,
- NetEaseMusicAlbumIE,
- NetEaseMusicSingerIE,
- NetEaseMusicListIE,
- NetEaseMusicMvIE,
- NetEaseMusicProgramIE,
- NetEaseMusicDjRadioIE,
-)
-from .newgrounds import (
- NewgroundsIE,
- NewgroundsPlaylistIE,
- NewgroundsUserIE,
-)
-from .newstube import NewstubeIE
-from .newsy import NewsyIE
-from .nextmedia import (
- NextMediaIE,
- NextMediaActionNewsIE,
- AppleDailyIE,
- NextTVIE,
-)
-from .nexx import (
- NexxIE,
- NexxEmbedIE,
-)
-from .nfb import NFBIE
-from .nfhsnetwork import NFHSNetworkIE
-from .nfl import (
- NFLIE,
- NFLArticleIE,
-)
-from .nhk import (
- NhkVodIE,
- NhkVodProgramIE,
- NhkForSchoolBangumiIE,
- NhkForSchoolSubjectIE,
- NhkForSchoolProgramListIE,
-)
-from .nhl import NHLIE
-from .nick import (
- NickIE,
- NickBrIE,
- NickDeIE,
- NickNightIE,
- NickRuIE,
-)
-from .niconico import (
- NiconicoIE,
- NiconicoPlaylistIE,
- NiconicoUserIE,
- NiconicoSeriesIE,
- NiconicoHistoryIE,
- NicovideoSearchDateIE,
- NicovideoSearchIE,
- NicovideoSearchURLIE,
- NicovideoTagURLIE,
-)
-from .ninecninemedia import (
- NineCNineMediaIE,
- CPTwentyFourIE,
-)
-from .ninegag import NineGagIE
-from .ninenow import NineNowIE
-from .nintendo import NintendoIE
-from .nitter import NitterIE
-from .njpwworld import NJPWWorldIE
-from .nobelprize import NobelPrizeIE
-from .nonktube import NonkTubeIE
-from .noodlemagazine import NoodleMagazineIE
-from .noovo import NoovoIE
-from .normalboots import NormalbootsIE
-from .nosvideo import NosVideoIE
-from .nova import (
- NovaEmbedIE,
- NovaIE,
-)
-from .novaplay import NovaPlayIE
-from .nowness import (
- NownessIE,
- NownessPlaylistIE,
- NownessSeriesIE,
-)
-from .noz import NozIE
-from .npo import (
- AndereTijdenIE,
- NPOIE,
- NPOLiveIE,
- NPORadioIE,
- NPORadioFragmentIE,
- SchoolTVIE,
- HetKlokhuisIE,
- VPROIE,
- WNLIE,
-)
-from .npr import NprIE
-from .nrk import (
- NRKIE,
- NRKPlaylistIE,
- NRKSkoleIE,
- NRKTVIE,
- NRKTVDirekteIE,
- NRKRadioPodkastIE,
- NRKTVEpisodeIE,
- NRKTVEpisodesIE,
- NRKTVSeasonIE,
- NRKTVSeriesIE,
-)
-from .nrl import NRLTVIE
-from .ntvcojp import NTVCoJpCUIE
-from .ntvde import NTVDeIE
-from .ntvru import NTVRuIE
-from .nytimes import (
- NYTimesIE,
- NYTimesArticleIE,
- NYTimesCookingIE,
-)
-from .nuvid import NuvidIE
-from .nzherald import NZHeraldIE
-from .nzz import NZZIE
-from .odatv import OdaTVIE
-from .odnoklassniki import OdnoklassnikiIE
-from .oktoberfesttv import OktoberfestTVIE
-from .olympics import OlympicsReplayIE
-from .on24 import On24IE
-from .ondemandkorea import OnDemandKoreaIE
-from .onefootball import OneFootballIE
-from .onet import (
- OnetIE,
- OnetChannelIE,
- OnetMVPIE,
- OnetPlIE,
-)
-from .onionstudios import OnionStudiosIE
-from .ooyala import (
- OoyalaIE,
- OoyalaExternalIE,
-)
-from .opencast import (
- OpencastIE,
- OpencastPlaylistIE,
-)
-from .openrec import (
- OpenRecIE,
- OpenRecCaptureIE,
- OpenRecMovieIE,
-)
-from .ora import OraTVIE
-from .orf import (
- ORFTVthekIE,
- ORFFM4IE,
- ORFFM4StoryIE,
- ORFOE1IE,
- ORFOE3IE,
- ORFNOEIE,
- ORFWIEIE,
- ORFBGLIE,
- ORFOOEIE,
- ORFSTMIE,
- ORFKTNIE,
- ORFSBGIE,
- ORFTIRIE,
- ORFVBGIE,
- ORFIPTVIE,
-)
-from .outsidetv import OutsideTVIE
-from .packtpub import (
- PacktPubIE,
- PacktPubCourseIE,
-)
-from .palcomp3 import (
- PalcoMP3IE,
- PalcoMP3ArtistIE,
- PalcoMP3VideoIE,
-)
-from .pandoratv import PandoraTVIE
-from .panopto import (
- PanoptoIE,
- PanoptoListIE,
- PanoptoPlaylistIE
-)
-from .paramountplus import (
- ParamountPlusIE,
- ParamountPlusSeriesIE,
-)
-from .parliamentliveuk import ParliamentLiveUKIE
-from .parlview import ParlviewIE
-from .patreon import (
- PatreonIE,
- PatreonUserIE
-)
-from .pbs import PBSIE
-from .pearvideo import PearVideoIE
-from .peekvids import PeekVidsIE, PlayVidsIE
-from .peertube import (
- PeerTubeIE,
- PeerTubePlaylistIE,
-)
-from .peertv import PeerTVIE
-from .peloton import (
- PelotonIE,
- PelotonLiveIE
-)
-from .people import PeopleIE
-from .performgroup import PerformGroupIE
-from .periscope import (
- PeriscopeIE,
- PeriscopeUserIE,
-)
-from .philharmoniedeparis import PhilharmonieDeParisIE
-from .phoenix import PhoenixIE
-from .photobucket import PhotobucketIE
-from .piapro import PiaproIE
-from .picarto import (
- PicartoIE,
- PicartoVodIE,
-)
-from .piksel import PikselIE
-from .pinkbike import PinkbikeIE
-from .pinterest import (
- PinterestIE,
- PinterestCollectionIE,
-)
-from .pixivsketch import (
- PixivSketchIE,
- PixivSketchUserIE,
-)
-from .pladform import PladformIE
-from .planetmarathi import PlanetMarathiIE
-from .platzi import (
- PlatziIE,
- PlatziCourseIE,
-)
-from .playfm import PlayFMIE
-from .playplustv import PlayPlusTVIE
-from .plays import PlaysTVIE
-from .playstuff import PlayStuffIE
-from .playtvak import PlaytvakIE
-from .playvid import PlayvidIE
-from .playwire import PlaywireIE
-from .plutotv import PlutoTVIE
-from .pluralsight import (
- PluralsightIE,
- PluralsightCourseIE,
-)
-from .podomatic import PodomaticIE
-from .pokemon import (
- PokemonIE,
- PokemonWatchIE,
- PokemonSoundLibraryIE,
-)
-from .pokergo import (
- PokerGoIE,
- PokerGoCollectionIE,
-)
-from .polsatgo import PolsatGoIE
-from .polskieradio import (
- PolskieRadioIE,
- PolskieRadioCategoryIE,
- PolskieRadioPlayerIE,
- PolskieRadioPodcastIE,
- PolskieRadioPodcastListIE,
- PolskieRadioRadioKierowcowIE,
-)
-from .popcorntimes import PopcorntimesIE
-from .popcorntv import PopcornTVIE
-from .porn91 import Porn91IE
-from .porncom import PornComIE
-from .pornflip import PornFlipIE
-from .pornhd import PornHdIE
-from .pornhub import (
- PornHubIE,
- PornHubUserIE,
- PornHubPlaylistIE,
- PornHubPagedVideoListIE,
- PornHubUserVideosUploadIE,
-)
-from .pornotube import PornotubeIE
-from .pornovoisines import PornoVoisinesIE
-from .pornoxo import PornoXOIE
-from .pornez import PornezIE
-from .puhutv import (
- PuhuTVIE,
- PuhuTVSerieIE,
-)
-from .presstv import PressTVIE
-from .projectveritas import ProjectVeritasIE
-from .prosiebensat1 import ProSiebenSat1IE
-from .prx import (
- PRXStoryIE,
- PRXSeriesIE,
- PRXAccountIE,
- PRXStoriesSearchIE,
- PRXSeriesSearchIE
-)
-from .puls4 import Puls4IE
-from .pyvideo import PyvideoIE
-from .qqmusic import (
- QQMusicIE,
- QQMusicSingerIE,
- QQMusicAlbumIE,
- QQMusicToplistIE,
- QQMusicPlaylistIE,
-)
-from .r7 import (
- R7IE,
- R7ArticleIE,
-)
-from .radiko import RadikoIE, RadikoRadioIE
-from .radiocanada import (
- RadioCanadaIE,
- RadioCanadaAudioVideoIE,
-)
-from .radiode import RadioDeIE
-from .radiojavan import RadioJavanIE
-from .radiobremen import RadioBremenIE
-from .radiofrance import RadioFranceIE
-from .radiozet import RadioZetPodcastIE
-from .radiokapital import (
- RadioKapitalIE,
- RadioKapitalShowIE,
-)
-from .radlive import (
- RadLiveIE,
- RadLiveChannelIE,
- RadLiveSeasonIE,
-)
-from .rai import (
- RaiPlayIE,
- RaiPlayLiveIE,
- RaiPlayPlaylistIE,
- RaiPlaySoundIE,
- RaiPlaySoundLiveIE,
- RaiPlaySoundPlaylistIE,
- RaiIE,
-)
-from .raywenderlich import (
- RayWenderlichIE,
- RayWenderlichCourseIE,
-)
-from .rbmaradio import RBMARadioIE
-from .rcs import (
- RCSIE,
- RCSEmbedsIE,
- RCSVariousIE,
-)
-from .rcti import (
- RCTIPlusIE,
- RCTIPlusSeriesIE,
- RCTIPlusTVIE,
-)
-from .rds import RDSIE
-from .redbulltv import (
- RedBullTVIE,
- RedBullEmbedIE,
- RedBullTVRrnContentIE,
- RedBullIE,
-)
-from .reddit import RedditIE
-from .redgifs import (
- RedGifsIE,
- RedGifsSearchIE,
- RedGifsUserIE,
-)
-from .redtube import RedTubeIE
-from .regiotv import RegioTVIE
-from .rentv import (
- RENTVIE,
- RENTVArticleIE,
-)
-from .restudy import RestudyIE
-from .reuters import ReutersIE
-from .reverbnation import ReverbNationIE
-from .rice import RICEIE
-from .rmcdecouverte import RMCDecouverteIE
-from .rockstargames import RockstarGamesIE
-from .rokfin import (
- RokfinIE,
- RokfinStackIE,
- RokfinChannelIE,
-)
-from .roosterteeth import RoosterTeethIE, RoosterTeethSeriesIE
-from .rottentomatoes import RottenTomatoesIE
-from .rozhlas import RozhlasIE
-from .rtbf import RTBFIE
-from .rte import RteIE, RteRadioIE
-from .rtlnl import RtlNlIE
-from .rtl2 import (
- RTL2IE,
- RTL2YouIE,
- RTL2YouSeriesIE,
-)
-from .rtnews import (
- RTNewsIE,
- RTDocumentryIE,
- RTDocumentryPlaylistIE,
- RuptlyIE,
-)
-from .rtp import RTPIE
-from .rtrfm import RTRFMIE
-from .rts import RTSIE
-from .rtve import (
- RTVEALaCartaIE,
- RTVEAudioIE,
- RTVELiveIE,
- RTVEInfantilIE,
- RTVETelevisionIE,
-)
-from .rtvnh import RTVNHIE
-from .rtvs import RTVSIE
-from .ruhd import RUHDIE
-from .rule34video import Rule34VideoIE
-from .rumble import (
- RumbleEmbedIE,
- RumbleChannelIE,
-)
-from .rutube import (
- RutubeIE,
- RutubeChannelIE,
- RutubeEmbedIE,
- RutubeMovieIE,
- RutubePersonIE,
- RutubePlaylistIE,
- RutubeTagsIE,
-)
-from .glomex import (
- GlomexIE,
- GlomexEmbedIE,
-)
-from .megatvcom import (
- MegaTVComIE,
- MegaTVComEmbedIE,
-)
-from .ant1newsgr import (
- Ant1NewsGrWatchIE,
- Ant1NewsGrArticleIE,
- Ant1NewsGrEmbedIE,
-)
-from .rutv import RUTVIE
-from .ruutu import RuutuIE
-from .ruv import (
- RuvIE,
- RuvSpilaIE
-)
-from .safari import (
- SafariIE,
- SafariApiIE,
- SafariCourseIE,
-)
-from .saitosan import SaitosanIE
-from .samplefocus import SampleFocusIE
-from .sapo import SapoIE
-from .savefrom import SaveFromIE
-from .sbs import SBSIE
-from .screencast import ScreencastIE
-from .screencastomatic import ScreencastOMaticIE
-from .scrippsnetworks import (
- ScrippsNetworksWatchIE,
- ScrippsNetworksIE,
-)
-from .scte import (
- SCTEIE,
- SCTECourseIE,
-)
-from .seeker import SeekerIE
-from .senategov import SenateISVPIE, SenateGovIE
-from .sendtonews import SendtoNewsIE
-from .servus import ServusIE
-from .sevenplus import SevenPlusIE
-from .sexu import SexuIE
-from .seznamzpravy import (
- SeznamZpravyIE,
- SeznamZpravyArticleIE,
-)
-from .shahid import (
- ShahidIE,
- ShahidShowIE,
-)
-from .shared import (
- SharedIE,
- VivoIE,
-)
-from .shemaroome import ShemarooMeIE
-from .showroomlive import ShowRoomLiveIE
-from .simplecast import (
- SimplecastIE,
- SimplecastEpisodeIE,
- SimplecastPodcastIE,
-)
-from .sina import SinaIE
-from .sixplay import SixPlayIE
-from .skeb import SkebIE
-from .skyit import (
- SkyItPlayerIE,
- SkyItVideoIE,
- SkyItVideoLiveIE,
- SkyItIE,
- SkyItAcademyIE,
- SkyItArteIE,
- CieloTVItIE,
- TV8ItIE,
-)
-from .skylinewebcams import SkylineWebcamsIE
-from .skynewsarabia import (
- SkyNewsArabiaIE,
- SkyNewsArabiaArticleIE,
-)
-from .skynewsau import SkyNewsAUIE
-from .sky import (
- SkyNewsIE,
- SkyNewsStoryIE,
- SkySportsIE,
- SkySportsNewsIE,
-)
-from .slideshare import SlideshareIE
-from .slideslive import SlidesLiveIE
-from .slutload import SlutloadIE
-from .snotr import SnotrIE
-from .sohu import SohuIE
-from .sonyliv import (
- SonyLIVIE,
- SonyLIVSeriesIE,
-)
-from .soundcloud import (
- SoundcloudEmbedIE,
- SoundcloudIE,
- SoundcloudSetIE,
- SoundcloudRelatedIE,
- SoundcloudUserIE,
- SoundcloudTrackStationIE,
- SoundcloudPlaylistIE,
- SoundcloudSearchIE,
-)
-from .soundgasm import (
- SoundgasmIE,
- SoundgasmProfileIE
-)
-from .southpark import (
- SouthParkIE,
- SouthParkDeIE,
- SouthParkDkIE,
- SouthParkEsIE,
- SouthParkNlIE
-)
-from .sovietscloset import (
- SovietsClosetIE,
- SovietsClosetPlaylistIE
-)
-from .spankbang import (
- SpankBangIE,
- SpankBangPlaylistIE,
-)
-from .spankwire import SpankwireIE
-from .spiegel import SpiegelIE
-from .spike import (
- BellatorIE,
- ParamountNetworkIE,
-)
-from .stitcher import (
- StitcherIE,
- StitcherShowIE,
-)
-from .sport5 import Sport5IE
-from .sportbox import SportBoxIE
-from .sportdeutschland import SportDeutschlandIE
-from .spotify import (
- SpotifyIE,
- SpotifyShowIE,
-)
-from .spreaker import (
- SpreakerIE,
- SpreakerPageIE,
- SpreakerShowIE,
- SpreakerShowPageIE,
-)
-from .springboardplatform import SpringboardPlatformIE
-from .sprout import SproutIE
-from .srgssr import (
- SRGSSRIE,
- SRGSSRPlayIE,
-)
-from .srmediathek import SRMediathekIE
-from .stanfordoc import StanfordOpenClassroomIE
-from .startv import StarTVIE
-from .steam import SteamIE
-from .storyfire import (
- StoryFireIE,
- StoryFireUserIE,
- StoryFireSeriesIE,
-)
-from .streamable import StreamableIE
-from .streamanity import StreamanityIE
-from .streamcloud import StreamcloudIE
-from .streamcz import StreamCZIE
-from .streamff import StreamFFIE
-from .streetvoice import StreetVoiceIE
-from .stretchinternet import StretchInternetIE
-from .stripchat import StripchatIE
-from .stv import STVPlayerIE
-from .sunporno import SunPornoIE
-from .sverigesradio import (
- SverigesRadioEpisodeIE,
- SverigesRadioPublicationIE,
-)
-from .svt import (
- SVTIE,
- SVTPageIE,
- SVTPlayIE,
- SVTSeriesIE,
-)
-from .swrmediathek import SWRMediathekIE
-from .syfy import SyfyIE
-from .sztvhu import SztvHuIE
-from .tagesschau import TagesschauIE
-from .tass import TassIE
-from .tbs import TBSIE
-from .tdslifeway import TDSLifewayIE
-from .teachable import (
- TeachableIE,
- TeachableCourseIE,
-)
-from .teachertube import (
- TeacherTubeIE,
- TeacherTubeUserIE,
-)
-from .teachingchannel import TeachingChannelIE
-from .teamcoco import TeamcocoIE
-from .teamtreehouse import TeamTreeHouseIE
-from .techtalks import TechTalksIE
-from .ted import (
- TedEmbedIE,
- TedPlaylistIE,
- TedSeriesIE,
- TedTalkIE,
-)
-from .tele5 import Tele5IE
-from .tele13 import Tele13IE
-from .telebruxelles import TeleBruxellesIE
-from .telecinco import TelecincoIE
-from .telegraaf import TelegraafIE
-from .telegram import TelegramEmbedIE
-from .telemb import TeleMBIE
-from .telemundo import TelemundoIE
-from .telequebec import (
- TeleQuebecIE,
- TeleQuebecSquatIE,
- TeleQuebecEmissionIE,
- TeleQuebecLiveIE,
- TeleQuebecVideoIE,
-)
-from .teletask import TeleTaskIE
-from .telewebion import TelewebionIE
-from .tennistv import TennisTVIE
-from .tenplay import TenPlayIE
-from .testurl import TestURLIE
-from .tf1 import TF1IE
-from .tfo import TFOIE
-from .theintercept import TheInterceptIE
-from .theplatform import (
- ThePlatformIE,
- ThePlatformFeedIE,
-)
-from .thestar import TheStarIE
-from .thesun import TheSunIE
-from .theta import (
- ThetaVideoIE,
- ThetaStreamIE,
-)
-from .theweatherchannel import TheWeatherChannelIE
-from .thisamericanlife import ThisAmericanLifeIE
-from .thisav import ThisAVIE
-from .thisoldhouse import ThisOldHouseIE
-from .threespeak import (
- ThreeSpeakIE,
- ThreeSpeakUserIE,
-)
-from .threeqsdn import ThreeQSDNIE
-from .tiktok import (
- TikTokIE,
- TikTokUserIE,
- TikTokSoundIE,
- TikTokEffectIE,
- TikTokTagIE,
- TikTokVMIE,
- DouyinIE,
-)
-from .tinypic import TinyPicIE
-from .tmz import TMZIE
-from .tnaflix import (
- TNAFlixNetworkEmbedIE,
- TNAFlixIE,
- EMPFlixIE,
- MovieFapIE,
-)
-from .toggle import (
- ToggleIE,
- MeWatchIE,
-)
-from .toggo import (
- ToggoIE,
-)
-from .tokentube import (
- TokentubeIE,
- TokentubeChannelIE
-)
-from .tonline import TOnlineIE
-from .toongoggles import ToonGogglesIE
-from .toutv import TouTvIE
-from .toypics import ToypicsUserIE, ToypicsIE
-from .traileraddict import TrailerAddictIE
-from .trilulilu import TriluliluIE
-from .trovo import (
- TrovoIE,
- TrovoVodIE,
- TrovoChannelVodIE,
- TrovoChannelClipIE,
-)
-from .trueid import TrueIDIE
-from .trunews import TruNewsIE
-from .trutv import TruTVIE
-from .tube8 import Tube8IE
-from .tubitv import (
- TubiTvIE,
- TubiTvShowIE,
-)
-from .tumblr import TumblrIE
-from .tunein import (
- TuneInClipIE,
- TuneInStationIE,
- TuneInProgramIE,
- TuneInTopicIE,
- TuneInShortenerIE,
-)
-from .tunepk import TunePkIE
-from .turbo import TurboIE
-from .tv2 import (
- TV2IE,
- TV2ArticleIE,
- KatsomoIE,
- MTVUutisetArticleIE,
-)
-from .tv2dk import (
- TV2DKIE,
- TV2DKBornholmPlayIE,
-)
-from .tv2hu import (
- TV2HuIE,
- TV2HuSeriesIE,
-)
-from .tv4 import TV4IE
-from .tv5mondeplus import TV5MondePlusIE
-from .tv5unis import (
- TV5UnisVideoIE,
- TV5UnisIE,
-)
-from .tva import (
- TVAIE,
- QubIE,
-)
-from .tvanouvelles import (
- TVANouvellesIE,
- TVANouvellesArticleIE,
-)
-from .tvc import (
- TVCIE,
- TVCArticleIE,
-)
-from .tver import TVerIE
-from .tvigle import TvigleIE
-from .tvland import TVLandIE
-from .tvn24 import TVN24IE
-from .tvnet import TVNetIE
-from .tvnoe import TVNoeIE
-from .tvnow import (
- TVNowIE,
- TVNowFilmIE,
- TVNowNewIE,
- TVNowSeasonIE,
- TVNowAnnualIE,
- TVNowShowIE,
-)
-from .tvopengr import (
- TVOpenGrWatchIE,
- TVOpenGrEmbedIE,
-)
-from .tvp import (
- TVPEmbedIE,
- TVPIE,
- TVPStreamIE,
- TVPWebsiteIE,
-)
-from .tvplay import (
- TVPlayIE,
- ViafreeIE,
- TVPlayHomeIE,
-)
-from .tvplayer import TVPlayerIE
-from .tweakers import TweakersIE
-from .twentyfourvideo import TwentyFourVideoIE
-from .twentymin import TwentyMinutenIE
-from .twentythreevideo import TwentyThreeVideoIE
-from .twitcasting import (
- TwitCastingIE,
- TwitCastingLiveIE,
- TwitCastingUserIE,
-)
-from .twitch import (
- TwitchVodIE,
- TwitchCollectionIE,
- TwitchVideosIE,
- TwitchVideosClipsIE,
- TwitchVideosCollectionsIE,
- TwitchStreamIE,
- TwitchClipsIE,
-)
-from .twitter import (
- TwitterCardIE,
- TwitterIE,
- TwitterAmplifyIE,
- TwitterBroadcastIE,
- TwitterShortenerIE,
-)
-from .udemy import (
- UdemyIE,
- UdemyCourseIE
-)
-from .udn import UDNEmbedIE
-from .ufctv import (
- UFCTVIE,
- UFCArabiaIE,
-)
-from .ukcolumn import UkColumnIE
-from .uktvplay import UKTVPlayIE
-from .digiteka import DigitekaIE
-from .dlive import (
- DLiveVODIE,
- DLiveStreamIE,
-)
-from .drooble import DroobleIE
-from .umg import UMGDeIE
-from .unistra import UnistraIE
-from .unity import UnityIE
-from .uol import UOLIE
-from .uplynk import (
- UplynkIE,
- UplynkPreplayIE,
-)
-from .urort import UrortIE
-from .urplay import URPlayIE
-from .usanetwork import USANetworkIE
-from .usatoday import USATodayIE
-from .ustream import UstreamIE, UstreamChannelIE
-from .ustudio import (
- UstudioIE,
- UstudioEmbedIE,
-)
-from .utreon import UtreonIE
-from .varzesh3 import Varzesh3IE
-from .vbox7 import Vbox7IE
-from .veehd import VeeHDIE
-from .veo import VeoIE
-from .veoh import VeohIE
-from .vesti import VestiIE
-from .vevo import (
- VevoIE,
- VevoPlaylistIE,
-)
-from .vgtv import (
- BTArticleIE,
- BTVestlendingenIE,
- VGTVIE,
-)
-from .vh1 import VH1IE
-from .vice import (
- ViceIE,
- ViceArticleIE,
- ViceShowIE,
-)
-from .vidbit import VidbitIE
-from .viddler import ViddlerIE
-from .videa import VideaIE
-from .videocampus_sachsen import (
- VideocampusSachsenIE,
- VideocampusSachsenEmbedIE,
-)
-from .videodetective import VideoDetectiveIE
-from .videofyme import VideofyMeIE
-from .videomore import (
- VideomoreIE,
- VideomoreVideoIE,
- VideomoreSeasonIE,
-)
-from .videopress import VideoPressIE
-from .vidio import (
- VidioIE,
- VidioPremierIE,
- VidioLiveIE
-)
-from .vidlii import VidLiiIE
-from .vier import VierIE, VierVideosIE
-from .viewlift import (
- ViewLiftIE,
- ViewLiftEmbedIE,
-)
-from .viidea import ViideaIE
-from .vimeo import (
- VimeoIE,
- VimeoAlbumIE,
- VimeoChannelIE,
- VimeoGroupsIE,
- VimeoLikesIE,
- VimeoOndemandIE,
- VimeoReviewIE,
- VimeoUserIE,
- VimeoWatchLaterIE,
- VHXEmbedIE,
-)
-from .vimm import (
- VimmIE,
- VimmRecordingIE,
-)
-from .vimple import VimpleIE
-from .vine import (
- VineIE,
- VineUserIE,
-)
-from .viki import (
- VikiIE,
- VikiChannelIE,
-)
-from .viqeo import ViqeoIE
-from .viu import (
- ViuIE,
- ViuPlaylistIE,
- ViuOTTIE,
-)
-from .vk import (
- VKIE,
- VKUserVideosIE,
- VKWallPostIE,
-)
-from .vlive import (
- VLiveIE,
- VLivePostIE,
- VLiveChannelIE,
-)
-from .vodlocker import VodlockerIE
-from .vodpl import VODPlIE
-from .vodplatform import VODPlatformIE
-from .voicerepublic import VoiceRepublicIE
-from .voicy import (
- VoicyIE,
- VoicyChannelIE,
-)
-from .voot import (
- VootIE,
- VootSeriesIE,
-)
-from .voxmedia import (
- VoxMediaVolumeIE,
- VoxMediaIE,
-)
-from .vrt import VRTIE
-from .vrak import VrakIE
-from .vrv import (
- VRVIE,
- VRVSeriesIE,
-)
-from .vshare import VShareIE
-from .vtm import VTMIE
-from .medialaan import MedialaanIE
-from .vuclip import VuClipIE
-from .vupload import VuploadIE
-from .vvvvid import (
- VVVVIDIE,
- VVVVIDShowIE,
-)
-from .vyborymos import VyboryMosIE
-from .vzaar import VzaarIE
-from .wakanim import WakanimIE
-from .walla import WallaIE
-from .washingtonpost import (
- WashingtonPostIE,
- WashingtonPostArticleIE,
-)
-from .wasdtv import (
- WASDTVStreamIE,
- WASDTVRecordIE,
- WASDTVClipIE,
-)
-from .wat import WatIE
-from .watchbox import WatchBoxIE
-from .watchindianporn import WatchIndianPornIE
-from .wdr import (
- WDRIE,
- WDRPageIE,
- WDRElefantIE,
- WDRMobileIE,
-)
-from .webcaster import (
- WebcasterIE,
- WebcasterFeedIE,
-)
-from .webofstories import (
- WebOfStoriesIE,
- WebOfStoriesPlaylistIE,
-)
-from .weibo import (
- WeiboIE,
- WeiboMobileIE
-)
-from .weiqitv import WeiqiTVIE
-from .willow import WillowIE
-from .wimtv import WimTVIE
-from .whowatch import WhoWatchIE
-from .wistia import (
- WistiaIE,
- WistiaPlaylistIE,
-)
-from .worldstarhiphop import WorldStarHipHopIE
-from .wppilot import (
- WPPilotIE,
- WPPilotChannelsIE,
-)
-from .wsj import (
- WSJIE,
- WSJArticleIE,
-)
-from .wwe import WWEIE
-from .xbef import XBefIE
-from .xboxclips import XboxClipsIE
-from .xfileshare import XFileShareIE
-from .xhamster import (
- XHamsterIE,
- XHamsterEmbedIE,
- XHamsterUserIE,
-)
-from .xiami import (
- XiamiSongIE,
- XiamiAlbumIE,
- XiamiArtistIE,
- XiamiCollectionIE
-)
-from .ximalaya import (
- XimalayaIE,
- XimalayaAlbumIE
-)
-from .xinpianchang import XinpianchangIE
-from .xminus import XMinusIE
-from .xnxx import XNXXIE
-from .xstream import XstreamIE
-from .xtube import XTubeUserIE, XTubeIE
-from .xuite import XuiteIE
-from .xvideos import XVideosIE
-from .xxxymovies import XXXYMoviesIE
-from .yahoo import (
- YahooIE,
- YahooSearchIE,
- YahooGyaOPlayerIE,
- YahooGyaOIE,
- YahooJapanNewsIE,
-)
-from .yandexdisk import YandexDiskIE
-from .yandexmusic import (
- YandexMusicTrackIE,
- YandexMusicAlbumIE,
- YandexMusicPlaylistIE,
- YandexMusicArtistTracksIE,
- YandexMusicArtistAlbumsIE,
-)
-from .yandexvideo import (
- YandexVideoIE,
- YandexVideoPreviewIE,
- ZenYandexIE,
- ZenYandexChannelIE,
-)
-from .yapfiles import YapFilesIE
-from .yesjapan import YesJapanIE
-from .yinyuetai import YinYueTaiIE
-from .ynet import YnetIE
-from .youjizz import YouJizzIE
-from .youku import (
- YoukuIE,
- YoukuShowIE,
-)
-from .younow import (
- YouNowLiveIE,
- YouNowChannelIE,
- YouNowMomentIE,
-)
-from .youporn import YouPornIE
-from .yourporn import YourPornIE
-from .yourupload import YourUploadIE
-from .youtube import (
- YoutubeIE,
- YoutubeClipIE,
- YoutubeFavouritesIE,
- YoutubeHistoryIE,
- YoutubeTabIE,
- YoutubeLivestreamEmbedIE,
- YoutubePlaylistIE,
- YoutubeRecommendedIE,
- YoutubeSearchDateIE,
- YoutubeSearchIE,
- YoutubeSearchURLIE,
- YoutubeMusicSearchURLIE,
- YoutubeSubscriptionsIE,
- YoutubeTruncatedIDIE,
- YoutubeTruncatedURLIE,
- YoutubeYtBeIE,
- YoutubeYtUserIE,
- YoutubeWatchLaterIE,
-)
-from .zapiks import ZapiksIE
-from .zattoo import (
- BBVTVIE,
- EinsUndEinsTVIE,
- EWETVIE,
- GlattvisionTVIE,
- MNetTVIE,
- MyVisionTVIE,
- NetPlusIE,
- OsnatelTVIE,
- QuantumTVIE,
- QuicklineIE,
- QuicklineLiveIE,
- SaltTVIE,
- SAKTVIE,
- VTXTVIE,
- WalyTVIE,
- ZattooIE,
- ZattooLiveIE,
-)
-from .zdf import ZDFIE, ZDFChannelIE
-from .zee5 import (
- Zee5IE,
- Zee5SeriesIE,
-)
-from .zhihu import ZhihuIE
-from .zingmp3 import (
- ZingMp3IE,
- ZingMp3AlbumIE,
-)
-from .zoom import ZoomIE
-from .zype import ZypeIE
+if not _LAZY_LOADER:
+ from ._extractors import * # noqa: F403
+ _ALL_CLASSES = [ # noqa: F811
+ klass
+ for name, klass in globals().items()
+ if name.endswith('IE') and name != 'GenericIE'
+ ]
+ _ALL_CLASSES.append(GenericIE) # noqa: F405
+
+globals().update(_PLUGIN_CLASSES)
+_ALL_CLASSES[:0] = _PLUGIN_CLASSES.values()
diff --git a/hypervideo_dl/extractor/extremetube.py b/hypervideo_dl/extractor/extremetube.py
index acd4090..2c19698 100644
--- a/hypervideo_dl/extractor/extremetube.py
+++ b/hypervideo_dl/extractor/extremetube.py
@@ -1,10 +1,8 @@
-from __future__ import unicode_literals
-
from ..utils import str_to_int
from .keezmovies import KeezMoviesIE
-class ExtremeTubeIE(KeezMoviesIE):
+class ExtremeTubeIE(KeezMoviesIE): # XXX: Do not subclass from concrete IE
_VALID_URL = r'https?://(?:www\.)?extremetube\.com/(?:[^/]+/)?video/(?P<id>[^/#?&]+)'
_TESTS = [{
'url': 'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431',
diff --git a/hypervideo_dl/extractor/eyedotv.py b/hypervideo_dl/extractor/eyedotv.py
index f62ddeb..d8b068e 100644
--- a/hypervideo_dl/extractor/eyedotv.py
+++ b/hypervideo_dl/extractor/eyedotv.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
xpath_text,
diff --git a/hypervideo_dl/extractor/facebook.py b/hypervideo_dl/extractor/facebook.py
index 022ea85..a58d9c8 100644
--- a/hypervideo_dl/extractor/facebook.py
+++ b/hypervideo_dl/extractor/facebook.py
@@ -1,21 +1,18 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import json
import re
+import urllib.parse
from .common import InfoExtractor
from ..compat import (
compat_etree_fromstring,
compat_str,
compat_urllib_parse_unquote,
- compat_urllib_parse_unquote_plus,
)
from ..utils import (
+ ExtractorError,
clean_html,
determine_ext,
error_to_compat_str,
- ExtractorError,
float_or_none,
get_element_by_id,
get_first,
@@ -60,6 +57,13 @@ class FacebookIE(InfoExtractor):
)
(?P<id>[0-9]+)
'''
+ _EMBED_REGEX = [
+ r'<iframe[^>]+?src=(["\'])(?P<url>https?://www\.facebook\.com/(?:video/embed|plugins/video\.php).+?)\1',
+ # Facebook API embed https://developers.facebook.com/docs/plugins/embedded-video-player
+ r'''(?x)<div[^>]+
+ class=(?P<q1>[\'"])[^\'"]*\bfb-(?:video|post)\b[^\'"]*(?P=q1)[^>]+
+ data-href=(?P<q2>[\'"])(?P<url>(?:https?:)?//(?:www\.)?facebook.com/.+?)(?P=q2)''',
+ ]
_LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1'
_CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1'
_NETRC_MACHINE = 'facebook'
@@ -314,21 +318,6 @@ class FacebookIE(InfoExtractor):
'graphURI': '/api/graphql/'
}
- @staticmethod
- def _extract_urls(webpage):
- urls = []
- for mobj in re.finditer(
- r'<iframe[^>]+?src=(["\'])(?P<url>https?://www\.facebook\.com/(?:video/embed|plugins/video\.php).+?)\1',
- webpage):
- urls.append(mobj.group('url'))
- # Facebook API embed
- # see https://developers.facebook.com/docs/plugins/embedded-video-player
- for mobj in re.finditer(r'''(?x)<div[^>]+
- class=(?P<q1>[\'"])[^\'"]*\bfb-(?:video|post)\b[^\'"]*(?P=q1)[^>]+
- data-href=(?P<q2>[\'"])(?P<url>(?:https?:)?//(?:www\.)?facebook.com/.+?)(?P=q2)''', webpage):
- urls.append(mobj.group('url'))
- return urls
-
def _perform_login(self, username, password):
login_page_req = sanitized_Request(self._LOGIN_URL)
self._set_cookie('facebook.com', 'locale', 'en_US')
@@ -397,10 +386,8 @@ class FacebookIE(InfoExtractor):
r'handleWithCustomApplyEach\(\s*ScheduledApplyEach\s*,\s*(\{.+?\})\s*\);', webpage)]
post = traverse_obj(post_data, (
..., 'require', ..., ..., ..., '__bbox', 'result', 'data'), expected_type=dict) or []
- media = traverse_obj(
- post,
- (..., 'attachments', ..., 'media', lambda _, m: str(m['id']) == video_id and m['__typename'] == 'Video'),
- expected_type=dict)
+ media = traverse_obj(post, (..., 'attachments', ..., lambda k, v: (
+ k == 'media' and str(v['id']) == video_id and v['__typename'] == 'Video')), expected_type=dict)
title = get_first(media, ('title', 'text'))
description = get_first(media, ('creation_story', 'comet_sections', 'message', 'story', 'message', 'text'))
uploader_data = get_first(media, 'owner') or get_first(post, ('node', 'actors', ...)) or {}
@@ -472,15 +459,14 @@ class FacebookIE(InfoExtractor):
dash_manifest = video.get('dash_manifest')
if dash_manifest:
formats.extend(self._parse_mpd_formats(
- compat_etree_fromstring(compat_urllib_parse_unquote_plus(dash_manifest))))
+ compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest))))
- def process_formats(formats):
+ def process_formats(info):
# Downloads with browser's User-Agent are rate limited. Working around
# with non-browser User-Agent.
- for f in formats:
+ for f in info['formats']:
f.setdefault('http_headers', {})['User-Agent'] = 'facebookexternalhit/1.1'
-
- self._sort_formats(formats, ('res', 'quality'))
+ info['_format_sort_fields'] = ('res', 'quality')
def extract_relay_data(_filter):
return self._parse_json(self._search_regex(
@@ -523,16 +509,17 @@ class FacebookIE(InfoExtractor):
'url': playable_url,
})
extract_dash_manifest(video, formats)
- process_formats(formats)
v_id = video.get('videoId') or video.get('id') or video_id
info = {
'id': v_id,
'formats': formats,
- 'thumbnail': try_get(video, lambda x: x['thumbnailImage']['uri']),
+ 'thumbnail': traverse_obj(
+ video, ('thumbnailImage', 'uri'), ('preferred_thumbnail', 'image', 'uri')),
'uploader_id': try_get(video, lambda x: x['owner']['id']),
'timestamp': int_or_none(video.get('publish_time')),
'duration': float_or_none(video.get('playable_duration_in_ms'), 1000),
}
+ process_formats(info)
description = try_get(video, lambda x: x['savable_description']['text'])
title = video.get('name')
if title:
@@ -699,13 +686,12 @@ class FacebookIE(InfoExtractor):
if subtitles_src:
subtitles.setdefault('en', []).append({'url': subtitles_src})
- process_formats(formats)
-
info_dict = {
'id': video_id,
'formats': formats,
'subtitles': subtitles,
}
+ process_formats(info_dict)
info_dict.update(extract_metadata(webpage))
return info_dict
@@ -784,3 +770,30 @@ class FacebookRedirectURLIE(InfoExtractor):
if not redirect_url:
raise ExtractorError('Invalid facebook redirect URL', expected=True)
return self.url_result(redirect_url)
+
+
+class FacebookReelIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:[\w-]+\.)?facebook\.com/reel/(?P<id>\d+)'
+ IE_NAME = 'facebook:reel'
+
+ _TESTS = [{
+ 'url': 'https://www.facebook.com/reel/1195289147628387',
+ 'md5': 'c4ff9a7182ff9ff7d6f7a83603bae831',
+ 'info_dict': {
+ 'id': '1195289147628387',
+ 'ext': 'mp4',
+ 'title': 'md5:9f5b142921b2dc57004fa13f76005f87',
+ 'description': 'md5:24ea7ef062215d295bdde64e778f5474',
+ 'uploader': 'Beast Camp Training',
+ 'uploader_id': '1738535909799870',
+ 'duration': 9.536,
+ 'thumbnail': r're:^https?://.*',
+ 'upload_date': '20211121',
+ 'timestamp': 1637502604,
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return self.url_result(
+ f'https://m.facebook.com/watch/?v={video_id}&_rdr', FacebookIE, video_id)
diff --git a/hypervideo_dl/extractor/fancode.py b/hypervideo_dl/extractor/fancode.py
index 7ea16c6..1b5db81 100644
--- a/hypervideo_dl/extractor/fancode.py
+++ b/hypervideo_dl/extractor/fancode.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import compat_str
@@ -128,7 +125,7 @@ class FancodeVodIE(InfoExtractor):
}
-class FancodeLiveIE(FancodeVodIE):
+class FancodeLiveIE(FancodeVodIE): # XXX: Do not subclass from concrete IE
IE_NAME = 'fancode:live'
_VALID_URL = r'https?://(www\.)?fancode\.com/match/(?P<id>[0-9]+).+'
diff --git a/hypervideo_dl/extractor/faz.py b/hypervideo_dl/extractor/faz.py
index 312ee2a..bca62ad 100644
--- a/hypervideo_dl/extractor/faz.py
+++ b/hypervideo_dl/extractor/faz.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -81,7 +78,6 @@ class FazIE(InfoExtractor):
'tbr': tbr or int(mobj.group(3)),
})
formats.append(f)
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/fc2.py b/hypervideo_dl/extractor/fc2.py
index 54a83aa..dd5e088 100644
--- a/hypervideo_dl/extractor/fc2.py
+++ b/hypervideo_dl/extractor/fc2.py
@@ -1,19 +1,13 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
-from ..compat import (
- compat_parse_qs,
-)
+from ..compat import compat_parse_qs
+from ..dependencies import websockets
from ..utils import (
ExtractorError,
WebSocketsWrapper,
- has_websockets,
js_to_json,
sanitized_Request,
- std_headers,
traverse_obj,
update_url_query,
urlencode_postdata,
@@ -84,7 +78,7 @@ class FC2IE(InfoExtractor):
webpage = None
if not url.startswith('fc2:'):
webpage = self._download_webpage(url, video_id)
- self._downloader.cookiejar.clear_session_cookies() # must clear
+ self.cookiejar.clear_session_cookies() # must clear
self._login()
title, thumbnail, description = None, None, None
@@ -173,7 +167,7 @@ class FC2LiveIE(InfoExtractor):
}]
def _real_extract(self, url):
- if not has_websockets:
+ if not websockets:
raise ExtractorError('websockets library is not available. Please install it.', expected=True)
video_id = self._match_id(url)
webpage = self._download_webpage('https://live.fc2.com/%s/' % video_id, video_id)
@@ -210,10 +204,10 @@ class FC2LiveIE(InfoExtractor):
'Cookie': str(self._get_cookies('https://live.fc2.com/'))[12:],
'Origin': 'https://live.fc2.com',
'Accept': '*/*',
- 'User-Agent': std_headers['User-Agent'],
+ 'User-Agent': self.get_param('http_headers')['User-Agent'],
})
- self.write_debug('[debug] Sending HLS server request')
+ self.write_debug('Sending HLS server request')
while True:
recv = ws.recv()
@@ -235,13 +229,10 @@ class FC2LiveIE(InfoExtractor):
if not data or not isinstance(data, dict):
continue
if data.get('name') == '_response_' and data.get('id') == 1:
- self.write_debug('[debug] Goodbye.')
+ self.write_debug('Goodbye')
playlist_data = data
break
- elif self._downloader.params.get('verbose', False):
- if len(recv) > 100:
- recv = recv[:100] + '...'
- self.to_screen('[debug] Server said: %s' % recv)
+ self.write_debug('Server said: %s%s' % (recv[:100], '...' if len(recv) > 100 else ''))
if not playlist_data:
raise ExtractorError('Unable to fetch HLS playlist info via WebSocket')
@@ -259,7 +250,6 @@ class FC2LiveIE(InfoExtractor):
'Referer': url,
}))
- self._sort_formats(formats)
for fmt in formats:
fmt.update({
'protocol': 'fc2_live',
diff --git a/hypervideo_dl/extractor/fczenit.py b/hypervideo_dl/extractor/fczenit.py
index 8db7c59..8175b6b 100644
--- a/hypervideo_dl/extractor/fczenit.py
+++ b/hypervideo_dl/extractor/fczenit.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
int_or_none,
@@ -41,8 +38,6 @@ class FczenitIE(InfoExtractor):
'height': int_or_none(q.get('label')),
} for q in msi_data['qualities'] if q.get('url')]
- self._sort_formats(formats)
-
tags = [tag['label'] for tag in msi_data.get('tags', []) if tag.get('label')]
return {
diff --git a/hypervideo_dl/extractor/fifa.py b/hypervideo_dl/extractor/fifa.py
new file mode 100644
index 0000000..dc00edc
--- /dev/null
+++ b/hypervideo_dl/extractor/fifa.py
@@ -0,0 +1,94 @@
+from .common import InfoExtractor
+
+from ..utils import (
+ int_or_none,
+ traverse_obj,
+ unified_timestamp,
+)
+
+
+class FifaIE(InfoExtractor):
+ _VALID_URL = r'https?://www.fifa.com/fifaplus/(?P<locale>\w{2})/watch/([^#?]+/)?(?P<id>\w+)'
+ _TESTS = [{
+ 'url': 'https://www.fifa.com/fifaplus/en/watch/7on10qPcnyLajDDU3ntg6y',
+ 'info_dict': {
+ 'id': '7on10qPcnyLajDDU3ntg6y',
+ 'title': 'Italy v France | Final | 2006 FIFA World Cup Germany™ | Full Match Replay',
+ 'description': 'md5:f4520d0ee80529c8ba4134a7d692ff8b',
+ 'ext': 'mp4',
+ 'categories': ['FIFA Tournaments'],
+ 'thumbnail': 'https://digitalhub.fifa.com/transform/fa6f0b3e-a2e9-4cf7-9f32-53c57bcb7360/2006_Final_ITA_FRA',
+ 'duration': 8165,
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://www.fifa.com/fifaplus/pt/watch/1cg5r5Qt6Qt12ilkDgb1sV',
+ 'info_dict': {
+ 'id': '1cg5r5Qt6Qt12ilkDgb1sV',
+ 'title': 'Brazil v Germany | Semi-finals | 2014 FIFA World Cup Brazil™ | Extended Highlights',
+ 'description': 'md5:d908c74ee66322b804ae2e521b02a855',
+ 'ext': 'mp4',
+ 'categories': ['FIFA Tournaments', 'Highlights'],
+ 'thumbnail': 'https://digitalhub.fifa.com/transform/d8fe6f61-276d-4a73-a7fe-6878a35fd082/FIFAPLS_100EXTHL_2014BRAvGER_TMB',
+ 'duration': 902,
+ 'release_timestamp': 1404777600,
+ 'release_date': '20140708',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://www.fifa.com/fifaplus/fr/watch/3C6gQH9C2DLwzNx7BMRQdp',
+ 'info_dict': {
+ 'id': '3C6gQH9C2DLwzNx7BMRQdp',
+ 'title': 'Josimar goal against Northern Ireland | Classic Goals',
+ 'description': 'md5:cbe7e7bb52f603c9f1fe9a4780fe983b',
+ 'ext': 'mp4',
+ 'categories': ['FIFA Tournaments', 'Goal'],
+ 'duration': 28,
+ 'thumbnail': 'https://digitalhub.fifa.com/transform/f9301391-f8d9-48b5-823e-c093ac5e3e11/CG_MEN_1986_JOSIMAR',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }]
+
+ def _real_extract(self, url):
+ video_id, locale = self._match_valid_url(url).group('id', 'locale')
+ webpage = self._download_webpage(url, video_id)
+
+ preconnect_link = self._search_regex(
+ r'<link[^>]+rel\s*=\s*"preconnect"[^>]+href\s*=\s*"([^"]+)"', webpage, 'Preconnect Link')
+
+ video_details = self._download_json(
+ f'{preconnect_link}/sections/videoDetails/{video_id}', video_id, 'Downloading Video Details', fatal=False)
+
+ preplay_parameters = self._download_json(
+ f'{preconnect_link}/videoPlayerData/{video_id}', video_id, 'Downloading Preplay Parameters')['preplayParameters']
+
+ cid = preplay_parameters['contentId']
+ content_data = self._download_json(
+ f'https://content.uplynk.com/preplay/{cid}/multiple.json', video_id, 'Downloading Content Data', query={
+ 'v': preplay_parameters['preplayAPIVersion'],
+ 'tc': preplay_parameters['tokenCheckAlgorithmVersion'],
+ 'rn': preplay_parameters['randomNumber'],
+ 'exp': preplay_parameters['tokenExpirationDate'],
+ 'ct': preplay_parameters['contentType'],
+ 'cid': cid,
+ 'mbtracks': preplay_parameters['tracksAssetNumber'],
+ 'ad': preplay_parameters['adConfiguration'],
+ 'ad.preroll': int(preplay_parameters['adPreroll']),
+ 'ad.cmsid': preplay_parameters['adCMSSourceId'],
+ 'ad.vid': preplay_parameters['adSourceVideoID'],
+ 'sig': preplay_parameters['signature'],
+ })
+
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(content_data['playURL'], video_id)
+
+ return {
+ 'id': video_id,
+ 'title': video_details.get('title'),
+ 'description': video_details.get('description'),
+ 'duration': int_or_none(video_details.get('duration')),
+ 'release_timestamp': unified_timestamp(video_details.get('dateOfRelease')),
+ 'categories': traverse_obj(video_details, (('videoCategory', 'videoSubcategory'),)),
+ 'thumbnail': traverse_obj(video_details, ('backgroundImage', 'src')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
diff --git a/hypervideo_dl/extractor/filmmodu.py b/hypervideo_dl/extractor/filmmodu.py
index 2746876..9eb550e 100644
--- a/hypervideo_dl/extractor/filmmodu.py
+++ b/hypervideo_dl/extractor/filmmodu.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import int_or_none
@@ -54,8 +51,6 @@ class FilmmoduIE(InfoExtractor):
'protocol': 'm3u8_native',
} for source in data['sources']]
- self._sort_formats(formats)
-
subtitles = {}
if data.get('subtitle'):
diff --git a/hypervideo_dl/extractor/filmon.py b/hypervideo_dl/extractor/filmon.py
index 7b43ecc..9a93cb9 100644
--- a/hypervideo_dl/extractor/filmon.py
+++ b/hypervideo_dl/extractor/filmon.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import (
compat_str,
@@ -68,7 +65,6 @@ class FilmOnIE(InfoExtractor):
'quality': QUALITY(stream.get('quality')),
'protocol': 'm3u8_native',
})
- self._sort_formats(formats)
thumbnails = []
poster = response.get('poster', {})
@@ -156,7 +152,6 @@ class FilmOnChannelIE(InfoExtractor):
'ext': 'mp4',
'quality': QUALITY(quality),
})
- self._sort_formats(formats)
thumbnails = []
for name, width, height in self._THUMBNAIL_RES:
diff --git a/hypervideo_dl/extractor/filmweb.py b/hypervideo_dl/extractor/filmweb.py
index 5e323b4..cfea1f2 100644
--- a/hypervideo_dl/extractor/filmweb.py
+++ b/hypervideo_dl/extractor/filmweb.py
@@ -1,6 +1,3 @@
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/firsttv.py b/hypervideo_dl/extractor/firsttv.py
index ccad173..f74bd13 100644
--- a/hypervideo_dl/extractor/firsttv.py
+++ b/hypervideo_dl/extractor/firsttv.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import (
compat_str,
@@ -126,7 +123,6 @@ class FirstTVIE(InfoExtractor):
% (path, m3u8_path),
display_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
- self._sort_formats(formats)
thumbnail = item.get('poster') or self._og_search_thumbnail(webpage)
duration = int_or_none(item.get('duration') or self._html_search_meta(
diff --git a/hypervideo_dl/extractor/fivemin.py b/hypervideo_dl/extractor/fivemin.py
deleted file mode 100644
index f3f876e..0000000
--- a/hypervideo_dl/extractor/fivemin.py
+++ /dev/null
@@ -1,54 +0,0 @@
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-
-
-class FiveMinIE(InfoExtractor):
- IE_NAME = '5min'
- _VALID_URL = r'(?:5min:|https?://(?:[^/]*?5min\.com/|delivery\.vidible\.tv/aol)(?:(?:Scripts/PlayerSeed\.js|playerseed/?)?\?.*?playList=)?)(?P<id>\d+)'
-
- _TESTS = [
- {
- # From http://www.engadget.com/2013/11/15/ipad-mini-retina-display-review/
- 'url': 'http://pshared.5min.com/Scripts/PlayerSeed.js?sid=281&width=560&height=345&playList=518013791',
- 'md5': '4f7b0b79bf1a470e5004f7112385941d',
- 'info_dict': {
- 'id': '518013791',
- 'ext': 'mp4',
- 'title': 'iPad Mini with Retina Display Review',
- 'description': 'iPad mini with Retina Display review',
- 'duration': 177,
- 'uploader': 'engadget',
- 'upload_date': '20131115',
- 'timestamp': 1384515288,
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- }
- },
- {
- # From http://on.aol.com/video/how-to-make-a-next-level-fruit-salad-518086247
- 'url': '5min:518086247',
- 'md5': 'e539a9dd682c288ef5a498898009f69e',
- 'info_dict': {
- 'id': '518086247',
- 'ext': 'mp4',
- 'title': 'How to Make a Next-Level Fruit Salad',
- 'duration': 184,
- },
- 'skip': 'no longer available',
- },
- {
- 'url': 'http://embed.5min.com/518726732/',
- 'only_matching': True,
- },
- {
- 'url': 'http://delivery.vidible.tv/aol?playList=518013791',
- 'only_matching': True,
- }
- ]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- return self.url_result('aol-video:%s' % video_id)
diff --git a/hypervideo_dl/extractor/fivetv.py b/hypervideo_dl/extractor/fivetv.py
index d6bebd1..1f48cfd 100644
--- a/hypervideo_dl/extractor/fivetv.py
+++ b/hypervideo_dl/extractor/fivetv.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..utils import int_or_none
@@ -75,7 +71,7 @@ class FiveTVIE(InfoExtractor):
r'<a[^>]+?href="([^"]+)"[^>]+?class="videoplayer"'],
webpage, 'video url')
- title = self._og_search_title(webpage, default=None) or self._html_extract_title(webpage)
+ title = self._generic_title('', webpage)
duration = int_or_none(self._og_search_property(
'video:duration', webpage, 'duration', default=None))
diff --git a/hypervideo_dl/extractor/flickr.py b/hypervideo_dl/extractor/flickr.py
index 2ed6c2b..89a40d7 100644
--- a/hypervideo_dl/extractor/flickr.py
+++ b/hypervideo_dl/extractor/flickr.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import (
compat_str,
@@ -91,12 +89,11 @@ class FlickrIE(InfoExtractor):
'url': stream['_content'],
'quality': preference(stream_type),
})
- self._sort_formats(formats)
owner = video_info.get('owner', {})
uploader_id = owner.get('nsid')
uploader_path = owner.get('path_alias') or uploader_id
- uploader_url = format_field(uploader_path, template='https://www.flickr.com/photos/%s/')
+ uploader_url = format_field(uploader_path, None, 'https://www.flickr.com/photos/%s/')
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/folketinget.py b/hypervideo_dl/extractor/folketinget.py
index b3df93f..55a11e5 100644
--- a/hypervideo_dl/extractor/folketinget.py
+++ b/hypervideo_dl/extractor/folketinget.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import compat_parse_qs
from ..utils import (
@@ -62,7 +59,6 @@ class FolketingetIE(InfoExtractor):
'url': xpath_text(n, './url', fatal=True),
'tbr': int_or_none(n.attrib['bitrate']),
} for n in doc.findall('.//streams/stream')]
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/footyroom.py b/hypervideo_dl/extractor/footyroom.py
index 118325b..4a1316b 100644
--- a/hypervideo_dl/extractor/footyroom.py
+++ b/hypervideo_dl/extractor/footyroom.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from .streamable import StreamableIE
diff --git a/hypervideo_dl/extractor/formula1.py b/hypervideo_dl/extractor/formula1.py
index 67662e6..0a8ef85 100644
--- a/hypervideo_dl/extractor/formula1.py
+++ b/hypervideo_dl/extractor/formula1.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/fourtube.py b/hypervideo_dl/extractor/fourtube.py
index d4d955b..b6368b8 100644
--- a/hypervideo_dl/extractor/fourtube.py
+++ b/hypervideo_dl/extractor/fourtube.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -37,7 +35,6 @@ class FourTubeBaseIE(InfoExtractor):
'resolution': format + 'p',
'quality': int(format),
} for format in sources]
- self._sort_formats(formats)
return formats
def _real_extract(self, url):
diff --git a/hypervideo_dl/extractor/fourzerostudio.py b/hypervideo_dl/extractor/fourzerostudio.py
new file mode 100644
index 0000000..c388a3a
--- /dev/null
+++ b/hypervideo_dl/extractor/fourzerostudio.py
@@ -0,0 +1,106 @@
+from .common import InfoExtractor
+from ..utils import traverse_obj, unified_timestamp
+
+
+class FourZeroStudioArchiveIE(InfoExtractor):
+ _VALID_URL = r'https?://0000\.studio/(?P<uploader_id>[^/]+)/broadcasts/(?P<id>[^/]+)/archive'
+ IE_NAME = '0000studio:archive'
+ _TESTS = [{
+ 'url': 'https://0000.studio/mumeijiten/broadcasts/1290f433-fce0-4909-a24a-5f7df09665dc/archive',
+ 'info_dict': {
+ 'id': '1290f433-fce0-4909-a24a-5f7df09665dc',
+ 'title': 'noteで『canape』様へのファンレターを執筆します。(数秘術その2)',
+ 'timestamp': 1653802534,
+ 'release_timestamp': 1653796604,
+ 'thumbnails': 'count:1',
+ 'comments': 'count:7',
+ 'uploader': '『中崎雄心』の執務室。',
+ 'uploader_id': 'mumeijiten',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id, uploader_id = self._match_valid_url(url).group('id', 'uploader_id')
+ webpage = self._download_webpage(url, video_id)
+ nuxt_data = self._search_nuxt_data(webpage, video_id, traverse=None)
+
+ pcb = traverse_obj(nuxt_data, ('ssrRefs', lambda _, v: v['__typename'] == 'PublicCreatorBroadcast'), get_all=False)
+ uploader_internal_id = traverse_obj(nuxt_data, (
+ 'ssrRefs', lambda _, v: v['__typename'] == 'PublicUser', 'id'), get_all=False)
+
+ formats, subs = self._extract_m3u8_formats_and_subtitles(pcb['archiveUrl'], video_id, ext='mp4')
+
+ return {
+ 'id': video_id,
+ 'title': pcb.get('title'),
+ 'age_limit': 18 if pcb.get('isAdult') else None,
+ 'timestamp': unified_timestamp(pcb.get('finishTime')),
+ 'release_timestamp': unified_timestamp(pcb.get('createdAt')),
+ 'thumbnails': [{
+ 'url': pcb['thumbnailUrl'],
+ 'ext': 'png',
+ }] if pcb.get('thumbnailUrl') else None,
+ 'formats': formats,
+ 'subtitles': subs,
+ 'comments': [{
+ 'author': c.get('username'),
+ 'author_id': c.get('postedUserId'),
+ 'author_thumbnail': c.get('userThumbnailUrl'),
+ 'id': c.get('id'),
+ 'text': c.get('body'),
+ 'timestamp': unified_timestamp(c.get('createdAt')),
+ 'like_count': c.get('likeCount'),
+ 'is_favorited': c.get('isLikedByOwner'),
+ 'author_is_uploader': c.get('postedUserId') == uploader_internal_id,
+ } for c in traverse_obj(nuxt_data, (
+ 'ssrRefs', ..., lambda _, v: v['__typename'] == 'PublicCreatorBroadcastComment')) or []],
+ 'uploader_id': uploader_id,
+ 'uploader': traverse_obj(nuxt_data, (
+ 'ssrRefs', lambda _, v: v['__typename'] == 'PublicUser', 'username'), get_all=False),
+ }
+
+
+class FourZeroStudioClipIE(InfoExtractor):
+ _VALID_URL = r'https?://0000\.studio/(?P<uploader_id>[^/]+)/archive-clip/(?P<id>[^/]+)'
+ IE_NAME = '0000studio:clip'
+ _TESTS = [{
+ 'url': 'https://0000.studio/soeji/archive-clip/e46b0278-24cd-40a8-92e1-b8fc2b21f34f',
+ 'info_dict': {
+ 'id': 'e46b0278-24cd-40a8-92e1-b8fc2b21f34f',
+ 'title': 'わたベーさんからイラスト差し入れいただきました。ありがとうございました!',
+ 'timestamp': 1652109105,
+ 'like_count': 1,
+ 'uploader': 'ソエジマケイタ',
+ 'uploader_id': 'soeji',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id, uploader_id = self._match_valid_url(url).group('id', 'uploader_id')
+ webpage = self._download_webpage(url, video_id)
+ nuxt_data = self._search_nuxt_data(webpage, video_id, traverse=None)
+
+ clip_info = traverse_obj(nuxt_data, ('ssrRefs', lambda _, v: v['__typename'] == 'PublicCreatorArchivedClip'), get_all=False)
+
+ info = next((
+ m for m in self._parse_html5_media_entries(url, webpage, video_id)
+ if 'mp4' in traverse_obj(m, ('formats', ..., 'ext'))
+ ), None)
+ if not info:
+ self.report_warning('Failed to find a desired media element. Falling back to using NUXT data.')
+ info = {
+ 'formats': [{
+ 'ext': 'mp4',
+ 'url': url,
+ } for url in clip_info.get('mediaFiles') or [] if url],
+ }
+ return {
+ **info,
+ 'id': video_id,
+ 'title': clip_info.get('clipComment'),
+ 'timestamp': unified_timestamp(clip_info.get('createdAt')),
+ 'like_count': clip_info.get('likeCount'),
+ 'uploader_id': uploader_id,
+ 'uploader': traverse_obj(nuxt_data, (
+ 'ssrRefs', lambda _, v: v['__typename'] == 'PublicUser', 'username'), get_all=False),
+ }
diff --git a/hypervideo_dl/extractor/fox.py b/hypervideo_dl/extractor/fox.py
index 4c52b9a..15c0c48 100644
--- a/hypervideo_dl/extractor/fox.py
+++ b/hypervideo_dl/extractor/fox.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import json
import uuid
@@ -15,8 +12,10 @@ from ..utils import (
int_or_none,
parse_age_limit,
parse_duration,
+ traverse_obj,
try_get,
unified_timestamp,
+ url_or_none,
)
@@ -37,7 +36,8 @@ class FOXIE(InfoExtractor):
'creator': 'FOX',
'series': 'Gotham',
'age_limit': 14,
- 'episode': 'Aftermath: Bruce Wayne Develops Into The Dark Knight'
+ 'episode': 'Aftermath: Bruce Wayne Develops Into The Dark Knight',
+ 'thumbnail': r're:^https?://.*\.jpg$',
},
'params': {
'skip_download': True,
@@ -132,7 +132,6 @@ class FOXIE(InfoExtractor):
formats = self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls')
- self._sort_formats(formats)
data = try_get(
video, lambda x: x['trackingData']['properties'], dict) or {}
@@ -168,6 +167,7 @@ class FOXIE(InfoExtractor):
'season_number': int_or_none(video.get('seasonNumber')),
'episode': video.get('name'),
'episode_number': int_or_none(video.get('episodeNumber')),
+ 'thumbnail': traverse_obj(video, ('images', 'still', 'raw'), expected_type=url_or_none),
'release_year': int_or_none(video.get('releaseYear')),
'subtitles': subtitles,
}
diff --git a/hypervideo_dl/extractor/fox9.py b/hypervideo_dl/extractor/fox9.py
index 91f8f7b..dfbafa7 100644
--- a/hypervideo_dl/extractor/fox9.py
+++ b/hypervideo_dl/extractor/fox9.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/foxgay.py b/hypervideo_dl/extractor/foxgay.py
index 1c53e06..f4f29c6 100644
--- a/hypervideo_dl/extractor/foxgay.py
+++ b/hypervideo_dl/extractor/foxgay.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import itertools
from .common import InfoExtractor
@@ -33,7 +31,7 @@ class FoxgayIE(InfoExtractor):
description = get_element_by_id('inf_tit', webpage)
# The default user-agent with foxgay cookies leads to pages without videos
- self._downloader.cookiejar.clear('.foxgay.com')
+ self.cookiejar.clear('.foxgay.com')
# Find the URL for the iFrame which contains the actual video.
iframe_url = self._html_search_regex(
r'<iframe[^>]+src=([\'"])(?P<url>[^\'"]+)\1', webpage,
@@ -50,8 +48,6 @@ class FoxgayIE(InfoExtractor):
} for source, resolution in zip(
video_data['sources'], video_data.get('resolutions', itertools.repeat(None)))]
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': title,
diff --git a/hypervideo_dl/extractor/foxnews.py b/hypervideo_dl/extractor/foxnews.py
index 18fa0a5..52172aa 100644
--- a/hypervideo_dl/extractor/foxnews.py
+++ b/hypervideo_dl/extractor/foxnews.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import re
from .amp import AMPIE
@@ -58,13 +56,15 @@ class FoxNewsIE(AMPIE):
},
]
- @staticmethod
- def _extract_urls(webpage):
- return [
- mobj.group('url')
- for mobj in re.finditer(
- r'<(?:amp-)?iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//video\.foxnews\.com/v/video-embed\.html?.*?\bvideo_id=\d+.*?)\1',
- webpage)]
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ for mobj in re.finditer(
+ r'''(?x)
+ <(?:script|(?:amp-)?iframe)[^>]+\bsrc=["\']
+ (?:https?:)?//video\.foxnews\.com/v/(?:video-embed\.html|embed\.js)\?
+ (?:[^>"\']+&)?(?:video_)?id=(?P<video_id>\d+)
+ ''', webpage):
+ yield f'https://video.foxnews.com/v/video-embed.html?video_id={mobj.group("video_id")}'
def _real_extract(self, url):
host, video_id = self._match_valid_url(url).groups()
@@ -75,6 +75,29 @@ class FoxNewsIE(AMPIE):
return info
+class FoxNewsVideoIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?foxnews\.com/video/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.foxnews.com/video/6313058664112',
+ 'info_dict': {
+ 'id': '6313058664112',
+ 'ext': 'mp4',
+ 'thumbnail': r're:https://.+/1280x720/match/image\.jpg',
+ 'upload_date': '20220930',
+ 'description': 'New York City, Kids Therapy, Biden',
+ 'duration': 2415,
+ 'title': 'Gutfeld! - Thursday, September 29',
+ 'timestamp': 1664527538,
+ },
+ 'expected_warnings': ['Ignoring subtitle tracks'],
+ 'params': {'skip_download': 'm3u8'},
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return self.url_result(f'https://video.foxnews.com/v/{video_id}', FoxNewsIE, video_id)
+
+
class FoxNewsArticleIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?:insider\.)?foxnews\.com/(?!v)([^/]+/)+(?P<id>[a-z-]+)'
IE_NAME = 'foxnews:article'
@@ -124,4 +147,4 @@ class FoxNewsArticleIE(InfoExtractor):
'http://video.foxnews.com/v/' + video_id, FoxNewsIE.ie_key())
return self.url_result(
- FoxNewsIE._extract_urls(webpage)[0], FoxNewsIE.ie_key())
+ next(FoxNewsIE._extract_embed_urls(url, webpage)), FoxNewsIE.ie_key())
diff --git a/hypervideo_dl/extractor/foxsports.py b/hypervideo_dl/extractor/foxsports.py
index 2b2cb6c..f9d7fe5 100644
--- a/hypervideo_dl/extractor/foxsports.py
+++ b/hypervideo_dl/extractor/foxsports.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/fptplay.py b/hypervideo_dl/extractor/fptplay.py
index a34e90b..85613ba 100644
--- a/hypervideo_dl/extractor/fptplay.py
+++ b/hypervideo_dl/extractor/fptplay.py
@@ -1,18 +1,17 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import hashlib
import time
import urllib.parse
from .common import InfoExtractor
from ..utils import (
+ clean_html,
join_nonempty,
+ strip_or_none,
)
class FptplayIE(InfoExtractor):
- _VALID_URL = r'https?://fptplay\.vn/(?P<type>xem-video)/[^/]+\-(?P<id>\w+)(?:/tap-(?P<episode>[^/]+)?/?(?:[?#]|$)|)'
+ _VALID_URL = r'https?://fptplay\.vn/xem-video/[^/]+\-(?P<id>\w+)(?:/tap-(?P<episode>\d+)?/?(?:[?#]|$)|)'
_GEO_COUNTRIES = ['VN']
IE_NAME = 'fptplay'
IE_DESC = 'fptplay.vn'
@@ -22,7 +21,7 @@ class FptplayIE(InfoExtractor):
'info_dict': {
'id': '621a123016f369ebbde55945',
'ext': 'mp4',
- 'title': 'Nhân Duyên Đại Nhân Xin Dừng Bước - Ms. Cupid In Love',
+ 'title': 'Nhân Duyên Đại Nhân Xin Dừng Bước - Tập 1A',
'description': 'md5:23cf7d1ce0ade8e21e76ae482e6a8c6c',
},
}, {
@@ -31,25 +30,41 @@ class FptplayIE(InfoExtractor):
'info_dict': {
'id': '61f3aa8a6b3b1d2e73c60eb5',
'ext': 'mp4',
- 'title': 'Má Tôi Là Đại Gia - 3',
+ 'title': 'Má Tôi Là Đại Gia - Tập 3',
'description': 'md5:ff8ba62fb6e98ef8875c42edff641d1c',
},
}, {
+ 'url': 'https://fptplay.vn/xem-video/lap-toi-do-giam-under-the-skin-6222d9684ec7230fa6e627a2/tap-4',
+ 'md5': 'bcb06c55ec14786d7d4eda07fa1ccbb9',
+ 'info_dict': {
+ 'id': '6222d9684ec7230fa6e627a2',
+ 'ext': 'mp4',
+ 'title': 'Lạp Tội Đồ Giám - Tập 2B',
+ 'description': 'md5:e5a47e9d35fbf7e9479ca8a77204908b',
+ },
+ }, {
'url': 'https://fptplay.vn/xem-video/nha-co-chuyen-hi-alls-well-ends-well-1997-6218995f6af792ee370459f0',
'only_matching': True,
}]
def _real_extract(self, url):
- type_url, video_id, episode = self._match_valid_url(url).group('type', 'id', 'episode')
- webpage = self._download_webpage(url, video_id=video_id, fatal=False)
- info = self._download_json(self.get_api_with_st_token(video_id, episode or 0), video_id)
+ video_id, slug_episode = self._match_valid_url(url).group('id', 'episode')
+ webpage = self._download_webpage(url, video_id=video_id, fatal=False) or ''
+ title = self._search_regex(
+ r'(?s)<h4\s+class="mb-1 text-2xl text-white"[^>]*>(.+)</h4>', webpage, 'title', fatal=False)
+ real_episode = slug_episode if not title else self._search_regex(
+ r'<p.+title="(?P<episode>[^">]+)"\s+class="epi-title active"', webpage, 'episode', fatal=False)
+ title = strip_or_none(title) or self._html_search_meta(('og:title', 'twitter:title'), webpage)
+
+ info = self._download_json(
+ self.get_api_with_st_token(video_id, int(slug_episode) - 1 if slug_episode else 0), video_id)
formats, subtitles = self._extract_m3u8_formats_and_subtitles(info['data']['url'], video_id, 'mp4')
- self._sort_formats(formats)
return {
'id': video_id,
- 'title': join_nonempty(
- self._html_search_meta(('og:title', 'twitter:title'), webpage), episode, delim=' - '),
- 'description': self._html_search_meta(['og:description', 'twitter:description'], webpage),
+ 'title': join_nonempty(title, real_episode, delim=' - '),
+ 'description': (
+ clean_html(self._search_regex(r'<p\s+class="overflow-hidden"[^>]*>(.+)</p>', webpage, 'description'))
+ or self._html_search_meta(('og:description', 'twitter:description'), webpage)),
'formats': formats,
'subtitles': subtitles,
}
diff --git a/hypervideo_dl/extractor/franceculture.py b/hypervideo_dl/extractor/franceculture.py
deleted file mode 100644
index 9dc28d8..0000000
--- a/hypervideo_dl/extractor/franceculture.py
+++ /dev/null
@@ -1,128 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-from .common import InfoExtractor
-from ..utils import (
- determine_ext,
- extract_attributes,
- int_or_none,
- traverse_obj,
- unified_strdate,
-)
-
-
-class FranceCultureIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?franceculture\.fr/emissions/(?:[^/]+/)*(?P<id>[^/?#&]+)'
- _TESTS = [{
- # playlist
- 'url': 'https://www.franceculture.fr/emissions/serie/hasta-dente',
- 'playlist_count': 12,
- 'info_dict': {
- 'id': 'hasta-dente',
- 'title': 'Hasta Dente',
- 'description': 'md5:57479af50648d14e9bb649e6b1f8f911',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'upload_date': '20201024',
- },
- 'playlist': [{
- 'info_dict': {
- 'id': '3c1c2e55-41a0-11e5-9fe0-005056a87c89',
- 'ext': 'mp3',
- 'title': 'Jeudi, vous avez dit bizarre ?',
- 'description': 'md5:47cf1e00cc21c86b0210279996a812c6',
- 'duration': 604,
- 'upload_date': '20201024',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'timestamp': 1603576680
- },
- },
- ],
- }, {
- 'url': 'https://www.franceculture.fr/emissions/carnet-nomade/rendez-vous-au-pays-des-geeks',
- 'info_dict': {
- 'id': 'rendez-vous-au-pays-des-geeks',
- 'display_id': 'rendez-vous-au-pays-des-geeks',
- 'ext': 'mp3',
- 'title': 'Rendez-vous au pays des geeks',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'upload_date': '20140301',
- 'vcodec': 'none',
- 'duration': 3569,
- },
- }, {
- # no thumbnail
- 'url': 'https://www.franceculture.fr/emissions/la-recherche-montre-en-main/la-recherche-montre-en-main-du-mercredi-10-octobre-2018',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
- webpage = self._download_webpage(url, display_id)
-
- info = {
- 'id': display_id,
- 'title': self._html_search_regex(
- r'(?s)<h1[^>]*itemprop="[^"]*name[^"]*"[^>]*>(.+?)</h1>',
- webpage, 'title', default=self._og_search_title(webpage)),
- 'description': self._html_search_regex(
- r'(?s)<div[^>]+class="excerpt"[^>]*>(.*?)</div>', webpage, 'description', default=None),
- 'thumbnail': self._og_search_thumbnail(webpage),
- 'uploader': self._html_search_regex(
- r'(?s)<span class="author">(.*?)</span>', webpage, 'uploader', default=None),
- 'upload_date': unified_strdate(self._html_search_regex(
- r'(?s)class="teaser-text-date".*?(\d{2}/\d{2}/\d{4})', webpage, 'date', default=None)),
- }
-
- playlist_data = self._search_regex(
- r'''(?sx)
- <section[^>]+data-xiti-place="[^"]*?liste_episodes[^"?]*?"[^>]*>
- (.*?)
- </section>
- ''',
- webpage, 'playlist data', fatal=False, default=None)
-
- if playlist_data:
- entries = []
- for item, item_description in re.findall(
- r'(?s)(<button[^<]*class="[^"]*replay-button[^>]*>).*?<p[^>]*class="[^"]*teaser-text-chapo[^>]*>(.*?)</p>',
- playlist_data):
-
- item_attributes = extract_attributes(item)
- entries.append({
- 'id': item_attributes.get('data-emission-uuid'),
- 'url': item_attributes.get('data-url'),
- 'title': item_attributes.get('data-diffusion-title'),
- 'duration': int_or_none(traverse_obj(item_attributes, 'data-duration-seconds', 'data-duration-seconds')),
- 'description': item_description,
- 'timestamp': int_or_none(item_attributes.get('data-start-time')),
- 'thumbnail': info['thumbnail'],
- 'uploader': info['uploader'],
- })
-
- return {
- '_type': 'playlist',
- 'entries': entries,
- **info
- }
-
- video_data = extract_attributes(self._search_regex(
- r'''(?sx)
- (?:
- </h1>|
- <div[^>]+class="[^"]*?(?:title-zone-diffusion|heading-zone-(?:wrapper|player-button))[^"]*?"[^>]*>
- ).*?
- (<button[^>]+data-(?:url|asset-source)="[^"]+"[^>]+>)
- ''',
- webpage, 'video data'))
- video_url = traverse_obj(video_data, 'data-url', 'data-asset-source')
- ext = determine_ext(video_url.lower())
-
- return {
- 'display_id': display_id,
- 'url': video_url,
- 'ext': ext,
- 'vcodec': 'none' if ext == 'mp3' else None,
- 'duration': int_or_none(video_data.get('data-duration')),
- **info
- }
diff --git a/hypervideo_dl/extractor/franceinter.py b/hypervideo_dl/extractor/franceinter.py
index ae822a5..779249b 100644
--- a/hypervideo_dl/extractor/franceinter.py
+++ b/hypervideo_dl/extractor/franceinter.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import month_by_name
diff --git a/hypervideo_dl/extractor/francetv.py b/hypervideo_dl/extractor/francetv.py
index 347a766..0523172 100644
--- a/hypervideo_dl/extractor/francetv.py
+++ b/hypervideo_dl/extractor/francetv.py
@@ -1,8 +1,3 @@
-# coding: utf-8
-
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..utils import (
determine_ext,
@@ -37,6 +32,7 @@ class FranceTVIE(InfoExtractor):
(?P<id>[^@]+)(?:@(?P<catalog>.+))?
)
'''
+ _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1']
_TESTS = [{
# without catalog
@@ -195,8 +191,6 @@ class FranceTVIE(InfoExtractor):
} for sheet in spritesheets]
})
- self._sort_formats(formats)
-
if subtitle:
title += ' - %s' % subtitle
title = title.strip()
@@ -375,7 +369,7 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
webpage = self._download_webpage(url, display_id)
- dailymotion_urls = DailymotionIE._extract_urls(webpage)
+ dailymotion_urls = tuple(DailymotionIE._extract_embed_urls(url, webpage))
if dailymotion_urls:
return self.playlist_result([
self.url_result(dailymotion_url, DailymotionIE.ie_key())
diff --git a/hypervideo_dl/extractor/freesound.py b/hypervideo_dl/extractor/freesound.py
index 138b6bc..8b5f227 100644
--- a/hypervideo_dl/extractor/freesound.py
+++ b/hypervideo_dl/extractor/freesound.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -65,7 +63,6 @@ class FreesoundIE(InfoExtractor):
'format_note': channels,
'quality': quality,
} for quality, format_url in enumerate(audio_urls)]
- self._sort_formats(formats)
return {
'id': audio_id,
diff --git a/hypervideo_dl/extractor/freespeech.py b/hypervideo_dl/extractor/freespeech.py
index ea9c3e3..aea5513 100644
--- a/hypervideo_dl/extractor/freespeech.py
+++ b/hypervideo_dl/extractor/freespeech.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from .youtube import YoutubeIE
diff --git a/hypervideo_dl/extractor/freetv.py b/hypervideo_dl/extractor/freetv.py
new file mode 100644
index 0000000..757a10d
--- /dev/null
+++ b/hypervideo_dl/extractor/freetv.py
@@ -0,0 +1,139 @@
+import itertools
+import re
+
+from .common import InfoExtractor
+from ..utils import int_or_none, traverse_obj, urlencode_postdata
+
+
+class FreeTvBaseIE(InfoExtractor):
+ def _get_api_response(self, content_id, resource_type, postdata):
+ return self._download_json(
+ 'https://www.freetv.com/wordpress/wp-admin/admin-ajax.php',
+ content_id, data=urlencode_postdata(postdata),
+ note=f'Downloading {content_id} {resource_type} JSON')['data']
+
+
+class FreeTvMoviesIE(FreeTvBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?freetv\.com/peliculas/(?P<id>[^/]+)'
+ _TESTS = [{
+ 'url': 'https://www.freetv.com/peliculas/atrapame-si-puedes/',
+ 'md5': 'dc62d5abf0514726640077cd1591aa92',
+ 'info_dict': {
+ 'id': '428021',
+ 'title': 'Atrápame Si Puedes',
+ 'description': 'md5:ca63bc00898aeb2f64ec87c6d3a5b982',
+ 'ext': 'mp4',
+ }
+ }, {
+ 'url': 'https://www.freetv.com/peliculas/monstruoso/',
+ 'md5': '509c15c68de41cb708d1f92d071f20aa',
+ 'info_dict': {
+ 'id': '377652',
+ 'title': 'Monstruoso',
+ 'description': 'md5:333fc19ee327b457b980e54a911ea4a3',
+ 'ext': 'mp4',
+ }
+ }]
+
+ def _extract_video(self, content_id, action='olyott_video_play'):
+ api_response = self._get_api_response(content_id, 'video', {
+ 'action': action,
+ 'contentID': content_id,
+ })
+
+ video_id, video_url = api_response['displayMeta']['contentID'], api_response['displayMeta']['streamURLVideo']
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_url, video_id, 'mp4')
+
+ return {
+ 'id': video_id,
+ 'title': traverse_obj(api_response, ('displayMeta', 'title')),
+ 'description': traverse_obj(api_response, ('displayMeta', 'desc')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ return self._extract_video(
+ self._search_regex((
+ r'class=["\'][^>]+postid-(?P<video_id>\d+)',
+ r'<link[^>]+freetv.com/\?p=(?P<video_id>\d+)',
+ r'<div[^>]+data-params=["\'][^>]+post_id=(?P<video_id>\d+)',
+ ), webpage, 'video id', group='video_id'))
+
+
+class FreeTvIE(FreeTvBaseIE):
+ IE_NAME = 'freetv:series'
+ _VALID_URL = r'https?://(?:www\.)?freetv\.com/series/(?P<id>[^/]+)'
+ _TESTS = [{
+ 'url': 'https://www.freetv.com/series/el-detective-l/',
+ 'info_dict': {
+ 'id': 'el-detective-l',
+ 'title': 'El Detective L',
+ 'description': 'md5:f9f1143bc33e9856ecbfcbfb97a759be'
+ },
+ 'playlist_count': 24,
+ }, {
+ 'url': 'https://www.freetv.com/series/esmeraldas/',
+ 'info_dict': {
+ 'id': 'esmeraldas',
+ 'title': 'Esmeraldas',
+ 'description': 'md5:43d7ec45bd931d8268a4f5afaf4c77bf'
+ },
+ 'playlist_count': 62,
+ }, {
+ 'url': 'https://www.freetv.com/series/las-aventuras-de-leonardo/',
+ 'info_dict': {
+ 'id': 'las-aventuras-de-leonardo',
+ 'title': 'Las Aventuras de Leonardo',
+ 'description': 'md5:0c47130846c141120a382aca059288f6'
+ },
+ 'playlist_count': 13,
+ },
+ ]
+
+ def _extract_series_season(self, season_id, series_title):
+ episodes = self._get_api_response(season_id, 'series', {
+ 'contentID': season_id,
+ 'action': 'olyott_get_dynamic_series_content',
+ 'type': 'list',
+ 'perPage': '1000',
+ })['1']
+
+ for episode in episodes:
+ video_id = str(episode['contentID'])
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(episode['streamURL'], video_id, 'mp4')
+
+ yield {
+ 'id': video_id,
+ 'title': episode.get('fullTitle'),
+ 'description': episode.get('description'),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'thumbnail': episode.get('thumbnail'),
+ 'series': series_title,
+ 'series_id': traverse_obj(episode, ('contentMeta', 'displayMeta', 'seriesID')),
+ 'season_id': traverse_obj(episode, ('contentMeta', 'displayMeta', 'seasonID')),
+ 'season_number': traverse_obj(
+ episode, ('contentMeta', 'displayMeta', 'seasonNum'), expected_type=int_or_none),
+ 'episode_number': traverse_obj(
+ episode, ('contentMeta', 'displayMeta', 'episodeNum'), expected_type=int_or_none),
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ title = self._html_search_regex(
+ r'<h1[^>]+class=["\']synopis[^>]>(?P<title>[^<]+)', webpage, 'title', group='title', fatal=False)
+ description = self._html_search_regex(
+ r'<div[^>]+class=["\']+synopis content[^>]><p>(?P<description>[^<]+)',
+ webpage, 'description', group='description', fatal=False)
+
+ return self.playlist_result(
+ itertools.chain.from_iterable(
+ self._extract_series_season(season_id, title)
+ for season_id in re.findall(r'<option[^>]+value=["\'](\d+)["\']', webpage)),
+ display_id, title, description)
diff --git a/hypervideo_dl/extractor/freshlive.py b/hypervideo_dl/extractor/freshlive.py
deleted file mode 100644
index 72a8459..0000000
--- a/hypervideo_dl/extractor/freshlive.py
+++ /dev/null
@@ -1,83 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import (
- ExtractorError,
- int_or_none,
- try_get,
- unified_timestamp,
-)
-
-
-class FreshLiveIE(InfoExtractor):
- _VALID_URL = r'https?://freshlive\.tv/[^/]+/(?P<id>\d+)'
- _TEST = {
- 'url': 'https://freshlive.tv/satotv/74712',
- 'md5': '9f0cf5516979c4454ce982df3d97f352',
- 'info_dict': {
- 'id': '74712',
- 'ext': 'mp4',
- 'title': 'テスト',
- 'description': 'テスト',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 1511,
- 'timestamp': 1483619655,
- 'upload_date': '20170105',
- 'uploader': 'サトTV',
- 'uploader_id': 'satotv',
- 'view_count': int,
- 'comment_count': int,
- 'is_live': False,
- }
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- options = self._parse_json(
- self._search_regex(
- r'window\.__CONTEXT__\s*=\s*({.+?});\s*</script>',
- webpage, 'initial context'),
- video_id)
-
- info = options['context']['dispatcher']['stores']['ProgramStore']['programs'][video_id]
-
- title = info['title']
-
- if info.get('status') == 'upcoming':
- raise ExtractorError('Stream %s is upcoming' % video_id, expected=True)
-
- stream_url = info.get('liveStreamUrl') or info['archiveStreamUrl']
-
- is_live = info.get('liveStreamUrl') is not None
-
- formats = self._extract_m3u8_formats(
- stream_url, video_id, 'mp4',
- 'm3u8_native', m3u8_id='hls')
-
- if is_live:
- title = self._live_title(title)
-
- return {
- 'id': video_id,
- 'formats': formats,
- 'title': title,
- 'description': info.get('description'),
- 'thumbnail': info.get('thumbnailUrl'),
- 'duration': int_or_none(info.get('airTime')),
- 'timestamp': unified_timestamp(info.get('createdAt')),
- 'uploader': try_get(
- info, lambda x: x['channel']['title'], compat_str),
- 'uploader_id': try_get(
- info, lambda x: x['channel']['code'], compat_str),
- 'uploader_url': try_get(
- info, lambda x: x['channel']['permalink'], compat_str),
- 'view_count': int_or_none(info.get('viewCount')),
- 'comment_count': int_or_none(info.get('commentCount')),
- 'tags': info.get('tags', []),
- 'is_live': is_live,
- }
diff --git a/hypervideo_dl/extractor/frontendmasters.py b/hypervideo_dl/extractor/frontendmasters.py
index fc67a84..3bae8ad 100644
--- a/hypervideo_dl/extractor/frontendmasters.py
+++ b/hypervideo_dl/extractor/frontendmasters.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -163,7 +160,6 @@ class FrontendMastersIE(FrontendMastersBaseIE):
'format_id': format_id,
})
formats.append(f)
- self._sort_formats(formats)
subtitles = {
'en': [{
diff --git a/hypervideo_dl/extractor/fujitv.py b/hypervideo_dl/extractor/fujitv.py
index 4fdfe12..668bb27 100644
--- a/hypervideo_dl/extractor/fujitv.py
+++ b/hypervideo_dl/extractor/fujitv.py
@@ -1,5 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
from ..utils import HEADRequest
from .common import InfoExtractor
@@ -19,7 +17,7 @@ class FujiTVFODPlus7IE(InfoExtractor):
'url': 'https://fod.fujitv.co.jp/title/5d40/5d40110076',
'info_dict': {
'id': '5d40110076',
- 'ext': 'mp4',
+ 'ext': 'ts',
'title': '#1318 『まる子、まぼろしの洋館を見る』の巻',
'series': 'ちびまる子ちゃん',
'series_id': '5d40',
@@ -30,7 +28,7 @@ class FujiTVFODPlus7IE(InfoExtractor):
'url': 'https://fod.fujitv.co.jp/title/5d40/5d40810083',
'info_dict': {
'id': '5d40810083',
- 'ext': 'mp4',
+ 'ext': 'ts',
'title': '#1324 『まる子とオニの子』の巻/『結成!2月をムダにしない会』の巻',
'description': 'md5:3972d900b896adc8ab1849e310507efa',
'series': 'ちびまる子ちゃん',
@@ -47,19 +45,18 @@ class FujiTVFODPlus7IE(InfoExtractor):
if token:
json_info = self._download_json('https://fod-sp.fujitv.co.jp/apps/api/episode/detail/?ep_id=%s&is_premium=false' % video_id, video_id, headers={'x-authorization': f'Bearer {token.value}'}, fatal=False)
else:
- self.report_warning(f'The token cookie is needed to extract video metadata. {self._LOGIN_HINTS["cookies"]}')
+ self.report_warning(f'The token cookie is needed to extract video metadata. {self._login_hint("cookies")}')
formats, subtitles = [], {}
src_json = self._download_json(f'{self._BASE_URL}abrjson_v2/tv_android/{video_id}', video_id)
for src in src_json['video_selector']:
if not src.get('url'):
continue
- fmt, subs = self._extract_m3u8_formats_and_subtitles(src['url'], video_id, 'mp4')
+ fmt, subs = self._extract_m3u8_formats_and_subtitles(src['url'], video_id, 'ts')
for f in fmt:
f.update(dict(zip(('height', 'width'),
self._BITRATE_MAP.get(f.get('tbr'), ()))))
formats.extend(fmt)
subtitles = self._merge_subtitles(subtitles, subs)
- self._sort_formats(formats, ['tbr'])
return {
'id': video_id,
@@ -70,4 +67,5 @@ class FujiTVFODPlus7IE(InfoExtractor):
'formats': formats,
'subtitles': subtitles,
'thumbnail': f'{self._BASE_URL}img/program/{series_id}/episode/{video_id}_a.jpg',
+ '_format_sort_fields': ('tbr', )
}
diff --git a/hypervideo_dl/extractor/funimation.py b/hypervideo_dl/extractor/funimation.py
index 6aa9bc9..18363c1 100644
--- a/hypervideo_dl/extractor/funimation.py
+++ b/hypervideo_dl/extractor/funimation.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import random
import re
import string
@@ -8,17 +5,18 @@ import string
from .common import InfoExtractor
from ..compat import compat_HTTPError
from ..utils import (
+ ExtractorError,
determine_ext,
int_or_none,
join_nonempty,
js_to_json,
+ make_archive_id,
orderedSet,
qualities,
str_or_none,
traverse_obj,
try_get,
urlencode_postdata,
- ExtractorError,
)
@@ -245,11 +243,14 @@ class FunimationIE(FunimationBaseIE):
'language_preference': language_preference(lang.lower()),
})
formats.extend(current_formats)
+ if not formats and (requested_languages or requested_versions):
+ self.raise_no_formats(
+ 'There are no video formats matching the requested languages/versions', expected=True, video_id=display_id)
self._remove_duplicate_formats(formats)
- self._sort_formats(formats, ('lang', 'source'))
return {
- 'id': initial_experience_id if only_initial_experience else episode_id,
+ 'id': episode_id,
+ '_old_archive_ids': [make_archive_id(self, initial_experience_id)],
'display_id': display_id,
'duration': duration,
'title': episode['episodeTitle'],
@@ -264,6 +265,7 @@ class FunimationIE(FunimationBaseIE):
'formats': formats,
'thumbnails': thumbnails,
'subtitles': subtitles,
+ '_format_sort_fields': ('lang', 'source'),
}
def _get_subtitles(self, subtitles, experience_id, episode, display_id, format_name):
diff --git a/hypervideo_dl/extractor/funk.py b/hypervideo_dl/extractor/funk.py
index 2c5cfe8..539d719 100644
--- a/hypervideo_dl/extractor/funk.py
+++ b/hypervideo_dl/extractor/funk.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from .nexx import NexxIE
from ..utils import (
diff --git a/hypervideo_dl/extractor/fusion.py b/hypervideo_dl/extractor/fusion.py
index a3f44b8..689422f 100644
--- a/hypervideo_dl/extractor/fusion.py
+++ b/hypervideo_dl/extractor/fusion.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
determine_ext,
@@ -72,7 +70,6 @@ class FusionIE(InfoExtractor):
'protocol': 'm3u8_native' if ext == 'm3u8' else 'https',
})
if formats:
- self._sort_formats(formats)
info['formats'] = formats
else:
info.update({
diff --git a/hypervideo_dl/extractor/fuyintv.py b/hypervideo_dl/extractor/fuyintv.py
new file mode 100644
index 0000000..197901d
--- /dev/null
+++ b/hypervideo_dl/extractor/fuyintv.py
@@ -0,0 +1,30 @@
+from .common import InfoExtractor
+from ..utils import traverse_obj
+
+
+class FuyinTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?fuyin\.tv/html/(?:\d+)/(?P<id>\d+)\.html'
+ _TESTS = [{
+ 'url': 'https://www.fuyin.tv/html/2733/44129.html',
+ 'info_dict': {
+ 'id': '44129',
+ 'ext': 'mp4',
+ 'title': '第1集',
+ 'description': 'md5:21a3d238dc8d49608e1308e85044b9c3',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ json_data = self._download_json(
+ 'https://www.fuyin.tv/api/api/tv.movie/url',
+ video_id, query={'urlid': f'{video_id}'})
+ webpage = self._download_webpage(url, video_id, fatal=False)
+
+ return {
+ 'id': video_id,
+ 'title': traverse_obj(json_data, ('data', 'title')),
+ 'url': json_data['data']['url'],
+ 'ext': 'mp4',
+ 'description': self._html_search_meta('description', webpage),
+ }
diff --git a/hypervideo_dl/extractor/fxnetworks.py b/hypervideo_dl/extractor/fxnetworks.py
deleted file mode 100644
index 00e6742..0000000
--- a/hypervideo_dl/extractor/fxnetworks.py
+++ /dev/null
@@ -1,77 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .adobepass import AdobePassIE
-from ..utils import (
- extract_attributes,
- int_or_none,
- parse_age_limit,
- smuggle_url,
- update_url_query,
-)
-
-
-class FXNetworksIE(AdobePassIE):
- _VALID_URL = r'https?://(?:www\.)?(?:fxnetworks|simpsonsworld)\.com/video/(?P<id>\d+)'
- _TESTS = [{
- 'url': 'http://www.fxnetworks.com/video/1032565827847',
- 'md5': '8d99b97b4aa7a202f55b6ed47ea7e703',
- 'info_dict': {
- 'id': 'dRzwHC_MMqIv',
- 'ext': 'mp4',
- 'title': 'First Look: Better Things - Season 2',
- 'description': 'Because real life is like a fart. Watch this FIRST LOOK to see what inspired the new season of Better Things.',
- 'age_limit': 14,
- 'uploader': 'NEWA-FNG-FX',
- 'upload_date': '20170825',
- 'timestamp': 1503686274,
- 'episode_number': 0,
- 'season_number': 2,
- 'series': 'Better Things',
- },
- 'add_ie': ['ThePlatform'],
- }, {
- 'url': 'http://www.simpsonsworld.com/video/716094019682',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
- if 'The content you are trying to access is not available in your region.' in webpage:
- self.raise_geo_restricted()
- video_data = extract_attributes(self._search_regex(
- r'(<a.+?rel="https?://link\.theplatform\.com/s/.+?</a>)', webpage, 'video data'))
- player_type = self._search_regex(r'playerType\s*=\s*[\'"]([^\'"]+)', webpage, 'player type', default=None)
- release_url = video_data['rel']
- title = video_data['data-title']
- rating = video_data.get('data-rating')
- query = {
- 'mbr': 'true',
- }
- if player_type == 'movies':
- query.update({
- 'manifest': 'm3u',
- })
- else:
- query.update({
- 'switch': 'http',
- })
- if video_data.get('data-req-auth') == '1':
- resource = self._get_mvpd_resource(
- video_data['data-channel'], title,
- video_data.get('data-guid'), rating)
- query['auth'] = self._extract_mvpd_auth(url, video_id, 'fx', resource)
-
- return {
- '_type': 'url_transparent',
- 'id': video_id,
- 'title': title,
- 'url': smuggle_url(update_url_query(release_url, query), {'force_smil_url': True}),
- 'series': video_data.get('data-show-title'),
- 'episode_number': int_or_none(video_data.get('data-episode')),
- 'season_number': int_or_none(video_data.get('data-season')),
- 'thumbnail': video_data.get('data-large-thumb'),
- 'age_limit': parse_age_limit(rating),
- 'ie_key': 'ThePlatform',
- }
diff --git a/hypervideo_dl/extractor/gab.py b/hypervideo_dl/extractor/gab.py
index 9ba0b1c..5016e2f 100644
--- a/hypervideo_dl/extractor/gab.py
+++ b/hypervideo_dl/extractor/gab.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -57,7 +54,6 @@ class GabTVIE(InfoExtractor):
else:
frmt['height'] = str_to_int(resolution.replace('p', ''))
formats.append(frmt)
- self._sort_formats(formats)
return {
'id': id,
@@ -123,8 +119,6 @@ class GabIE(InfoExtractor):
} for url, f in ((media.get('url'), metadata.get('original') or {}),
(media.get('source_mp4'), metadata.get('playable') or {})) if url]
- self._sort_formats(formats)
-
author = json_data.get('account') or {}
entries.append({
'id': f'{post_id}-{idx}',
diff --git a/hypervideo_dl/extractor/gaia.py b/hypervideo_dl/extractor/gaia.py
index 5b0195c..c84386f 100644
--- a/hypervideo_dl/extractor/gaia.py
+++ b/hypervideo_dl/extractor/gaia.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..compat import (
compat_str,
@@ -92,7 +88,6 @@ class GaiaIE(InfoExtractor):
media_id, headers=headers)
formats = self._extract_m3u8_formats(
media['mediaUrls']['bcHLS'], media_id, 'mp4')
- self._sort_formats(formats)
subtitles = {}
text_tracks = media.get('textTracks', {})
diff --git a/hypervideo_dl/extractor/gameinformer.py b/hypervideo_dl/extractor/gameinformer.py
index f1b96c1..2664edb 100644
--- a/hypervideo_dl/extractor/gameinformer.py
+++ b/hypervideo_dl/extractor/gameinformer.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .brightcove import BrightcoveNewIE
from .common import InfoExtractor
from ..utils import (
diff --git a/hypervideo_dl/extractor/gamejolt.py b/hypervideo_dl/extractor/gamejolt.py
index a13e528..440b832 100644
--- a/hypervideo_dl/extractor/gamejolt.py
+++ b/hypervideo_dl/extractor/gamejolt.py
@@ -1,4 +1,3 @@
-# coding: utf-8
import itertools
import json
import math
diff --git a/hypervideo_dl/extractor/gamespot.py b/hypervideo_dl/extractor/gamespot.py
index 7a1beae..8dec252 100644
--- a/hypervideo_dl/extractor/gamespot.py
+++ b/hypervideo_dl/extractor/gamespot.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .once import OnceIE
from ..compat import compat_urllib_parse_unquote
@@ -67,8 +65,6 @@ class GameSpotIE(OnceIE):
formats.extend(self._extract_mpd_formats(
mpd_url, page_id, mpd_id='dash', fatal=False))
- self._sort_formats(formats)
-
return {
'id': data_video.get('guid') or page_id,
'display_id': page_id,
diff --git a/hypervideo_dl/extractor/gamestar.py b/hypervideo_dl/extractor/gamestar.py
index e882fa6..e9966f5 100644
--- a/hypervideo_dl/extractor/gamestar.py
+++ b/hypervideo_dl/extractor/gamestar.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..utils import (
int_or_none,
diff --git a/hypervideo_dl/extractor/gaskrank.py b/hypervideo_dl/extractor/gaskrank.py
index 03acd2a..e0bbdae 100644
--- a/hypervideo_dl/extractor/gaskrank.py
+++ b/hypervideo_dl/extractor/gaskrank.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
from ..utils import (
@@ -96,6 +93,5 @@ class GaskrankIE(InfoExtractor):
'view_count': view_count,
'average_rating': average_rating,
})
- self._sort_formats(entry['formats'])
return entry
diff --git a/hypervideo_dl/extractor/gazeta.py b/hypervideo_dl/extractor/gazeta.py
index 3671870..c6868a6 100644
--- a/hypervideo_dl/extractor/gazeta.py
+++ b/hypervideo_dl/extractor/gazeta.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/gdcvault.py b/hypervideo_dl/extractor/gdcvault.py
index c3ad6b4..2878bbd 100644
--- a/hypervideo_dl/extractor/gdcvault.py
+++ b/hypervideo_dl/extractor/gdcvault.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/gedidigital.py b/hypervideo_dl/extractor/gedidigital.py
index ec386c2..1878d63 100644
--- a/hypervideo_dl/extractor/gedidigital.py
+++ b/hypervideo_dl/extractor/gedidigital.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -14,7 +11,7 @@ from ..utils import (
class GediDigitalIE(InfoExtractor):
- _VALID_URL = r'''(?x)(?P<url>(?:https?:)//video\.
+ _VALID_URL = r'''(?x:(?P<base_url>(?:https?:)//video\.
(?:
(?:
(?:espresso\.)?repubblica
@@ -36,7 +33,13 @@ class GediDigitalIE(InfoExtractor):
|corrierealpi
|lasentinella
)\.gelocal
- )\.it(?:/[^/]+){2,4}/(?P<id>\d+))(?:$|[?&].*)'''
+ )\.it(?:/[^/]+){2,4}/(?P<id>\d+))(?:$|[?&].*))'''
+ _EMBED_REGEX = [rf'''(?x)
+ (?:
+ data-frame-src=|
+ <iframe[^\n]+src=
+ )
+ (["'])(?P<url>{_VALID_URL})\1''']
_TESTS = [{
'url': 'https://video.lastampa.it/politica/il-paradosso-delle-regionali-la-lega-vince-ma-sembra-aver-perso/121559/121683',
'md5': '84658d7fb9e55a6e57ecc77b73137494',
@@ -112,22 +115,9 @@ class GediDigitalIE(InfoExtractor):
urls[i] = urljoin(base_url(e), url_basename(e))
return urls
- @staticmethod
- def _extract_urls(webpage):
- entries = [
- mobj.group('eurl')
- for mobj in re.finditer(r'''(?x)
- (?:
- data-frame-src=|
- <iframe[^\n]+src=
- )
- (["'])(?P<eurl>%s)\1''' % GediDigitalIE._VALID_URL, webpage)]
- return GediDigitalIE._sanitize_urls(entries)
-
- @staticmethod
- def _extract_url(webpage):
- urls = GediDigitalIE._extract_urls(webpage)
- return urls[0] if urls else None
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ return cls._sanitize_urls(tuple(super()._extract_embed_urls(url, webpage)))
@staticmethod
def _clean_formats(formats):
@@ -142,8 +132,7 @@ class GediDigitalIE(InfoExtractor):
formats[:] = clean_formats
def _real_extract(self, url):
- video_id = self._match_id(url)
- url = self._match_valid_url(url).group('url')
+ video_id, url = self._match_valid_url(url).group('id', 'base_url')
webpage = self._download_webpage(url, video_id)
title = self._html_search_meta(
['twitter:title', 'og:title'], webpage, fatal=True)
@@ -197,7 +186,6 @@ class GediDigitalIE(InfoExtractor):
duration = int_or_none(v)
self._clean_formats(formats)
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/generic.py b/hypervideo_dl/extractor/generic.py
index 03e6eb2..f28a77e 100644
--- a/hypervideo_dl/extractor/generic.py
+++ b/hypervideo_dl/extractor/generic.py
@@ -1,162 +1,49 @@
-# coding: utf-8
-
-from __future__ import unicode_literals
-
import os
import re
-import sys
+import types
+import urllib.parse
+import xml.etree.ElementTree
-from .common import InfoExtractor
+from .common import InfoExtractor # isort: split
+from .commonprotocols import RtmpIE
from .youtube import YoutubeIE
-from ..compat import (
- compat_etree_fromstring,
- compat_str,
- compat_urllib_parse_unquote,
- compat_urlparse,
- compat_xml_parse_error,
-)
+from ..compat import compat_etree_fromstring
from ..utils import (
+ KNOWN_EXTENSIONS,
+ MEDIA_EXTENSIONS,
+ ExtractorError,
+ UnsupportedError,
determine_ext,
dict_get,
- ExtractorError,
- float_or_none,
- HEADRequest,
+ format_field,
int_or_none,
is_html,
js_to_json,
- KNOWN_EXTENSIONS,
merge_dicts,
mimetype2ext,
orderedSet,
parse_duration,
parse_resolution,
- sanitized_Request,
smuggle_url,
str_or_none,
+ traverse_obj,
+ try_call,
unescapeHTML,
unified_timestamp,
unsmuggle_url,
- UnsupportedError,
url_or_none,
+ variadic,
xpath_attr,
xpath_text,
xpath_with_ns,
)
-from .commonprotocols import RtmpIE
-from .brightcove import (
- BrightcoveLegacyIE,
- BrightcoveNewIE,
-)
-from .nexx import (
- NexxIE,
- NexxEmbedIE,
-)
-from .nbc import NBCSportsVPlayerIE
-from .ooyala import OoyalaIE
-from .rutv import RUTVIE
-from .tvc import TVCIE
-from .sportbox import SportBoxIE
-from .myvi import MyviIE
-from .condenast import CondeNastIE
-from .udn import UDNEmbedIE
-from .senategov import SenateISVPIE
-from .svt import SVTIE
-from .pornhub import PornHubIE
-from .xhamster import XHamsterEmbedIE
-from .tnaflix import TNAFlixNetworkEmbedIE
-from .drtuber import DrTuberIE
-from .redtube import RedTubeIE
-from .tube8 import Tube8IE
-from .mofosex import MofosexEmbedIE
-from .spankwire import SpankwireIE
-from .youporn import YouPornIE
-from .vimeo import (
- VimeoIE,
- VHXEmbedIE,
-)
-from .dailymotion import DailymotionIE
-from .dailymail import DailyMailIE
-from .onionstudios import OnionStudiosIE
-from .viewlift import ViewLiftEmbedIE
-from .mtv import MTVServicesEmbeddedIE
-from .pladform import PladformIE
-from .videomore import VideomoreIE
-from .webcaster import WebcasterFeedIE
-from .googledrive import GoogleDriveIE
-from .jwplatform import JWPlatformIE
-from .digiteka import DigitekaIE
-from .arkena import ArkenaIE
-from .instagram import InstagramIE
-from .threeqsdn import ThreeQSDNIE
-from .theplatform import ThePlatformIE
-from .kaltura import KalturaIE
-from .eagleplatform import EaglePlatformIE
-from .facebook import FacebookIE
-from .soundcloud import SoundcloudEmbedIE
-from .tunein import TuneInBaseIE
-from .vbox7 import Vbox7IE
-from .dbtv import DBTVIE
-from .piksel import PikselIE
-from .videa import VideaIE
-from .twentymin import TwentyMinutenIE
-from .ustream import UstreamIE
-from .arte import ArteTVEmbedIE
-from .videopress import VideoPressIE
-from .rutube import RutubeIE
-from .glomex import GlomexEmbedIE
-from .megatvcom import MegaTVComEmbedIE
-from .ant1newsgr import Ant1NewsGrEmbedIE
-from .limelight import LimelightBaseIE
-from .anvato import AnvatoIE
-from .washingtonpost import WashingtonPostIE
-from .wistia import WistiaIE
-from .mediaset import MediasetIE
-from .joj import JojIE
-from .megaphone import MegaphoneIE
-from .vzaar import VzaarIE
-from .channel9 import Channel9IE
-from .vshare import VShareIE
-from .mediasite import MediasiteIE
-from .springboardplatform import SpringboardPlatformIE
-from .ted import TedEmbedIE
-from .yapfiles import YapFilesIE
-from .vice import ViceIE
-from .xfileshare import XFileShareIE
-from .cloudflarestream import CloudflareStreamIE
-from .peertube import PeerTubeIE
-from .teachable import TeachableIE
-from .indavideo import IndavideoEmbedIE
-from .apa import APAIE
-from .foxnews import FoxNewsIE
-from .viqeo import ViqeoIE
-from .expressen import ExpressenIE
-from .zype import ZypeIE
-from .odnoklassniki import OdnoklassnikiIE
-from .vk import VKIE
-from .kinja import KinjaEmbedIE
-from .gedidigital import GediDigitalIE
-from .rcs import RCSEmbedsIE
-from .bitchute import BitChuteIE
-from .rumble import RumbleEmbedIE
-from .arcpublishing import ArcPublishingIE
-from .medialaan import MedialaanIE
-from .simplecast import SimplecastIE
-from .wimtv import WimTVIE
-from .tvopengr import TVOpenGrEmbedIE
-from .ertgr import ERTWebtvEmbedIE
-from .tvp import TVPEmbedIE
-from .blogger import BloggerIE
-from .mainstreaming import MainStreamingIE
-from .gfycat import GfycatIE
-from .panopto import PanoptoBaseIE
-from .ruutu import RuutuIE
class GenericIE(InfoExtractor):
IE_DESC = 'Generic downloader that works on some sites'
_VALID_URL = r'.*'
IE_NAME = 'generic'
- _NETRC_MACHINE = False # Supress username warning
+ _NETRC_MACHINE = False # Suppress username warning
_TESTS = [
# Direct link to a video
{
@@ -474,188 +361,6 @@ class GenericIE(InfoExtractor):
},
'skip': 'There is a limit of 200 free downloads / month for the test song',
},
- {
- # embedded brightcove video
- # it also tests brightcove videos that need to set the 'Referer'
- # in the http requests
- 'add_ie': ['BrightcoveLegacy'],
- 'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
- 'info_dict': {
- 'id': '2765128793001',
- 'ext': 'mp4',
- 'title': 'Le cours de bourse : l’analyse technique',
- 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
- 'uploader': 'BFM BUSINESS',
- },
- 'params': {
- 'skip_download': True,
- },
- },
- {
- # embedded with itemprop embedURL and video id spelled as `idVideo`
- 'add_id': ['BrightcoveLegacy'],
- 'url': 'http://bfmbusiness.bfmtv.com/mediaplayer/chroniques/olivier-delamarche/',
- 'info_dict': {
- 'id': '5255628253001',
- 'ext': 'mp4',
- 'title': 'md5:37c519b1128915607601e75a87995fc0',
- 'description': 'md5:37f7f888b434bb8f8cc8dbd4f7a4cf26',
- 'uploader': 'BFM BUSINESS',
- 'uploader_id': '876450612001',
- 'timestamp': 1482255315,
- 'upload_date': '20161220',
- },
- 'params': {
- 'skip_download': True,
- },
- },
- {
- # https://github.com/ytdl-org/youtube-dl/issues/2253
- 'url': 'http://bcove.me/i6nfkrc3',
- 'md5': '0ba9446db037002366bab3b3eb30c88c',
- 'info_dict': {
- 'id': '3101154703001',
- 'ext': 'mp4',
- 'title': 'Still no power',
- 'uploader': 'thestar.com',
- 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
- },
- 'add_ie': ['BrightcoveLegacy'],
- 'skip': 'video gone',
- },
- {
- 'url': 'http://www.championat.com/video/football/v/87/87499.html',
- 'md5': 'fb973ecf6e4a78a67453647444222983',
- 'info_dict': {
- 'id': '3414141473001',
- 'ext': 'mp4',
- 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
- 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
- 'uploader': 'Championat',
- },
- },
- {
- # https://github.com/ytdl-org/youtube-dl/issues/3541
- 'add_ie': ['BrightcoveLegacy'],
- 'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
- 'info_dict': {
- 'id': '3866516442001',
- 'ext': 'mp4',
- 'title': 'Leer mij vrouwen kennen: Aflevering 1',
- 'description': 'Leer mij vrouwen kennen: Aflevering 1',
- 'uploader': 'SBS Broadcasting',
- },
- 'skip': 'Restricted to Netherlands',
- 'params': {
- 'skip_download': True, # m3u8 download
- },
- },
- {
- # Brightcove video in <iframe>
- 'url': 'http://www.un.org/chinese/News/story.asp?NewsID=27724',
- 'md5': '36d74ef5e37c8b4a2ce92880d208b968',
- 'info_dict': {
- 'id': '5360463607001',
- 'ext': 'mp4',
- 'title': '叙利亚失明儿童在废墟上演唱《心跳》 呼吁获得正常童年生活',
- 'description': '联合国儿童基金会中东和北非区域大使、作曲家扎德·迪拉尼(Zade Dirani)在3月15日叙利亚冲突爆发7周年纪念日之际发布了为叙利亚谱写的歌曲《心跳》(HEARTBEAT),为受到六年冲突影响的叙利亚儿童发出强烈呐喊,呼吁世界做出共同努力,使叙利亚儿童重新获得享有正常童年生活的权利。',
- 'uploader': 'United Nations',
- 'uploader_id': '1362235914001',
- 'timestamp': 1489593889,
- 'upload_date': '20170315',
- },
- 'add_ie': ['BrightcoveLegacy'],
- },
- {
- # Brightcove with alternative playerID key
- 'url': 'http://www.nature.com/nmeth/journal/v9/n7/fig_tab/nmeth.2062_SV1.html',
- 'info_dict': {
- 'id': 'nmeth.2062_SV1',
- 'title': 'Simultaneous multiview imaging of the Drosophila syncytial blastoderm : Quantitative high-speed imaging of entire developing embryos with simultaneous multiview light-sheet microscopy : Nature Methods : Nature Research',
- },
- 'playlist': [{
- 'info_dict': {
- 'id': '2228375078001',
- 'ext': 'mp4',
- 'title': 'nmeth.2062-sv1',
- 'description': 'nmeth.2062-sv1',
- 'timestamp': 1363357591,
- 'upload_date': '20130315',
- 'uploader': 'Nature Publishing Group',
- 'uploader_id': '1964492299001',
- },
- }],
- },
- {
- # Brightcove with UUID in videoPlayer
- 'url': 'http://www8.hp.com/cn/zh/home.html',
- 'info_dict': {
- 'id': '5255815316001',
- 'ext': 'mp4',
- 'title': 'Sprocket Video - China',
- 'description': 'Sprocket Video - China',
- 'uploader': 'HP-Video Gallery',
- 'timestamp': 1482263210,
- 'upload_date': '20161220',
- 'uploader_id': '1107601872001',
- },
- 'params': {
- 'skip_download': True, # m3u8 download
- },
- 'skip': 'video rotates...weekly?',
- },
- {
- # Brightcove:new type [2].
- 'url': 'http://www.delawaresportszone.com/video-st-thomas-more-earns-first-trip-to-basketball-semis',
- 'md5': '2b35148fcf48da41c9fb4591650784f3',
- 'info_dict': {
- 'id': '5348741021001',
- 'ext': 'mp4',
- 'upload_date': '20170306',
- 'uploader_id': '4191638492001',
- 'timestamp': 1488769918,
- 'title': 'VIDEO: St. Thomas More earns first trip to basketball semis',
-
- },
- },
- {
- # Alternative brightcove <video> attributes
- 'url': 'http://www.programme-tv.net/videos/extraits/81095-guillaume-canet-evoque-les-rumeurs-d-infidelite-de-marion-cotillard-avec-brad-pitt-dans-vivement-dimanche/',
- 'info_dict': {
- 'id': '81095-guillaume-canet-evoque-les-rumeurs-d-infidelite-de-marion-cotillard-avec-brad-pitt-dans-vivement-dimanche',
- 'title': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche, Extraits : toutes les vidéos avec Télé-Loisirs",
- },
- 'playlist': [{
- 'md5': '732d22ba3d33f2f3fc253c39f8f36523',
- 'info_dict': {
- 'id': '5311302538001',
- 'ext': 'mp4',
- 'title': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche",
- 'description': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche (France 2, 5 février 2017)",
- 'timestamp': 1486321708,
- 'upload_date': '20170205',
- 'uploader_id': '800000640001',
- },
- 'only_matching': True,
- }],
- },
- {
- # Brightcove with UUID in videoPlayer
- 'url': 'http://www8.hp.com/cn/zh/home.html',
- 'info_dict': {
- 'id': '5255815316001',
- 'ext': 'mp4',
- 'title': 'Sprocket Video - China',
- 'description': 'Sprocket Video - China',
- 'uploader': 'HP-Video Gallery',
- 'timestamp': 1482263210,
- 'upload_date': '20161220',
- 'uploader_id': '1107601872001',
- },
- 'params': {
- 'skip_download': True, # m3u8 download
- },
- },
# ooyala video
{
'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
@@ -947,45 +652,6 @@ class GenericIE(InfoExtractor):
'skip_download': True,
}
},
- # YouTube <object> embed
- {
- 'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
- 'md5': '516718101ec834f74318df76259fb3cc',
- 'info_dict': {
- 'id': 'msN87y-iEx0',
- 'ext': 'webm',
- 'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
- 'upload_date': '20080526',
- 'description': 'md5:0ffc78ea3f01b2e2c247d5f8d1d3c18d',
- 'uploader': 'Christopher Sykes',
- 'uploader_id': 'ChristopherJSykes',
- },
- 'add_ie': ['Youtube'],
- },
- # Camtasia studio
- {
- 'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
- 'playlist': [{
- 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
- 'info_dict': {
- 'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
- 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
- 'ext': 'flv',
- 'duration': 2235.90,
- }
- }, {
- 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
- 'info_dict': {
- 'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
- 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
- 'ext': 'flv',
- 'duration': 2235.93,
- }
- }],
- 'info_dict': {
- 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
- }
- },
# Flowplayer
{
'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
@@ -998,20 +664,6 @@ class GenericIE(InfoExtractor):
'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
}
},
- # Multiple brightcove videos
- # https://github.com/ytdl-org/youtube-dl/issues/2283
- {
- 'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
- 'info_dict': {
- 'id': 'always-never',
- 'title': 'Always / Never - The New Yorker',
- },
- 'playlist_count': 3,
- 'params': {
- 'extract_flat': False,
- 'skip_download': True,
- }
- },
# MLB embed
{
'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
@@ -1027,36 +679,6 @@ class GenericIE(InfoExtractor):
'thumbnail': r're:^https?://.*\.jpg$',
},
},
- # Wistia embed
- {
- 'url': 'http://study.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
- 'md5': '1953f3a698ab51cfc948ed3992a0b7ff',
- 'info_dict': {
- 'id': '6e2wtrbdaf',
- 'ext': 'mov',
- 'title': 'paywall_north-american-exploration-failed-colonies-of-spain-france-england',
- 'description': 'a Paywall Videos video from Remilon',
- 'duration': 644.072,
- 'uploader': 'study.com',
- 'timestamp': 1459678540,
- 'upload_date': '20160403',
- 'filesize': 24687186,
- },
- },
- {
- 'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
- 'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
- 'info_dict': {
- 'id': 'uxjb0lwrcz',
- 'ext': 'mp4',
- 'title': 'Conversation about Hexagonal Rails Part 1',
- 'description': 'a Martin Fowler video from ThoughtWorks',
- 'duration': 1715.0,
- 'uploader': 'thoughtworks.wistia.com',
- 'timestamp': 1401832161,
- 'upload_date': '20140603',
- },
- },
# Wistia standard embed (async)
{
'url': 'https://www.getdrip.com/university/brennan-dunn-drip-workshop/',
@@ -1071,7 +693,8 @@ class GenericIE(InfoExtractor):
},
'params': {
'skip_download': True,
- }
+ },
+ 'skip': 'webpage 404 not found',
},
# Soundcloud embed
{
@@ -1255,18 +878,6 @@ class GenericIE(InfoExtractor):
}
},
{
- # JWPlatform iframe
- 'url': 'https://www.covermagazine.co.uk/feature/2465255/business-protection-involved',
- 'info_dict': {
- 'id': 'AG26UQXM',
- 'ext': 'mp4',
- 'upload_date': '20160719',
- 'timestamp': 468923808,
- 'title': '2016_05_18 Cover L&G Business Protection V1 FINAL.mp4',
- },
- 'add_ie': [JWPlatformIE.ie_key()],
- },
- {
# Video.js embed, multiple formats
'url': 'http://ortcam.com/solidworks-урок-6-настройка-чертежа_33f9b7351.html',
'info_dict': {
@@ -1545,21 +1156,6 @@ class GenericIE(InfoExtractor):
},
'expected_warnings': ['Failed to parse JSON Expecting value'],
},
- # Brightcove URL in single quotes
- {
- 'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
- 'md5': '4ae374f1f8b91c889c4b9203c8c752af',
- 'info_dict': {
- 'id': '4255764656001',
- 'ext': 'mp4',
- 'title': 'SN Presents: Russell Martin, World Citizen',
- 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
- 'uploader': 'Rogers Sportsnet',
- 'uploader_id': '1704050871',
- 'upload_date': '20150525',
- 'timestamp': 1432570283,
- },
- },
# Kinja embed
{
'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
@@ -1595,52 +1191,6 @@ class GenericIE(InfoExtractor):
'duration': 248.667,
},
},
- # BrightcoveInPageEmbed embed
- {
- 'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
- 'info_dict': {
- 'id': '4238694884001',
- 'ext': 'flv',
- 'title': 'Tabletop: Dread, Last Thoughts',
- 'description': 'Tabletop: Dread, Last Thoughts',
- 'duration': 51690,
- },
- },
- # Brightcove embed, with no valid 'renditions' but valid 'IOSRenditions'
- # This video can't be played in browsers if Flash disabled and UA set to iPhone, which is actually a false alarm
- {
- 'url': 'https://dl.dropboxusercontent.com/u/29092637/interview.html',
- 'info_dict': {
- 'id': '4785848093001',
- 'ext': 'mp4',
- 'title': 'The Cardinal Pell Interview',
- 'description': 'Sky News Contributor Andrew Bolt interviews George Pell in Rome, following the Cardinal\'s evidence before the Royal Commission into Child Abuse. ',
- 'uploader': 'GlobeCast Australia - GlobeStream',
- 'uploader_id': '2733773828001',
- 'upload_date': '20160304',
- 'timestamp': 1457083087,
- },
- 'params': {
- # m3u8 downloads
- 'skip_download': True,
- },
- },
- {
- # Brightcove embed with whitespace around attribute names
- 'url': 'http://www.stack.com/video/3167554373001/learn-to-hit-open-three-pointers-with-damian-lillard-s-baseline-drift-drill',
- 'info_dict': {
- 'id': '3167554373001',
- 'ext': 'mp4',
- 'title': "Learn to Hit Open Three-Pointers With Damian Lillard's Baseline Drift Drill",
- 'description': 'md5:57bacb0e0f29349de4972bfda3191713',
- 'uploader_id': '1079349493',
- 'upload_date': '20140207',
- 'timestamp': 1391810548,
- },
- 'params': {
- 'skip_download': True,
- },
- },
# Another form of arte.tv embed
{
'url': 'http://www.tv-replay.fr/redirection/09-04-16/arte-reportage-arte-11508975.html',
@@ -1691,7 +1241,7 @@ class GenericIE(InfoExtractor):
'timestamp': 1464107587,
'uploader': 'TheAtlantic',
},
- 'add_ie': ['BrightcoveLegacy'],
+ 'skip': 'Private Youtube video',
},
# Facebook <iframe> embed
{
@@ -1800,7 +1350,7 @@ class GenericIE(InfoExtractor):
'params': {
'skip_download': True,
},
- 'add_ie': [ArkenaIE.ie_key()],
+ 'add_ie': ['Arkena'],
},
{
'url': 'http://nova.bg/news/view/2016/08/16/156543/%D0%BD%D0%B0-%D0%BA%D0%BE%D1%81%D1%8A%D0%BC-%D0%BE%D1%82-%D0%B2%D0%B7%D1%80%D0%B8%D0%B2-%D0%BE%D1%82%D1%86%D0%B5%D0%BF%D0%B8%D1%85%D0%B0-%D1%86%D1%8F%D0%BB-%D0%BA%D0%B2%D0%B0%D1%80%D1%82%D0%B0%D0%BB-%D0%B7%D0%B0%D1%80%D0%B0%D0%B4%D0%B8-%D0%B8%D0%B7%D1%82%D0%B8%D1%87%D0%B0%D0%BD%D0%B5-%D0%BD%D0%B0-%D0%B3%D0%B0%D0%B7-%D0%B2-%D0%BF%D0%BB%D0%BE%D0%B2%D0%B4%D0%B8%D0%B2/',
@@ -1812,7 +1362,7 @@ class GenericIE(InfoExtractor):
'params': {
'skip_download': True,
},
- 'add_ie': [Vbox7IE.ie_key()],
+ 'add_ie': ['Vbox7'],
},
{
# DBTV embeds
@@ -1844,7 +1394,7 @@ class GenericIE(InfoExtractor):
'params': {
'skip_download': True,
},
- 'add_ie': [TwentyMinutenIE.ie_key()],
+ 'add_ie': ['TwentyMinuten'],
},
{
# VideoPress embed
@@ -1859,7 +1409,7 @@ class GenericIE(InfoExtractor):
'params': {
'skip_download': True,
},
- 'add_ie': [VideoPressIE.ie_key()],
+ 'add_ie': ['VideoPress'],
},
{
# Rutube embed
@@ -1876,7 +1426,7 @@ class GenericIE(InfoExtractor):
'params': {
'skip_download': True,
},
- 'add_ie': [RutubeIE.ie_key()],
+ 'add_ie': ['Rutube'],
},
{
# glomex:embed
@@ -1948,7 +1498,7 @@ class GenericIE(InfoExtractor):
'ext': 'mp4',
'title': 'Integrated Senate Video Player',
},
- 'add_ie': [SenateISVPIE.ie_key()],
+ 'add_ie': ['SenateISVP'],
},
{
# Limelight embeds (1 channel embed + 4 media embeds)
@@ -1995,7 +1545,7 @@ class GenericIE(InfoExtractor):
'uploader': 'The Washington Post',
'upload_date': '20160211',
},
- 'add_ie': [WashingtonPostIE.ie_key()],
+ 'add_ie': ['WashingtonPost'],
},
{
# Mediaset embed
@@ -2008,7 +1558,7 @@ class GenericIE(InfoExtractor):
'params': {
'skip_download': True,
},
- 'add_ie': [MediasetIE.ie_key()],
+ 'add_ie': ['Mediaset'],
},
{
# JOJ.sk embeds
@@ -2018,7 +1568,7 @@ class GenericIE(InfoExtractor):
'title': 'Slovenskom sa prehnala vlna silných búrok',
},
'playlist_mincount': 5,
- 'add_ie': [JojIE.ie_key()],
+ 'add_ie': ['Joj'],
},
{
# AMP embed (see https://www.ampproject.org/docs/reference/components/amp-video)
@@ -2084,7 +1634,7 @@ class GenericIE(InfoExtractor):
'params': {
'skip_download': True,
},
- 'add_ie': [SpringboardPlatformIE.ie_key()],
+ 'add_ie': ['SpringboardPlatform'],
},
{
'url': 'https://www.yapfiles.ru/show/1872528/690b05d3054d2dbe1e69523aa21bb3b1.mp4.html',
@@ -2093,7 +1643,7 @@ class GenericIE(InfoExtractor):
'ext': 'mp4',
'title': 'Котята',
},
- 'add_ie': [YapFilesIE.ie_key()],
+ 'add_ie': ['YapFiles'],
'params': {
'skip_download': True,
},
@@ -2106,7 +1656,7 @@ class GenericIE(InfoExtractor):
'ext': 'mp4',
'title': '31c9291ab41fac05471db4e73aa11717',
},
- 'add_ie': [CloudflareStreamIE.ie_key()],
+ 'add_ie': ['CloudflareStream'],
'params': {
'skip_download': True,
},
@@ -2133,7 +1683,7 @@ class GenericIE(InfoExtractor):
'uploader': 'StreetKitchen',
'uploader_id': '546363',
},
- 'add_ie': [IndavideoEmbedIE.ie_key()],
+ 'add_ie': ['IndavideoEmbed'],
'params': {
'skip_download': True,
},
@@ -2174,22 +1724,6 @@ class GenericIE(InfoExtractor):
},
'playlist_count': 6,
},
- {
- # Squarespace video embed, 2019-08-28
- 'url': 'http://ootboxford.com',
- 'info_dict': {
- 'id': 'Tc7b_JGdZfw',
- 'title': 'Out of the Blue, at Childish Things 10',
- 'ext': 'mp4',
- 'description': 'md5:a83d0026666cf5ee970f8bd1cfd69c7f',
- 'uploader_id': 'helendouglashouse',
- 'uploader': 'Helen & Douglas House',
- 'upload_date': '20140328',
- },
- 'params': {
- 'skip_download': True,
- },
- },
# {
# # Zype embed
# 'url': 'https://www.cookscountry.com/episode/554-smoky-barbecue-favorites',
@@ -2508,10 +2042,10 @@ class GenericIE(InfoExtractor):
# Panopto embeds
'url': 'https://www.monash.edu/learning-teaching/teachhq/learning-technologies/panopto/how-to/insert-a-quiz-into-a-panopto-video',
'info_dict': {
- 'title': 'Insert a quiz into a Panopto video',
- 'id': 'insert-a-quiz-into-a-panopto-video'
+ 'ext': 'mp4',
+ 'id': '0bd3f16c-824a-436a-8486-ac5900693aef',
+ 'title': 'Quizzes in Panopto',
},
- 'playlist_count': 1
},
{
# Ruutu embed
@@ -2530,114 +2064,178 @@ class GenericIE(InfoExtractor):
'upload_date': '20220308',
},
},
+ {
+ # Multiple Ruutu embeds
+ 'url': 'https://www.hs.fi/kotimaa/art-2000008762560.html',
+ 'info_dict': {
+ 'title': 'Koronavirus | Epidemiahuippu voi olla Suomessa ohi, mutta koronaviruksen poistamista yleisvaarallisten tautien joukosta harkitaan vasta syksyllä',
+ 'id': 'art-2000008762560'
+ },
+ 'playlist_count': 3
+ },
+ {
+ # Ruutu embed in hs.fi with a single video
+ 'url': 'https://www.hs.fi/kotimaa/art-2000008793421.html',
+ 'md5': 'f8964e65d8fada6e8a562389bf366bb4',
+ 'info_dict': {
+ 'id': '4081841',
+ 'ext': 'mp4',
+ 'title': 'Puolustusvoimat siirsi panssariajoneuvoja harjoituksiin Niinisaloon 2.5.2022',
+ 'thumbnail': r're:^https?://.+\.jpg$',
+ 'duration': 138,
+ 'age_limit': 0,
+ 'upload_date': '20220504',
+ },
+ },
+ {
+ # Webpage contains double BOM
+ 'url': 'https://www.filmarkivet.se/movies/paris-d-moll/',
+ 'md5': 'df02cadc719dcc63d43288366f037754',
+ 'info_dict': {
+ 'id': 'paris-d-moll',
+ 'ext': 'mp4',
+ 'upload_date': '20220518',
+ 'title': 'Paris d-moll',
+ 'description': 'md5:319e37ea5542293db37e1e13072fe330',
+ 'thumbnail': 'https://www.filmarkivet.se/wp-content/uploads/parisdmoll2.jpg',
+ 'timestamp': 1652833414,
+ 'age_limit': 0,
+ }
+ },
+ {
+ 'url': 'https://www.mollymovieclub.com/p/interstellar?s=r#details',
+ 'md5': '198bde8bed23d0b23c70725c83c9b6d9',
+ 'info_dict': {
+ 'id': '53602801',
+ 'ext': 'mpga',
+ 'title': 'Interstellar',
+ 'description': 'Listen now | Episode One',
+ 'thumbnail': 'md5:c30d9c83f738e16d8551d7219d321538',
+ 'uploader': 'Molly Movie Club',
+ 'uploader_id': '839621',
+ },
+ },
+ {
+ 'url': 'https://www.blockedandreported.org/p/episode-117-lets-talk-about-depp?s=r',
+ 'md5': 'c0cc44ee7415daeed13c26e5b56d6aa0',
+ 'info_dict': {
+ 'id': '57962052',
+ 'ext': 'mpga',
+ 'title': 'md5:855b2756f0ee10f6723fa00b16266f8d',
+ 'description': 'md5:fe512a5e94136ad260c80bde00ea4eef',
+ 'thumbnail': 'md5:2218f27dfe517bb5ac16c47d0aebac59',
+ 'uploader': 'Blocked and Reported',
+ 'uploader_id': '500230',
+ },
+ },
+ {
+ 'url': 'https://www.skimag.com/video/ski-people-1980/',
+ 'md5': '022a7e31c70620ebec18deeab376ee03',
+ 'info_dict': {
+ 'id': 'YTmgRiNU',
+ 'ext': 'mp4',
+ 'title': '1980 Ski People',
+ 'timestamp': 1610407738,
+ 'description': 'md5:cf9c3d101452c91e141f292b19fe4843',
+ 'thumbnail': 'https://cdn.jwplayer.com/v2/media/YTmgRiNU/poster.jpg?width=720',
+ 'duration': 5688.0,
+ 'upload_date': '20210111',
+ }
+ },
+ {
+ 'note': 'JSON LD with multiple @type',
+ 'url': 'https://www.nu.nl/280161/video/hoe-een-bladvlo-dit-verwoestende-japanse-onkruid-moet-vernietigen.html',
+ 'md5': 'c7949f34f57273013fb7ccb1156393db',
+ 'info_dict': {
+ 'id': 'ipy2AcGL',
+ 'ext': 'mp4',
+ 'description': 'md5:6a9d644bab0dc2dc06849c2505d8383d',
+ 'thumbnail': r're:https://media\.nu\.nl/m/.+\.jpg',
+ 'title': 'Hoe een bladvlo dit verwoestende Japanse onkruid moet vernietigen',
+ 'timestamp': 1586577474,
+ 'upload_date': '20200411',
+ 'age_limit': 0,
+ 'duration': 111.0,
+ }
+ },
+ {
+ 'note': 'JSON LD with unexpected data type',
+ 'url': 'https://www.autoweek.nl/autotests/artikel/porsche-911-gt3-rs-rij-impressie-2/',
+ 'info_dict': {
+ 'id': 'porsche-911-gt3-rs-rij-impressie-2',
+ 'ext': 'mp4',
+ 'title': 'Test: Porsche 911 GT3 RS',
+ 'description': 'Je ziet het niet, maar het is er wel. Downforce, hebben we het dan over. En in de nieuwe Porsche 911 GT3 RS is er zelfs heel veel downforce.',
+ 'timestamp': 1664920902,
+ 'upload_date': '20221004',
+ 'thumbnail': r're:^https://media.autoweek.nl/m/.+\.jpg$',
+ 'age_limit': 0,
+ 'direct': True,
+ }
+ }
]
def report_following_redirect(self, new_url):
"""Report information extraction."""
self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
- def report_detected(self, name):
- self._downloader.write_debug(f'Identified a {name}')
+ def report_detected(self, name, num=1, note=None):
+ if num > 1:
+ name += 's'
+ elif not num:
+ return
+ else:
+ num = 'a'
- def _extract_rss(self, url, video_id, doc):
- playlist_title = doc.find('./channel/title').text
- playlist_desc_el = doc.find('./channel/description')
- playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
+ self._downloader.write_debug(f'Identified {num} {name}{format_field(note, None, "; %s")}')
+ def _fragment_query(self, url):
+ if self._configuration_arg('fragment_query'):
+ query_string = urllib.parse.urlparse(url).query
+ if query_string:
+ return {'extra_param_to_segment_url': query_string}
+ return {}
+
+ def _extract_rss(self, url, video_id, doc):
NS_MAP = {
'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd',
}
entries = []
for it in doc.findall('./channel/item'):
- next_url = None
- enclosure_nodes = it.findall('./enclosure')
- for e in enclosure_nodes:
- next_url = e.attrib.get('url')
- if next_url:
- break
-
- if not next_url:
- next_url = xpath_text(it, 'link', fatal=False)
-
+ next_url = next(
+ (e.attrib.get('url') for e in it.findall('./enclosure')),
+ xpath_text(it, 'link', fatal=False))
if not next_url:
continue
- if it.find('guid').text is not None:
- next_url = smuggle_url(next_url, {'force_videoid': it.find('guid').text})
+ guid = try_call(lambda: it.find('guid').text)
+ if guid:
+ next_url = smuggle_url(next_url, {'force_videoid': guid})
def itunes(key):
- return xpath_text(
- it, xpath_with_ns('./itunes:%s' % key, NS_MAP),
- default=None)
-
- duration = itunes('duration')
- explicit = (itunes('explicit') or '').lower()
- if explicit in ('true', 'yes'):
- age_limit = 18
- elif explicit in ('false', 'no'):
- age_limit = 0
- else:
- age_limit = None
+ return xpath_text(it, xpath_with_ns(f'./itunes:{key}', NS_MAP), default=None)
entries.append({
'_type': 'url_transparent',
'url': next_url,
- 'title': it.find('title').text,
+ 'title': try_call(lambda: it.find('title').text),
'description': xpath_text(it, 'description', default=None),
- 'timestamp': unified_timestamp(
- xpath_text(it, 'pubDate', default=None)),
- 'duration': int_or_none(duration) or parse_duration(duration),
+ 'timestamp': unified_timestamp(xpath_text(it, 'pubDate', default=None)),
+ 'duration': parse_duration(itunes('duration')),
'thumbnail': url_or_none(xpath_attr(it, xpath_with_ns('./itunes:image', NS_MAP), 'href')),
'episode': itunes('title'),
'episode_number': int_or_none(itunes('episode')),
'season_number': int_or_none(itunes('season')),
- 'age_limit': age_limit,
+ 'age_limit': {'true': 18, 'yes': 18, 'false': 0, 'no': 0}.get((itunes('explicit') or '').lower()),
})
return {
'_type': 'playlist',
'id': url,
- 'title': playlist_title,
- 'description': playlist_desc,
- 'entries': entries,
- }
-
- def _extract_camtasia(self, url, video_id, webpage):
- """ Returns None if no camtasia video can be found. """
-
- camtasia_cfg = self._search_regex(
- r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
- webpage, 'camtasia configuration file', default=None)
- if camtasia_cfg is None:
- return None
-
- title = self._html_search_meta('DC.title', webpage, fatal=True)
-
- camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
- camtasia_cfg = self._download_xml(
- camtasia_url, video_id,
- note='Downloading camtasia configuration',
- errnote='Failed to download camtasia configuration')
- fileset_node = camtasia_cfg.find('./playlist/array/fileset')
-
- entries = []
- for n in fileset_node.getchildren():
- url_n = n.find('./uri')
- if url_n is None:
- continue
-
- entries.append({
- 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
- 'title': '%s - %s' % (title, n.tag),
- 'url': compat_urlparse.urljoin(url, url_n.text),
- 'duration': float_or_none(n.find('./duration').text),
- })
-
- return {
- '_type': 'playlist',
+ 'title': try_call(lambda: doc.find('./channel/title').text),
+ 'description': try_call(lambda: doc.find('./channel/description').text),
'entries': entries,
- 'title': title,
}
def _kvs_getrealurl(self, video_url, license_code):
@@ -2651,7 +2249,7 @@ class GenericIE(InfoExtractor):
for o in range(len(newmagic) - 1, -1, -1):
new = ''
- l = (o + sum([int(n) for n in license[o:]])) % 32
+ l = (o + sum(int(n) for n in license[o:])) % 32
for i in range(0, len(newmagic)):
if i == o:
@@ -2682,7 +2280,7 @@ class GenericIE(InfoExtractor):
if url.startswith('//'):
return self.url_result(self.http_scheme() + url)
- parsed_url = compat_urlparse.urlparse(url)
+ parsed_url = urllib.parse.urlparse(url)
if not parsed_url.scheme:
default_search = self.get_param('default_search')
if default_search is None:
@@ -2713,59 +2311,59 @@ class GenericIE(InfoExtractor):
default_search += ':'
return self.url_result(default_search + url)
- url, smuggled_data = unsmuggle_url(url)
+ original_url = url
+ url, smuggled_data = unsmuggle_url(url, {})
force_videoid = None
- is_intentional = smuggled_data and smuggled_data.get('to_generic')
- if smuggled_data and 'force_videoid' in smuggled_data:
+ is_intentional = smuggled_data.get('to_generic')
+ if 'force_videoid' in smuggled_data:
force_videoid = smuggled_data['force_videoid']
video_id = force_videoid
else:
video_id = self._generic_id(url)
- self.to_screen('%s: Requesting header' % video_id)
-
- head_req = HEADRequest(url)
- head_response = self._request_webpage(
- head_req, video_id,
- note=False, errnote='Could not send HEAD request to %s' % url,
- fatal=False)
-
- if head_response is not False:
- # Check for redirect
- new_url = head_response.geturl()
- if url != new_url:
- self.report_following_redirect(new_url)
- if force_videoid:
- new_url = smuggle_url(
- new_url, {'force_videoid': force_videoid})
- return self.url_result(new_url)
-
- full_response = None
- if head_response is False:
- request = sanitized_Request(url)
- request.add_header('Accept-Encoding', '*')
- full_response = self._request_webpage(request, video_id)
- head_response = full_response
+ # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
+ # making it impossible to download only chunk of the file (yet we need only 512kB to
+ # test whether it's HTML or not). According to hypervideo default Accept-Encoding
+ # that will always result in downloading the whole file that is not desirable.
+ # Therefore for extraction pass we have to override Accept-Encoding to any in order
+ # to accept raw bytes and being able to download only a chunk.
+ # It may probably better to solve this by checking Content-Type for application/octet-stream
+ # after a HEAD request, but not sure if we can rely on this.
+ full_response = self._request_webpage(url, video_id, headers={
+ 'Accept-Encoding': '*',
+ **smuggled_data.get('http_headers', {})
+ })
+ new_url = full_response.geturl()
+ if new_url == urllib.parse.urlparse(url)._replace(scheme='https').geturl():
+ url = new_url
+ elif url != new_url:
+ self.report_following_redirect(new_url)
+ if force_videoid:
+ new_url = smuggle_url(new_url, {'force_videoid': force_videoid})
+ return self.url_result(new_url)
info_dict = {
'id': video_id,
'title': self._generic_title(url),
- 'timestamp': unified_timestamp(head_response.headers.get('Last-Modified'))
+ 'timestamp': unified_timestamp(full_response.headers.get('Last-Modified'))
}
# Check for direct link to a video
- content_type = head_response.headers.get('Content-Type', '').lower()
+ content_type = full_response.headers.get('Content-Type', '').lower()
m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
if m:
self.report_detected('direct video link')
- format_id = compat_str(m.group('format_id'))
+ headers = smuggled_data.get('http_headers', {})
+ format_id = str(m.group('format_id'))
subtitles = {}
if format_id.endswith('mpegurl'):
- formats, subtitles = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4')
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4', headers=headers)
+ info_dict.update(self._fragment_query(url))
elif format_id.endswith('mpd') or format_id.endswith('dash+xml'):
- formats, subtitles = self._extract_mpd_formats_and_subtitles(url, video_id)
+ formats, subtitles = self._extract_mpd_formats_and_subtitles(url, video_id, headers=headers)
+ info_dict.update(self._fragment_query(url))
elif format_id == 'f4m':
- formats = self._extract_f4m_formats(url, video_id)
+ formats = self._extract_f4m_formats(url, video_id, headers=headers)
else:
formats = [{
'format_id': format_id,
@@ -2773,28 +2371,16 @@ class GenericIE(InfoExtractor):
'vcodec': 'none' if m.group('type') == 'audio' else None
}]
info_dict['direct'] = True
- self._sort_formats(formats)
- info_dict['formats'] = formats
- info_dict['subtitles'] = subtitles
+ info_dict.update({
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'http_headers': headers,
+ })
return info_dict
if not self.get_param('test', False) and not is_intentional:
force = self.get_param('force_generic_extractor', False)
- self.report_warning(
- '%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
-
- if not full_response:
- request = sanitized_Request(url)
- # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
- # making it impossible to download only chunk of the file (yet we need only 512kB to
- # test whether it's HTML or not). According to hypervideo default Accept-Encoding
- # that will always result in downloading the whole file that is not desirable.
- # Therefore for extraction pass we have to override Accept-Encoding to any in order
- # to accept raw bytes and being able to download only a chunk.
- # It may probably better to solve this by checking Content-Type for application/octet-stream
- # after HEAD request finishes, but not sure if we can rely on this.
- request.add_header('Accept-Encoding', '*')
- full_response = self._request_webpage(request, video_id)
+ self.report_warning('%s generic information extractor' % ('Forcing' if force else 'Falling back on'))
first_bytes = full_response.read(512)
@@ -2802,7 +2388,7 @@ class GenericIE(InfoExtractor):
if first_bytes.startswith(b'#EXTM3U'):
self.report_detected('M3U playlist')
info_dict['formats'], info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4')
- self._sort_formats(info_dict['formats'])
+ info_dict.update(self._fragment_query(url))
return info_dict
# Maybe it's a direct link to a video?
@@ -2828,7 +2414,7 @@ class GenericIE(InfoExtractor):
try:
try:
doc = compat_etree_fromstring(webpage)
- except compat_xml_parse_error:
+ except xml.etree.ElementTree.ParseError:
doc = compat_etree_fromstring(webpage.encode('utf-8'))
if doc.tag == 'rss':
self.report_detected('RSS feed')
@@ -2836,12 +2422,10 @@ class GenericIE(InfoExtractor):
elif doc.tag == 'SmoothStreamingMedia':
info_dict['formats'], info_dict['subtitles'] = self._parse_ism_formats_and_subtitles(doc, url)
self.report_detected('ISM manifest')
- self._sort_formats(info_dict['formats'])
return info_dict
elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
smil = self._parse_smil(doc, url, video_id)
self.report_detected('SMIL file')
- self._sort_formats(smil['formats'])
return smil
elif doc.tag == '{http://xspf.org/ns/0/}playlist':
self.report_detected('XSPF playlist')
@@ -2855,947 +2439,83 @@ class GenericIE(InfoExtractor):
doc,
mpd_base_url=full_response.geturl().rpartition('/')[0],
mpd_url=url)
+ info_dict.update(self._fragment_query(url))
self.report_detected('DASH manifest')
- self._sort_formats(info_dict['formats'])
return info_dict
elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
info_dict['formats'] = self._parse_f4m_formats(doc, url, video_id)
self.report_detected('F4M manifest')
- self._sort_formats(info_dict['formats'])
return info_dict
- except compat_xml_parse_error:
+ except xml.etree.ElementTree.ParseError:
pass
- # Is it a Camtasia project?
- camtasia_res = self._extract_camtasia(url, video_id, webpage)
- if camtasia_res is not None:
- self.report_detected('Camtasia video')
- return camtasia_res
+ info_dict.update({
+ # it's tempting to parse this further, but you would
+ # have to take into account all the variations like
+ # Video Title - Site Name
+ # Site Name | Video Title
+ # Video Title - Tagline | Site Name
+ # and so on and so forth; it's just not practical
+ 'title': self._generic_title('', webpage, default='video'),
+ 'description': self._og_search_description(webpage, default=None),
+ 'thumbnail': self._og_search_thumbnail(webpage, default=None),
+ 'age_limit': self._rta_search(webpage),
+ })
+
+ self._downloader.write_debug('Looking for embeds')
+ embeds = list(self._extract_embeds(original_url, webpage, urlh=full_response, info_dict=info_dict))
+ if len(embeds) == 1:
+ return {**info_dict, **embeds[0]}
+ elif embeds:
+ return self.playlist_result(embeds, **info_dict)
+ raise UnsupportedError(url)
+
+ def _extract_embeds(self, url, webpage, *, urlh=None, info_dict={}):
+ """Returns an iterator of video entries"""
+ info_dict = types.MappingProxyType(info_dict) # Prevents accidental mutation
+ video_id = traverse_obj(info_dict, 'display_id', 'id') or self._generic_id(url)
+ url, smuggled_data = unsmuggle_url(url, {})
+ actual_url = urlh.geturl() if urlh else url
# Sometimes embedded video player is hidden behind percent encoding
# (e.g. https://github.com/ytdl-org/youtube-dl/issues/2448)
# Unescaping the whole page allows to handle those cases in a generic way
# FIXME: unescaping the whole page may break URLs, commenting out for now.
# There probably should be a second run of generic extractor on unescaped webpage.
- # webpage = compat_urllib_parse_unquote(webpage)
-
- # Unescape squarespace embeds to be detected by generic extractor,
- # see https://github.com/ytdl-org/youtube-dl/issues/21294
- webpage = re.sub(
- r'<div[^>]+class=[^>]*?\bsqs-video-wrapper\b[^>]*>',
- lambda x: unescapeHTML(x.group(0)), webpage)
-
- # it's tempting to parse this further, but you would
- # have to take into account all the variations like
- # Video Title - Site Name
- # Site Name | Video Title
- # Video Title - Tagline | Site Name
- # and so on and so forth; it's just not practical
- video_title = (self._og_search_title(webpage, default=None)
- or self._html_extract_title(webpage, 'video title', default='video'))
-
- # Try to detect age limit automatically
- age_limit = self._rta_search(webpage)
- # And then there are the jokers who advertise that they use RTA,
- # but actually don't.
- AGE_LIMIT_MARKERS = [
- r'Proudly Labeled <a href="http://www\.rtalabel\.org/" title="Restricted to Adults">RTA</a>',
- ]
- if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
- age_limit = 18
-
- # video uploader is domain name
- video_uploader = self._search_regex(
- r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
-
- video_description = self._og_search_description(webpage, default=None)
- video_thumbnail = self._og_search_thumbnail(webpage, default=None)
-
- info_dict.update({
- 'title': video_title,
- 'description': video_description,
- 'thumbnail': video_thumbnail,
- 'age_limit': age_limit,
- })
-
- self._downloader.write_debug('Looking for video embeds')
-
- # Look for Brightcove Legacy Studio embeds
- bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
- if bc_urls:
- entries = [{
- '_type': 'url',
- 'url': smuggle_url(bc_url, {'Referer': url}),
- 'ie_key': 'BrightcoveLegacy'
- } for bc_url in bc_urls]
-
- return {
- '_type': 'playlist',
- 'title': video_title,
- 'id': video_id,
- 'entries': entries,
- }
-
- # Look for Brightcove New Studio embeds
- bc_urls = BrightcoveNewIE._extract_urls(self, webpage)
- if bc_urls:
- return self.playlist_from_matches(
- bc_urls, video_id, video_title,
- getter=lambda x: smuggle_url(x, {'referrer': url}),
- ie='BrightcoveNew')
-
- # Look for Nexx embeds
- nexx_urls = NexxIE._extract_urls(webpage)
- if nexx_urls:
- return self.playlist_from_matches(nexx_urls, video_id, video_title, ie=NexxIE.ie_key())
-
- # Look for Nexx iFrame embeds
- nexx_embed_urls = NexxEmbedIE._extract_urls(webpage)
- if nexx_embed_urls:
- return self.playlist_from_matches(nexx_embed_urls, video_id, video_title, ie=NexxEmbedIE.ie_key())
-
- # Look for ThePlatform embeds
- tp_urls = ThePlatformIE._extract_urls(webpage)
- if tp_urls:
- return self.playlist_from_matches(tp_urls, video_id, video_title, ie='ThePlatform')
-
- arc_urls = ArcPublishingIE._extract_urls(webpage)
- if arc_urls:
- return self.playlist_from_matches(arc_urls, video_id, video_title, ie=ArcPublishingIE.ie_key())
-
- mychannels_urls = MedialaanIE._extract_urls(webpage)
- if mychannels_urls:
- return self.playlist_from_matches(
- mychannels_urls, video_id, video_title, ie=MedialaanIE.ie_key())
-
- # Look for embedded rtl.nl player
- matches = re.findall(
- r'<iframe[^>]+?src="((?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)"',
- webpage)
- if matches:
- return self.playlist_from_matches(matches, video_id, video_title, ie='RtlNl')
-
- vimeo_urls = VimeoIE._extract_urls(url, webpage)
- if vimeo_urls:
- return self.playlist_from_matches(vimeo_urls, video_id, video_title, ie=VimeoIE.ie_key())
-
- vhx_url = VHXEmbedIE._extract_url(webpage)
- if vhx_url:
- return self.url_result(vhx_url, VHXEmbedIE.ie_key())
-
- # Invidious Instances
- # https://github.com/hypervideo/hypervideo/issues/195
- # https://github.com/iv-org/invidious/pull/1730
- youtube_url = self._search_regex(
- r'<link rel="alternate" href="(https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',
- webpage, 'youtube link', default=None)
- if youtube_url:
- return self.url_result(youtube_url, YoutubeIE.ie_key())
-
- # Look for YouTube embeds
- youtube_urls = YoutubeIE._extract_urls(webpage)
- if youtube_urls:
- return self.playlist_from_matches(
- youtube_urls, video_id, video_title, ie=YoutubeIE.ie_key())
-
- matches = DailymotionIE._extract_urls(webpage)
- if matches:
- return self.playlist_from_matches(matches, video_id, video_title)
-
- # Look for embedded Dailymotion playlist player (#3822)
- m = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
- if m:
- playlists = re.findall(
- r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
- if playlists:
- return self.playlist_from_matches(
- playlists, video_id, video_title, lambda p: '//dailymotion.com/playlist/%s' % p)
-
- # Look for DailyMail embeds
- dailymail_urls = DailyMailIE._extract_urls(webpage)
- if dailymail_urls:
- return self.playlist_from_matches(
- dailymail_urls, video_id, video_title, ie=DailyMailIE.ie_key())
-
- # Look for Teachable embeds, must be before Wistia
- teachable_url = TeachableIE._extract_url(webpage, url)
- if teachable_url:
- return self.url_result(teachable_url)
-
- # Look for embedded Wistia player
- wistia_urls = WistiaIE._extract_urls(webpage)
- if wistia_urls:
- playlist = self.playlist_from_matches(wistia_urls, video_id, video_title, ie=WistiaIE.ie_key())
- for entry in playlist['entries']:
- entry.update({
- '_type': 'url_transparent',
- 'uploader': video_uploader,
- })
- return playlist
-
- # Look for SVT player
- svt_url = SVTIE._extract_url(webpage)
- if svt_url:
- return self.url_result(svt_url, 'SVT')
-
- # Look for Bandcamp pages with custom domain
- mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
- if mobj is not None:
- burl = unescapeHTML(mobj.group(1))
- # Don't set the extractor because it can be a track url or an album
- return self.url_result(burl)
-
- # Look for embedded Vevo player
- mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'))
-
- # Look for embedded Viddler player
- mobj = re.search(
- r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
- webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'))
-
- # Look for NYTimes player
- mobj = re.search(
- r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
- webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'))
-
- # Look for Libsyn player
- mobj = re.search(
- r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'))
-
- # Look for Ooyala videos
- mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage)
- or re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage)
- or re.search(r'OO\.Player\.create\.apply\(\s*OO\.Player\s*,\s*op\(\s*\[\s*[\'"][^\'"]*[\'"]\s*,\s*[\'"](?P<ec>.{32})[\'"]', webpage)
- or re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage)
- or re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
- if mobj is not None:
- embed_token = self._search_regex(
- r'embedToken[\'"]?\s*:\s*[\'"]([^\'"]+)',
- webpage, 'ooyala embed token', default=None)
- return OoyalaIE._build_url_result(smuggle_url(
- mobj.group('ec'), {
- 'domain': url,
- 'embed_token': embed_token,
- }))
-
- # Look for multiple Ooyala embeds on SBN network websites
- mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
- if mobj is not None:
- embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
- if embeds:
- return self.playlist_from_matches(
- embeds, video_id, video_title,
- getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala')
-
- # Look for Aparat videos
- mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
- if mobj is not None:
- return self.url_result(mobj.group(1), 'Aparat')
-
- # Look for MPORA videos
- mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
- if mobj is not None:
- return self.url_result(mobj.group(1), 'Mpora')
-
- # Look for embedded Facebook player
- facebook_urls = FacebookIE._extract_urls(webpage)
- if facebook_urls:
- return self.playlist_from_matches(facebook_urls, video_id, video_title)
-
- # Look for embedded VK player
- mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'VK')
+ # webpage = urllib.parse.unquote(webpage)
- # Look for embedded Odnoklassniki player
- odnoklassniki_url = OdnoklassnikiIE._extract_url(webpage)
- if odnoklassniki_url:
- return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key())
-
- # Look for sibnet embedded player
- sibnet_urls = VKIE._extract_sibnet_urls(webpage)
- if sibnet_urls:
- return self.playlist_from_matches(sibnet_urls, video_id, video_title)
-
- # Look for embedded ivi player
- mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'Ivi')
-
- # Look for embedded Huffington Post player
- mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'HuffPost')
-
- # Look for embed.ly
- mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'))
- mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
- if mobj is not None:
- return self.url_result(compat_urllib_parse_unquote(mobj.group('url')))
-
- # Look for funnyordie embed
- matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
- if matches:
- return self.playlist_from_matches(
- matches, video_id, video_title, getter=unescapeHTML, ie='FunnyOrDie')
-
- # Look for Simplecast embeds
- simplecast_urls = SimplecastIE._extract_urls(webpage)
- if simplecast_urls:
- return self.playlist_from_matches(
- simplecast_urls, video_id, video_title)
-
- # Look for BBC iPlayer embed
- matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
- if matches:
- return self.playlist_from_matches(matches, video_id, video_title, ie='BBCCoUk')
-
- # Look for embedded RUTV player
- rutv_url = RUTVIE._extract_url(webpage)
- if rutv_url:
- return self.url_result(rutv_url, 'RUTV')
-
- # Look for embedded TVC player
- tvc_url = TVCIE._extract_url(webpage)
- if tvc_url:
- return self.url_result(tvc_url, 'TVC')
-
- # Look for embedded SportBox player
- sportbox_urls = SportBoxIE._extract_urls(webpage)
- if sportbox_urls:
- return self.playlist_from_matches(sportbox_urls, video_id, video_title, ie=SportBoxIE.ie_key())
-
- # Look for embedded XHamster player
- xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
- if xhamster_urls:
- return self.playlist_from_matches(xhamster_urls, video_id, video_title, ie='XHamsterEmbed')
-
- # Look for embedded TNAFlixNetwork player
- tnaflix_urls = TNAFlixNetworkEmbedIE._extract_urls(webpage)
- if tnaflix_urls:
- return self.playlist_from_matches(tnaflix_urls, video_id, video_title, ie=TNAFlixNetworkEmbedIE.ie_key())
-
- # Look for embedded PornHub player
- pornhub_urls = PornHubIE._extract_urls(webpage)
- if pornhub_urls:
- return self.playlist_from_matches(pornhub_urls, video_id, video_title, ie=PornHubIE.ie_key())
-
- # Look for embedded DrTuber player
- drtuber_urls = DrTuberIE._extract_urls(webpage)
- if drtuber_urls:
- return self.playlist_from_matches(drtuber_urls, video_id, video_title, ie=DrTuberIE.ie_key())
-
- # Look for embedded RedTube player
- redtube_urls = RedTubeIE._extract_urls(webpage)
- if redtube_urls:
- return self.playlist_from_matches(redtube_urls, video_id, video_title, ie=RedTubeIE.ie_key())
-
- # Look for embedded Tube8 player
- tube8_urls = Tube8IE._extract_urls(webpage)
- if tube8_urls:
- return self.playlist_from_matches(tube8_urls, video_id, video_title, ie=Tube8IE.ie_key())
-
- # Look for embedded Mofosex player
- mofosex_urls = MofosexEmbedIE._extract_urls(webpage)
- if mofosex_urls:
- return self.playlist_from_matches(mofosex_urls, video_id, video_title, ie=MofosexEmbedIE.ie_key())
-
- # Look for embedded Spankwire player
- spankwire_urls = SpankwireIE._extract_urls(webpage)
- if spankwire_urls:
- return self.playlist_from_matches(spankwire_urls, video_id, video_title, ie=SpankwireIE.ie_key())
-
- # Look for embedded YouPorn player
- youporn_urls = YouPornIE._extract_urls(webpage)
- if youporn_urls:
- return self.playlist_from_matches(youporn_urls, video_id, video_title, ie=YouPornIE.ie_key())
-
- # Look for embedded Tvigle player
- mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'Tvigle')
-
- # Look for embedded TED player
- ted_urls = TedEmbedIE._extract_urls(webpage)
- if ted_urls:
- return self.playlist_from_matches(ted_urls, video_id, video_title, ie=TedEmbedIE.ie_key())
-
- # Look for embedded Ustream videos
- ustream_url = UstreamIE._extract_url(webpage)
- if ustream_url:
- return self.url_result(ustream_url, UstreamIE.ie_key())
-
- # Look for embedded arte.tv player
- arte_urls = ArteTVEmbedIE._extract_urls(webpage)
- if arte_urls:
- return self.playlist_from_matches(arte_urls, video_id, video_title)
-
- # Look for embedded francetv player
- mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1',
- webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'))
-
- # Look for embedded Myvi.ru player
- myvi_url = MyviIE._extract_url(webpage)
- if myvi_url:
- return self.url_result(myvi_url)
-
- # Look for embedded soundcloud player
- soundcloud_urls = SoundcloudEmbedIE._extract_urls(webpage)
- if soundcloud_urls:
- return self.playlist_from_matches(soundcloud_urls, video_id, video_title, getter=unescapeHTML)
-
- # Look for tunein player
- tunein_urls = TuneInBaseIE._extract_urls(webpage)
- if tunein_urls:
- return self.playlist_from_matches(tunein_urls, video_id, video_title)
-
- # Look for embedded mtvservices player
- mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
- if mtvservices_url:
- return self.url_result(mtvservices_url, ie='MTVServicesEmbedded')
-
- # Look for embedded yahoo player
- mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
- webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'Yahoo')
-
- # Look for embedded sbs.com.au player
- mobj = re.search(
- r'''(?x)
- (?:
- <meta\s+property="og:video"\s+content=|
- <iframe[^>]+?src=
- )
- (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
- webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'SBS')
-
- # Look for embedded Cinchcast player
- mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
- webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'Cinchcast')
-
- mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
- webpage)
- if not mobj:
- mobj = re.search(
- r'data-video-link=["\'](?P<url>http://m\.mlb\.com/video/[^"\']+)',
- webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'MLB')
-
- mobj = re.search(
- r'<(?:iframe|script)[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
- webpage)
- if mobj is not None:
- return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
-
- mobj = re.search(
- r'<iframe[^>]+src="(?P<url>https?://(?:new\.)?livestream\.com/[^"]+/player[^"]+)"',
- webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'Livestream')
-
- # Look for Zapiks embed
- mobj = re.search(
- r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'Zapiks')
-
- # Look for Kaltura embeds
- kaltura_urls = KalturaIE._extract_urls(webpage)
- if kaltura_urls:
- return self.playlist_from_matches(
- kaltura_urls, video_id, video_title,
- getter=lambda x: smuggle_url(x, {'source_url': url}),
- ie=KalturaIE.ie_key())
-
- # Look for EaglePlatform embeds
- eagleplatform_url = EaglePlatformIE._extract_url(webpage)
- if eagleplatform_url:
- return self.url_result(smuggle_url(eagleplatform_url, {'referrer': url}), EaglePlatformIE.ie_key())
-
- # Look for ClipYou (uses EaglePlatform) embeds
- mobj = re.search(
- r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
- if mobj is not None:
- return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
-
- # Look for Pladform embeds
- pladform_url = PladformIE._extract_url(webpage)
- if pladform_url:
- return self.url_result(pladform_url)
-
- # Look for Videomore embeds
- videomore_url = VideomoreIE._extract_url(webpage)
- if videomore_url:
- return self.url_result(videomore_url)
-
- # Look for Webcaster embeds
- webcaster_url = WebcasterFeedIE._extract_url(self, webpage)
- if webcaster_url:
- return self.url_result(webcaster_url, ie=WebcasterFeedIE.ie_key())
-
- # Look for Playwire embeds
- mobj = re.search(
- r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'))
-
- # Look for Crooks and Liars embeds
- mobj = re.search(
- r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'))
-
- # Look for NBC Sports VPlayer embeds
- nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
- if nbc_sports_url:
- return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
-
- # Look for NBC News embeds
- nbc_news_embed_url = re.search(
- r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//www\.nbcnews\.com/widget/video-embed/[^"\']+)\1', webpage)
- if nbc_news_embed_url:
- return self.url_result(nbc_news_embed_url.group('url'), 'NBCNews')
-
- # Look for Google Drive embeds
- google_drive_url = GoogleDriveIE._extract_url(webpage)
- if google_drive_url:
- return self.url_result(google_drive_url, 'GoogleDrive')
-
- # Look for UDN embeds
- mobj = re.search(
- r'<iframe[^>]+src="(?:https?:)?(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
- if mobj is not None:
- return self.url_result(
- compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
-
- # Look for Senate ISVP iframe
- senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
- if senate_isvp_url:
- return self.url_result(senate_isvp_url, 'SenateISVP')
-
- # Look for Kinja embeds
- kinja_embed_urls = KinjaEmbedIE._extract_urls(webpage, url)
- if kinja_embed_urls:
- return self.playlist_from_matches(
- kinja_embed_urls, video_id, video_title)
-
- # Look for OnionStudios embeds
- onionstudios_url = OnionStudiosIE._extract_url(webpage)
- if onionstudios_url:
- return self.url_result(onionstudios_url)
-
- # Look for Blogger embeds
- blogger_urls = BloggerIE._extract_urls(webpage)
- if blogger_urls:
- return self.playlist_from_matches(blogger_urls, video_id, video_title, ie=BloggerIE.ie_key())
-
- # Look for ViewLift embeds
- viewlift_url = ViewLiftEmbedIE._extract_url(webpage)
- if viewlift_url:
- return self.url_result(viewlift_url)
-
- # Look for JWPlatform embeds
- jwplatform_urls = JWPlatformIE._extract_urls(webpage)
- if jwplatform_urls:
- return self.playlist_from_matches(jwplatform_urls, video_id, video_title, ie=JWPlatformIE.ie_key())
-
- # Look for Digiteka embeds
- digiteka_url = DigitekaIE._extract_url(webpage)
- if digiteka_url:
- return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key())
-
- # Look for Arkena embeds
- arkena_url = ArkenaIE._extract_url(webpage)
- if arkena_url:
- return self.url_result(arkena_url, ArkenaIE.ie_key())
-
- # Look for Piksel embeds
- piksel_url = PikselIE._extract_url(webpage)
- if piksel_url:
- return self.url_result(piksel_url, PikselIE.ie_key())
-
- # Look for Limelight embeds
- limelight_urls = LimelightBaseIE._extract_urls(webpage, url)
- if limelight_urls:
- return self.playlist_result(
- limelight_urls, video_id, video_title, video_description)
-
- # Look for Anvato embeds
- anvato_urls = AnvatoIE._extract_urls(self, webpage, video_id)
- if anvato_urls:
- return self.playlist_result(
- anvato_urls, video_id, video_title, video_description)
-
- # Look for AdobeTVVideo embeds
- mobj = re.search(
- r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
- webpage)
- if mobj is not None:
- return self.url_result(
- self._proto_relative_url(unescapeHTML(mobj.group(1))),
- 'AdobeTVVideo')
-
- # Look for Vine embeds
- mobj = re.search(
- r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?vine\.co/v/[^/]+/embed/(?:simple|postcard))',
- webpage)
- if mobj is not None:
- return self.url_result(
- self._proto_relative_url(unescapeHTML(mobj.group(1))), 'Vine')
-
- # Look for VODPlatform embeds
- mobj = re.search(
- r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:(?:www\.)?vod-platform\.net|embed\.kwikmotion\.com)/[eE]mbed/.+?)\1',
- webpage)
- if mobj is not None:
- return self.url_result(
- self._proto_relative_url(unescapeHTML(mobj.group('url'))), 'VODPlatform')
-
- # Look for Mangomolo embeds
- mobj = re.search(
- r'''(?x)<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//
- (?:
- admin\.mangomolo\.com/analytics/index\.php/customers/embed|
- player\.mangomolo\.com/v1
- )/
- (?:
- video\?.*?\bid=(?P<video_id>\d+)|
- (?:index|live)\?.*?\bchannelid=(?P<channel_id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)
- ).+?)\1''', webpage)
- if mobj is not None:
- info = {
- '_type': 'url_transparent',
- 'url': self._proto_relative_url(unescapeHTML(mobj.group('url'))),
- 'title': video_title,
- 'description': video_description,
- 'thumbnail': video_thumbnail,
- 'uploader': video_uploader,
- }
- video_id = mobj.group('video_id')
- if video_id:
- info.update({
- 'ie_key': 'MangomoloVideo',
- 'id': video_id,
- })
- else:
- info.update({
- 'ie_key': 'MangomoloLive',
- 'id': mobj.group('channel_id'),
- })
- return info
-
- # Look for Instagram embeds
- instagram_embed_url = InstagramIE._extract_embed_url(webpage)
- if instagram_embed_url is not None:
- return self.url_result(
- self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key())
-
- # Look for 3Q SDN embeds
- threeqsdn_url = ThreeQSDNIE._extract_url(webpage)
- if threeqsdn_url:
- return {
- '_type': 'url_transparent',
- 'ie_key': ThreeQSDNIE.ie_key(),
- 'url': self._proto_relative_url(threeqsdn_url),
- 'title': video_title,
- 'description': video_description,
- 'thumbnail': video_thumbnail,
- 'uploader': video_uploader,
- }
-
- # Look for VBOX7 embeds
- vbox7_url = Vbox7IE._extract_url(webpage)
- if vbox7_url:
- return self.url_result(vbox7_url, Vbox7IE.ie_key())
-
- # Look for DBTV embeds
- dbtv_urls = DBTVIE._extract_urls(webpage)
- if dbtv_urls:
- return self.playlist_from_matches(dbtv_urls, video_id, video_title, ie=DBTVIE.ie_key())
-
- # Look for Videa embeds
- videa_urls = VideaIE._extract_urls(webpage)
- if videa_urls:
- return self.playlist_from_matches(videa_urls, video_id, video_title, ie=VideaIE.ie_key())
-
- # Look for 20 minuten embeds
- twentymin_urls = TwentyMinutenIE._extract_urls(webpage)
- if twentymin_urls:
- return self.playlist_from_matches(
- twentymin_urls, video_id, video_title, ie=TwentyMinutenIE.ie_key())
-
- # Look for VideoPress embeds
- videopress_urls = VideoPressIE._extract_urls(webpage)
- if videopress_urls:
- return self.playlist_from_matches(
- videopress_urls, video_id, video_title, ie=VideoPressIE.ie_key())
-
- # Look for Rutube embeds
- rutube_urls = RutubeIE._extract_urls(webpage)
- if rutube_urls:
- return self.playlist_from_matches(
- rutube_urls, video_id, video_title, ie=RutubeIE.ie_key())
-
- # Look for Glomex embeds
- glomex_urls = list(GlomexEmbedIE._extract_urls(webpage, url))
- if glomex_urls:
- return self.playlist_from_matches(
- glomex_urls, video_id, video_title, ie=GlomexEmbedIE.ie_key())
-
- # Look for megatv.com embeds
- megatvcom_urls = list(MegaTVComEmbedIE._extract_urls(webpage))
- if megatvcom_urls:
- return self.playlist_from_matches(
- megatvcom_urls, video_id, video_title, ie=MegaTVComEmbedIE.ie_key())
-
- # Look for ant1news.gr embeds
- ant1newsgr_urls = list(Ant1NewsGrEmbedIE._extract_urls(webpage))
- if ant1newsgr_urls:
- return self.playlist_from_matches(
- ant1newsgr_urls, video_id, video_title, ie=Ant1NewsGrEmbedIE.ie_key())
-
- # Look for WashingtonPost embeds
- wapo_urls = WashingtonPostIE._extract_urls(webpage)
- if wapo_urls:
- return self.playlist_from_matches(
- wapo_urls, video_id, video_title, ie=WashingtonPostIE.ie_key())
-
- # Look for Mediaset embeds
- mediaset_urls = MediasetIE._extract_urls(self, webpage)
- if mediaset_urls:
- return self.playlist_from_matches(
- mediaset_urls, video_id, video_title, ie=MediasetIE.ie_key())
-
- # Look for JOJ.sk embeds
- joj_urls = JojIE._extract_urls(webpage)
- if joj_urls:
- return self.playlist_from_matches(
- joj_urls, video_id, video_title, ie=JojIE.ie_key())
-
- # Look for megaphone.fm embeds
- mpfn_urls = MegaphoneIE._extract_urls(webpage)
- if mpfn_urls:
- return self.playlist_from_matches(
- mpfn_urls, video_id, video_title, ie=MegaphoneIE.ie_key())
-
- # Look for vzaar embeds
- vzaar_urls = VzaarIE._extract_urls(webpage)
- if vzaar_urls:
- return self.playlist_from_matches(
- vzaar_urls, video_id, video_title, ie=VzaarIE.ie_key())
-
- channel9_urls = Channel9IE._extract_urls(webpage)
- if channel9_urls:
- return self.playlist_from_matches(
- channel9_urls, video_id, video_title, ie=Channel9IE.ie_key())
-
- vshare_urls = VShareIE._extract_urls(webpage)
- if vshare_urls:
- return self.playlist_from_matches(
- vshare_urls, video_id, video_title, ie=VShareIE.ie_key())
-
- # Look for Mediasite embeds
- mediasite_urls = MediasiteIE._extract_urls(webpage)
- if mediasite_urls:
- entries = [
- self.url_result(smuggle_url(
- compat_urlparse.urljoin(url, mediasite_url),
- {'UrlReferrer': url}), ie=MediasiteIE.ie_key())
- for mediasite_url in mediasite_urls]
- return self.playlist_result(entries, video_id, video_title)
-
- springboardplatform_urls = SpringboardPlatformIE._extract_urls(webpage)
- if springboardplatform_urls:
- return self.playlist_from_matches(
- springboardplatform_urls, video_id, video_title,
- ie=SpringboardPlatformIE.ie_key())
-
- yapfiles_urls = YapFilesIE._extract_urls(webpage)
- if yapfiles_urls:
- return self.playlist_from_matches(
- yapfiles_urls, video_id, video_title, ie=YapFilesIE.ie_key())
-
- vice_urls = ViceIE._extract_urls(webpage)
- if vice_urls:
- return self.playlist_from_matches(
- vice_urls, video_id, video_title, ie=ViceIE.ie_key())
-
- xfileshare_urls = XFileShareIE._extract_urls(webpage)
- if xfileshare_urls:
- return self.playlist_from_matches(
- xfileshare_urls, video_id, video_title, ie=XFileShareIE.ie_key())
-
- cloudflarestream_urls = CloudflareStreamIE._extract_urls(webpage)
- if cloudflarestream_urls:
- return self.playlist_from_matches(
- cloudflarestream_urls, video_id, video_title, ie=CloudflareStreamIE.ie_key())
-
- peertube_urls = PeerTubeIE._extract_urls(webpage, url)
- if peertube_urls:
- return self.playlist_from_matches(
- peertube_urls, video_id, video_title, ie=PeerTubeIE.ie_key())
-
- indavideo_urls = IndavideoEmbedIE._extract_urls(webpage)
- if indavideo_urls:
- return self.playlist_from_matches(
- indavideo_urls, video_id, video_title, ie=IndavideoEmbedIE.ie_key())
-
- apa_urls = APAIE._extract_urls(webpage)
- if apa_urls:
- return self.playlist_from_matches(
- apa_urls, video_id, video_title, ie=APAIE.ie_key())
-
- foxnews_urls = FoxNewsIE._extract_urls(webpage)
- if foxnews_urls:
- return self.playlist_from_matches(
- foxnews_urls, video_id, video_title, ie=FoxNewsIE.ie_key())
-
- sharevideos_urls = [sharevideos_mobj.group('url') for sharevideos_mobj in re.finditer(
- r'<iframe[^>]+?\bsrc\s*=\s*(["\'])(?P<url>(?:https?:)?//embed\.share-videos\.se/auto/embed/\d+\?.*?\buid=\d+.*?)\1',
- webpage)]
- if sharevideos_urls:
- return self.playlist_from_matches(
- sharevideos_urls, video_id, video_title)
-
- viqeo_urls = ViqeoIE._extract_urls(webpage)
- if viqeo_urls:
- return self.playlist_from_matches(
- viqeo_urls, video_id, video_title, ie=ViqeoIE.ie_key())
-
- expressen_urls = ExpressenIE._extract_urls(webpage)
- if expressen_urls:
- return self.playlist_from_matches(
- expressen_urls, video_id, video_title, ie=ExpressenIE.ie_key())
-
- zype_urls = ZypeIE._extract_urls(webpage)
- if zype_urls:
- return self.playlist_from_matches(
- zype_urls, video_id, video_title, ie=ZypeIE.ie_key())
-
- gedi_urls = GediDigitalIE._extract_urls(webpage)
- if gedi_urls:
- return self.playlist_from_matches(
- gedi_urls, video_id, video_title, ie=GediDigitalIE.ie_key())
-
- # Look for RCS media group embeds
- rcs_urls = RCSEmbedsIE._extract_urls(webpage)
- if rcs_urls:
- return self.playlist_from_matches(
- rcs_urls, video_id, video_title, ie=RCSEmbedsIE.ie_key())
-
- wimtv_urls = WimTVIE._extract_urls(webpage)
- if wimtv_urls:
- return self.playlist_from_matches(
- wimtv_urls, video_id, video_title, ie=WimTVIE.ie_key())
-
- bitchute_urls = BitChuteIE._extract_urls(webpage)
- if bitchute_urls:
- return self.playlist_from_matches(
- bitchute_urls, video_id, video_title, ie=BitChuteIE.ie_key())
-
- rumble_urls = RumbleEmbedIE._extract_urls(webpage)
- if len(rumble_urls) == 1:
- return self.url_result(rumble_urls[0], RumbleEmbedIE.ie_key())
- if rumble_urls:
- return self.playlist_from_matches(
- rumble_urls, video_id, video_title, ie=RumbleEmbedIE.ie_key())
-
- # Look for (tvopen|ethnos).gr embeds
- tvopengr_urls = list(TVOpenGrEmbedIE._extract_urls(webpage))
- if tvopengr_urls:
- return self.playlist_from_matches(tvopengr_urls, video_id, video_title, ie=TVOpenGrEmbedIE.ie_key())
-
- # Look for ert.gr webtv embeds
- ertwebtv_urls = list(ERTWebtvEmbedIE._extract_urls(webpage))
- if len(ertwebtv_urls) == 1:
- return self.url_result(self._proto_relative_url(ertwebtv_urls[0]), video_title=video_title, url_transparent=True)
- elif ertwebtv_urls:
- return self.playlist_from_matches(ertwebtv_urls, video_id, video_title, ie=ERTWebtvEmbedIE.ie_key())
-
- tvp_urls = TVPEmbedIE._extract_urls(webpage)
- if tvp_urls:
- return self.playlist_from_matches(tvp_urls, video_id, video_title, ie=TVPEmbedIE.ie_key())
-
- # Look for MainStreaming embeds
- mainstreaming_urls = MainStreamingIE._extract_urls(webpage)
- if mainstreaming_urls:
- return self.playlist_from_matches(mainstreaming_urls, video_id, video_title, ie=MainStreamingIE.ie_key())
-
- # Look for Gfycat Embeds
- gfycat_urls = GfycatIE._extract_urls(webpage)
- if gfycat_urls:
- return self.playlist_from_matches(gfycat_urls, video_id, video_title, ie=GfycatIE.ie_key())
-
- panopto_urls = PanoptoBaseIE._extract_urls(webpage)
- if panopto_urls:
- return self.playlist_from_matches(panopto_urls, video_id, video_title)
-
- # Look for Ruutu embeds
- ruutu_url = RuutuIE._extract_url(webpage)
- if ruutu_url:
- return self.url_result(ruutu_url, RuutuIE)
-
- # Look for HTML5 media
- entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
- if entries:
- self.report_detected('HTML5 media')
- if len(entries) == 1:
- entries[0].update({
- 'id': video_id,
- 'title': video_title,
- })
- else:
- for num, entry in enumerate(entries, start=1):
- entry.update({
- 'id': '%s-%s' % (video_id, num),
- 'title': '%s (%d)' % (video_title, num),
- })
- for entry in entries:
- self._sort_formats(entry['formats'])
- return self.playlist_result(entries, video_id, video_title)
+ embeds = []
+ for ie in self._downloader._ies.values():
+ if ie.ie_key() in smuggled_data.get('block_ies', []):
+ continue
+ gen = ie.extract_from_webpage(self._downloader, url, webpage)
+ current_embeds = []
+ try:
+ while True:
+ current_embeds.append(next(gen))
+ except self.StopExtraction:
+ self.report_detected(f'{ie.IE_NAME} exclusive embed', len(current_embeds),
+ embeds and 'discarding other embeds')
+ return current_embeds
+ except StopIteration:
+ self.report_detected(f'{ie.IE_NAME} embed', len(current_embeds))
+ embeds.extend(current_embeds)
+
+ if embeds:
+ return embeds
jwplayer_data = self._find_jwplayer_data(
webpage, video_id, transform_source=js_to_json)
if jwplayer_data:
if isinstance(jwplayer_data.get('playlist'), str):
self.report_detected('JW Player playlist')
- return {
- **info_dict,
- '_type': 'url',
- 'ie_key': JWPlatformIE.ie_key(),
- 'url': jwplayer_data['playlist'],
- }
+ return [self.url_result(jwplayer_data['playlist'], 'JWPlatform')]
try:
info = self._parse_jwplayer_data(
jwplayer_data, video_id, require_title=False, base_url=url)
- self.report_detected('JW Player data')
- return merge_dicts(info, info_dict)
+ if traverse_obj(info, 'formats', ('entries', ..., 'formats')):
+ self.report_detected('JW Player data')
+ return [info]
except ExtractorError:
# See https://github.com/ytdl-org/youtube-dl/pull/16735
pass
@@ -3806,24 +2526,21 @@ class GenericIE(InfoExtractor):
webpage)
if mobj is not None:
varname = mobj.group(1)
- sources = self._parse_json(
- mobj.group(2), video_id, transform_source=js_to_json,
- fatal=False) or []
- if not isinstance(sources, list):
- sources = [sources]
+ sources = variadic(self._parse_json(
+ mobj.group(2), video_id, transform_source=js_to_json, fatal=False) or [])
formats = []
subtitles = {}
for source in sources:
src = source.get('src')
- if not src or not isinstance(src, compat_str):
+ if not src or not isinstance(src, str):
continue
- src = compat_urlparse.urljoin(url, src)
+ src = urllib.parse.urljoin(url, src)
src_type = source.get('type')
- if isinstance(src_type, compat_str):
+ if isinstance(src_type, str):
src_type = src_type.lower()
ext = determine_ext(src).lower()
if src_type == 'video/youtube':
- return self.url_result(src, YoutubeIE.ie_key())
+ return [self.url_result(src, YoutubeIE.ie_key())]
if src_type == 'application/dash+xml' or ext == 'mpd':
fmts, subs = self._extract_mpd_formats_and_subtitles(
src, video_id, mpd_id='dash', fatal=False)
@@ -3835,13 +2552,16 @@ class GenericIE(InfoExtractor):
m3u8_id='hls', fatal=False)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
- else:
+ for fmt in formats:
+ fmt.update(self._fragment_query(src))
+
+ if not formats:
formats.append({
'url': src,
'ext': (mimetype2ext(src_type)
or ext if ext in KNOWN_EXTENSIONS else 'mp4'),
'http_headers': {
- 'Referer': full_response.geturl(),
+ 'Referer': actual_url,
},
})
# https://docs.videojs.com/player#addRemoteTextTrack
@@ -3853,39 +2573,36 @@ class GenericIE(InfoExtractor):
if not src:
continue
subtitles.setdefault(dict_get(sub, ('language', 'srclang')) or 'und', []).append({
- 'url': compat_urlparse.urljoin(url, src),
+ 'url': urllib.parse.urljoin(url, src),
'name': sub.get('label'),
'http_headers': {
- 'Referer': full_response.geturl(),
+ 'Referer': actual_url,
},
})
if formats or subtitles:
self.report_detected('video.js embed')
- self._sort_formats(formats)
- info_dict['formats'] = formats
- info_dict['subtitles'] = subtitles
- return info_dict
+ return [{'formats': formats, 'subtitles': subtitles}]
# Looking for http://schema.org/VideoObject
json_ld = self._search_json_ld(webpage, video_id, default={})
if json_ld.get('url') not in (url, None):
self.report_detected('JSON LD')
- if determine_ext(json_ld['url']) == 'm3u8':
- json_ld['formats'], json_ld['subtitles'] = self._extract_m3u8_formats_and_subtitles(
- json_ld['url'], video_id, 'mp4')
- json_ld.pop('url')
- self._sort_formats(json_ld['formats'])
- else:
- json_ld['_type'] = 'url_transparent'
- json_ld['url'] = smuggle_url(json_ld['url'], {'force_videoid': video_id, 'to_generic': True})
- return merge_dicts(json_ld, info_dict)
+ is_direct = json_ld.get('ext') not in (None, *MEDIA_EXTENSIONS.manifests)
+ return [merge_dicts({
+ '_type': 'video' if is_direct else 'url_transparent',
+ 'url': smuggle_url(json_ld['url'], {
+ 'force_videoid': video_id,
+ 'to_generic': True,
+ 'http_headers': {'Referer': url},
+ }),
+ }, json_ld)]
def check_video(vurl):
if YoutubeIE.suitable(vurl):
return True
if RtmpIE.suitable(vurl):
return True
- vpath = compat_urlparse.urlparse(vurl).path
+ vpath = urllib.parse.urlparse(vurl).path
vext = determine_ext(vpath, None)
return vext not in (None, 'swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js', 'xml')
@@ -3947,15 +2664,13 @@ class GenericIE(InfoExtractor):
if not formats[-1].get('height'):
formats[-1]['quality'] = 1
- self._sort_formats(formats)
-
- return {
+ return [{
'id': flashvars['video_id'],
'display_id': display_id,
'title': title,
'thumbnail': thumbnail,
'formats': formats,
- }
+ }]
if not found:
# Broaden the search a little bit
found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
@@ -3994,7 +2709,7 @@ class GenericIE(InfoExtractor):
self.report_detected('Twitter card')
if not found:
# We look for Open Graph info:
- # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
+ # We have to match any number spaces between elements, some sites try to align them, e.g.: statigr.am
m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
# We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
if m_video_type is not None:
@@ -4009,20 +2724,14 @@ class GenericIE(InfoExtractor):
webpage)
if not found:
# Look also in Refresh HTTP header
- refresh_header = head_response.headers.get('Refresh')
+ refresh_header = urlh and urlh.headers.get('Refresh')
if refresh_header:
- # In python 2 response HTTP headers are bytestrings
- if sys.version_info < (3, 0) and isinstance(refresh_header, str):
- refresh_header = refresh_header.decode('iso-8859-1')
found = re.search(REDIRECT_REGEX, refresh_header)
if found:
- new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
+ new_url = urllib.parse.urljoin(url, unescapeHTML(found.group(1)))
if new_url != url:
self.report_following_redirect(new_url)
- return {
- '_type': 'url',
- 'url': new_url,
- }
+ return [self.url_result(new_url)]
else:
found = None
@@ -4033,34 +2742,35 @@ class GenericIE(InfoExtractor):
embed_url = self._html_search_meta('twitter:player', webpage, default=None)
if embed_url and embed_url != url:
self.report_detected('twitter:player iframe')
- return self.url_result(embed_url)
+ return [self.url_result(embed_url)]
if not found:
- raise UnsupportedError(url)
+ return []
+
+ domain_name = self._search_regex(r'^(?:https?://)?([^/]*)/.*', url, 'video uploader', default=None)
entries = []
for video_url in orderedSet(found):
video_url = unescapeHTML(video_url)
video_url = video_url.replace('\\/', '/')
- video_url = compat_urlparse.urljoin(url, video_url)
- video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
+ video_url = urllib.parse.urljoin(url, video_url)
+ video_id = urllib.parse.unquote(os.path.basename(video_url))
# Sometimes, jwplayer extraction will result in a YouTube URL
if YoutubeIE.suitable(video_url):
entries.append(self.url_result(video_url, 'Youtube'))
continue
- # here's a fun little line of code for you:
video_id = os.path.splitext(video_id)[0]
headers = {
- 'referer': full_response.geturl()
+ 'referer': actual_url
}
entry_info_dict = {
'id': video_id,
- 'uploader': video_uploader,
- 'title': video_title,
- 'age_limit': age_limit,
+ 'uploader': domain_name,
+ 'title': info_dict['title'],
+ 'age_limit': info_dict['age_limit'],
'http_headers': headers,
}
@@ -4077,11 +2787,13 @@ class GenericIE(InfoExtractor):
if ext == 'smil':
entry_info_dict = {**self._extract_smil_info(video_url, video_id), **entry_info_dict}
elif ext == 'xspf':
- return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
+ return [self._extract_xspf_playlist(video_url, video_id)]
elif ext == 'm3u8':
entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(video_url, video_id, ext='mp4', headers=headers)
+ entry_info_dict.update(self._fragment_query(video_url))
elif ext == 'mpd':
entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_mpd_formats_and_subtitles(video_url, video_id, headers=headers)
+ entry_info_dict.update(self._fragment_query(video_url))
elif ext == 'f4m':
entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id, headers=headers)
elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url:
@@ -4102,19 +2814,11 @@ class GenericIE(InfoExtractor):
else:
entry_info_dict['url'] = video_url
- if entry_info_dict.get('formats'):
- self._sort_formats(entry_info_dict['formats'])
-
entries.append(entry_info_dict)
- if len(entries) == 1:
- return entries[0]
- else:
+ if len(entries) > 1:
for num, e in enumerate(entries, start=1):
# 'url' results don't have a title
if e.get('title') is not None:
e['title'] = '%s (%d)' % (e['title'], num)
- return {
- '_type': 'playlist',
- 'entries': entries,
- }
+ return entries
diff --git a/hypervideo_dl/extractor/genericembeds.py b/hypervideo_dl/extractor/genericembeds.py
new file mode 100644
index 0000000..9b4f14d
--- /dev/null
+++ b/hypervideo_dl/extractor/genericembeds.py
@@ -0,0 +1,114 @@
+import re
+import urllib.parse
+
+from .common import InfoExtractor
+from ..utils import make_archive_id, unescapeHTML
+
+
+class HTML5MediaEmbedIE(InfoExtractor):
+ _VALID_URL = False
+ IE_NAME = 'html5'
+ _WEBPAGE_TESTS = [
+ {
+ 'url': 'https://html.com/media/',
+ 'info_dict': {
+ 'title': 'HTML5 Media',
+ 'description': 'md5:933b2d02ceffe7a7a0f3c8326d91cc2a',
+ },
+ 'playlist_count': 2
+ }
+ ]
+
+ def _extract_from_webpage(self, url, webpage):
+ video_id, title = self._generic_id(url), self._generic_title(url, webpage)
+ entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls') or []
+ for num, entry in enumerate(entries, start=1):
+ entry.update({
+ 'id': f'{video_id}-{num}',
+ 'title': f'{title} ({num})',
+ '_old_archive_ids': [
+ make_archive_id('generic', f'{video_id}-{num}' if len(entries) > 1 else video_id),
+ ],
+ })
+ yield entry
+
+
+class QuotedHTMLIE(InfoExtractor):
+ """For common cases of quoted/escaped html parts in the webpage"""
+ _VALID_URL = False
+ IE_NAME = 'generic:quoted-html'
+ IE_DESC = False # Do not list
+ _WEBPAGE_TESTS = [{
+ # 2 YouTube embeds in data-html
+ 'url': 'https://24tv.ua/bronetransporteri-ozbroyenni-zsu-shho-vidomo-pro-bronovik-wolfhound_n2167966',
+ 'info_dict': {
+ 'id': 'bronetransporteri-ozbroyenni-zsu-shho-vidomo-pro-bronovik-wolfhound_n2167966',
+ 'title': 'Броньовик Wolfhound: гігант, який допомагає ЗСУ знищувати окупантів на фронті',
+ 'thumbnail': r're:^https?://.*\.jpe?g',
+ 'timestamp': float,
+ 'upload_date': str,
+ 'description': 'md5:6816e1e5a65304bd7898e4c7eb1b26f7',
+ 'age_limit': 0,
+ },
+ 'playlist_count': 2
+ }, {
+ # Generic iframe embed of TV24UAPlayerIE within data-html
+ 'url': 'https://24tv.ua/harkivyani-zgaduyut-misto-do-viyni-shhemlive-video_n1887584',
+ 'info_dict': {
+ 'id': '1887584',
+ 'ext': 'mp4',
+ 'title': 'Харків\'яни згадують місто до війни: щемливе відео',
+ 'thumbnail': r're:^https?://.*\.jpe?g',
+ },
+ 'params': {'skip_download': True}
+ }, {
+ # YouTube embeds on Squarespace (data-html): https://github.com/ytdl-org/youtube-dl/issues/21294
+ 'url': 'https://www.harvardballetcompany.org/past-productions',
+ 'info_dict': {
+ 'id': 'past-productions',
+ 'title': 'Productions — Harvard Ballet Company',
+ 'age_limit': 0,
+ 'description': 'Past Productions',
+ },
+ 'playlist_mincount': 26
+ }, {
+ # Squarespace video embed, 2019-08-28, data-html
+ 'url': 'http://ootboxford.com',
+ 'info_dict': {
+ 'id': 'Tc7b_JGdZfw',
+ 'title': 'Out of the Blue, at Childish Things 10',
+ 'ext': 'mp4',
+ 'description': 'md5:a83d0026666cf5ee970f8bd1cfd69c7f',
+ 'uploader_id': 'helendouglashouse',
+ 'uploader': 'Helen & Douglas House',
+ 'upload_date': '20140328',
+ 'availability': 'public',
+ 'view_count': int,
+ 'channel': 'Helen & Douglas House',
+ 'comment_count': int,
+ 'uploader_url': 'http://www.youtube.com/user/helendouglashouse',
+ 'duration': 253,
+ 'channel_url': 'https://www.youtube.com/channel/UCTChGezrZVmlYlpMlkmulPA',
+ 'playable_in_embed': True,
+ 'age_limit': 0,
+ 'channel_follower_count': int,
+ 'channel_id': 'UCTChGezrZVmlYlpMlkmulPA',
+ 'tags': 'count:6',
+ 'categories': ['Nonprofits & Activism'],
+ 'like_count': int,
+ 'thumbnail': 'https://i.ytimg.com/vi/Tc7b_JGdZfw/hqdefault.jpg',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ def _extract_from_webpage(self, url, webpage):
+ combined = ''
+ for _, html in re.findall(r'(?s)\bdata-html=(["\'])((?:(?!\1).)+)\1', webpage):
+ # unescapeHTML can handle &quot; etc., unquote can handle percent encoding
+ unquoted_html = unescapeHTML(urllib.parse.unquote(html))
+ if unquoted_html != html:
+ combined += unquoted_html
+ if combined:
+ yield from self._extract_generic_embeds(url, combined)
diff --git a/hypervideo_dl/extractor/genius.py b/hypervideo_dl/extractor/genius.py
new file mode 100644
index 0000000..62f5a28
--- /dev/null
+++ b/hypervideo_dl/extractor/genius.py
@@ -0,0 +1,127 @@
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ js_to_json,
+ smuggle_url,
+ str_or_none,
+ traverse_obj,
+ unescapeHTML,
+)
+
+
+class GeniusIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?genius\.com/videos/(?P<id>[^?/#]+)'
+ _TESTS = [{
+ 'url': 'https://genius.com/videos/Vince-staples-breaks-down-the-meaning-of-when-sparks-fly',
+ 'md5': '64c2ad98cfafcfda23bfa0ad0c512f4c',
+ 'info_dict': {
+ 'id': '6313303597112',
+ 'ext': 'mp4',
+ 'title': 'Vince Staples Breaks Down The Meaning Of “When Sparks Fly”',
+ 'description': 'md5:bc15e00342c537c0039d414423ae5752',
+ 'tags': 'count:1',
+ 'uploader_id': '4863540648001',
+ 'duration': 388.416,
+ 'upload_date': '20221005',
+ 'timestamp': 1664982341,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ }, {
+ 'url': 'https://genius.com/videos/Breaking-down-drakes-certified-lover-boy-kanye-beef-way-2-sexy-cudi',
+ 'md5': 'b8ed87a5efd1473bd027c20a969d4060',
+ 'info_dict': {
+ 'id': '6271792014001',
+ 'ext': 'mp4',
+ 'title': 'md5:c6355f7fa8a70bc86492a3963919fc15',
+ 'description': 'md5:1774638c31548b31b037c09e9b821393',
+ 'tags': 'count:3',
+ 'uploader_id': '4863540648001',
+ 'duration': 2685.099,
+ 'upload_date': '20210909',
+ 'timestamp': 1631209167,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ metadata = self._search_json(
+ r'<meta content="', webpage, 'metadata', display_id, transform_source=unescapeHTML)
+ video_id = traverse_obj(
+ metadata, ('video', 'provider_id'),
+ ('dfp_kv', lambda _, x: x['name'] == 'brightcove_video_id', 'values', 0), get_all=False)
+ if not video_id:
+ raise ExtractorError('Brightcove video id not found in webpage')
+
+ config = self._search_json(r'var\s*APP_CONFIG\s*=', webpage, 'config', video_id, default={})
+ account_id = config.get('brightcove_account_id', '4863540648001')
+ player_id = traverse_obj(
+ config, 'brightcove_standard_web_player_id', 'brightcove_standard_no_autoplay_web_player_id',
+ 'brightcove_modal_web_player_id', 'brightcove_song_story_web_player_id', default='S1ZcmcOC1x')
+
+ return self.url_result(
+ smuggle_url(
+ f'https://players.brightcove.net/{account_id}/{player_id}_default/index.html?videoId={video_id}',
+ {'referrer': url}), 'BrightcoveNew', video_id)
+
+
+class GeniusLyricsIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?genius\.com/(?P<id>[^?/#]+)-lyrics[?/#]?'
+ _TESTS = [{
+ 'url': 'https://genius.com/Lil-baby-heyy-lyrics',
+ 'playlist_mincount': 2,
+ 'info_dict': {
+ 'id': '8454545',
+ 'title': 'Heyy',
+ 'description': 'Heyy by Lil Baby',
+ },
+ }, {
+ 'url': 'https://genius.com/Outkast-two-dope-boyz-in-a-cadillac-lyrics',
+ 'playlist_mincount': 1,
+ 'info_dict': {
+ 'id': '36239',
+ 'title': 'Two Dope Boyz (In a Cadillac)',
+ 'description': 'Two Dope Boyz (In a Cadillac) by OutKast',
+ },
+ }, {
+ 'url': 'https://genius.com/Playboi-carti-rip-lyrics',
+ 'playlist_mincount': 1,
+ 'info_dict': {
+ 'id': '3710582',
+ 'title': 'R.I.P.',
+ 'description': 'R.I.P. by Playboi Carti',
+ },
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ json_string = self._search_json(
+ r'window\.__PRELOADED_STATE__\s*=\s*JSON\.parse\(', webpage, 'json string',
+ display_id, transform_source=js_to_json, contains_pattern=r'\'{(?s:.+)}\'')
+ song_info = self._parse_json(json_string, display_id)
+ song_id = str_or_none(traverse_obj(song_info, ('songPage', 'song')))
+ if not song_id:
+ raise ExtractorError('Song id not found in webpage')
+
+ title = traverse_obj(
+ song_info, ('songPage', 'trackingData', lambda _, x: x['key'] == 'Title', 'value'),
+ get_all=False, default='untitled')
+ artist = traverse_obj(
+ song_info, ('songPage', 'trackingData', lambda _, x: x['key'] == 'Primary Artist', 'value'),
+ get_all=False, default='unknown artist')
+ media = traverse_obj(
+ song_info, ('entities', 'songs', song_id, 'media'), expected_type=list, default=[])
+
+ entries = []
+ for m in media:
+ if m.get('type') in ('video', 'audio') and m.get('url'):
+ if m.get('provider') == 'spotify':
+ self.to_screen(f'{song_id}: Skipping Spotify audio embed')
+ else:
+ entries.append(self.url_result(m['url']))
+
+ return self.playlist_result(entries, song_id, title, f'{title} by {artist}')
diff --git a/hypervideo_dl/extractor/gettr.py b/hypervideo_dl/extractor/gettr.py
index 327a4d0..7795dc5 100644
--- a/hypervideo_dl/extractor/gettr.py
+++ b/hypervideo_dl/extractor/gettr.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
bool_or_none,
@@ -124,8 +121,6 @@ class GettrIE(GettrBaseIE):
'height': int_or_none(post_data.get('vid_hgt')),
})
- self._sort_formats(formats)
-
return {
'id': post_id,
'title': title,
@@ -195,8 +190,6 @@ class GettrStreamingIE(GettrBaseIE):
'url': urljoin(self._MEDIA_BASE_URL, thumbnail),
} for thumbnail in try_get(video_info, lambda x: x['postData']['imgs'], list) or []]
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': try_get(video_info, lambda x: x['postData']['ttl'], str),
diff --git a/hypervideo_dl/extractor/gfycat.py b/hypervideo_dl/extractor/gfycat.py
index 2ad03e2..edc2e56 100644
--- a/hypervideo_dl/extractor/gfycat.py
+++ b/hypervideo_dl/extractor/gfycat.py
@@ -1,8 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
from .common import InfoExtractor
from ..utils import (
int_or_none,
@@ -13,7 +8,8 @@ from ..utils import (
class GfycatIE(InfoExtractor):
- _VALID_URL = r'(?i)https?://(?:(?:www|giant|thumbs)\.)?gfycat\.com/(?:ru/|ifr/|gifs/detail/)?(?P<id>[^-/?#\."\']+)'
+ _VALID_URL = r'https?://(?:(?:www|giant|thumbs)\.)?gfycat\.com/(?i:ru/|ifr/|gifs/detail/)?(?P<id>[^-/?#\."\']+)'
+ _EMBED_REGEX = [rf'<(?:iframe|source)[^>]+\bsrc=["\'](?P<url>{_VALID_URL})']
_TESTS = [{
'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher',
'info_dict': {
@@ -85,14 +81,6 @@ class GfycatIE(InfoExtractor):
'only_matching': True
}]
- @staticmethod
- def _extract_urls(webpage):
- return [
- mobj.group('url')
- for mobj in re.finditer(
- r'<(?:iframe|source)[^>]+\bsrc=["\'](?P<url>%s)' % GfycatIE._VALID_URL,
- webpage)]
-
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -139,7 +127,6 @@ class GfycatIE(InfoExtractor):
'filesize': filesize,
'quality': quality(format_id),
})
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/giantbomb.py b/hypervideo_dl/extractor/giantbomb.py
index 1920923..1125723 100644
--- a/hypervideo_dl/extractor/giantbomb.py
+++ b/hypervideo_dl/extractor/giantbomb.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import json
from .common import InfoExtractor
@@ -76,8 +74,6 @@ class GiantBombIE(InfoExtractor):
if youtube_id:
return self.url_result(youtube_id, 'Youtube')
- self._sort_formats(formats)
-
return {
'id': video_id,
'display_id': display_id,
diff --git a/hypervideo_dl/extractor/giga.py b/hypervideo_dl/extractor/giga.py
index 5a9992a..b59c129 100644
--- a/hypervideo_dl/extractor/giga.py
+++ b/hypervideo_dl/extractor/giga.py
@@ -1,16 +1,8 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import itertools
from .common import InfoExtractor
-from ..utils import (
- qualities,
- compat_str,
- parse_duration,
- parse_iso8601,
- str_to_int,
-)
+from ..compat import compat_str
+from ..utils import parse_duration, parse_iso8601, qualities, str_to_int
class GigaIE(InfoExtractor):
@@ -67,7 +59,6 @@ class GigaIE(InfoExtractor):
'format_id': '%s-%s' % (fmt['quality'], fmt['type'].split('/')[-1]),
'quality': quality(fmt['quality']),
})
- self._sort_formats(formats)
title = self._html_search_meta(
'title', webpage, 'title', fatal=True)
diff --git a/hypervideo_dl/extractor/gigya.py b/hypervideo_dl/extractor/gigya.py
index 4121784..c5bc86b 100644
--- a/hypervideo_dl/extractor/gigya.py
+++ b/hypervideo_dl/extractor/gigya.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
diff --git a/hypervideo_dl/extractor/glide.py b/hypervideo_dl/extractor/glide.py
index 12af859..d114f34 100644
--- a/hypervideo_dl/extractor/glide.py
+++ b/hypervideo_dl/extractor/glide.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
@@ -23,7 +20,7 @@ class GlideIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
- title = self._html_extract_title(webpage, default=None) or self._og_search_title(webpage)
+ title = self._generic_title('', webpage)
video_url = self._proto_relative_url(self._search_regex(
r'<source[^>]+src=(["\'])(?P<url>.+?)\1',
webpage, 'video URL', default=None,
diff --git a/hypervideo_dl/extractor/globo.py b/hypervideo_dl/extractor/globo.py
index f6aaae1..a7be2cb 100644
--- a/hypervideo_dl/extractor/globo.py
+++ b/hypervideo_dl/extractor/globo.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import base64
import hashlib
import json
@@ -142,7 +139,6 @@ class GloboIE(InfoExtractor):
fmts, subtitles = self._extract_m3u8_formats_and_subtitles(
signed_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
formats.extend(fmts)
- self._sort_formats(formats)
for resource in video['resources']:
if resource.get('type') == 'subtitle':
@@ -181,12 +177,12 @@ class GloboArticleIE(InfoExtractor):
_VALID_URL = r'https?://.+?\.globo\.com/(?:[^/]+/)*(?P<id>[^/.]+)(?:\.html)?'
_VIDEOID_REGEXES = [
- r'\bdata-video-id=["\'](\d{7,})',
- r'\bdata-player-videosids=["\'](\d{7,})',
+ r'\bdata-video-id=["\'](\d{7,})["\']',
+ r'\bdata-player-videosids=["\'](\d{7,})["\']',
r'\bvideosIDs\s*:\s*["\']?(\d{7,})',
- r'\bdata-id=["\'](\d{7,})',
- r'<div[^>]+\bid=["\'](\d{7,})',
- r'<bs-player[^>]+\bvideoid=["\'](\d{8,})',
+ r'\bdata-id=["\'](\d{7,})["\']',
+ r'<div[^>]+\bid=["\'](\d{7,})["\']',
+ r'<bs-player[^>]+\bvideoid=["\'](\d{8,})["\']',
]
_TESTS = [{
@@ -222,6 +218,14 @@ class GloboArticleIE(InfoExtractor):
'description': 'md5:2d089d036c4c9675117d3a56f8c61739',
},
'playlist_count': 1,
+ }, {
+ 'url': 'https://redeglobo.globo.com/rpc/meuparana/noticia/a-producao-de-chocolates-no-parana.ghtml',
+ 'info_dict': {
+ 'id': 'a-producao-de-chocolates-no-parana',
+ 'title': 'A produção de chocolates no Paraná',
+ 'description': 'md5:f2e3daf00ffd1dc0e9a8a6c7cfb0a89e',
+ },
+ 'playlist_count': 2,
}]
@classmethod
@@ -237,6 +241,6 @@ class GloboArticleIE(InfoExtractor):
entries = [
self.url_result('globo:%s' % video_id, GloboIE.ie_key())
for video_id in orderedSet(video_ids)]
- title = self._og_search_title(webpage)
+ title = self._og_search_title(webpage).strip()
description = self._html_search_meta('description', webpage)
return self.playlist_result(entries, display_id, title, description)
diff --git a/hypervideo_dl/extractor/glomex.py b/hypervideo_dl/extractor/glomex.py
index d9ef433..22aac0d 100644
--- a/hypervideo_dl/extractor/glomex.py
+++ b/hypervideo_dl/extractor/glomex.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
import urllib.parse
@@ -85,7 +82,6 @@ class GlomexBaseIE(InfoExtractor):
if video.get('language'):
for fmt in formats:
fmt['language'] = video['language']
- self._sort_formats(formats)
images = (video.get('images') or []) + [video.get('image') or {}]
thumbnails = [{
@@ -177,7 +173,7 @@ class GlomexEmbedIE(GlomexBaseIE):
return cls._smuggle_origin_url(f'https:{cls._BASE_PLAYER_URL}?{query_string}', origin_url)
@classmethod
- def _extract_urls(cls, webpage, origin_url):
+ def _extract_embed_urls(cls, url, webpage):
# https://docs.glomex.com/publisher/video-player-integration/javascript-api/
quot_re = r'["\']'
@@ -186,9 +182,9 @@ class GlomexEmbedIE(GlomexBaseIE):
(?:https?:)?{cls._BASE_PLAYER_URL_RE}\?(?:(?!(?P=q)).)+
)(?P=q)'''
for mobj in re.finditer(regex, webpage):
- url = unescapeHTML(mobj.group('url'))
- if cls.suitable(url):
- yield cls._smuggle_origin_url(url, origin_url)
+ embed_url = unescapeHTML(mobj.group('url'))
+ if cls.suitable(embed_url):
+ yield cls._smuggle_origin_url(embed_url, url)
regex = fr'''(?x)
<glomex-player [^>]+?>|
@@ -196,7 +192,7 @@ class GlomexEmbedIE(GlomexBaseIE):
for mobj in re.finditer(regex, webpage):
attrs = extract_attributes(mobj.group(0))
if attrs.get('data-integration-id') and attrs.get('data-playlist-id'):
- yield cls.build_player_url(attrs['data-playlist-id'], attrs['data-integration-id'], origin_url)
+ yield cls.build_player_url(attrs['data-playlist-id'], attrs['data-integration-id'], url)
# naive parsing of inline scripts for hard-coded integration parameters
regex = fr'''(?x)
@@ -209,7 +205,7 @@ class GlomexEmbedIE(GlomexBaseIE):
continue
playlist_id = re.search(regex % 'playlistId', script)
if playlist_id:
- yield cls.build_player_url(playlist_id, integration_id, origin_url)
+ yield cls.build_player_url(playlist_id, integration_id, url)
def _real_extract(self, url):
url, origin_url = self._unsmuggle_origin_url(url)
diff --git a/hypervideo_dl/extractor/go.py b/hypervideo_dl/extractor/go.py
index f92e166..b075a02 100644
--- a/hypervideo_dl/extractor/go.py
+++ b/hypervideo_dl/extractor/go.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .adobepass import AdobePassIE
@@ -14,6 +11,8 @@ from ..utils import (
try_get,
urlencode_postdata,
ExtractorError,
+ unified_timestamp,
+ traverse_obj,
)
@@ -73,7 +72,7 @@ class GoIE(AdobePassIE):
},
'skip': 'This content is no longer available.',
}, {
- 'url': 'http://watchdisneyxd.go.com/doraemon',
+ 'url': 'https://disneynow.com/shows/big-hero-6-the-series',
'info_dict': {
'title': 'Doraemon',
'id': 'SH55574025',
@@ -83,10 +82,19 @@ class GoIE(AdobePassIE):
'url': 'http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood',
'info_dict': {
'id': 'VDKA3609139',
- 'ext': 'mp4',
'title': 'This Guilty Blood',
'description': 'md5:f18e79ad1c613798d95fdabfe96cd292',
'age_limit': 14,
+ 'episode': 'Episode 1',
+ 'upload_date': '20170102',
+ 'season': 'Season 2',
+ 'thumbnail': 'http://cdn1.edgedatg.com/aws/v2/abcf/Shadowhunters/video/201/ae5f75608d86bf88aa4f9f4aa76ab1b7/579x325-Q100_ae5f75608d86bf88aa4f9f4aa76ab1b7.jpg',
+ 'duration': 2544,
+ 'season_number': 2,
+ 'series': 'Shadowhunters',
+ 'episode_number': 1,
+ 'timestamp': 1483387200,
+ 'ext': 'mp4'
},
'params': {
'geo_bypass_ip_block': '3.244.239.0/24',
@@ -94,13 +102,22 @@ class GoIE(AdobePassIE):
'skip_download': True,
},
}, {
- 'url': 'https://abc.com/shows/the-rookie/episode-guide/season-02/03-the-bet',
+ 'url': 'https://abc.com/shows/the-rookie/episode-guide/season-04/12-the-knock',
'info_dict': {
- 'id': 'VDKA13435179',
- 'ext': 'mp4',
- 'title': 'The Bet',
- 'description': 'md5:c66de8ba2e92c6c5c113c3ade84ab404',
+ 'id': 'VDKA26050359',
+ 'title': 'The Knock',
+ 'description': 'md5:0c2947e3ada4c31f28296db7db14aa64',
'age_limit': 14,
+ 'ext': 'mp4',
+ 'thumbnail': 'http://cdn1.edgedatg.com/aws/v2/abc/TheRookie/video/412/daf830d06e83b11eaf5c0a299d993ae3/1556x876-Q75_daf830d06e83b11eaf5c0a299d993ae3.jpg',
+ 'episode': 'Episode 12',
+ 'season_number': 4,
+ 'season': 'Season 4',
+ 'timestamp': 1642975200,
+ 'episode_number': 12,
+ 'upload_date': '20220123',
+ 'series': 'The Rookie',
+ 'duration': 2572,
},
'params': {
'geo_bypass_ip_block': '3.244.239.0/24',
@@ -111,24 +128,18 @@ class GoIE(AdobePassIE):
'url': 'https://fxnow.fxnetworks.com/shows/better-things/video/vdka12782841',
'info_dict': {
'id': 'VDKA12782841',
- 'ext': 'mp4',
'title': 'First Look: Better Things - Season 2',
'description': 'md5:fa73584a95761c605d9d54904e35b407',
- },
- 'params': {
- 'geo_bypass_ip_block': '3.244.239.0/24',
- # m3u8 download
- 'skip_download': True,
- },
- }, {
- 'url': 'https://abc.com/shows/modern-family/episode-guide/season-01/101-pilot',
- 'info_dict': {
- 'id': 'VDKA22600213',
'ext': 'mp4',
- 'title': 'Pilot',
- 'description': 'md5:74306df917cfc199d76d061d66bebdb4',
+ 'age_limit': 14,
+ 'upload_date': '20170825',
+ 'duration': 161,
+ 'series': 'Better Things',
+ 'thumbnail': 'http://cdn1.edgedatg.com/aws/v2/fx/BetterThings/video/12782841/b6b05e58264121cc2c98811318e6d507/1556x876-Q75_b6b05e58264121cc2c98811318e6d507.jpg',
+ 'timestamp': 1503661074,
},
'params': {
+ 'geo_bypass_ip_block': '3.244.239.0/24',
# m3u8 download
'skip_download': True,
},
@@ -282,7 +293,6 @@ class GoIE(AdobePassIE):
'height': height,
})
formats.append(f)
- self._sort_formats(formats)
for cc in video_data.get('closedcaption', {}).get('src', []):
cc_url = cc.get('value')
@@ -319,4 +329,5 @@ class GoIE(AdobePassIE):
'thumbnails': thumbnails,
'formats': formats,
'subtitles': subtitles,
+ 'timestamp': unified_timestamp(traverse_obj(video_data, ('airdates', 'airdate', 0))),
}
diff --git a/hypervideo_dl/extractor/godtube.py b/hypervideo_dl/extractor/godtube.py
index 96e68b4..6975401 100644
--- a/hypervideo_dl/extractor/godtube.py
+++ b/hypervideo_dl/extractor/godtube.py
@@ -1,6 +1,3 @@
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..utils import (
parse_duration,
diff --git a/hypervideo_dl/extractor/gofile.py b/hypervideo_dl/extractor/gofile.py
index 62d778c..ddbce2e 100644
--- a/hypervideo_dl/extractor/gofile.py
+++ b/hypervideo_dl/extractor/gofile.py
@@ -1,4 +1,5 @@
-# coding: utf-8
+import hashlib
+
from .common import InfoExtractor
from ..utils import (
ExtractorError,
@@ -19,22 +20,34 @@ class GofileIE(InfoExtractor):
'id': 'de571ac1-5edc-42e2-8ec2-bdac83ad4a31',
'filesize': 928116,
'ext': 'mp4',
- 'title': 'nuuh'
+ 'title': 'nuuh',
+ 'release_timestamp': 1638338704,
+ 'release_date': '20211201',
}
}]
- }, { # URL to test mixed file types
- 'url': 'https://gofile.io/d/avt34h',
+ }, {
+ 'url': 'https://gofile.io/d/is8lKr',
'info_dict': {
- 'id': 'avt34h',
- },
- 'playlist_mincount': 1,
- }, { # URL to test no video/audio error
- 'url': 'https://gofile.io/d/aB03lZ',
- 'info_dict': {
- 'id': 'aB03lZ',
+ 'id': 'TMjXd9',
+ 'ext': 'mp4',
},
'playlist_count': 0,
'skip': 'No video/audio found at provided URL.',
+ }, {
+ 'url': 'https://gofile.io/d/TMjXd9',
+ 'info_dict': {
+ 'id': 'TMjXd9',
+ },
+ 'playlist_count': 1,
+ }, {
+ 'url': 'https://gofile.io/d/gqOtRf',
+ 'info_dict': {
+ 'id': 'gqOtRf',
+ },
+ 'playlist_mincount': 1,
+ 'params': {
+ 'videopassword': 'password',
+ },
}]
_TOKEN = None
@@ -50,12 +63,22 @@ class GofileIE(InfoExtractor):
self._set_cookie('gofile.io', 'accountToken', self._TOKEN)
def _entries(self, file_id):
+ query_params = {
+ 'contentId': file_id,
+ 'token': self._TOKEN,
+ 'websiteToken': 12345,
+ }
+ password = self.get_param('videopassword')
+ if password:
+ query_params['password'] = hashlib.sha256(password.encode('utf-8')).hexdigest()
files = self._download_json(
- f'https://api.gofile.io/getContent?contentId={file_id}&token={self._TOKEN}&websiteToken=websiteToken&cache=true',
- 'Gofile', note='Getting filelist')
+ 'https://api.gofile.io/getContent', file_id, note='Getting filelist', query=query_params)
status = files['status']
- if status != 'ok':
+ if status == 'error-passwordRequired':
+ raise ExtractorError(
+ 'This video is protected by a password, use the --video-password option', expected=True)
+ elif status != 'ok':
raise ExtractorError(f'{self.IE_NAME} said: status {status}', expected=True)
found_files = False
@@ -65,7 +88,7 @@ class GofileIE(InfoExtractor):
continue
found_files = True
- file_url = file.get('directLink')
+ file_url = file.get('link')
if file_url:
yield {
'id': file['id'],
diff --git a/hypervideo_dl/extractor/golem.py b/hypervideo_dl/extractor/golem.py
index 47a068e..c33d950 100644
--- a/hypervideo_dl/extractor/golem.py
+++ b/hypervideo_dl/extractor/golem.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import (
compat_str,
@@ -54,7 +51,6 @@ class GolemIE(InfoExtractor):
'filesize': self._int(e.findtext('filesize'), 'filesize'),
'ext': determine_ext(e.findtext('./filename')),
})
- self._sort_formats(formats)
info['formats'] = formats
thumbnails = []
diff --git a/hypervideo_dl/extractor/goodgame.py b/hypervideo_dl/extractor/goodgame.py
new file mode 100644
index 0000000..c17ad56
--- /dev/null
+++ b/hypervideo_dl/extractor/goodgame.py
@@ -0,0 +1,57 @@
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ int_or_none,
+ str_or_none,
+ traverse_obj,
+)
+
+
+class GoodGameIE(InfoExtractor):
+ IE_NAME = 'goodgame:stream'
+ _VALID_URL = r'https?://goodgame\.ru/channel/(?P<id>\w+)'
+ _TESTS = [{
+ 'url': 'https://goodgame.ru/channel/Pomi/#autoplay',
+ 'info_dict': {
+ 'id': 'pomi',
+ 'ext': 'mp4',
+ 'title': r're:Reynor vs Special \(1/2,bo3\) Wardi Spring EU \- playoff \(финальный день\) \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
+ 'channel_id': '1644',
+ 'channel': 'Pomi',
+ 'channel_url': 'https://goodgame.ru/channel/Pomi/',
+ 'description': 'md5:4a87b775ee7b2b57bdccebe285bbe171',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'live_status': 'is_live',
+ 'view_count': int,
+ },
+ 'params': {'skip_download': 'm3u8'},
+ 'skip': 'May not be online',
+ }]
+
+ def _real_extract(self, url):
+ channel_name = self._match_id(url)
+ response = self._download_json(f'https://api2.goodgame.ru/v2/streams/{channel_name}', channel_name)
+ player_id = response['channel']['gg_player_src']
+
+ formats, subtitles = [], {}
+ if response.get('status') == 'Live':
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ f'https://hls.goodgame.ru/manifest/{player_id}_master.m3u8',
+ channel_name, 'mp4', live=True)
+ else:
+ self.raise_no_formats('User is offline', expected=True, video_id=channel_name)
+
+ return {
+ 'id': player_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'title': traverse_obj(response, ('channel', 'title')),
+ 'channel': channel_name,
+ 'channel_id': str_or_none(traverse_obj(response, ('channel', 'id'))),
+ 'channel_url': response.get('url'),
+ 'description': clean_html(traverse_obj(response, ('channel', 'description'))),
+ 'thumbnail': traverse_obj(response, ('channel', 'thumb')),
+ 'is_live': bool(formats),
+ 'view_count': int_or_none(response.get('viewers')),
+ 'age_limit': 18 if traverse_obj(response, ('channel', 'adult')) else None,
+ }
diff --git a/hypervideo_dl/extractor/googledrive.py b/hypervideo_dl/extractor/googledrive.py
index 7b5bf28..e027ea7 100644
--- a/hypervideo_dl/extractor/googledrive.py
+++ b/hypervideo_dl/extractor/googledrive.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -79,13 +77,13 @@ class GoogleDriveIE(InfoExtractor):
_caption_formats_ext = []
_captions_xml = None
- @staticmethod
- def _extract_url(webpage):
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
mobj = re.search(
r'<iframe[^>]+src="https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9_-]{28,})',
webpage)
if mobj:
- return 'https://drive.google.com/file/d/%s' % mobj.group('id')
+ yield 'https://drive.google.com/file/d/%s' % mobj.group('id')
def _download_subtitles_xml(self, video_id, subtitles_id, hl):
if self._captions_xml:
@@ -255,8 +253,6 @@ class GoogleDriveIE(InfoExtractor):
if not formats and reason:
self.raise_no_formats(reason, expected=True)
- self._sort_formats(formats)
-
hl = get_value('hl')
subtitles_id = None
ttsurl = get_value('ttsurl')
@@ -266,7 +262,7 @@ class GoogleDriveIE(InfoExtractor):
subtitles_id = ttsurl.encode('utf-8').decode(
'unicode_escape').split('=')[-1]
- self._downloader.cookiejar.clear(domain='.google.com', path='/', name='NID')
+ self.cookiejar.clear(domain='.google.com', path='/', name='NID')
return {
'id': video_id,
@@ -278,3 +274,59 @@ class GoogleDriveIE(InfoExtractor):
'automatic_captions': self.extract_automatic_captions(
video_id, subtitles_id, hl),
}
+
+
+class GoogleDriveFolderIE(InfoExtractor):
+ IE_NAME = 'GoogleDrive:Folder'
+ _VALID_URL = r'https?://(?:docs|drive)\.google\.com/drive/folders/(?P<id>[\w-]{28,})'
+ _TESTS = [{
+ 'url': 'https://drive.google.com/drive/folders/1dQ4sx0-__Nvg65rxTSgQrl7VyW_FZ9QI',
+ 'info_dict': {
+ 'id': '1dQ4sx0-__Nvg65rxTSgQrl7VyW_FZ9QI',
+ 'title': 'Forrest'
+ },
+ 'playlist_count': 3,
+ }]
+ _BOUNDARY = '=====vc17a3rwnndj====='
+ _REQUEST = "/drive/v2beta/files?openDrive=true&reason=102&syncType=0&errorRecovery=false&q=trashed%20%3D%20false%20and%20'{folder_id}'%20in%20parents&fields=kind%2CnextPageToken%2Citems(kind%2CmodifiedDate%2CmodifiedByMeDate%2ClastViewedByMeDate%2CfileSize%2Cowners(kind%2CpermissionId%2Cid)%2ClastModifyingUser(kind%2CpermissionId%2Cid)%2ChasThumbnail%2CthumbnailVersion%2Ctitle%2Cid%2CresourceKey%2Cshared%2CsharedWithMeDate%2CuserPermission(role)%2CexplicitlyTrashed%2CmimeType%2CquotaBytesUsed%2Ccopyable%2CfileExtension%2CsharingUser(kind%2CpermissionId%2Cid)%2Cspaces%2Cversion%2CteamDriveId%2ChasAugmentedPermissions%2CcreatedDate%2CtrashingUser(kind%2CpermissionId%2Cid)%2CtrashedDate%2Cparents(id)%2CshortcutDetails(targetId%2CtargetMimeType%2CtargetLookupStatus)%2Ccapabilities(canCopy%2CcanDownload%2CcanEdit%2CcanAddChildren%2CcanDelete%2CcanRemoveChildren%2CcanShare%2CcanTrash%2CcanRename%2CcanReadTeamDrive%2CcanMoveTeamDriveItem)%2Clabels(starred%2Ctrashed%2Crestricted%2Cviewed))%2CincompleteSearch&appDataFilter=NO_APP_DATA&spaces=drive&pageToken={page_token}&maxResults=50&supportsTeamDrives=true&includeItemsFromAllDrives=true&corpora=default&orderBy=folder%2Ctitle_natural%20asc&retryCount=0&key={key} HTTP/1.1"
+ _DATA = f'''--{_BOUNDARY}
+content-type: application/http
+content-transfer-encoding: binary
+
+GET %s
+
+--{_BOUNDARY}
+'''
+
+ def _call_api(self, folder_id, key, data, **kwargs):
+ response = self._download_webpage(
+ 'https://clients6.google.com/batch/drive/v2beta',
+ folder_id, data=data.encode('utf-8'),
+ headers={
+ 'Content-Type': 'text/plain;charset=UTF-8;',
+ 'Origin': 'https://drive.google.com',
+ }, query={
+ '$ct': f'multipart/mixed; boundary="{self._BOUNDARY}"',
+ 'key': key
+ }, **kwargs)
+ return self._search_json('', response, 'api response', folder_id, **kwargs) or {}
+
+ def _get_folder_items(self, folder_id, key):
+ page_token = ''
+ while page_token is not None:
+ request = self._REQUEST.format(folder_id=folder_id, page_token=page_token, key=key)
+ page = self._call_api(folder_id, key, self._DATA % request)
+ yield from page['items']
+ page_token = page.get('nextPageToken')
+
+ def _real_extract(self, url):
+ folder_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, folder_id)
+ key = self._search_regex(r'"(\w{39})"', webpage, 'key')
+
+ folder_info = self._call_api(folder_id, key, self._DATA % f'/drive/v2beta/files/{folder_id} HTTP/1.1', fatal=False)
+
+ return self.playlist_from_matches(
+ self._get_folder_items(folder_id, key), folder_id, folder_info.get('title'),
+ ie=GoogleDriveIE, getter=lambda item: f'https://drive.google.com/file/d/{item["id"]}')
diff --git a/hypervideo_dl/extractor/googlepodcasts.py b/hypervideo_dl/extractor/googlepodcasts.py
index 25631e2..8b2351b 100644
--- a/hypervideo_dl/extractor/googlepodcasts.py
+++ b/hypervideo_dl/extractor/googlepodcasts.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import json
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/googlesearch.py b/hypervideo_dl/extractor/googlesearch.py
index 4b8b1bc..67ca0e5 100644
--- a/hypervideo_dl/extractor/googlesearch.py
+++ b/hypervideo_dl/extractor/googlesearch.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import itertools
import re
diff --git a/hypervideo_dl/extractor/goplay.py b/hypervideo_dl/extractor/goplay.py
new file mode 100644
index 0000000..2882b49
--- /dev/null
+++ b/hypervideo_dl/extractor/goplay.py
@@ -0,0 +1,394 @@
+import base64
+import binascii
+import datetime
+import hashlib
+import hmac
+import json
+import os
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ traverse_obj,
+ unescapeHTML,
+)
+
+
+class GoPlayIE(InfoExtractor):
+ _VALID_URL = r'https?://(www\.)?goplay\.be/video/([^/]+/[^/]+/|)(?P<display_id>[^/#]+)'
+
+ _NETRC_MACHINE = 'goplay'
+
+ _TESTS = [{
+ 'url': 'https://www.goplay.be/video/de-container-cup/de-container-cup-s3/de-container-cup-s3-aflevering-2#autoplay',
+ 'info_dict': {
+ 'id': '9c4214b8-e55d-4e4b-a446-f015f6c6f811',
+ 'ext': 'mp4',
+ 'title': 'S3 - Aflevering 2',
+ 'series': 'De Container Cup',
+ 'season': 'Season 3',
+ 'season_number': 3,
+ 'episode': 'Episode 2',
+ 'episode_number': 2,
+ },
+ 'skip': 'This video is only available for registered users'
+ }, {
+ 'url': 'https://www.goplay.be/video/a-family-for-thr-holidays-s1-aflevering-1#autoplay',
+ 'info_dict': {
+ 'id': '74e3ed07-748c-49e4-85a0-393a93337dbf',
+ 'ext': 'mp4',
+ 'title': 'A Family for the Holidays',
+ },
+ 'skip': 'This video is only available for registered users'
+ }]
+
+ _id_token = None
+
+ def _perform_login(self, username, password):
+ self.report_login()
+ aws = AwsIdp(ie=self, pool_id='eu-west-1_dViSsKM5Y', client_id='6s1h851s8uplco5h6mqh1jac8m')
+ self._id_token, _ = aws.authenticate(username=username, password=password)
+
+ def _real_initialize(self):
+ if not self._id_token:
+ raise self.raise_login_required(method='password')
+
+ def _real_extract(self, url):
+ url, display_id = self._match_valid_url(url).group(0, 'display_id')
+ webpage = self._download_webpage(url, display_id)
+ video_data_json = self._html_search_regex(r'<div\s+data-hero="([^"]+)"', webpage, 'video_data')
+ video_data = self._parse_json(unescapeHTML(video_data_json), display_id).get('data')
+
+ movie = video_data.get('movie')
+ if movie:
+ video_id = movie['videoUuid']
+ info_dict = {
+ 'title': movie.get('title')
+ }
+ else:
+ episode = traverse_obj(video_data, ('playlists', ..., 'episodes', lambda _, v: v['pageInfo']['url'] == url), get_all=False)
+ video_id = episode['videoUuid']
+ info_dict = {
+ 'title': episode.get('episodeTitle'),
+ 'series': traverse_obj(episode, ('program', 'title')),
+ 'season_number': episode.get('seasonNumber'),
+ 'episode_number': episode.get('episodeNumber'),
+ }
+
+ api = self._download_json(
+ f'https://api.viervijfzes.be/content/{video_id}',
+ video_id, headers={'Authorization': self._id_token})
+
+ formats, subs = self._extract_m3u8_formats_and_subtitles(
+ api['video']['S'], video_id, ext='mp4', m3u8_id='HLS')
+
+ info_dict.update({
+ 'id': video_id,
+ 'formats': formats,
+ })
+
+ return info_dict
+
+
+# Taken from https://github.com/add-ons/plugin.video.viervijfzes/blob/master/resources/lib/viervijfzes/auth_awsidp.py
+# Released into Public domain by https://github.com/michaelarnauts
+
+class InvalidLoginException(ExtractorError):
+ """ The login credentials are invalid """
+
+
+class AuthenticationException(ExtractorError):
+ """ Something went wrong while logging in """
+
+
+class AwsIdp:
+ """ AWS Identity Provider """
+
+ def __init__(self, ie, pool_id, client_id):
+ """
+ :param InfoExtrator ie: The extractor that instantiated this class.
+ :param str pool_id: The AWS user pool to connect to (format: <region>_<poolid>).
+ E.g.: eu-west-1_aLkOfYN3T
+ :param str client_id: The client application ID (the ID of the application connecting)
+ """
+
+ self.ie = ie
+
+ self.pool_id = pool_id
+ if "_" not in self.pool_id:
+ raise ValueError("Invalid pool_id format. Should be <region>_<poolid>.")
+
+ self.client_id = client_id
+ self.region = self.pool_id.split("_")[0]
+ self.url = "https://cognito-idp.%s.amazonaws.com/" % (self.region,)
+
+ # Initialize the values
+ # https://github.com/aws/amazon-cognito-identity-js/blob/master/src/AuthenticationHelper.js#L22
+ self.n_hex = 'FFFFFFFFFFFFFFFFC90FDAA22168C234C4C6628B80DC1CD1' + \
+ '29024E088A67CC74020BBEA63B139B22514A08798E3404DD' + \
+ 'EF9519B3CD3A431B302B0A6DF25F14374FE1356D6D51C245' + \
+ 'E485B576625E7EC6F44C42E9A637ED6B0BFF5CB6F406B7ED' + \
+ 'EE386BFB5A899FA5AE9F24117C4B1FE649286651ECE45B3D' + \
+ 'C2007CB8A163BF0598DA48361C55D39A69163FA8FD24CF5F' + \
+ '83655D23DCA3AD961C62F356208552BB9ED529077096966D' + \
+ '670C354E4ABC9804F1746C08CA18217C32905E462E36CE3B' + \
+ 'E39E772C180E86039B2783A2EC07A28FB5C55DF06F4C52C9' + \
+ 'DE2BCBF6955817183995497CEA956AE515D2261898FA0510' + \
+ '15728E5A8AAAC42DAD33170D04507A33A85521ABDF1CBA64' + \
+ 'ECFB850458DBEF0A8AEA71575D060C7DB3970F85A6E1E4C7' + \
+ 'ABF5AE8CDB0933D71E8C94E04A25619DCEE3D2261AD2EE6B' + \
+ 'F12FFA06D98A0864D87602733EC86A64521F2B18177B200C' + \
+ 'BBE117577A615D6C770988C0BAD946E208E24FA074E5AB31' + \
+ '43DB5BFCE0FD108E4B82D120A93AD2CAFFFFFFFFFFFFFFFF'
+
+ # https://github.com/aws/amazon-cognito-identity-js/blob/master/src/AuthenticationHelper.js#L49
+ self.g_hex = '2'
+ self.info_bits = bytearray('Caldera Derived Key', 'utf-8')
+
+ self.big_n = self.__hex_to_long(self.n_hex)
+ self.g = self.__hex_to_long(self.g_hex)
+ self.k = self.__hex_to_long(self.__hex_hash('00' + self.n_hex + '0' + self.g_hex))
+ self.small_a_value = self.__generate_random_small_a()
+ self.large_a_value = self.__calculate_a()
+
+ def authenticate(self, username, password):
+ """ Authenticate with a username and password. """
+ # Step 1: First initiate an authentication request
+ auth_data_dict = self.__get_authentication_request(username)
+ auth_data = json.dumps(auth_data_dict).encode("utf-8")
+ auth_headers = {
+ "X-Amz-Target": "AWSCognitoIdentityProviderService.InitiateAuth",
+ "Accept-Encoding": "identity",
+ "Content-Type": "application/x-amz-json-1.1"
+ }
+ auth_response_json = self.ie._download_json(
+ self.url, None, data=auth_data, headers=auth_headers,
+ note='Authenticating username', errnote='Invalid username')
+ challenge_parameters = auth_response_json.get("ChallengeParameters")
+
+ if auth_response_json.get("ChallengeName") != "PASSWORD_VERIFIER":
+ raise AuthenticationException(auth_response_json["message"])
+
+ # Step 2: Respond to the Challenge with a valid ChallengeResponse
+ challenge_request = self.__get_challenge_response_request(challenge_parameters, password)
+ challenge_data = json.dumps(challenge_request).encode("utf-8")
+ challenge_headers = {
+ "X-Amz-Target": "AWSCognitoIdentityProviderService.RespondToAuthChallenge",
+ "Content-Type": "application/x-amz-json-1.1"
+ }
+ auth_response_json = self.ie._download_json(
+ self.url, None, data=challenge_data, headers=challenge_headers,
+ note='Authenticating password', errnote='Invalid password')
+
+ if 'message' in auth_response_json:
+ raise InvalidLoginException(auth_response_json['message'])
+ return (
+ auth_response_json['AuthenticationResult']['IdToken'],
+ auth_response_json['AuthenticationResult']['RefreshToken']
+ )
+
+ def __get_authentication_request(self, username):
+ """
+
+ :param str username: The username to use
+
+ :return: A full Authorization request.
+ :rtype: dict
+ """
+ auth_request = {
+ "AuthParameters": {
+ "USERNAME": username,
+ "SRP_A": self.__long_to_hex(self.large_a_value)
+ },
+ "AuthFlow": "USER_SRP_AUTH",
+ "ClientId": self.client_id
+ }
+ return auth_request
+
+ def __get_challenge_response_request(self, challenge_parameters, password):
+ """ Create a Challenge Response Request object.
+
+ :param dict[str,str|imt] challenge_parameters: The parameters for the challenge.
+ :param str password: The password.
+
+ :return: A valid and full request data object to use as a response for a challenge.
+ :rtype: dict
+ """
+ user_id = challenge_parameters["USERNAME"]
+ user_id_for_srp = challenge_parameters["USER_ID_FOR_SRP"]
+ srp_b = challenge_parameters["SRP_B"]
+ salt = challenge_parameters["SALT"]
+ secret_block = challenge_parameters["SECRET_BLOCK"]
+
+ timestamp = self.__get_current_timestamp()
+
+ # Get a HKDF key for the password, SrpB and the Salt
+ hkdf = self.__get_hkdf_key_for_password(
+ user_id_for_srp,
+ password,
+ self.__hex_to_long(srp_b),
+ salt
+ )
+ secret_block_bytes = base64.standard_b64decode(secret_block)
+
+ # the message is a combo of the pool_id, provided SRP userId, the Secret and Timestamp
+ msg = \
+ bytearray(self.pool_id.split('_')[1], 'utf-8') + \
+ bytearray(user_id_for_srp, 'utf-8') + \
+ bytearray(secret_block_bytes) + \
+ bytearray(timestamp, 'utf-8')
+ hmac_obj = hmac.new(hkdf, msg, digestmod=hashlib.sha256)
+ signature_string = base64.standard_b64encode(hmac_obj.digest()).decode('utf-8')
+ challenge_request = {
+ "ChallengeResponses": {
+ "USERNAME": user_id,
+ "TIMESTAMP": timestamp,
+ "PASSWORD_CLAIM_SECRET_BLOCK": secret_block,
+ "PASSWORD_CLAIM_SIGNATURE": signature_string
+ },
+ "ChallengeName": "PASSWORD_VERIFIER",
+ "ClientId": self.client_id
+ }
+ return challenge_request
+
+ def __get_hkdf_key_for_password(self, username, password, server_b_value, salt):
+ """ Calculates the final hkdf based on computed S value, and computed U value and the key.
+
+ :param str username: Username.
+ :param str password: Password.
+ :param int server_b_value: Server B value.
+ :param int salt: Generated salt.
+
+ :return Computed HKDF value.
+ :rtype: object
+ """
+
+ u_value = self.__calculate_u(self.large_a_value, server_b_value)
+ if u_value == 0:
+ raise ValueError('U cannot be zero.')
+ username_password = '%s%s:%s' % (self.pool_id.split('_')[1], username, password)
+ username_password_hash = self.__hash_sha256(username_password.encode('utf-8'))
+
+ x_value = self.__hex_to_long(self.__hex_hash(self.__pad_hex(salt) + username_password_hash))
+ g_mod_pow_xn = pow(self.g, x_value, self.big_n)
+ int_value2 = server_b_value - self.k * g_mod_pow_xn
+ s_value = pow(int_value2, self.small_a_value + u_value * x_value, self.big_n)
+ hkdf = self.__compute_hkdf(
+ bytearray.fromhex(self.__pad_hex(s_value)),
+ bytearray.fromhex(self.__pad_hex(self.__long_to_hex(u_value)))
+ )
+ return hkdf
+
+ def __compute_hkdf(self, ikm, salt):
+ """ Standard hkdf algorithm
+
+ :param {Buffer} ikm Input key material.
+ :param {Buffer} salt Salt value.
+ :return {Buffer} Strong key material.
+ """
+
+ prk = hmac.new(salt, ikm, hashlib.sha256).digest()
+ info_bits_update = self.info_bits + bytearray(chr(1), 'utf-8')
+ hmac_hash = hmac.new(prk, info_bits_update, hashlib.sha256).digest()
+ return hmac_hash[:16]
+
+ def __calculate_u(self, big_a, big_b):
+ """ Calculate the client's value U which is the hash of A and B
+
+ :param int big_a: Large A value.
+ :param int big_b: Server B value.
+
+ :return Computed U value.
+ :rtype: int
+ """
+
+ u_hex_hash = self.__hex_hash(self.__pad_hex(big_a) + self.__pad_hex(big_b))
+ return self.__hex_to_long(u_hex_hash)
+
+ def __generate_random_small_a(self):
+ """ Helper function to generate a random big integer
+
+ :return a random value.
+ :rtype: int
+ """
+ random_long_int = self.__get_random(128)
+ return random_long_int % self.big_n
+
+ def __calculate_a(self):
+ """ Calculate the client's public value A = g^a%N with the generated random number a
+
+ :return Computed large A.
+ :rtype: int
+ """
+
+ big_a = pow(self.g, self.small_a_value, self.big_n)
+ # safety check
+ if (big_a % self.big_n) == 0:
+ raise ValueError('Safety check for A failed')
+ return big_a
+
+ @staticmethod
+ def __long_to_hex(long_num):
+ return '%x' % long_num
+
+ @staticmethod
+ def __hex_to_long(hex_string):
+ return int(hex_string, 16)
+
+ @staticmethod
+ def __hex_hash(hex_string):
+ return AwsIdp.__hash_sha256(bytearray.fromhex(hex_string))
+
+ @staticmethod
+ def __hash_sha256(buf):
+ """AuthenticationHelper.hash"""
+ digest = hashlib.sha256(buf).hexdigest()
+ return (64 - len(digest)) * '0' + digest
+
+ @staticmethod
+ def __pad_hex(long_int):
+ """ Converts a Long integer (or hex string) to hex format padded with zeroes for hashing
+
+ :param int|str long_int: Number or string to pad.
+
+ :return Padded hex string.
+ :rtype: str
+ """
+
+ if not isinstance(long_int, str):
+ hash_str = AwsIdp.__long_to_hex(long_int)
+ else:
+ hash_str = long_int
+ if len(hash_str) % 2 == 1:
+ hash_str = '0%s' % hash_str
+ elif hash_str[0] in '89ABCDEFabcdef':
+ hash_str = '00%s' % hash_str
+ return hash_str
+
+ @staticmethod
+ def __get_random(nbytes):
+ random_hex = binascii.hexlify(os.urandom(nbytes))
+ return AwsIdp.__hex_to_long(random_hex)
+
+ @staticmethod
+ def __get_current_timestamp():
+ """ Creates a timestamp with the correct English format.
+
+ :return: timestamp in format 'Sun Jan 27 19:00:04 UTC 2019'
+ :rtype: str
+ """
+
+ # We need US only data, so we cannot just do a strftime:
+ # Sun Jan 27 19:00:04 UTC 2019
+ months = [None, 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
+ days = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
+
+ time_now = datetime.datetime.utcnow()
+ format_string = "{} {} {} %H:%M:%S UTC %Y".format(days[time_now.weekday()], months[time_now.month], time_now.day)
+ time_string = datetime.datetime.utcnow().strftime(format_string)
+ return time_string
+
+ def __str__(self):
+ return "AWS IDP Client for:\nRegion: %s\nPoolId: %s\nAppId: %s" % (
+ self.region, self.pool_id.split("_")[1], self.client_id
+ )
diff --git a/hypervideo_dl/extractor/gopro.py b/hypervideo_dl/extractor/gopro.py
index 10cc1ae..ae96537 100644
--- a/hypervideo_dl/extractor/gopro.py
+++ b/hypervideo_dl/extractor/gopro.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
int_or_none,
@@ -81,8 +78,6 @@ class GoProIE(InfoExtractor):
'height': int_or_none(fmt.get('height')),
})
- self._sort_formats(formats)
-
title = str_or_none(
try_get(metadata, lambda x: x['collection']['title'])
or self._html_search_meta(['og:title', 'twitter:title'], webpage)
diff --git a/hypervideo_dl/extractor/goshgay.py b/hypervideo_dl/extractor/goshgay.py
index 377981d..9a1f32b 100644
--- a/hypervideo_dl/extractor/goshgay.py
+++ b/hypervideo_dl/extractor/goshgay.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
diff --git a/hypervideo_dl/extractor/gotostage.py b/hypervideo_dl/extractor/gotostage.py
index 6aa9610..112293b 100644
--- a/hypervideo_dl/extractor/gotostage.py
+++ b/hypervideo_dl/extractor/gotostage.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
diff --git a/hypervideo_dl/extractor/gputechconf.py b/hypervideo_dl/extractor/gputechconf.py
index 73dc62c..2d13bf4 100644
--- a/hypervideo_dl/extractor/gputechconf.py
+++ b/hypervideo_dl/extractor/gputechconf.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/gronkh.py b/hypervideo_dl/extractor/gronkh.py
index c9f1dd2..b9370e3 100644
--- a/hypervideo_dl/extractor/gronkh.py
+++ b/hypervideo_dl/extractor/gronkh.py
@@ -1,20 +1,34 @@
-# coding: utf-8
-from __future__ import unicode_literals
+import functools
from .common import InfoExtractor
-from ..utils import unified_strdate
+from ..utils import (
+ OnDemandPagedList,
+ traverse_obj,
+ unified_strdate,
+)
class GronkhIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?gronkh\.tv/(?:watch/)?stream/(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:www\.)?gronkh\.tv/(?:watch/)?streams?/(?P<id>\d+)'
_TESTS = [{
+ 'url': 'https://gronkh.tv/streams/657',
+ 'info_dict': {
+ 'id': '657',
+ 'ext': 'mp4',
+ 'title': 'H.O.R.D.E. - DAS ZWEiTE ZEiTALTER 🎲 Session 1',
+ 'view_count': int,
+ 'thumbnail': 'https://01.cdn.vod.farm/preview/9e2555d3a23bf4e5c5b7c6b3b70a9d84.jpg',
+ 'upload_date': '20221111'
+ },
+ 'params': {'skip_download': True}
+ }, {
'url': 'https://gronkh.tv/stream/536',
'info_dict': {
'id': '536',
'ext': 'mp4',
'title': 'GTV0536, 2021-10-01 - MARTHA IS DEAD #FREiAB1830 !FF7 !horde !archiv',
- 'view_count': 19491,
+ 'view_count': int,
'thumbnail': 'https://01.cdn.vod.farm/preview/6436746cce14e25f751260a692872b9b.jpg',
'upload_date': '20211001'
},
@@ -34,7 +48,6 @@ class GronkhIE(InfoExtractor):
'url': data_json['vtt_url'],
'ext': 'vtt',
})
- self._sort_formats(formats)
return {
'id': id,
'title': data_json.get('title'),
@@ -44,3 +57,54 @@ class GronkhIE(InfoExtractor):
'formats': formats,
'subtitles': subtitles,
}
+
+
+class GronkhFeedIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?gronkh\.tv(?:/feed)?/?(?:#|$)'
+ IE_NAME = 'gronkh:feed'
+
+ _TESTS = [{
+ 'url': 'https://gronkh.tv/feed',
+ 'info_dict': {
+ 'id': 'feed',
+ },
+ 'playlist_count': 16,
+ }, {
+ 'url': 'https://gronkh.tv',
+ 'only_matching': True,
+ }]
+
+ def _entries(self):
+ for type_ in ('recent', 'views'):
+ info = self._download_json(
+ f'https://api.gronkh.tv/v1/video/discovery/{type_}', 'feed', note=f'Downloading {type_} API JSON')
+ for item in traverse_obj(info, ('discovery', ...)) or []:
+ yield self.url_result(f'https://gronkh.tv/watch/stream/{item["episode"]}', GronkhIE, item.get('title'))
+
+ def _real_extract(self, url):
+ return self.playlist_result(self._entries(), 'feed')
+
+
+class GronkhVodsIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?gronkh\.tv/vods/streams/?(?:#|$)'
+ IE_NAME = 'gronkh:vods'
+
+ _TESTS = [{
+ 'url': 'https://gronkh.tv/vods/streams',
+ 'info_dict': {
+ 'id': 'vods',
+ },
+ 'playlist_mincount': 150,
+ }]
+ _PER_PAGE = 25
+
+ def _fetch_page(self, page):
+ items = traverse_obj(self._download_json(
+ 'https://api.gronkh.tv/v1/search', 'vods', query={'offset': self._PER_PAGE * page, 'first': self._PER_PAGE},
+ note=f'Downloading stream video page {page + 1}'), ('results', 'videos', ...))
+ for item in items or []:
+ yield self.url_result(f'https://gronkh.tv/watch/stream/{item["episode"]}', GronkhIE, item['episode'], item.get('title'))
+
+ def _real_extract(self, url):
+ entries = OnDemandPagedList(functools.partial(self._fetch_page), self._PER_PAGE)
+ return self.playlist_result(entries, 'vods')
diff --git a/hypervideo_dl/extractor/groupon.py b/hypervideo_dl/extractor/groupon.py
index a6da909..362d3ff 100644
--- a/hypervideo_dl/extractor/groupon.py
+++ b/hypervideo_dl/extractor/groupon.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/harpodeon.py b/hypervideo_dl/extractor/harpodeon.py
new file mode 100644
index 0000000..0aa4733
--- /dev/null
+++ b/hypervideo_dl/extractor/harpodeon.py
@@ -0,0 +1,70 @@
+from .common import InfoExtractor
+from ..utils import unified_strdate
+
+
+class HarpodeonIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?harpodeon\.com/(?:video|preview)/\w+/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.harpodeon.com/video/The_Smoking_Out_of_Bella_Butts/268068288',
+ 'md5': '727371564a6a9ebccef2073535b5b6bd',
+ 'skip': 'Free video could become unavailable',
+ 'info_dict': {
+ 'id': '268068288',
+ 'ext': 'mp4',
+ 'title': 'The Smoking Out of Bella Butts',
+ 'description': 'md5:47e16bdb41fc8a79c83ab83af11c8b77',
+ 'creator': 'Vitagraph Company of America',
+ 'release_date': '19150101'
+ }
+ }, {
+ 'url': 'https://www.harpodeon.com/preview/The_Smoking_Out_of_Bella_Butts/268068288',
+ 'md5': '6dfea5412845f690c7331be703f884db',
+ 'info_dict': {
+ 'id': '268068288',
+ 'ext': 'mp4',
+ 'title': 'The Smoking Out of Bella Butts',
+ 'description': 'md5:47e16bdb41fc8a79c83ab83af11c8b77',
+ 'creator': 'Vitagraph Company of America',
+ 'release_date': '19150101'
+ }
+ }, {
+ 'url': 'https://www.harpodeon.com/preview/Behind_the_Screen/421838710',
+ 'md5': '7979df9ca04637282cb7d172ab3a9c3b',
+ 'info_dict': {
+ 'id': '421838710',
+ 'ext': 'mp4',
+ 'title': 'Behind the Screen',
+ 'description': 'md5:008972a3dc51fba3965ee517d2ba9155',
+ 'creator': 'Lone Star Corporation',
+ 'release_date': '19160101'
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ title, creator, release_year = self._search_regex(
+ r'''(?x)
+ <div[^>]+videoInfo[^<]*<h2[^>]*>(?P<title>[^>]+)</h2>
+ (?:\s*<p[^>]*>\((?P<creator>.+),\s*)?(?P<release_year>\d{4})?''',
+ webpage, 'title', group=('title', 'creator', 'release_year'),
+ fatal=False) or (None, None, None)
+
+ hp_base = self._html_search_regex(r'hpBase\(\s*["\']([^"\']+)', webpage, 'hp_base')
+
+ hp_inject_video, hp_resolution = self._search_regex(
+ r'''(?x)
+ hpInjectVideo\([\'\"](?P<hp_inject_video>\w+)[\'\"],
+ [\'\"](?P<hp_resolution>\d+)[\'\"]''',
+ webpage, 'hp_inject_video', group=['hp_inject_video', 'hp_resolution'])
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'url': f'{hp_base}{hp_inject_video}_{hp_resolution}.mp4',
+ 'http_headers': {'Referer': url},
+ 'description': self._html_search_meta('description', webpage, fatal=False),
+ 'creator': creator,
+ 'release_date': unified_strdate(f'{release_year}0101')
+ }
diff --git a/hypervideo_dl/extractor/hbo.py b/hypervideo_dl/extractor/hbo.py
index 68df748..530bdb7 100644
--- a/hypervideo_dl/extractor/hbo.py
+++ b/hypervideo_dl/extractor/hbo.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -115,7 +112,6 @@ class HBOBaseIE(InfoExtractor):
'width': format_info.get('width'),
'height': format_info.get('height'),
})
- self._sort_formats(formats)
thumbnails = []
card_sizes = xpath_element(video_data, 'titleCardSizes')
diff --git a/hypervideo_dl/extractor/hearthisat.py b/hypervideo_dl/extractor/hearthisat.py
index a3d6a05..d1a400d 100644
--- a/hypervideo_dl/extractor/hearthisat.py
+++ b/hypervideo_dl/extractor/hearthisat.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..utils import (
determine_ext,
@@ -85,7 +81,6 @@ class HearThisAtIE(InfoExtractor):
'acodec': ext,
'quality': 2, # Usually better quality
})
- self._sort_formats(formats)
return {
'id': track_id,
diff --git a/hypervideo_dl/extractor/heise.py b/hypervideo_dl/extractor/heise.py
index cbe564a..27d737c 100644
--- a/hypervideo_dl/extractor/heise.py
+++ b/hypervideo_dl/extractor/heise.py
@@ -1,13 +1,12 @@
-# coding: utf-8
-from __future__ import unicode_literals
+import urllib.parse
from .common import InfoExtractor
from .kaltura import KalturaIE
from .youtube import YoutubeIE
from ..utils import (
+ NO_DEFAULT,
determine_ext,
int_or_none,
- NO_DEFAULT,
parse_iso8601,
smuggle_url,
xpath_text,
@@ -26,6 +25,9 @@ class HeiseIE(InfoExtractor):
'timestamp': 1512734959,
'upload_date': '20171208',
'description': 'md5:c934cbfb326c669c2bcabcbe3d3fcd20',
+ 'thumbnail': 're:^https?://.*/thumbnail/.*',
+ 'duration': 2845,
+ 'view_count': int,
},
'params': {
'skip_download': True,
@@ -37,11 +39,27 @@ class HeiseIE(InfoExtractor):
'info_dict': {
'id': '6kmWbXleKW4',
'ext': 'mp4',
- 'title': 'NEU IM SEPTEMBER | Netflix',
- 'description': 'md5:2131f3c7525e540d5fd841de938bd452',
+ 'title': 'Neu im September 2017 | Netflix',
+ 'description': 'md5:d6852d1f96bb80760608eed3b907437c',
'upload_date': '20170830',
'uploader': 'Netflix Deutschland, Österreich und Schweiz',
'uploader_id': 'netflixdach',
+ 'categories': ['Entertainment'],
+ 'tags': 'count:27',
+ 'age_limit': 0,
+ 'availability': 'public',
+ 'comment_count': int,
+ 'channel_id': 'UCZqgRlLcvO3Fnx_npQJygcQ',
+ 'thumbnail': 'https://i.ytimg.com/vi_webp/6kmWbXleKW4/maxresdefault.webp',
+ 'uploader_url': 'http://www.youtube.com/user/netflixdach',
+ 'playable_in_embed': True,
+ 'live_status': 'not_live',
+ 'channel_url': 'https://www.youtube.com/channel/UCZqgRlLcvO3Fnx_npQJygcQ',
+ 'view_count': int,
+ 'channel': 'Netflix Deutschland, Österreich und Schweiz',
+ 'channel_follower_count': int,
+ 'like_count': int,
+ 'duration': 67,
},
'params': {
'skip_download': True,
@@ -55,11 +73,15 @@ class HeiseIE(InfoExtractor):
'description': 'md5:47e8ffb6c46d85c92c310a512d6db271',
'timestamp': 1512470717,
'upload_date': '20171205',
+ 'duration': 786,
+ 'view_count': int,
+ 'thumbnail': 're:^https?://.*/thumbnail/.*',
},
'params': {
'skip_download': True,
},
}, {
+ # FIXME: Video m3u8 fails to download; issue with Kaltura extractor
'url': 'https://www.heise.de/ct/artikel/c-t-uplink-20-8-Staubsaugerroboter-Xiaomi-Vacuum-2-AR-Brille-Meta-2-und-Android-rooten-3959893.html',
'info_dict': {
'id': '1_59mk80sf',
@@ -73,6 +95,18 @@ class HeiseIE(InfoExtractor):
'skip_download': True,
},
}, {
+ # videout
+ 'url': 'https://www.heise.de/ct/artikel/c-t-uplink-3-8-Anonyme-SIM-Karten-G-Sync-Monitore-Citizenfour-2440327.html',
+ 'info_dict': {
+ 'id': '2440327',
+ 'ext': 'mp4',
+ 'title': 'c\'t uplink 3.8: Anonyme SIM-Karten, G-Sync-Monitore, Citizenfour',
+ 'thumbnail': 'http://www.heise.de/imagine/yxM2qmol0xV3iFB7qFb70dGvXjc/gallery/',
+ 'description': 'md5:fa164d8c8707dff124a9626d39205f5d',
+ 'timestamp': 1414825200,
+ 'upload_date': '20141101',
+ }
+ }, {
'url': 'http://www.heise.de/ct/artikel/c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2403911.html',
'only_matching': True,
}, {
@@ -124,26 +158,28 @@ class HeiseIE(InfoExtractor):
if kaltura_id:
return _make_kaltura_result('kaltura:2238431:%s' % kaltura_id)
- yt_urls = YoutubeIE._extract_urls(webpage)
+ yt_urls = tuple(YoutubeIE._extract_embed_urls(url, webpage))
if yt_urls:
return self.playlist_from_matches(
yt_urls, video_id, title, ie=YoutubeIE.ie_key())
title = extract_title()
-
- container_id = self._search_regex(
- r'<div class="videoplayerjw"[^>]+data-container="([0-9]+)"',
- webpage, 'container ID')
-
- sequenz_id = self._search_regex(
- r'<div class="videoplayerjw"[^>]+data-sequenz="([0-9]+)"',
- webpage, 'sequenz ID')
-
- doc = self._download_xml(
- 'http://www.heise.de/videout/feed', video_id, query={
+ api_params = urllib.parse.parse_qs(
+ self._search_regex(r'/videout/feed\.json\?([^\']+)', webpage, 'feed params', default=None) or '')
+ if not api_params or 'container' not in api_params or 'sequenz' not in api_params:
+ container_id = self._search_regex(
+ r'<div class="videoplayerjw"[^>]+data-container="([0-9]+)"',
+ webpage, 'container ID')
+
+ sequenz_id = self._search_regex(
+ r'<div class="videoplayerjw"[^>]+data-sequenz="([0-9]+)"',
+ webpage, 'sequenz ID')
+ api_params = {
'container': container_id,
'sequenz': sequenz_id,
- })
+ }
+ doc = self._download_xml(
+ 'http://www.heise.de/videout/feed', video_id, query=api_params)
formats = []
for source_node in doc.findall('.//{http://rss.jwpcdn.com/}source'):
@@ -158,7 +194,6 @@ class HeiseIE(InfoExtractor):
'format_id': '%s_%s' % (ext, label),
'height': height,
})
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/hellporno.py b/hypervideo_dl/extractor/hellporno.py
index 92d32cd..fa32b27 100644
--- a/hypervideo_dl/extractor/hellporno.py
+++ b/hypervideo_dl/extractor/hellporno.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
int_or_none,
@@ -41,7 +39,6 @@ class HellPornoIE(InfoExtractor):
title = remove_end(self._html_extract_title(webpage), ' - Hell Porno')
info = self._parse_html5_media_entries(url, webpage, display_id)[0]
- self._sort_formats(info['formats'])
video_id = self._search_regex(
(r'chs_object\s*=\s*["\'](\d+)',
diff --git a/hypervideo_dl/extractor/helsinki.py b/hypervideo_dl/extractor/helsinki.py
index 575fb33..e518cae 100644
--- a/hypervideo_dl/extractor/helsinki.py
+++ b/hypervideo_dl/extractor/helsinki.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import js_to_json
@@ -33,7 +29,6 @@ class HelsinkiIE(InfoExtractor):
'url': s['file'],
'ext': 'mp4',
} for s in params['sources']]
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/hentaistigma.py b/hypervideo_dl/extractor/hentaistigma.py
index 86a93de..ca5ffc2 100644
--- a/hypervideo_dl/extractor/hentaistigma.py
+++ b/hypervideo_dl/extractor/hentaistigma.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/hgtv.py b/hypervideo_dl/extractor/hgtv.py
index a4f3325..c40017d 100644
--- a/hypervideo_dl/extractor/hgtv.py
+++ b/hypervideo_dl/extractor/hgtv.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/hidive.py b/hypervideo_dl/extractor/hidive.py
index 46d7d62..3a53f2c 100644
--- a/hypervideo_dl/extractor/hidive.py
+++ b/hypervideo_dl/extractor/hidive.py
@@ -1,4 +1,3 @@
-# coding: utf-8
import re
from .common import InfoExtractor
@@ -39,7 +38,9 @@ class HiDiveIE(InfoExtractor):
webpage = self._download_webpage(self._LOGIN_URL, None)
form = self._search_regex(
r'(?s)<form[^>]+action="/account/login"[^>]*>(.+?)</form>',
- webpage, 'login form')
+ webpage, 'login form', default=None)
+ if not form: # logged in
+ return
data = self._hidden_inputs(form)
data.update({
'Email': username,
@@ -102,7 +103,6 @@ class HiDiveIE(InfoExtractor):
f['language'] = audio
f['format_note'] = f'{version}, {extra}'
formats.extend(frmt)
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/historicfilms.py b/hypervideo_dl/extractor/historicfilms.py
index 56343e9..c428fee 100644
--- a/hypervideo_dl/extractor/historicfilms.py
+++ b/hypervideo_dl/extractor/historicfilms.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import parse_duration
diff --git a/hypervideo_dl/extractor/hitbox.py b/hypervideo_dl/extractor/hitbox.py
index 0470d0a..f0c6898 100644
--- a/hypervideo_dl/extractor/hitbox.py
+++ b/hypervideo_dl/extractor/hitbox.py
@@ -1,16 +1,13 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
clean_html,
- parse_iso8601,
+ determine_ext,
float_or_none,
int_or_none,
- compat_str,
- determine_ext,
+ parse_iso8601,
)
@@ -121,7 +118,6 @@ class HitboxIE(InfoExtractor):
'tbr': bitrate,
'format_note': label,
})
- self._sort_formats(formats)
metadata = self._extract_metadata(
'https://www.smashcast.tv/api/media/video', video_id)
@@ -130,7 +126,7 @@ class HitboxIE(InfoExtractor):
return metadata
-class HitboxLiveIE(HitboxIE):
+class HitboxLiveIE(HitboxIE): # XXX: Do not subclass from concrete IE
IE_NAME = 'hitbox:live'
_VALID_URL = r'https?://(?:www\.)?(?:hitbox|smashcast)\.tv/(?P<id>[^/?#&]+)'
_TESTS = [{
@@ -203,7 +199,6 @@ class HitboxLiveIE(HitboxIE):
'page_url': url,
'player_url': 'http://www.hitbox.tv/static/player/flowplayer/flowplayer.commercial-3.2.16.swf',
})
- self._sort_formats(formats)
metadata = self._extract_metadata(
'https://www.smashcast.tv/api/media/live', video_id)
diff --git a/hypervideo_dl/extractor/hitrecord.py b/hypervideo_dl/extractor/hitrecord.py
index fd5dc29..902af44 100644
--- a/hypervideo_dl/extractor/hitrecord.py
+++ b/hypervideo_dl/extractor/hitrecord.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
diff --git a/hypervideo_dl/extractor/hketv.py b/hypervideo_dl/extractor/hketv.py
index 1f3502b..1087956 100644
--- a/hypervideo_dl/extractor/hketv.py
+++ b/hypervideo_dl/extractor/hketv.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
@@ -140,7 +137,6 @@ class HKETVIE(InfoExtractor):
'width': w,
'height': h,
})
- self._sort_formats(formats)
subtitles = {}
tracks = try_get(playlist0, lambda x: x['tracks'], list) or []
diff --git a/hypervideo_dl/extractor/holodex.py b/hypervideo_dl/extractor/holodex.py
new file mode 100644
index 0000000..a2b73ec
--- /dev/null
+++ b/hypervideo_dl/extractor/holodex.py
@@ -0,0 +1,100 @@
+from .common import InfoExtractor
+from .youtube import YoutubeIE
+from ..utils import traverse_obj
+
+
+class HolodexIE(InfoExtractor):
+ _VALID_URL = r'''(?x)https?://(?:www\.|staging\.)?holodex\.net/(?:
+ api/v2/playlist/(?P<playlist>\d+)|
+ watch/(?P<id>[\w-]{11})(?:\?(?:[^#]+&)?playlist=(?P<playlist2>\d+))?
+ )'''
+ _TESTS = [{
+ 'url': 'https://holodex.net/watch/9kQ2GtvDV3s',
+ 'md5': 'be5ffce2f0feae8ba4c01553abc0f175',
+ 'info_dict': {
+ 'ext': 'mp4',
+ 'id': '9kQ2GtvDV3s',
+ 'title': '【おちゃめ機能】ホロライブが吹っ切れた【24人で歌ってみた】',
+ 'channel_id': 'UCJFZiqLMntJufDCHc6bQixg',
+ 'playable_in_embed': True,
+ 'tags': 'count:43',
+ 'age_limit': 0,
+ 'live_status': 'not_live',
+ 'description': 'md5:040e866c09dc4ab899b36479f4b7c7a2',
+ 'channel_url': 'https://www.youtube.com/channel/UCJFZiqLMntJufDCHc6bQixg',
+ 'upload_date': '20200406',
+ 'uploader_url': 'http://www.youtube.com/channel/UCJFZiqLMntJufDCHc6bQixg',
+ 'view_count': int,
+ 'channel': 'hololive ホロライブ - VTuber Group',
+ 'categories': ['Music'],
+ 'uploader': 'hololive ホロライブ - VTuber Group',
+ 'channel_follower_count': int,
+ 'uploader_id': 'UCJFZiqLMntJufDCHc6bQixg',
+ 'availability': 'public',
+ 'thumbnail': 'https://i.ytimg.com/vi_webp/9kQ2GtvDV3s/maxresdefault.webp',
+ 'duration': 263,
+ 'like_count': int,
+ },
+ }, {
+ 'url': 'https://holodex.net/api/v2/playlist/239',
+ 'info_dict': {
+ 'id': '239',
+ 'title': 'Songs/Videos that made fall into the rabbit hole (from my google activity history)',
+ },
+ 'playlist_count': 14,
+ }, {
+ 'url': 'https://holodex.net/watch/_m2mQyaofjI?foo=bar&playlist=69',
+ 'info_dict': {
+ 'id': '69',
+ 'title': '拿著金斧頭的藍髮大姊姊'
+ },
+ 'playlist_count': 3,
+ }, {
+ 'url': 'https://holodex.net/watch/_m2mQyaofjI?playlist=69',
+ 'info_dict': {
+ 'id': '_m2mQyaofjI',
+ 'ext': 'mp4',
+ 'playable_in_embed': True,
+ 'like_count': int,
+ 'uploader': 'Ernst / エンスト',
+ 'duration': 11,
+ 'uploader_url': 'http://www.youtube.com/channel/UCqSX4PPZY0cyetqKVY_wRVA',
+ 'categories': ['Entertainment'],
+ 'title': '【星街すいせい】星街向你獻上晚安',
+ 'upload_date': '20210705',
+ 'description': 'md5:8b8ffb157bae77f2d109021a0b577d4a',
+ 'channel': 'Ernst / エンスト',
+ 'channel_id': 'UCqSX4PPZY0cyetqKVY_wRVA',
+ 'channel_follower_count': int,
+ 'view_count': int,
+ 'tags': [],
+ 'live_status': 'not_live',
+ 'channel_url': 'https://www.youtube.com/channel/UCqSX4PPZY0cyetqKVY_wRVA',
+ 'availability': 'public',
+ 'thumbnail': 'https://i.ytimg.com/vi_webp/_m2mQyaofjI/maxresdefault.webp',
+ 'age_limit': 0,
+ 'uploader_id': 'UCqSX4PPZY0cyetqKVY_wRVA',
+ 'comment_count': int,
+ },
+ 'params': {'noplaylist': True},
+ }, {
+ 'url': 'https://staging.holodex.net/api/v2/playlist/125',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://staging.holodex.net/watch/rJJTJA_T_b0?playlist=25',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://staging.holodex.net/watch/s1ifBeukThg',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id, playlist_id, pl_id2 = self._match_valid_url(url).group('id', 'playlist', 'playlist2')
+ playlist_id = playlist_id or pl_id2
+
+ if not self._yes_playlist(playlist_id, video_id):
+ return self.url_result(f'https://www.youtube.com/watch?v={video_id}', YoutubeIE)
+
+ data = self._download_json(f'https://holodex.net/api/v2/playlist/{playlist_id}', playlist_id)
+ return self.playlist_from_matches(
+ traverse_obj(data, ('videos', ..., 'id')), playlist_id, data.get('name'), ie=YoutubeIE)
diff --git a/hypervideo_dl/extractor/hornbunny.py b/hypervideo_dl/extractor/hornbunny.py
deleted file mode 100644
index c458a95..0000000
--- a/hypervideo_dl/extractor/hornbunny.py
+++ /dev/null
@@ -1,49 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import (
- int_or_none,
- parse_duration,
-)
-
-
-class HornBunnyIE(InfoExtractor):
- _VALID_URL = r'http?://(?:www\.)?hornbunny\.com/videos/(?P<title_dash>[a-z-]+)-(?P<id>\d+)\.html'
- _TEST = {
- 'url': 'http://hornbunny.com/videos/panty-slut-jerk-off-instruction-5227.html',
- 'md5': 'e20fd862d1894b67564c96f180f43924',
- 'info_dict': {
- 'id': '5227',
- 'ext': 'mp4',
- 'title': 'panty slut jerk off instruction',
- 'duration': 550,
- 'age_limit': 18,
- 'view_count': int,
- 'thumbnail': r're:^https?://.*\.jpg$',
- }
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
- title = self._og_search_title(webpage)
- info_dict = self._parse_html5_media_entries(url, webpage, video_id)[0]
-
- duration = parse_duration(self._search_regex(
- r'<strong>Runtime:</strong>\s*([0-9:]+)</div>',
- webpage, 'duration', fatal=False))
- view_count = int_or_none(self._search_regex(
- r'<strong>Views:</strong>\s*(\d+)</div>',
- webpage, 'view count', fatal=False))
-
- info_dict.update({
- 'id': video_id,
- 'title': title,
- 'duration': duration,
- 'view_count': view_count,
- 'age_limit': 18,
- })
-
- return info_dict
diff --git a/hypervideo_dl/extractor/hotnewhiphop.py b/hypervideo_dl/extractor/hotnewhiphop.py
index 4703e18..f8570cb 100644
--- a/hypervideo_dl/extractor/hotnewhiphop.py
+++ b/hypervideo_dl/extractor/hotnewhiphop.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import compat_b64decode
from ..utils import (
diff --git a/hypervideo_dl/extractor/hotstar.py b/hypervideo_dl/extractor/hotstar.py
index d55a79b..61eec7b 100644
--- a/hypervideo_dl/extractor/hotstar.py
+++ b/hypervideo_dl/extractor/hotstar.py
@@ -1,31 +1,33 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import hashlib
import hmac
+import json
import re
import time
import uuid
-import json
from .common import InfoExtractor
-from ..compat import (
- compat_HTTPError,
- compat_str
-)
+from ..compat import compat_HTTPError, compat_str
from ..utils import (
- determine_ext,
ExtractorError,
+ determine_ext,
int_or_none,
+ join_nonempty,
str_or_none,
- try_get,
+ traverse_obj,
url_or_none,
)
class HotStarBaseIE(InfoExtractor):
+ _BASE_URL = 'https://www.hotstar.com'
+ _API_URL = 'https://api.hotstar.com'
_AKAMAI_ENCRYPTION_KEY = b'\x05\xfc\x1a\x01\xca\xc9\x4b\xc4\x12\xfc\x53\x12\x07\x75\xf9\xee'
+ def _call_api_v1(self, path, *args, **kwargs):
+ return self._download_json(
+ f'{self._API_URL}/o/v1/{path}', *args, **kwargs,
+ headers={'x-country-code': 'IN', 'x-platform-code': 'PCTV'})
+
def _call_api_impl(self, path, video_id, query, st=None, cookies=None):
st = int_or_none(st) or int(time.time())
exp = st + 6000
@@ -36,7 +38,7 @@ class HotStarBaseIE(InfoExtractor):
token = cookies.get('userUP').value
else:
token = self._download_json(
- 'https://api.hotstar.com/um/v3/users',
+ f'{self._API_URL}/um/v3/users',
video_id, note='Downloading token',
data=json.dumps({"device_ids": [{"id": compat_str(uuid.uuid4()), "type": "device_id"}]}).encode('utf-8'),
headers={
@@ -46,58 +48,48 @@ class HotStarBaseIE(InfoExtractor):
})['user_identity']
response = self._download_json(
- 'https://api.hotstar.com/' + path, video_id, headers={
+ f'{self._API_URL}/{path}', video_id, query=query,
+ headers={
'hotstarauth': auth,
'x-hs-appversion': '6.72.2',
'x-hs-platform': 'web',
'x-hs-usertoken': token,
- }, query=query)
+ })
if response['message'] != "Playback URL's fetched successfully":
raise ExtractorError(
response['message'], expected=True)
return response['data']
- def _call_api(self, path, video_id, query_name='contentId'):
- return self._download_json('https://api.hotstar.com/' + path, video_id=video_id, query={
- query_name: video_id,
- 'tas': 10000,
- }, headers={
- 'x-country-code': 'IN',
- 'x-platform-code': 'PCTV',
- })
-
def _call_api_v2(self, path, video_id, st=None, cookies=None):
return self._call_api_impl(
- '%s/content/%s' % (path, video_id), video_id, st=st, cookies=cookies, query={
+ f'{path}/content/{video_id}', video_id, st=st, cookies=cookies, query={
'desired-config': 'audio_channel:stereo|container:fmp4|dynamic_range:hdr|encryption:plain|ladder:tv|package:dash|resolution:fhd|subs-tag:HotstarVIP|video_codec:h265',
'device-id': cookies.get('device_id').value if cookies.get('device_id') else compat_str(uuid.uuid4()),
'os-name': 'Windows',
'os-version': '10',
})
+ def _playlist_entries(self, path, item_id, root=None, **kwargs):
+ results = self._call_api_v1(path, item_id, **kwargs)['body']['results']
+ for video in traverse_obj(results, (('assets', None), 'items', ...)):
+ if video.get('contentId'):
+ yield self.url_result(
+ HotStarIE._video_url(video['contentId'], root=root), HotStarIE, video['contentId'])
+
class HotStarIE(HotStarBaseIE):
IE_NAME = 'hotstar'
_VALID_URL = r'''(?x)
- (?:
- hotstar\:|
- https?://(?:www\.)?hotstar\.com(?:/in)?/(?!in/)
- )
- (?:
- (?P<type>movies|sports|episode|(?P<tv>tv))
- (?:
- \:|
- /[^/?#]+/
- (?(tv)
- (?:[^/?#]+/){2}|
- (?:[^/?#]+/)*
- )
- )|
- [^/?#]+/
- )?
- (?P<id>\d{10})
- '''
+ https?://(?:www\.)?hotstar\.com(?:/in)?/(?!in/)
+ (?:
+ (?P<type>movies|sports|episode|(?P<tv>tv))/
+ (?(tv)(?:[^/?#]+/){2}|[^?#]*)
+ )?
+ [^/?#]+/
+ (?P<id>\d{10})
+ '''
+
_TESTS = [{
'url': 'https://www.hotstar.com/can-you-not-spread-rumours/1000076273',
'info_dict': {
@@ -108,38 +100,9 @@ class HotStarIE(HotStarBaseIE):
'timestamp': 1447248600,
'upload_date': '20151111',
'duration': 381,
+ 'episode': 'Can You Not Spread Rumours?',
},
- }, {
- 'url': 'hotstar:1000076273',
- 'only_matching': True,
- }, {
- 'url': 'https://www.hotstar.com/movies/radha-gopalam/1000057157',
- 'info_dict': {
- 'id': '1000057157',
- 'ext': 'mp4',
- 'title': 'Radha Gopalam',
- 'description': 'md5:be3bc342cc120bbc95b3b0960e2b0d22',
- 'timestamp': 1140805800,
- 'upload_date': '20060224',
- 'duration': 9182,
- },
- }, {
- 'url': 'hotstar:movies:1000057157',
- 'only_matching': True,
- }, {
- 'url': 'https://www.hotstar.com/in/sports/cricket/follow-the-blues-2021/recap-eng-fight-back-on-day-2/1260066104',
- 'only_matching': True,
- }, {
- 'url': 'https://www.hotstar.com/in/sports/football/most-costly-pl-transfers-ft-grealish/1260065956',
- 'only_matching': True,
- }, {
- # contentData
- 'url': 'hotstar:sports:1260065956',
- 'only_matching': True,
- }, {
- # contentData
- 'url': 'hotstar:sports:1260066104',
- 'only_matching': True,
+ 'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://www.hotstar.com/tv/ek-bhram-sarvagun-sampanna/s-2116/janhvi-targets-suman/1000234847',
'info_dict': {
@@ -158,12 +121,19 @@ class HotStarIE(HotStarBaseIE):
'season_id': 6771,
'episode': 'Janhvi Targets Suman',
'episode_number': 8,
- },
+ }
}, {
- 'url': 'hotstar:episode:1000234847',
+ 'url': 'https://www.hotstar.com/movies/radha-gopalam/1000057157',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.hotstar.com/in/sports/cricket/follow-the-blues-2021/recap-eng-fight-back-on-day-2/1260066104',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.hotstar.com/in/sports/football/most-costly-pl-transfers-ft-grealish/1260065956',
'only_matching': True,
}]
_GEO_BYPASS = False
+
_TYPE = {
'movies': 'movie',
'sports': 'match',
@@ -172,41 +142,54 @@ class HotStarIE(HotStarBaseIE):
None: 'content',
}
+ _IGNORE_MAP = {
+ 'res': 'resolution',
+ 'vcodec': 'video_codec',
+ 'dr': 'dynamic_range',
+ }
+
+ @classmethod
+ def _video_url(cls, video_id, video_type=None, *, slug='ignore_me', root=None):
+ assert None in (video_type, root)
+ if not root:
+ root = join_nonempty(cls._BASE_URL, video_type, delim='/')
+ return f'{root}/{slug}/{video_id}'
+
def _real_extract(self, url):
- mobj = self._match_valid_url(url)
- video_id = mobj.group('id')
- video_type = mobj.group('type')
- cookies = self._get_cookies(url)
+ video_id, video_type = self._match_valid_url(url).group('id', 'type')
video_type = self._TYPE.get(video_type, video_type)
- video_data = self._call_api(f'o/v1/{video_type}/detail', video_id)['body']['results']['item']
- title = video_data['title']
+ cookies = self._get_cookies(url) # Cookies before any request
+ video_data = self._call_api_v1(f'{video_type}/detail', video_id,
+ query={'tas': 10000, 'contentId': video_id})['body']['results']['item']
if not self.get_param('allow_unplayable_formats') and video_data.get('drmProtected'):
self.report_drm(video_id)
- headers = {'Referer': 'https://www.hotstar.com/in'}
- formats = []
- subs = {}
+ # See https://github.com/hypervideo/hypervideo/issues/396
+ st = self._download_webpage_handle(f'{self._BASE_URL}/in', video_id)[1].headers.get('x-origin-date')
+
geo_restricted = False
- _, urlh = self._download_webpage_handle('https://www.hotstar.com/in', video_id)
- # Required to fix https://github.com/hypervideo/hypervideo/issues/396
- st = urlh.headers.get('x-origin-date')
+ formats, subs = [], {}
+ headers = {'Referer': f'{self._BASE_URL}/in'}
+
# change to v2 in the future
playback_sets = self._call_api_v2('play/v1/playback', video_id, st=st, cookies=cookies)['playBackSets']
for playback_set in playback_sets:
if not isinstance(playback_set, dict):
continue
- dr = re.search(r'dynamic_range:(?P<dr>[a-z]+)', playback_set.get('tagsCombination')).group('dr')
+ tags = str_or_none(playback_set.get('tagsCombination')) or ''
+ if any(f'{prefix}:{ignore}' in tags
+ for key, prefix in self._IGNORE_MAP.items()
+ for ignore in self._configuration_arg(key)):
+ continue
+
format_url = url_or_none(playback_set.get('playbackUrl'))
if not format_url:
continue
- format_url = re.sub(
- r'(?<=//staragvod)(\d)', r'web\1', format_url)
- tags = str_or_none(playback_set.get('tagsCombination')) or ''
- ingored_res, ignored_vcodec, ignored_dr = self._configuration_arg('res'), self._configuration_arg('vcodec'), self._configuration_arg('dr')
- if any(f'resolution:{ig_res}' in tags for ig_res in ingored_res) or any(f'video_codec:{ig_vc}' in tags for ig_vc in ignored_vcodec) or any(f'dynamic_range:{ig_dr}' in tags for ig_dr in ignored_dr):
- continue
+ format_url = re.sub(r'(?<=//staragvod)(\d)', r'web\1', format_url)
+ dr = re.search(r'dynamic_range:(?P<dr>[a-z]+)', playback_set.get('tagsCombination')).group('dr')
ext = determine_ext(format_url)
+
current_formats, current_subs = [], {}
try:
if 'package:hls' in tags or ext == 'm3u8':
@@ -218,8 +201,7 @@ class HotStarIE(HotStarBaseIE):
current_formats, current_subs = self._extract_mpd_formats_and_subtitles(
format_url, video_id, mpd_id=f'{dr}-dash', headers=headers)
elif ext == 'f4m':
- # produce broken files
- pass
+ pass # XXX: produce broken files
else:
current_formats = [{
'url': format_url,
@@ -230,6 +212,7 @@ class HotStarIE(HotStarBaseIE):
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
geo_restricted = True
continue
+
if tags and 'encryption:plain' not in tags:
for f in current_formats:
f['has_drm'] = True
@@ -238,18 +221,18 @@ class HotStarIE(HotStarBaseIE):
for f in current_formats:
if not f.get('langauge'):
f['language'] = lang
+
formats.extend(current_formats)
subs = self._merge_subtitles(subs, current_subs)
+
if not formats and geo_restricted:
self.raise_geo_restricted(countries=['IN'], metadata_available=True)
- self._sort_formats(formats)
-
for f in formats:
f.setdefault('http_headers', {}).update(headers)
return {
'id': video_id,
- 'title': title,
+ 'title': video_data.get('title'),
'description': video_data.get('description'),
'duration': int_or_none(video_data.get('duration')),
'timestamp': int_or_none(video_data.get('broadcastDate') or video_data.get('startDate')),
@@ -261,17 +244,51 @@ class HotStarIE(HotStarBaseIE):
'season': video_data.get('seasonName'),
'season_number': int_or_none(video_data.get('seasonNo')),
'season_id': video_data.get('seasonId'),
- 'episode': title,
+ 'episode': video_data.get('title'),
'episode_number': int_or_none(video_data.get('episodeNo')),
- 'http_headers': {
- 'Referer': 'https://www.hotstar.com/in',
- }
}
+class HotStarPrefixIE(InfoExtractor):
+ """ The "hotstar:" prefix is no longer in use, but this is kept for backward compatibility """
+ IE_DESC = False
+ _VALID_URL = r'hotstar:(?:(?P<type>\w+):)?(?P<id>\d+)$'
+ _TESTS = [{
+ 'url': 'hotstar:1000076273',
+ 'only_matching': True,
+ }, {
+ 'url': 'hotstar:movies:1260009879',
+ 'info_dict': {
+ 'id': '1260009879',
+ 'ext': 'mp4',
+ 'title': 'Nuvvu Naaku Nachav',
+ 'description': 'md5:d43701b1314e6f8233ce33523c043b7d',
+ 'timestamp': 1567525674,
+ 'upload_date': '20190903',
+ 'duration': 10787,
+ 'episode': 'Nuvvu Naaku Nachav',
+ },
+ }, {
+ 'url': 'hotstar:episode:1000234847',
+ 'only_matching': True,
+ }, {
+ # contentData
+ 'url': 'hotstar:sports:1260065956',
+ 'only_matching': True,
+ }, {
+ # contentData
+ 'url': 'hotstar:sports:1260066104',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id, video_type = self._match_valid_url(url).group('id', 'type')
+ return self.url_result(HotStarIE._video_url(video_id, video_type), HotStarIE, video_id)
+
+
class HotStarPlaylistIE(HotStarBaseIE):
IE_NAME = 'hotstar:playlist'
- _VALID_URL = r'https?://(?:www\.)?hotstar\.com/tv/[^/]+/s-\w+/list/[^/]+/t-(?P<id>\w+)'
+ _VALID_URL = r'https?://(?:www\.)?hotstar\.com(?:/in)?/tv(?:/[^/]+){2}/list/[^/]+/t-(?P<id>\w+)'
_TESTS = [{
'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/popular-clips/t-3_2_26',
'info_dict': {
@@ -281,25 +298,49 @@ class HotStarPlaylistIE(HotStarBaseIE):
}, {
'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/extras/t-2480',
'only_matching': True,
+ }, {
+ 'url': 'https://www.hotstar.com/in/tv/karthika-deepam/15457/list/popular-clips/t-3_2_1272',
+ 'only_matching': True,
}]
def _real_extract(self, url):
- playlist_id = self._match_id(url)
+ id_ = self._match_id(url)
+ return self.playlist_result(
+ self._playlist_entries('tray/find', id_, query={'tas': 10000, 'uqId': id_}), id_)
- collection = self._call_api('o/v1/tray/find', playlist_id, 'uqId')['body']['results']
- entries = [
- self.url_result(
- 'https://www.hotstar.com/%s' % video['contentId'],
- ie=HotStarIE.ie_key(), video_id=video['contentId'])
- for video in collection['assets']['items']
- if video.get('contentId')]
- return self.playlist_result(entries, playlist_id)
+class HotStarSeasonIE(HotStarBaseIE):
+ IE_NAME = 'hotstar:season'
+ _VALID_URL = r'(?P<url>https?://(?:www\.)?hotstar\.com(?:/in)?/tv/[^/]+/\w+)/seasons/[^/]+/ss-(?P<id>\w+)'
+ _TESTS = [{
+ 'url': 'https://www.hotstar.com/tv/radhakrishn/1260000646/seasons/season-2/ss-8028',
+ 'info_dict': {
+ 'id': '8028',
+ },
+ 'playlist_mincount': 35,
+ }, {
+ 'url': 'https://www.hotstar.com/in/tv/ishqbaaz/9567/seasons/season-2/ss-4357',
+ 'info_dict': {
+ 'id': '4357',
+ },
+ 'playlist_mincount': 30,
+ }, {
+ 'url': 'https://www.hotstar.com/in/tv/bigg-boss/14714/seasons/season-4/ss-8208/',
+ 'info_dict': {
+ 'id': '8208',
+ },
+ 'playlist_mincount': 19,
+ }]
+
+ def _real_extract(self, url):
+ url, season_id = self._match_valid_url(url).groups()
+ return self.playlist_result(self._playlist_entries(
+ 'season/asset', season_id, url, query={'tao': 0, 'tas': 0, 'size': 10000, 'id': season_id}), season_id)
class HotStarSeriesIE(HotStarBaseIE):
IE_NAME = 'hotstar:series'
- _VALID_URL = r'(?P<url>https?://(?:www\.)?hotstar\.com(?:/in)?/tv/[^/]+/(?P<id>\d+))'
+ _VALID_URL = r'(?P<url>https?://(?:www\.)?hotstar\.com(?:/in)?/tv/[^/]+/(?P<id>\d+))/?(?:[#?]|$)'
_TESTS = [{
'url': 'https://www.hotstar.com/in/tv/radhakrishn/1260000646',
'info_dict': {
@@ -317,25 +358,13 @@ class HotStarSeriesIE(HotStarBaseIE):
'info_dict': {
'id': '435',
},
- 'playlist_mincount': 269,
+ 'playlist_mincount': 267,
}]
def _real_extract(self, url):
url, series_id = self._match_valid_url(url).groups()
- headers = {
- 'x-country-code': 'IN',
- 'x-platform-code': 'PCTV',
- }
- detail_json = self._download_json('https://api.hotstar.com/o/v1/show/detail?contentId=' + series_id,
- video_id=series_id, headers=headers)
- id = compat_str(try_get(detail_json, lambda x: x['body']['results']['item']['id'], int))
- item_json = self._download_json('https://api.hotstar.com/o/v1/tray/g/1/items?etid=0&tao=0&tas=10000&eid=' + id,
- video_id=series_id, headers=headers)
- entries = [
- self.url_result(
- '%s/ignoreme/%d' % (url, video['contentId']),
- ie=HotStarIE.ie_key(), video_id=video['contentId'])
- for video in item_json['body']['results']['items']
- if video.get('contentId')]
-
- return self.playlist_result(entries, series_id)
+ id_ = self._call_api_v1(
+ 'show/detail', series_id, query={'contentId': series_id})['body']['results']['item']['id']
+
+ return self.playlist_result(self._playlist_entries(
+ 'tray/g/1/items', series_id, url, query={'tao': 0, 'tas': 10000, 'etid': 0, 'eid': id_}), series_id)
diff --git a/hypervideo_dl/extractor/howcast.py b/hypervideo_dl/extractor/howcast.py
index 7e36b85..59cf80f 100644
--- a/hypervideo_dl/extractor/howcast.py
+++ b/hypervideo_dl/extractor/howcast.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import parse_iso8601
diff --git a/hypervideo_dl/extractor/howstuffworks.py b/hypervideo_dl/extractor/howstuffworks.py
index cf90ab3..238fc0b 100644
--- a/hypervideo_dl/extractor/howstuffworks.py
+++ b/hypervideo_dl/extractor/howstuffworks.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
find_xpath_attr,
@@ -77,8 +75,6 @@ class HowStuffWorksIE(InfoExtractor):
'vbr': vbr,
})
- self._sort_formats(formats)
-
return {
'id': '%s' % video_id,
'display_id': display_id,
diff --git a/hypervideo_dl/extractor/hrfensehen.py b/hypervideo_dl/extractor/hrfensehen.py
index e39ded2..35e9f67 100644
--- a/hypervideo_dl/extractor/hrfensehen.py
+++ b/hypervideo_dl/extractor/hrfensehen.py
@@ -1,17 +1,19 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import json
import re
-from ..utils import int_or_none, unified_timestamp, unescapeHTML
from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ traverse_obj,
+ try_call,
+ unescapeHTML,
+ unified_timestamp,
+)
class HRFernsehenIE(InfoExtractor):
IE_NAME = 'hrfernsehen'
_VALID_URL = r'^https?://www\.(?:hr-fernsehen|hessenschau)\.de/.*,video-(?P<id>[0-9]{6})\.html'
-
_TESTS = [{
'url': 'https://www.hessenschau.de/tv-sendung/hessenschau-vom-26082020,video-130546.html',
'md5': '5c4e0ba94677c516a2f65a84110fc536',
@@ -24,10 +26,11 @@ class HRFernsehenIE(InfoExtractor):
'subtitles': {'de': [{
'url': 'https://hr-a.akamaihd.net/video/as/hessenschau/2020_08/hrLogo_200826200407_L385592_512x288-25p-500kbit.vtt'
}]},
- 'timestamp': 1598470200,
+ 'timestamp': 1598400000,
'upload_date': '20200826',
- 'thumbnail': 'https://www.hessenschau.de/tv-sendung/hs_ganz-1554~_t-1598465545029_v-16to9__medium.jpg',
- 'title': 'hessenschau vom 26.08.2020'
+ 'thumbnail': 'https://www.hessenschau.de/tv-sendung/hs_ganz-1554~_t-1598465545029_v-16to9.jpg',
+ 'title': 'hessenschau vom 26.08.2020',
+ 'duration': 1654
}
}, {
'url': 'https://www.hr-fernsehen.de/sendungen-a-z/mex/sendungen/fair-und-gut---was-hinter-aldis-eigenem-guetesiegel-steckt,video-130544.html',
@@ -36,25 +39,18 @@ class HRFernsehenIE(InfoExtractor):
_GEO_COUNTRIES = ['DE']
- def extract_airdate(self, loader_data):
- airdate_str = loader_data.get('mediaMetadata', {}).get('agf', {}).get('airdate')
-
- if airdate_str is None:
- return None
-
- return unified_timestamp(airdate_str)
-
def extract_formats(self, loader_data):
stream_formats = []
- for stream_obj in loader_data["videoResolutionLevels"]:
+ data = loader_data['mediaCollection']['streams'][0]['media']
+ for inner in data[1:]:
stream_format = {
- 'format_id': str(stream_obj['verticalResolution']) + "p",
- 'height': stream_obj['verticalResolution'],
- 'url': stream_obj['url'],
+ 'format_id': try_call(lambda: f'{inner["maxHResolutionPx"]}p'),
+ 'height': inner.get('maxHResolutionPx'),
+ 'url': inner['url'],
}
quality_information = re.search(r'([0-9]{3,4})x([0-9]{3,4})-([0-9]{2})p-([0-9]{3,4})kbit',
- stream_obj['url'])
+ inner['url'])
if quality_information:
stream_format['width'] = int_or_none(quality_information.group(1))
stream_format['height'] = int_or_none(quality_information.group(2))
@@ -62,8 +58,6 @@ class HRFernsehenIE(InfoExtractor):
stream_format['tbr'] = int_or_none(quality_information.group(4))
stream_formats.append(stream_format)
-
- self._sort_formats(stream_formats)
return stream_formats
def _real_extract(self, url):
@@ -75,22 +69,22 @@ class HRFernsehenIE(InfoExtractor):
description = self._html_search_meta(
['description'], webpage)
- loader_str = unescapeHTML(self._search_regex(r"data-new-hr-mediaplayer-loader='([^']*)'", webpage, "ardloader"))
+ loader_str = unescapeHTML(self._search_regex(r"data-(?:new-)?hr-mediaplayer-loader='([^']*)'", webpage, 'ardloader'))
loader_data = json.loads(loader_str)
+ subtitle = traverse_obj(loader_data, ('mediaCollection', 'subTitles', 0, 'sources', 0, 'url'))
+
info = {
'id': video_id,
'title': title,
'description': description,
'formats': self.extract_formats(loader_data),
- 'timestamp': self.extract_airdate(loader_data)
+ 'subtitles': {'de': [{'url': subtitle}]},
+ 'timestamp': unified_timestamp(self._search_regex(
+ r'<time\sdatetime="(\d{4}\W\d{1,2}\W\d{1,2})', webpage, 'datetime', fatal=False)),
+ 'duration': int_or_none(traverse_obj(
+ loader_data, ('playerConfig', 'pluginData', 'trackingAti@all', 'richMedia', 'duration'))),
+ 'thumbnail': self._search_regex(r'thumbnailUrl\W*([^"]+)', webpage, 'thumbnail', default=None),
}
- if "subtitle" in loader_data:
- info["subtitles"] = {"de": [{"url": loader_data["subtitle"]}]}
-
- thumbnails = list(set([t for t in loader_data.get("previewImageUrl", {}).values()]))
- if len(thumbnails) > 0:
- info["thumbnails"] = [{"url": t} for t in thumbnails]
-
return info
diff --git a/hypervideo_dl/extractor/hrti.py b/hypervideo_dl/extractor/hrti.py
index 36d6007..cfec80d 100644
--- a/hypervideo_dl/extractor/hrti.py
+++ b/hypervideo_dl/extractor/hrti.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import json
from .common import InfoExtractor
@@ -147,7 +144,6 @@ class HRTiIE(HRTiBaseIE):
formats = self._extract_m3u8_formats(
m3u8_url, display_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls')
- self._sort_formats(formats)
description = clean_html(title_info.get('summary_long'))
age_limit = parse_age_limit(video.get('parental_control', {}).get('rating'))
diff --git a/hypervideo_dl/extractor/hse.py b/hypervideo_dl/extractor/hse.py
index 9144ff8..3cb21d2 100644
--- a/hypervideo_dl/extractor/hse.py
+++ b/hypervideo_dl/extractor/hse.py
@@ -1,4 +1,3 @@
-# coding: utf-8
from .common import InfoExtractor
from ..utils import (
ExtractorError,
@@ -26,7 +25,6 @@ class HSEShowBaseInfoExtractor(InfoExtractor):
fmts, subs = self._extract_m3u8_formats_and_subtitles(src['url'], video_id, ext='mp4')
formats.extend(fmts)
subtitles = self._merge_subtitles(subtitles, subs)
- self._sort_formats(formats)
return formats, subtitles
diff --git a/hypervideo_dl/extractor/huajiao.py b/hypervideo_dl/extractor/huajiao.py
index 4ca275d..c498fa3 100644
--- a/hypervideo_dl/extractor/huajiao.py
+++ b/hypervideo_dl/extractor/huajiao.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
parse_duration,
diff --git a/hypervideo_dl/extractor/huffpost.py b/hypervideo_dl/extractor/huffpost.py
index 54385ba..69fdc34 100644
--- a/hypervideo_dl/extractor/huffpost.py
+++ b/hypervideo_dl/extractor/huffpost.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -19,6 +17,7 @@ class HuffPostIE(InfoExtractor):
HPLEmbedPlayer/\?segmentId=
)
(?P<id>[0-9a-f]+)'''
+ _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1']
_TEST = {
'url': 'http://live.huffingtonpost.com/r/segment/legalese-it/52dd3e4b02a7602131000677',
@@ -80,8 +79,6 @@ class HuffPostIE(InfoExtractor):
'vcodec': 'none' if key.startswith('audio/') else None,
})
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': video_title,
diff --git a/hypervideo_dl/extractor/hungama.py b/hypervideo_dl/extractor/hungama.py
index 821b16e..2e99396 100644
--- a/hypervideo_dl/extractor/hungama.py
+++ b/hypervideo_dl/extractor/hungama.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -23,15 +20,17 @@ class HungamaIE(InfoExtractor):
'''
_TESTS = [{
'url': 'http://www.hungama.com/video/krishna-chants/39349649/',
- 'md5': 'a845a6d1ebd08d80c1035126d49bd6a0',
+ 'md5': '687c5f1e9f832f3b59f44ed0eb1f120a',
'info_dict': {
- 'id': '2931166',
+ 'id': '39349649',
'ext': 'mp4',
- 'title': 'Lucky Ali - Kitni Haseen Zindagi',
- 'track': 'Kitni Haseen Zindagi',
- 'artist': 'Lucky Ali',
- 'album': 'Aks',
- 'release_year': 2000,
+ 'title': 'Krishna Chants',
+ 'description': 'Watch Krishna Chants video now. You can also watch other latest videos only at Hungama',
+ 'upload_date': '20180829',
+ 'duration': 264,
+ 'timestamp': 1535500800,
+ 'view_count': int,
+ 'thumbnail': 'https://images.hungama.com/c/1/0dc/2ca/39349649/39349649_700x394.jpg',
}
}, {
'url': 'https://www.hungama.com/movie/kahaani-2/44129919/',
@@ -43,12 +42,7 @@ class HungamaIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- info = self._search_json_ld(webpage, video_id)
-
- m3u8_url = self._download_json(
+ video_json = self._download_json(
'https://www.hungama.com/index.php', video_id,
data=urlencode_postdata({'content_id': video_id}), headers={
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
@@ -56,18 +50,24 @@ class HungamaIE(InfoExtractor):
}, query={
'c': 'common',
'm': 'get_video_mdn_url',
- })['stream_url']
+ })
- formats = self._extract_m3u8_formats(
- m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native',
- m3u8_id='hls')
- self._sort_formats(formats)
+ formats = self._extract_m3u8_formats(video_json['stream_url'], video_id, ext='mp4', m3u8_id='hls')
- info.update({
+ json_ld = self._search_json_ld(
+ self._download_webpage(url, video_id, fatal=False) or '', video_id, fatal=False)
+
+ return {
+ **json_ld,
'id': video_id,
'formats': formats,
- })
- return info
+ 'subtitles': {
+ 'en': [{
+ 'url': video_json['sub_title'],
+ 'ext': 'vtt',
+ }]
+ } if video_json.get('sub_title') else None,
+ }
class HungamaSongIE(InfoExtractor):
diff --git a/hypervideo_dl/extractor/huya.py b/hypervideo_dl/extractor/huya.py
index 4e96f22..b6e9eec 100644
--- a/hypervideo_dl/extractor/huya.py
+++ b/hypervideo_dl/extractor/huya.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import hashlib
import random
@@ -9,7 +6,6 @@ from ..compat import compat_urlparse, compat_b64decode
from ..utils import (
ExtractorError,
int_or_none,
- js_to_json,
str_or_none,
try_get,
unescapeHTML,
@@ -58,11 +54,7 @@ class HuyaLiveIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id=video_id)
- json_stream = self._search_regex(r'"stream":\s+"([a-zA-Z0-9+=/]+)"', webpage, 'stream', default=None)
- if not json_stream:
- raise ExtractorError('Video is offline', expected=True)
- stream_data = self._parse_json(compat_b64decode(json_stream).decode(), video_id=video_id,
- transform_source=js_to_json)
+ stream_data = self._search_json(r'stream:\s', webpage, 'stream', video_id=video_id, default=None)
room_info = try_get(stream_data, lambda x: x['data'][0]['gameLiveInfo'])
if not room_info:
raise ExtractorError('Can not extract the room info', expected=True)
@@ -70,6 +62,8 @@ class HuyaLiveIE(InfoExtractor):
screen_type = room_info.get('screenType')
live_source_type = room_info.get('liveSourceType')
stream_info_list = stream_data['data'][0]['gameStreamInfoList']
+ if not stream_info_list:
+ raise ExtractorError('Video is offline', expected=True)
formats = []
for stream_info in stream_info_list:
stream_url = stream_info.get('sFlvUrl')
@@ -99,8 +93,6 @@ class HuyaLiveIE(InfoExtractor):
**self._RESOLUTION.get(si.get('sDisplayName'), {}),
})
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': title,
diff --git a/hypervideo_dl/extractor/hypem.py b/hypervideo_dl/extractor/hypem.py
index 9ca28d6..54db7b3 100644
--- a/hypervideo_dl/extractor/hypem.py
+++ b/hypervideo_dl/extractor/hypem.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import int_or_none
diff --git a/hypervideo_dl/extractor/hytale.py b/hypervideo_dl/extractor/hytale.py
new file mode 100644
index 0000000..0f4dcc3
--- /dev/null
+++ b/hypervideo_dl/extractor/hytale.py
@@ -0,0 +1,58 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import traverse_obj
+
+
+class HytaleIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?hytale\.com/news/\d+/\d+/(?P<id>[a-z0-9-]+)'
+ _TESTS = [{
+ 'url': 'https://hytale.com/news/2021/07/summer-2021-development-update',
+ 'info_dict': {
+ 'id': 'summer-2021-development-update',
+ 'title': 'Summer 2021 Development Update',
+ },
+ 'playlist_count': 4,
+ 'playlist': [{
+ 'md5': '0854ebe347d233ee19b86ab7b2ead610',
+ 'info_dict': {
+ 'id': 'ed51a2609d21bad6e14145c37c334999',
+ 'ext': 'mp4',
+ 'title': 'Avatar Personalization',
+ 'thumbnail': r're:https://videodelivery\.net/\w+/thumbnails/thumbnail\.jpg',
+ }
+ }]
+ }, {
+ 'url': 'https://www.hytale.com/news/2019/11/hytale-graphics-update',
+ 'info_dict': {
+ 'id': 'hytale-graphics-update',
+ 'title': 'Hytale graphics update',
+ },
+ 'playlist_count': 2,
+ }]
+
+ def _real_initialize(self):
+ media_webpage = self._download_webpage(
+ 'https://hytale.com/media', None, note='Downloading list of media', fatal=False) or ''
+
+ clips_json = traverse_obj(
+ self._search_json(
+ r'window\.__INITIAL_COMPONENTS_STATE__\s*=\s*\[',
+ media_webpage, 'clips json', None),
+ ('media', 'clips')) or []
+
+ self._titles = {clip.get('src'): clip.get('caption') for clip in clips_json}
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ webpage = self._download_webpage(url, playlist_id)
+ entries = [
+ self.url_result(
+ f'https://cloudflarestream.com/{video_hash}/manifest/video.mpd?parentOrigin=https%3A%2F%2Fhytale.com',
+ title=self._titles.get(video_hash), url_transparent=True)
+ for video_hash in re.findall(
+ r'<stream\s+class\s*=\s*"ql-video\s+cf-stream"\s+src\s*=\s*"([a-f0-9]{32})"',
+ webpage)
+ ]
+
+ return self.playlist_result(entries, playlist_id, self._og_search_title(webpage))
diff --git a/hypervideo_dl/extractor/icareus.py b/hypervideo_dl/extractor/icareus.py
new file mode 100644
index 0000000..d081cf4
--- /dev/null
+++ b/hypervideo_dl/extractor/icareus.py
@@ -0,0 +1,179 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ determine_ext,
+ get_element_by_class,
+ int_or_none,
+ merge_dicts,
+ parse_bitrate,
+ parse_resolution,
+ remove_end,
+ str_or_none,
+ url_or_none,
+ urlencode_postdata,
+)
+
+
+class IcareusIE(InfoExtractor):
+ _DOMAINS = '|'.join(map(re.escape, (
+ 'asahitv.fi',
+ 'helsinkikanava.fi',
+ 'hyvinvointitv.fi',
+ 'inez.fi',
+ 'permanto.fi',
+ 'suite.icareus.com',
+ 'videos.minifiddlers.org',
+ )))
+ _VALID_URL = rf'(?P<base_url>https?://(?:www\.)?(?:{_DOMAINS}))/[^?#]+/player/[^?#]+\?(?:[^#]+&)?(?:assetId|eventId)=(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.helsinkikanava.fi/fi_FI/web/helsinkikanava/player/vod?assetId=68021894',
+ 'md5': 'ca0b62ffc814a5411dfa6349cf5adb8a',
+ 'info_dict': {
+ 'id': '68021894',
+ 'ext': 'mp4',
+ 'title': 'Perheiden parhaaksi',
+ 'description': 'md5:295785ea408e5ac00708766465cc1325',
+ 'thumbnail': 'https://www.helsinkikanava.fi/image/image_gallery?img_id=68022501',
+ 'upload_date': '20200924',
+ 'timestamp': 1600938300,
+ },
+ }, { # Recorded livestream
+ 'url': 'https://www.helsinkikanava.fi/fi/web/helsinkikanava/player/event/view?eventId=76241489',
+ 'md5': '014327e69dfa7b949fcc861f6d162d6d',
+ 'info_dict': {
+ 'id': '76258304',
+ 'ext': 'mp4',
+ 'title': 'Helsingin kaupungin ja HUSin tiedotustilaisuus koronaepidemiatilanteesta 24.11.2020',
+ 'description': 'md5:3129d041c6fbbcdc7fe68d9a938fef1c',
+ 'thumbnail': 'https://icareus-suite.secure2.footprint.net/image/image_gallery?img_id=76288630',
+ 'upload_date': '20201124',
+ 'timestamp': 1606206600,
+ },
+ }, { # Non-m3u8 stream
+ 'url': 'https://suite.icareus.com/fi/web/westend-indians/player/vod?assetId=47567389',
+ 'md5': '72fc04ee971bbedc44405cdf16c990b6',
+ 'info_dict': {
+ 'id': '47567389',
+ 'ext': 'mp4',
+ 'title': 'Omatoiminen harjoittelu - Laukominen',
+ 'description': '',
+ 'thumbnail': 'https://suite.icareus.com/image/image_gallery?img_id=47568162',
+ 'upload_date': '20200319',
+ 'timestamp': 1584658080,
+ },
+ }, {
+ 'url': 'https://asahitv.fi/fi/web/asahi/player/vod?assetId=89415818',
+ 'only_matching': True
+ }, {
+ 'url': 'https://hyvinvointitv.fi/fi/web/hyvinvointitv/player/vod?assetId=89149730',
+ 'only_matching': True
+ }, {
+ 'url': 'https://inez.fi/fi/web/inez-media/player/vod?assetId=71328822',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.permanto.fi/fi/web/alfatv/player/vod?assetId=135497515',
+ 'only_matching': True
+ }, {
+ 'url': 'https://videos.minifiddlers.org/web/international-minifiddlers/player/vod?assetId=1982759',
+ 'only_matching': True
+ }]
+
+ def _real_extract(self, url):
+ base_url, temp_id = self._match_valid_url(url).groups()
+ webpage = self._download_webpage(url, temp_id)
+
+ video_id = self._search_regex(r"_icareus\['itemId'\]\s*=\s*'(\d+)'", webpage, 'video_id')
+ organization_id = self._search_regex(r"_icareus\['organizationId'\]\s*=\s*'(\d+)'", webpage, 'organization_id')
+
+ assets = self._download_json(
+ self._search_regex(r'var\s+publishingServiceURL\s*=\s*"(http[^"]+)";', webpage, 'api_base'),
+ video_id, data=urlencode_postdata({
+ 'version': '03',
+ 'action': 'getAssetPlaybackUrls',
+ 'organizationId': organization_id,
+ 'assetId': video_id,
+ 'token': self._search_regex(r"_icareus\['token'\]\s*=\s*'([a-f0-9]+)'", webpage, 'icareus_token'),
+ }))
+
+ subtitles = {
+ remove_end(sdesc.split(' ')[0], ':'): [{'url': url_or_none(surl)}]
+ for _, sdesc, surl in assets.get('subtitles') or []
+ }
+
+ formats = [{
+ 'format': item.get('name'),
+ 'format_id': 'audio',
+ 'vcodec': 'none',
+ 'url': url_or_none(item['url']),
+ 'tbr': int_or_none(self._search_regex(
+ r'\((\d+)\s*k\)', item.get('name') or '', 'audio bitrate', default=None)),
+ } for item in assets.get('audio_urls') or [] if url_or_none(item.get('url'))]
+
+ for item in assets.get('urls') or []:
+ video_url = url_or_none(item.get('url'))
+ if video_url is None:
+ continue
+ ext = determine_ext(video_url)
+ if ext == 'm3u8':
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ video_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+ else:
+ fmt = item.get('name')
+ formats.append({
+ 'url': video_url,
+ 'format': fmt,
+ 'tbr': parse_bitrate(fmt),
+ 'format_id': str_or_none(item.get('id')),
+ **parse_resolution(fmt),
+ })
+
+ info, token, live_title = self._search_json_ld(webpage, video_id, default={}), None, None
+ if not info:
+ token = self._search_regex(
+ r'data\s*:\s*{action:"getAsset".*?token:\'([a-f0-9]+)\'}', webpage, 'token', default=None)
+ if not token:
+ live_title = get_element_by_class('unpublished-info-item future-event-title', webpage)
+
+ if token:
+ metadata = self._download_json(
+ f'{base_url}/icareus-suite-api-portlet/publishing',
+ video_id, fatal=False, data=urlencode_postdata({
+ 'version': '03',
+ 'action': 'getAsset',
+ 'organizationId': organization_id,
+ 'assetId': video_id,
+ 'languageId': 'en_US',
+ 'userId': '0',
+ 'token': token,
+ })) or {}
+ info = {
+ 'title': metadata.get('name'),
+ 'description': metadata.get('description'),
+ 'timestamp': int_or_none(metadata.get('date'), scale=1000),
+ 'duration': int_or_none(metadata.get('duration')),
+ 'thumbnail': url_or_none(metadata.get('thumbnailMedium')),
+ }
+ elif live_title: # Recorded livestream
+ info = {
+ 'title': live_title,
+ 'description': get_element_by_class('unpublished-info-item future-event-description', webpage),
+ 'timestamp': int_or_none(self._search_regex(
+ r'var startEvent\s*=\s*(\d+);', webpage, 'uploadDate', fatal=False), scale=1000),
+ }
+
+ thumbnails = info.get('thumbnails') or [{
+ 'url': url_or_none(info.get('thumbnail') or assets.get('thumbnail'))
+ }]
+
+ return merge_dicts({
+ 'id': video_id,
+ 'title': None,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'description': clean_html(info.get('description')),
+ 'thumbnails': thumbnails if thumbnails[0]['url'] else None,
+ }, info)
diff --git a/hypervideo_dl/extractor/ichinanalive.py b/hypervideo_dl/extractor/ichinanalive.py
index cb39f82..9d55ddc 100644
--- a/hypervideo_dl/extractor/ichinanalive.py
+++ b/hypervideo_dl/extractor/ichinanalive.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import ExtractorError, str_or_none, traverse_obj, unified_strdate
from ..compat import compat_str
@@ -76,8 +73,6 @@ class IchinanaLiveIE(InfoExtractor):
'acodec': 'aac',
})
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': uploader or video_id,
@@ -150,8 +145,6 @@ class IchinanaLiveClipIE(InfoExtractor):
'http_headers': {'Referer': url},
})
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': uploader or video_id,
diff --git a/hypervideo_dl/extractor/ign.py b/hypervideo_dl/extractor/ign.py
index c826eb3..d4797d3 100644
--- a/hypervideo_dl/extractor/ign.py
+++ b/hypervideo_dl/extractor/ign.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -104,8 +102,6 @@ class IGNIE(IGNBaseIE):
'url': mezzanine_url,
})
- self._sort_formats(formats)
-
thumbnails = []
for thumbnail in (video.get('thumbnails') or []):
thumbnail_url = thumbnail.get('url')
diff --git a/hypervideo_dl/extractor/iheart.py b/hypervideo_dl/extractor/iheart.py
index b54c05e..2c6a5b6 100644
--- a/hypervideo_dl/extractor/iheart.py
+++ b/hypervideo_dl/extractor/iheart.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
clean_html,
diff --git a/hypervideo_dl/extractor/iltalehti.py b/hypervideo_dl/extractor/iltalehti.py
new file mode 100644
index 0000000..0e7e82c
--- /dev/null
+++ b/hypervideo_dl/extractor/iltalehti.py
@@ -0,0 +1,51 @@
+from .common import InfoExtractor
+from ..utils import js_to_json, traverse_obj
+
+
+class IltalehtiIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?iltalehti\.fi/[^/?#]+/a/(?P<id>[^/?#])'
+ _TESTS = [
+ # jwplatform embed main_media
+ {
+ 'url': 'https://www.iltalehti.fi/ulkomaat/a/9fbd067f-94e4-46cd-8748-9d958eb4dae2',
+ 'md5': 'af12d42c539f1f49f0b62d231fe72dcd',
+ 'info_dict': {
+ 'id': 'gYjjaf1L',
+ 'ext': 'mp4',
+ 'title': 'Sensuroimaton Päivärinta, jakso 227: Vieraana Suomen Venäjän ex-suurlähettiläs René Nyberg ja Kenraalimajuri evp Pekka Toveri',
+ 'description': '',
+ 'upload_date': '20220928',
+ 'timestamp': 1664360878,
+ 'duration': 2089,
+ 'thumbnail': r're:^https?://.*\.jpg',
+ },
+ },
+ # jwplatform embed body
+ {
+ 'url': 'https://www.iltalehti.fi/politiikka/a/1ce49d85-1670-428b-8db8-d2479b9950a4',
+ 'md5': '9e50334b8f8330ce8828b567a82a3c65',
+ 'info_dict': {
+ 'id': '18R6zkLi',
+ 'ext': 'mp4',
+ 'title': 'Pekka Toverin arvio: Näin Nord Stream -kaasuputken räjäyttäminen on saatettu toteuttaa',
+ 'description': 'md5:3d1302c9e17e7ffd564143ff58f8de35',
+ 'upload_date': '20220929',
+ 'timestamp': 1664435867,
+ 'duration': 165.0,
+ 'thumbnail': r're:^https?://.*\.jpg',
+ },
+ },
+ ]
+
+ def _real_extract(self, url):
+ article_id = self._match_id(url)
+ webpage = self._download_webpage(url, article_id)
+ info = self._search_json(
+ r'<script>\s*window.App\s*=', webpage, 'json', article_id,
+ transform_source=js_to_json)
+ props = traverse_obj(info, (
+ 'state', 'articles', ..., 'items', (('main_media', 'properties'), ('body', ..., 'properties'))))
+ video_ids = traverse_obj(props, (lambda _, v: v['provider'] == 'jwplayer', 'id'))
+ return self.playlist_from_matches(
+ video_ids, article_id, ie='JWPlatform', getter=lambda id: f'jwplatform:{id}',
+ title=traverse_obj(info, ('state', 'articles', ..., 'items', 'canonical_title'), get_all=False))
diff --git a/hypervideo_dl/extractor/imdb.py b/hypervideo_dl/extractor/imdb.py
index 96cee2e..557a3b7 100644
--- a/hypervideo_dl/extractor/imdb.py
+++ b/hypervideo_dl/extractor/imdb.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import base64
import json
import re
@@ -102,7 +100,6 @@ class ImdbIE(InfoExtractor):
'ext': ext,
'quality': quality(format_id),
})
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/imggaming.py b/hypervideo_dl/extractor/imggaming.py
index ce7b21a..8e220fd 100644
--- a/hypervideo_dl/extractor/imggaming.py
+++ b/hypervideo_dl/extractor/imggaming.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import json
from .common import InfoExtractor
@@ -106,7 +103,6 @@ class ImgGamingBaseIE(InfoExtractor):
formats.extend(self._extract_mpd_formats(
media_url, media_id, mpd_id='dash', fatal=False,
headers=self._MANIFEST_HEADERS))
- self._sort_formats(formats)
subtitles = {}
for subtitle in video_data.get('subtitles', []):
diff --git a/hypervideo_dl/extractor/imgur.py b/hypervideo_dl/extractor/imgur.py
index c917cf1..061c4cc 100644
--- a/hypervideo_dl/extractor/imgur.py
+++ b/hypervideo_dl/extractor/imgur.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -86,8 +84,6 @@ class ImgurIE(InfoExtractor):
},
})
- self._sort_formats(formats)
-
return {
'id': video_id,
'formats': formats,
@@ -140,7 +136,7 @@ class ImgurGalleryIE(InfoExtractor):
return self.url_result('http://imgur.com/%s' % gallery_id, ImgurIE.ie_key(), gallery_id)
-class ImgurAlbumIE(ImgurGalleryIE):
+class ImgurAlbumIE(ImgurGalleryIE): # XXX: Do not subclass from concrete IE
IE_NAME = 'imgur:album'
_VALID_URL = r'https?://(?:i\.)?imgur\.com/a/(?P<id>[a-zA-Z0-9]+)'
diff --git a/hypervideo_dl/extractor/ina.py b/hypervideo_dl/extractor/ina.py
index b3b2683..857013d 100644
--- a/hypervideo_dl/extractor/ina.py
+++ b/hypervideo_dl/extractor/ina.py
@@ -1,26 +1,19 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
-from ..utils import (
- determine_ext,
- int_or_none,
- strip_or_none,
- xpath_attr,
- xpath_text,
-)
+from ..utils import unified_strdate
class InaIE(InfoExtractor):
- _VALID_URL = r'https?://(?:(?:www|m)\.)?ina\.fr/(?:video|audio)/(?P<id>[A-Z0-9_]+)'
+ _VALID_URL = r'https?://(?:(?:www|m)\.)?ina\.fr/(?:[^?#]+/)(?P<id>[\w-]+)'
_TESTS = [{
- 'url': 'http://www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html',
- 'md5': 'a667021bf2b41f8dc6049479d9bb38a3',
+ 'url': 'https://www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html',
+ 'md5': 'c5a09e5cb5604ed10709f06e7a377dda',
'info_dict': {
'id': 'I12055569',
'ext': 'mp4',
'title': 'François Hollande "Je crois que c\'est clair"',
- 'description': 'md5:3f09eb072a06cb286b8f7e4f77109663',
+ 'description': 'md5:19f61e2b4844ed4bb2e3df9ab9f527ff',
+ 'upload_date': '20070712',
+ 'thumbnail': 'https://cdn-hub.ina.fr/notice/690x517/3c4/I12055569.jpeg',
}
}, {
'url': 'https://www.ina.fr/video/S806544_001/don-d-organes-des-avancees-mais-d-importants-besoins-video.html',
@@ -34,53 +27,58 @@ class InaIE(InfoExtractor):
}, {
'url': 'http://m.ina.fr/video/I12055569',
'only_matching': True,
+ }, {
+ 'url': 'https://www.ina.fr/ina-eclaire-actu/video/cpb8205116303/les-jeux-electroniques',
+ 'md5': '4b8284a9a3a184fdc7e744225b8251e7',
+ 'info_dict': {
+ 'id': 'CPB8205116303',
+ 'ext': 'mp4',
+ 'title': 'Les jeux électroniques',
+ 'description': 'md5:e09f7683dad1cc60b74950490127d233',
+ 'upload_date': '19821204',
+ 'duration': 657,
+ 'thumbnail': 'https://cdn-hub.ina.fr/notice/690x517/203/CPB8205116303.jpeg',
+ },
+ }, {
+ 'url': 'https://www.ina.fr/ina-eclaire-actu/arletty-carriere-conseils-actrice-marcel-carne',
+ 'md5': '743d6f069a00e19dda0da166a54eeccb',
+ 'info_dict': {
+ 'id': 'I22203233',
+ 'ext': 'mp4',
+ 'title': 'Arletty sur le métier d\'actrice',
+ 'description': 'md5:3d89b5e419d8514c934f146045ccdbad',
+ 'upload_date': '19581128',
+ 'thumbnail': 'https://cdn-hub.ina.fr/notice/690x517/082/I22203233.jpeg',
+ },
+ }, {
+ 'url': 'https://www.ina.fr/ina-eclaire-actu/chasse-croise-sncf-gare-d-austerlitz-vacances-d-ete',
+ 'md5': 'a96fb85e9ba3b5c5b2eeb0c5daa55f2f',
+ 'info_dict': {
+ 'id': 'CAF91038285',
+ 'ext': 'mp4',
+ 'title': 'Les grands départs : les trains',
+ 'description': 'md5:1630ee819d8d4da97df53459e99f72bb',
+ 'upload_date': '19740801',
+ 'thumbnail': 'https://cdn-hub.ina.fr/notice/690x517/2cf/CAF91038285.jpeg',
+ },
}]
def _real_extract(self, url):
- video_id = self._match_id(url)
- info_doc = self._download_xml(
- 'http://player.ina.fr/notices/%s.mrss' % video_id, video_id)
- item = info_doc.find('channel/item')
- title = xpath_text(item, 'title', fatal=True)
- media_ns_xpath = lambda x: self._xpath_ns(x, 'http://search.yahoo.com/mrss/')
- content = item.find(media_ns_xpath('content'))
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
- get_furl = lambda x: xpath_attr(content, media_ns_xpath(x), 'url')
- formats = []
- for q, w, h in (('bq', 400, 300), ('mq', 512, 384), ('hq', 768, 576)):
- q_url = get_furl(q)
- if not q_url:
- continue
- formats.append({
- 'format_id': q,
- 'url': q_url,
- 'width': w,
- 'height': h,
- })
- if not formats:
- furl = get_furl('player') or content.attrib['url']
- ext = determine_ext(furl)
- formats = [{
- 'url': furl,
- 'vcodec': 'none' if ext == 'mp3' else None,
- 'ext': ext,
- }]
+ api_url = self._html_search_regex(r'asset-details-url\s*=\s*["\'](?P<api_url>[^"\']+)', webpage, 'api_url')
+ asset_id = self._search_regex(r'assets/([^?/]+)', api_url, 'asset_id')
- thumbnails = []
- for thumbnail in content.findall(media_ns_xpath('thumbnail')):
- thumbnail_url = thumbnail.get('url')
- if not thumbnail_url:
- continue
- thumbnails.append({
- 'url': thumbnail_url,
- 'height': int_or_none(thumbnail.get('height')),
- 'width': int_or_none(thumbnail.get('width')),
- })
+ api_response = self._download_json(api_url.replace(asset_id, f'{asset_id}.json'), asset_id)
return {
- 'id': video_id,
- 'formats': formats,
- 'title': title,
- 'description': strip_or_none(xpath_text(item, 'description')),
- 'thumbnails': thumbnails,
+ 'id': asset_id,
+ 'url': api_response['resourceUrl'],
+ 'ext': {'video': 'mp4', 'audio': 'mp3'}.get(api_response.get('type')),
+ 'title': api_response.get('title'),
+ 'description': api_response.get('description'),
+ 'upload_date': unified_strdate(api_response.get('dateOfBroadcast')),
+ 'duration': api_response.get('duration'),
+ 'thumbnail': api_response.get('resourceThumbnail'),
}
diff --git a/hypervideo_dl/extractor/inc.py b/hypervideo_dl/extractor/inc.py
index d5b258a..9b3fe9a 100644
--- a/hypervideo_dl/extractor/inc.py
+++ b/hypervideo_dl/extractor/inc.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from .kaltura import KalturaIE
diff --git a/hypervideo_dl/extractor/indavideo.py b/hypervideo_dl/extractor/indavideo.py
index 4c16243..4fa97d8 100644
--- a/hypervideo_dl/extractor/indavideo.py
+++ b/hypervideo_dl/extractor/indavideo.py
@@ -1,8 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
@@ -15,6 +10,14 @@ from ..utils import (
class IndavideoEmbedIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:embed\.)?indavideo\.hu/player/video/|assets\.indavideo\.hu/swf/player\.swf\?.*\b(?:v(?:ID|id))=)(?P<id>[\da-f]+)'
+ # Some example URLs covered by generic extractor:
+ # http://indavideo.hu/video/Vicces_cica_1
+ # http://index.indavideo.hu/video/2015_0728_beregszasz
+ # http://auto.indavideo.hu/video/Sajat_utanfutoban_a_kis_tacsko
+ # http://erotika.indavideo.hu/video/Amator_tini_punci
+ # http://film.indavideo.hu/video/f_hrom_nagymamm_volt
+ # http://palyazat.indavideo.hu/video/Embertelen_dal_Dodgem_egyuttes
+ _EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//embed\.indavideo\.hu/player/video/[\da-f]+)']
_TESTS = [{
'url': 'http://indavideo.hu/player/video/1bdc3c6d80/',
'md5': 'c8a507a1c7410685f83a06eaeeaafeab',
@@ -40,20 +43,6 @@ class IndavideoEmbedIE(InfoExtractor):
'only_matching': True,
}]
- # Some example URLs covered by generic extractor:
- # http://indavideo.hu/video/Vicces_cica_1
- # http://index.indavideo.hu/video/2015_0728_beregszasz
- # http://auto.indavideo.hu/video/Sajat_utanfutoban_a_kis_tacsko
- # http://erotika.indavideo.hu/video/Amator_tini_punci
- # http://film.indavideo.hu/video/f_hrom_nagymamm_volt
- # http://palyazat.indavideo.hu/video/Embertelen_dal_Dodgem_egyuttes
-
- @staticmethod
- def _extract_urls(webpage):
- return re.findall(
- r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//embed\.indavideo\.hu/player/video/[\da-f]+)',
- webpage)
-
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -100,7 +89,6 @@ class IndavideoEmbedIE(InfoExtractor):
'url': video_url,
'height': height,
})
- self._sort_formats(formats)
timestamp = video.get('date')
if timestamp:
diff --git a/hypervideo_dl/extractor/infoq.py b/hypervideo_dl/extractor/infoq.py
index 347cc51..192bcfe 100644
--- a/hypervideo_dl/extractor/infoq.py
+++ b/hypervideo_dl/extractor/infoq.py
@@ -1,15 +1,13 @@
-# coding: utf-8
-
-from __future__ import unicode_literals
-
from ..compat import (
compat_b64decode,
compat_urllib_parse_unquote,
compat_urlparse,
)
from ..utils import (
+ ExtractorError,
determine_ext,
update_url_query,
+ traverse_obj,
)
from .bokecc import BokeCCBaseIE
@@ -38,6 +36,7 @@ class InfoQIE(BokeCCBaseIE):
'ext': 'flv',
'description': 'md5:308d981fb28fa42f49f9568322c683ff',
},
+ 'skip': 'Sorry, the page you visited does not exist',
}, {
'url': 'https://www.infoq.com/presentations/Simple-Made-Easy',
'md5': '0e34642d4d9ef44bf86f66f6399672db',
@@ -90,8 +89,10 @@ class InfoQIE(BokeCCBaseIE):
}]
def _extract_http_audio(self, webpage, video_id):
- fields = self._form_hidden_inputs('mp3Form', webpage)
- http_audio_url = fields.get('filename')
+ try:
+ http_audio_url = traverse_obj(self._form_hidden_inputs('mp3Form', webpage), 'filename')
+ except ExtractorError:
+ http_audio_url = None
if not http_audio_url:
return []
@@ -127,8 +128,6 @@ class InfoQIE(BokeCCBaseIE):
+ self._extract_http_video(webpage)
+ self._extract_http_audio(webpage, video_id))
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': video_title,
diff --git a/hypervideo_dl/extractor/instagram.py b/hypervideo_dl/extractor/instagram.py
index 970f2c8..0233513 100644
--- a/hypervideo_dl/extractor/instagram.py
+++ b/hypervideo_dl/extractor/instagram.py
@@ -1,19 +1,17 @@
-# coding: utf-8
-
-import itertools
import hashlib
+import itertools
import json
import re
import time
+import urllib.error
from .common import InfoExtractor
-from ..compat import (
- compat_HTTPError,
-)
from ..utils import (
ExtractorError,
- format_field,
+ decode_base_n,
+ encode_base_n,
float_or_none,
+ format_field,
get_element_by_attribute,
int_or_none,
lowercase_escape,
@@ -24,42 +22,59 @@ from ..utils import (
urlencode_postdata,
)
+_ENCODING_CHARS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_'
+
+
+def _pk_to_id(id):
+ """Source: https://stackoverflow.com/questions/24437823/getting-instagram-post-url-from-media-id"""
+ return encode_base_n(int(id.split('_')[0]), table=_ENCODING_CHARS)
+
+
+def _id_to_pk(shortcode):
+ """Covert a shortcode to a numeric value"""
+ return decode_base_n(shortcode[:11], table=_ENCODING_CHARS)
+
class InstagramBaseIE(InfoExtractor):
_NETRC_MACHINE = 'instagram'
_IS_LOGGED_IN = False
+ _API_BASE_URL = 'https://i.instagram.com/api/v1'
+ _LOGIN_URL = 'https://www.instagram.com/accounts/login'
+ _API_HEADERS = {
+ 'X-IG-App-ID': '936619743392459',
+ 'X-ASBD-ID': '198387',
+ 'X-IG-WWW-Claim': '0',
+ 'Origin': 'https://www.instagram.com',
+ 'Accept': '*/*',
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36',
+ }
+
def _perform_login(self, username, password):
if self._IS_LOGGED_IN:
return
login_webpage = self._download_webpage(
- 'https://www.instagram.com/accounts/login/', None,
- note='Downloading login webpage', errnote='Failed to download login webpage')
+ self._LOGIN_URL, None, note='Downloading login webpage', errnote='Failed to download login webpage')
- shared_data = self._parse_json(
- self._search_regex(
- r'window\._sharedData\s*=\s*({.+?});',
- login_webpage, 'shared data', default='{}'),
- None)
-
- login = self._download_json('https://www.instagram.com/accounts/login/ajax/', None, note='Logging in', headers={
- 'Accept': '*/*',
- 'X-IG-App-ID': '936619743392459',
- 'X-ASBD-ID': '198387',
- 'X-IG-WWW-Claim': '0',
- 'X-Requested-With': 'XMLHttpRequest',
- 'X-CSRFToken': shared_data['config']['csrf_token'],
- 'X-Instagram-AJAX': shared_data['rollout_hash'],
- 'Referer': 'https://www.instagram.com/',
- }, data=urlencode_postdata({
- 'enc_password': f'#PWD_INSTAGRAM_BROWSER:0:{int(time.time())}:{password}',
- 'username': username,
- 'queryParams': '{}',
- 'optIntoOneTap': 'false',
- 'stopDeletionNonce': '',
- 'trustedDeviceRecords': '{}',
- }))
+ shared_data = self._parse_json(self._search_regex(
+ r'window\._sharedData\s*=\s*({.+?});', login_webpage, 'shared data', default='{}'), None)
+
+ login = self._download_json(
+ f'{self._LOGIN_URL}/ajax/', None, note='Logging in', headers={
+ **self._API_HEADERS,
+ 'X-Requested-With': 'XMLHttpRequest',
+ 'X-CSRFToken': shared_data['config']['csrf_token'],
+ 'X-Instagram-AJAX': shared_data['rollout_hash'],
+ 'Referer': 'https://www.instagram.com/',
+ }, data=urlencode_postdata({
+ 'enc_password': f'#PWD_INSTAGRAM_BROWSER:0:{int(time.time())}:{password}',
+ 'username': username,
+ 'queryParams': '{}',
+ 'optIntoOneTap': 'false',
+ 'stopDeletionNonce': '',
+ 'trustedDeviceRecords': '{}',
+ }))
if not login.get('authenticated'):
if login.get('message'):
@@ -124,7 +139,7 @@ class InstagramBaseIE(InfoExtractor):
}
def _extract_product_media(self, product_media):
- media_id = product_media.get('code') or product_media.get('id')
+ media_id = product_media.get('code') or _pk_to_id(product_media.get('pk'))
vcodec = product_media.get('video_codec')
dash_manifest_raw = product_media.get('video_dash_manifest')
videos_list = product_media.get('video_versions')
@@ -140,7 +155,6 @@ class InstagramBaseIE(InfoExtractor):
} for format in videos_list or []]
if dash_manifest_raw:
formats.extend(self._parse_mpd_formats(self._parse_xml(dash_manifest_raw, media_id), mpd_id='dash'))
- self._sort_formats(formats)
thumbnails = [{
'url': thumbnail.get('url'),
@@ -160,7 +174,7 @@ class InstagramBaseIE(InfoExtractor):
user_info = product_info.get('user') or {}
info_dict = {
- 'id': product_info.get('code') or product_info.get('id'),
+ 'id': _pk_to_id(traverse_obj(product_info, 'pk', 'id', expected_type=str_or_none)[:19]),
'title': product_info.get('title') or f'Video by {user_info.get("username")}',
'description': traverse_obj(product_info, ('caption', 'text'), expected_type=str_or_none),
'timestamp': int_or_none(product_info.get('taken_at')),
@@ -170,6 +184,7 @@ class InstagramBaseIE(InfoExtractor):
'view_count': int_or_none(product_info.get('view_count')),
'like_count': int_or_none(product_info.get('like_count')),
'comment_count': int_or_none(product_info.get('comment_count')),
+ '__post_extractor': self.extract_comments(_pk_to_id(product_info.get('pk'))),
'http_headers': {
'Referer': 'https://www.instagram.com/',
}
@@ -191,6 +206,23 @@ class InstagramBaseIE(InfoExtractor):
**self._extract_product_media(product_info)
}
+ def _get_comments(self, video_id):
+ comments_info = self._download_json(
+ f'{self._API_BASE_URL}/media/{_id_to_pk(video_id)}/comments/?can_support_threading=true&permalink_enabled=false', video_id,
+ fatal=False, errnote='Comments extraction failed', note='Downloading comments info', headers=self._API_HEADERS) or {}
+
+ comment_data = traverse_obj(comments_info, ('edge_media_to_parent_comment', 'edges'), 'comments')
+ for comment_dict in comment_data or []:
+ yield {
+ 'author': traverse_obj(comment_dict, ('node', 'owner', 'username'), ('user', 'username')),
+ 'author_id': traverse_obj(comment_dict, ('node', 'owner', 'id'), ('user', 'pk')),
+ 'author_thumbnail': traverse_obj(comment_dict, ('node', 'owner', 'profile_pic_url'), ('user', 'profile_pic_url'), expected_type=url_or_none),
+ 'id': traverse_obj(comment_dict, ('node', 'id'), 'pk'),
+ 'text': traverse_obj(comment_dict, ('node', 'text'), 'text'),
+ 'like_count': traverse_obj(comment_dict, ('node', 'edge_liked_by', 'count'), 'comment_like_count', expected_type=int_or_none),
+ 'timestamp': traverse_obj(comment_dict, ('node', 'created_at'), 'created_at', expected_type=int_or_none),
+ }
+
class InstagramIOSIE(InfoExtractor):
IE_DESC = 'IOS instagram:// URL'
@@ -216,27 +248,14 @@ class InstagramIOSIE(InfoExtractor):
'add_ie': ['Instagram']
}]
- def _get_id(self, id):
- """Source: https://stackoverflow.com/questions/24437823/getting-instagram-post-url-from-media-id"""
- chrs = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_'
- media_id = int(id.split('_')[0])
- shortened_id = ''
- while media_id > 0:
- r = media_id % 64
- media_id = (media_id - r) // 64
- shortened_id = chrs[r] + shortened_id
- return shortened_id
-
def _real_extract(self, url):
- return {
- '_type': 'url_transparent',
- 'url': f'http://instagram.com/tv/{self._get_id(self._match_id(url))}/',
- 'ie_key': 'Instagram',
- }
+ video_id = _pk_to_id(self._match_id(url))
+ return self.url_result(f'http://instagram.com/tv/{video_id}', InstagramIE, video_id)
class InstagramIE(InstagramBaseIE):
_VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com(?:/[^/]+)?/(?:p|tv|reel)/(?P<id>[^/?#&]+))'
+ _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?instagram\.com/p/[^/]+/embed.*?)\1']
_TESTS = [{
'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc',
'md5': '0d2da106a9d2631273e192b372806516',
@@ -246,7 +265,7 @@ class InstagramIE(InstagramBaseIE):
'title': 'Video by naomipq',
'description': 'md5:1f17f0ab29bd6fe2bfad705f58de3cb8',
'thumbnail': r're:^https?://.*\.jpg',
- 'duration': 0,
+ 'duration': 8.747,
'timestamp': 1371748545,
'upload_date': '20130620',
'uploader_id': '2815873',
@@ -256,27 +275,34 @@ class InstagramIE(InstagramBaseIE):
'comment_count': int,
'comments': list,
},
+ 'expected_warnings': [
+ 'General metadata extraction failed',
+ 'Main webpage is locked behind the login page',
+ ],
}, {
- # missing description
- 'url': 'https://www.instagram.com/p/BA-pQFBG8HZ/?taken-by=britneyspears',
+ # reel
+ 'url': 'https://www.instagram.com/reel/Chunk8-jurw/',
+ 'md5': 'f6d8277f74515fa3ff9f5791426e42b1',
'info_dict': {
- 'id': 'BA-pQFBG8HZ',
+ 'id': 'Chunk8-jurw',
'ext': 'mp4',
- 'title': 'Video by britneyspears',
+ 'title': 'Video by instagram',
+ 'description': 'md5:c9cde483606ed6f80fbe9283a6a2b290',
'thumbnail': r're:^https?://.*\.jpg',
- 'duration': 0,
- 'timestamp': 1453760977,
- 'upload_date': '20160125',
- 'uploader_id': '12246775',
- 'uploader': 'Britney Spears',
- 'channel': 'britneyspears',
+ 'duration': 5.016,
+ 'timestamp': 1661529231,
+ 'upload_date': '20220826',
+ 'uploader_id': '25025320',
+ 'uploader': 'Instagram',
+ 'channel': 'instagram',
'like_count': int,
'comment_count': int,
'comments': list,
},
- 'params': {
- 'skip_download': True,
- },
+ 'expected_warnings': [
+ 'General metadata extraction failed',
+ 'Main webpage is locked behind the login page',
+ ],
}, {
# multi video post
'url': 'https://www.instagram.com/p/BQ0eAlwhDrw/',
@@ -285,18 +311,24 @@ class InstagramIE(InstagramBaseIE):
'id': 'BQ0dSaohpPW',
'ext': 'mp4',
'title': 'Video 1',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'view_count': int,
},
}, {
'info_dict': {
'id': 'BQ0dTpOhuHT',
'ext': 'mp4',
'title': 'Video 2',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'view_count': int,
},
}, {
'info_dict': {
'id': 'BQ0dT7RBFeF',
'ext': 'mp4',
'title': 'Video 3',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'view_count': int,
},
}],
'info_dict': {
@@ -304,6 +336,10 @@ class InstagramIE(InstagramBaseIE):
'title': 'Post by instagram',
'description': 'md5:0f9203fc6a2ce4d228da5754bcf54957',
},
+ 'expected_warnings': [
+ 'General metadata extraction failed',
+ 'Main webpage is locked behind the login page',
+ ],
}, {
# IGTV
'url': 'https://www.instagram.com/tv/BkfuX9UB-eK/',
@@ -322,7 +358,11 @@ class InstagramIE(InstagramBaseIE):
'comment_count': int,
'comments': list,
'description': 'Meet Cass Hirst (@cass.fb), a fingerboarding pro who can perform tiny ollies and kickflips while blindfolded.',
- }
+ },
+ 'expected_warnings': [
+ 'General metadata extraction failed',
+ 'Main webpage is locked behind the login page',
+ ],
}, {
'url': 'https://instagram.com/p/-Cmh1cukG2/',
'only_matching': True,
@@ -340,59 +380,88 @@ class InstagramIE(InstagramBaseIE):
'only_matching': True,
}]
- @staticmethod
- def _extract_embed_url(webpage):
- mobj = re.search(
- r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?instagram\.com/p/[^/]+/embed.*?)\1',
- webpage)
- if mobj:
- return mobj.group('url')
-
- blockquote_el = get_element_by_attribute(
- 'class', 'instagram-media', webpage)
- if blockquote_el is None:
- return
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ res = tuple(super()._extract_embed_urls(url, webpage))
+ if res:
+ return res
- mobj = re.search(
- r'<a[^>]+href=([\'"])(?P<link>[^\'"]+)\1', blockquote_el)
+ mobj = re.search(r'<a[^>]+href=([\'"])(?P<link>[^\'"]+)\1',
+ get_element_by_attribute('class', 'instagram-media', webpage) or '')
if mobj:
- return mobj.group('link')
+ return [mobj.group('link')]
def _real_extract(self, url):
video_id, url = self._match_valid_url(url).group('id', 'url')
- webpage, urlh = self._download_webpage_handle(url, video_id)
- if 'www.instagram.com/accounts/login' in urlh.geturl():
- self.report_warning('Main webpage is locked behind the login page. '
- 'Retrying with embed webpage (Note that some metadata might be missing)')
- webpage = self._download_webpage(
- 'https://www.instagram.com/p/%s/embed/' % video_id, video_id, note='Downloading embed webpage')
-
- shared_data = self._parse_json(
- self._search_regex(
- r'window\._sharedData\s*=\s*({.+?});',
- webpage, 'shared data', default='{}'),
- video_id, fatal=False)
- media = traverse_obj(
- shared_data,
- ('entry_data', 'PostPage', 0, 'graphql', 'shortcode_media'),
- ('entry_data', 'PostPage', 0, 'media'),
- expected_type=dict)
-
- # _sharedData.entry_data.PostPage is empty when authenticated (see
- # https://github.com/ytdl-org/youtube-dl/pull/22880)
- if not media:
- additional_data = self._parse_json(
- self._search_regex(
- r'window\.__additionalDataLoaded\s*\(\s*[^,]+,\s*({.+?})\s*\);',
- webpage, 'additional data', default='{}'),
- video_id, fatal=False)
- product_item = traverse_obj(additional_data, ('items', 0), expected_type=dict)
- if product_item:
- return self._extract_product(product_item)
- media = traverse_obj(additional_data, ('graphql', 'shortcode_media'), 'shortcode_media', expected_type=dict) or {}
-
- if not media and 'www.instagram.com/accounts/login' in urlh.geturl():
- self.raise_login_required('You need to log in to access this content')
+ media, webpage = {}, ''
+
+ if self._get_cookies(url).get('sessionid'):
+ info = traverse_obj(self._download_json(
+ f'{self._API_BASE_URL}/media/{_id_to_pk(video_id)}/info/', video_id,
+ fatal=False, errnote='Video info extraction failed',
+ note='Downloading video info', headers=self._API_HEADERS), ('items', 0))
+ if info:
+ media.update(info)
+ return self._extract_product(media)
+
+ api_check = self._download_json(
+ f'{self._API_BASE_URL}/web/get_ruling_for_content/?content_type=MEDIA&target_id={_id_to_pk(video_id)}',
+ video_id, headers=self._API_HEADERS, fatal=False, note='Setting up session', errnote=False) or {}
+ csrf_token = self._get_cookies('https://www.instagram.com').get('csrftoken')
+
+ if not csrf_token:
+ self.report_warning('No csrf token set by Instagram API', video_id)
+ else:
+ csrf_token = csrf_token.value if api_check.get('status') == 'ok' else None
+ if not csrf_token:
+ self.report_warning('Instagram API is not granting access', video_id)
+
+ variables = {
+ 'shortcode': video_id,
+ 'child_comment_count': 3,
+ 'fetch_comment_count': 40,
+ 'parent_comment_count': 24,
+ 'has_threaded_comments': True,
+ }
+ general_info = self._download_json(
+ 'https://www.instagram.com/graphql/query/', video_id, fatal=False, errnote=False,
+ headers={
+ **self._API_HEADERS,
+ 'X-CSRFToken': csrf_token or '',
+ 'X-Requested-With': 'XMLHttpRequest',
+ 'Referer': url,
+ }, query={
+ 'query_hash': '9f8827793ef34641b2fb195d4d41151c',
+ 'variables': json.dumps(variables, separators=(',', ':')),
+ })
+ media.update(traverse_obj(general_info, ('data', 'shortcode_media')) or {})
+
+ if not general_info:
+ self.report_warning('General metadata extraction failed (some metadata might be missing).', video_id)
+ webpage, urlh = self._download_webpage_handle(url, video_id)
+ shared_data = self._search_json(
+ r'window\._sharedData\s*=', webpage, 'shared data', video_id, fatal=False) or {}
+
+ if shared_data and self._LOGIN_URL not in urlh.geturl():
+ media.update(traverse_obj(
+ shared_data, ('entry_data', 'PostPage', 0, 'graphql', 'shortcode_media'),
+ ('entry_data', 'PostPage', 0, 'media'), expected_type=dict) or {})
+ else:
+ self.report_warning('Main webpage is locked behind the login page. Retrying with embed webpage (some metadata might be missing).')
+ webpage = self._download_webpage(
+ f'{url}/embed/', video_id, note='Downloading embed webpage', fatal=False)
+ additional_data = self._search_json(
+ r'window\.__additionalDataLoaded\s*\(\s*[^,]+,', webpage, 'additional data', video_id, fatal=False)
+ if not additional_data and not media:
+ self.raise_login_required('Requested content is not available, rate-limit reached or login required')
+
+ product_item = traverse_obj(additional_data, ('items', 0), expected_type=dict)
+ if product_item:
+ media.update(product_item)
+ return self._extract_product(media)
+
+ media.update(traverse_obj(
+ additional_data, ('graphql', 'shortcode_media'), 'shortcode_media', expected_type=dict) or {})
username = traverse_obj(media, ('owner', 'username')) or self._search_regex(
r'"owner"\s*:\s*{\s*"username"\s*:\s*"(.+?)"', webpage, 'username', fatal=False)
@@ -412,7 +481,7 @@ class InstagramIE(InstagramBaseIE):
if nodes:
return self.playlist_result(
self._extract_nodes(nodes, True), video_id,
- format_field(username, template='Post by %s'), description)
+ format_field(username, None, 'Post by %s'), description)
video_url = self._og_search_video_url(webpage, secure=False)
@@ -424,7 +493,6 @@ class InstagramIE(InstagramBaseIE):
dash = traverse_obj(media, ('dash_info', 'video_dash_manifest'))
if dash:
formats.extend(self._parse_mpd_formats(self._parse_xml(dash, video_id), mpd_id='dash'))
- self._sort_formats(formats)
comment_data = traverse_obj(media, ('edge_media_to_parent_comment', 'edges'))
comments = [{
@@ -521,7 +589,7 @@ class InstagramPlaylistBaseIE(InstagramBaseIE):
except ExtractorError as e:
# if it's an error caused by a bad query, and there are
# more GIS templates to try, ignore it and keep trying
- if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+ if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 403:
if gis_tmpl != gis_tmpls[-1]:
continue
raise
@@ -631,41 +699,32 @@ class InstagramStoryIE(InstagramBaseIE):
def _real_extract(self, url):
username, story_id = self._match_valid_url(url).groups()
-
- story_info_url = f'{username}/{story_id}/?__a=1' if username == 'highlights' else f'{username}/?__a=1'
- story_info = self._download_json(f'https://www.instagram.com/stories/{story_info_url}', story_id, headers={
- 'X-IG-App-ID': 936619743392459,
- 'X-ASBD-ID': 198387,
- 'X-IG-WWW-Claim': 0,
- 'X-Requested-With': 'XMLHttpRequest',
- 'Referer': url,
- })
- user_id = story_info['user']['id']
- highlight_title = traverse_obj(story_info, ('highlight', 'title'))
+ story_info = self._download_webpage(url, story_id)
+ user_info = self._search_json(r'"user":', story_info, 'user info', story_id, fatal=False)
+ if not user_info:
+ self.raise_login_required('This content is unreachable')
+ user_id = user_info.get('id')
story_info_url = user_id if username != 'highlights' else f'highlight:{story_id}'
- videos = self._download_json(f'https://i.instagram.com/api/v1/feed/reels_media/?reel_ids={story_info_url}', story_id, headers={
- 'X-IG-App-ID': 936619743392459,
- 'X-ASBD-ID': 198387,
- 'X-IG-WWW-Claim': 0,
- })['reels']
-
- full_name = traverse_obj(videos, ('user', 'full_name'))
-
- user_info = {}
- if not (username and username != 'highlights' and full_name):
- user_info = self._download_json(
- f'https://i.instagram.com/api/v1/users/{user_id}/info/', story_id, headers={
- 'User-Agent': 'Mozilla/5.0 (Linux; Android 11; SM-A505F Build/RP1A.200720.012; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/96.0.4664.45 Mobile Safari/537.36 Instagram 214.1.0.29.120 Android (30/11; 450dpi; 1080x2122; samsung; SM-A505F; a50; exynos9610; en_US; 333717274)',
- }, note='Downloading user info')
+ videos = traverse_obj(self._download_json(
+ f'{self._API_BASE_URL}/feed/reels_media/?reel_ids={story_info_url}',
+ story_id, errnote=False, fatal=False, headers=self._API_HEADERS), 'reels')
+ if not videos:
+ self.raise_login_required('You need to log in to access this content')
- username = traverse_obj(user_info, ('user', 'username')) or username
- full_name = traverse_obj(user_info, ('user', 'full_name')) or full_name
+ full_name = traverse_obj(videos, (f'highlight:{story_id}', 'user', 'full_name'), (str(user_id), 'user', 'full_name'))
+ story_title = traverse_obj(videos, (f'highlight:{story_id}', 'title'))
+ if not story_title:
+ story_title = f'Story by {username}'
highlights = traverse_obj(videos, (f'highlight:{story_id}', 'items'), (str(user_id), 'items'))
- return self.playlist_result([{
- **self._extract_product(highlight),
- 'title': f'Story by {username}',
- 'uploader': full_name,
- 'uploader_id': user_id,
- } for highlight in highlights], playlist_id=story_id, playlist_title=highlight_title)
+ info_data = []
+ for highlight in highlights:
+ highlight_data = self._extract_product(highlight)
+ if highlight_data.get('formats'):
+ info_data.append({
+ **highlight_data,
+ 'uploader': full_name,
+ 'uploader_id': user_id,
+ })
+ return self.playlist_result(info_data, playlist_id=story_id, playlist_title=story_title)
diff --git a/hypervideo_dl/extractor/internazionale.py b/hypervideo_dl/extractor/internazionale.py
index 45e2af6..1b1cb57 100644
--- a/hypervideo_dl/extractor/internazionale.py
+++ b/hypervideo_dl/extractor/internazionale.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import unified_timestamp
@@ -63,7 +60,6 @@ class InternazionaleIE(InfoExtractor):
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
formats.extend(self._extract_mpd_formats(
video_base + 'mpd', display_id, mpd_id='dash', fatal=False))
- self._sort_formats(formats)
timestamp = unified_timestamp(self._html_search_meta(
'article:published_time', webpage, 'timestamp'))
diff --git a/hypervideo_dl/extractor/internetvideoarchive.py b/hypervideo_dl/extractor/internetvideoarchive.py
index 880918c..9d2574c 100644
--- a/hypervideo_dl/extractor/internetvideoarchive.py
+++ b/hypervideo_dl/extractor/internetvideoarchive.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import json
import re
@@ -50,7 +48,6 @@ class InternetVideoArchiveIE(InfoExtractor):
replace_url('.mpd'), video_id, mpd_id='dash', fatal=False))
formats.extend(self._extract_ism_formats(
replace_url('Manifest'), video_id, ism_id='mss', fatal=False))
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/iprima.py b/hypervideo_dl/extractor/iprima.py
index 1a20384..1818205 100644
--- a/hypervideo_dl/extractor/iprima.py
+++ b/hypervideo_dl/extractor/iprima.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
import time
@@ -151,9 +148,8 @@ class IPrimaIE(InfoExtractor):
elif manifest_type == 'DASH' or ext == 'mpd':
formats += self._extract_mpd_formats(
manifest_url, video_id, mpd_id='dash', fatal=False)
- self._sort_formats(formats)
- final_result = self._search_json_ld(webpage, video_id) or {}
+ final_result = self._search_json_ld(webpage, video_id, default={})
final_result.update({
'id': video_id,
'title': title,
@@ -251,8 +247,6 @@ class IPrimaCNNIE(InfoExtractor):
if not formats and '>GEO_IP_NOT_ALLOWED<' in playerpage:
self.raise_geo_restricted(countries=['CZ'], metadata_available=True)
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': title,
diff --git a/hypervideo_dl/extractor/iqiyi.py b/hypervideo_dl/extractor/iqiyi.py
index d07b39d..c41f6db 100644
--- a/hypervideo_dl/extractor/iqiyi.py
+++ b/hypervideo_dl/extractor/iqiyi.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import hashlib
import itertools
import re
@@ -218,7 +215,6 @@ class IqiyiIE(InfoExtractor):
self._sleep(5, video_id)
- self._sort_formats(formats)
title = (get_element_by_id('widget-videotitle', webpage)
or clean_html(get_element_by_attribute('class', 'mod-play-tit', webpage))
or self._html_search_regex(r'<span[^>]+data-videochanged-title="word"[^>]*>([^<]+)</span>', webpage, 'title'))
@@ -274,6 +270,7 @@ class IqIE(InfoExtractor):
'1': 'zh_CN',
'2': 'zh_TW',
'3': 'en',
+ '4': 'kor',
'18': 'th',
'21': 'my',
'23': 'vi',
@@ -354,7 +351,7 @@ class IqIE(InfoExtractor):
'''
def _extract_vms_player_js(self, webpage, video_id):
- player_js_cache = self._downloader.cache.load('iq', 'player_js')
+ player_js_cache = self.cache.load('iq', 'player_js')
if player_js_cache:
return player_js_cache
webpack_js_url = self._proto_relative_url(self._search_regex(
@@ -367,7 +364,7 @@ class IqIE(InfoExtractor):
f'https://stc.iqiyipic.com/_next/static/chunks/{webpack_map1.get(module_index, module_index)}.{webpack_map2[module_index]}.js',
video_id, note=f'Downloading #{module_index} module JS', errnote='Unable to download module JS', fatal=False) or ''
if 'vms request' in module_js:
- self._downloader.cache.store('iq', 'player_js', module_js)
+ self.cache.store('iq', 'player_js', module_js)
return module_js
raise ExtractorError('Unable to extract player JS')
@@ -420,8 +417,9 @@ class IqIE(InfoExtractor):
ut_list = ['0']
# bid 0 as an initial format checker
- dash_paths = self._parse_json(PhantomJSwrapper(self).get(
- url, html='<!DOCTYPE html>', video_id=video_id, note2='Executing signature code', jscode=self._DASH_JS % {
+ dash_paths = self._parse_json(PhantomJSwrapper(self, timeout=120_000).get(
+ url, note2='Executing signature code (this may take a couple minutes)',
+ html='<!DOCTYPE html>', video_id=video_id, jscode=self._DASH_JS % {
'tvid': video_info['tvId'],
'vid': video_info['vid'],
'src': traverse_obj(next_props, ('initialProps', 'pageProps', 'ptid'),
@@ -443,7 +441,7 @@ class IqIE(InfoExtractor):
preview_time = traverse_obj(
initial_format_data, ('boss_ts', (None, 'data'), ('previewTime', 'rtime')), expected_type=float_or_none, get_all=False)
if traverse_obj(initial_format_data, ('boss_ts', 'data', 'prv'), expected_type=int_or_none):
- self.report_warning('This preview video is limited%s' % format_field(preview_time, template=' to %s seconds'))
+ self.report_warning('This preview video is limited%s' % format_field(preview_time, None, ' to %s seconds'))
# TODO: Extract audio-only formats
for bid in set(traverse_obj(initial_format_data, ('program', 'video', ..., 'bid'), expected_type=str_or_none, default=[])):
@@ -498,8 +496,6 @@ class IqIE(InfoExtractor):
})
formats.extend(extracted_formats)
- self._sort_formats(formats)
-
for sub_format in traverse_obj(initial_format_data, ('program', 'stl', ...), expected_type=dict, default=[]):
lang = self._LID_TAGS.get(str_or_none(sub_format.get('lid')), sub_format.get('_name'))
subtitles.setdefault(lang, []).extend([{
diff --git a/hypervideo_dl/extractor/ir90tv.py b/hypervideo_dl/extractor/ir90tv.py
deleted file mode 100644
index d5a3f6f..0000000
--- a/hypervideo_dl/extractor/ir90tv.py
+++ /dev/null
@@ -1,42 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import remove_start
-
-
-class Ir90TvIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?90tv\.ir/video/(?P<id>[0-9]+)/.*'
- _TESTS = [{
- 'url': 'http://90tv.ir/video/95719/%D8%B4%D8%A7%DB%8C%D8%B9%D8%A7%D8%AA-%D9%86%D9%82%D9%84-%D9%88-%D8%A7%D9%86%D8%AA%D9%82%D8%A7%D9%84%D8%A7%D8%AA-%D9%85%D9%87%D9%85-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7-940218',
- 'md5': '411dbd94891381960cb9e13daa47a869',
- 'info_dict': {
- 'id': '95719',
- 'ext': 'mp4',
- 'title': 'شایعات نقل و انتقالات مهم فوتبال اروپا 94/02/18',
- 'thumbnail': r're:^https?://.*\.jpg$',
- }
- }, {
- 'url': 'http://www.90tv.ir/video/95719/%D8%B4%D8%A7%DB%8C%D8%B9%D8%A7%D8%AA-%D9%86%D9%82%D9%84-%D9%88-%D8%A7%D9%86%D8%AA%D9%82%D8%A7%D9%84%D8%A7%D8%AA-%D9%85%D9%87%D9%85-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7-940218',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- title = remove_start(self._html_search_regex(
- r'<title>([^<]+)</title>', webpage, 'title'), '90tv.ir :: ')
-
- video_url = self._search_regex(
- r'<source[^>]+src="([^"]+)"', webpage, 'video url')
-
- thumbnail = self._search_regex(r'poster="([^"]+)"', webpage, 'thumbnail url', fatal=False)
-
- return {
- 'url': video_url,
- 'id': video_id,
- 'title': title,
- 'video_url': video_url,
- 'thumbnail': thumbnail,
- }
diff --git a/hypervideo_dl/extractor/islamchannel.py b/hypervideo_dl/extractor/islamchannel.py
new file mode 100644
index 0000000..253a846
--- /dev/null
+++ b/hypervideo_dl/extractor/islamchannel.py
@@ -0,0 +1,81 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import traverse_obj, urljoin
+
+
+class IslamChannelIE(InfoExtractor):
+ _VALID_URL = r'https?://watch\.islamchannel\.tv/watch/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://watch.islamchannel.tv/watch/38604310',
+ 'info_dict': {
+ 'id': '38604310',
+ 'title': 'Omar - Young Omar',
+ 'description': 'md5:5cc7ddecef064ea7afe52eb5e0e33b55',
+ 'thumbnail': r're:https?://.+',
+ 'ext': 'mp4',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ thumbnail = self._search_regex(
+ r'data-poster="([^"]+)"', webpage, 'data poster', fatal=False) or \
+ self._html_search_meta(('og:image', 'twitter:image'), webpage)
+
+ headers = {
+ 'Token': self._search_regex(r'data-token="([^"]+)"', webpage, 'data token'),
+ 'Token-Expiry': self._search_regex(r'data-expiry="([^"]+)"', webpage, 'data expiry'),
+ 'Uvid': video_id,
+ }
+ show_stream = self._download_json(
+ f'https://v2-streams-elb.simplestreamcdn.com/api/show/stream/{video_id}', video_id,
+ query={
+ 'key': self._search_regex(r'data-key="([^"]+)"', webpage, 'data key'),
+ 'platform': 'chrome',
+ }, headers=headers)
+ # TODO: show_stream['stream'] and show_stream['drm'] may contain something interesting
+ streams = self._download_json(
+ traverse_obj(show_stream, ('response', 'tokenization', 'url')), video_id,
+ headers=headers)
+ formats, subs = self._extract_m3u8_formats_and_subtitles(traverse_obj(streams, ('Streams', 'Adaptive')), video_id, 'mp4')
+
+ return {
+ 'id': video_id,
+ 'title': self._html_search_meta(('og:title', 'twitter:title'), webpage),
+ 'description': self._html_search_meta(('og:description', 'twitter:description', 'description'), webpage),
+ 'formats': formats,
+ 'subtitles': subs,
+ 'thumbnails': [{
+ 'id': 'unscaled',
+ 'url': thumbnail.split('?')[0],
+ 'ext': 'jpg',
+ 'preference': 2,
+ }, {
+ 'id': 'orig',
+ 'url': thumbnail,
+ 'ext': 'jpg',
+ 'preference': 1,
+ }] if thumbnail else None,
+ }
+
+
+class IslamChannelSeriesIE(InfoExtractor):
+ _VALID_URL = r'https?://watch\.islamchannel\.tv/series/(?P<id>[a-f\d-]+)'
+ _TESTS = [{
+ 'url': 'https://watch.islamchannel.tv/series/a6cccef3-3ef1-11eb-bc19-06b69c2357cd',
+ 'info_dict': {
+ 'id': 'a6cccef3-3ef1-11eb-bc19-06b69c2357cd',
+ },
+ 'playlist_mincount': 31,
+ }]
+
+ def _real_extract(self, url):
+ pl_id = self._match_id(url)
+ webpage = self._download_webpage(url, pl_id)
+
+ return self.playlist_from_matches(
+ re.finditer(r'<a\s+href="(/watch/\d+)"[^>]+?data-video-type="show">', webpage),
+ pl_id, getter=lambda x: urljoin(url, x.group(1)), ie=IslamChannelIE)
diff --git a/hypervideo_dl/extractor/israelnationalnews.py b/hypervideo_dl/extractor/israelnationalnews.py
new file mode 100644
index 0000000..35040f5
--- /dev/null
+++ b/hypervideo_dl/extractor/israelnationalnews.py
@@ -0,0 +1,50 @@
+from .common import InfoExtractor
+from ..utils import ExtractorError, traverse_obj
+
+
+class IsraelNationalNewsIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?israelnationalnews\.com/news/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.israelnationalnews.com/news/354520',
+ 'info_dict': {
+ 'id': '354520'
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': 'jA84wQhVvg8',
+ 'title': 'Even CNN Host Is Shocked by How Bad Biden\'s Approval Ratings Have Gotten | DM CLIPS | Rubin Report',
+ 'ext': 'mp4',
+ 'description': 'md5:b7325a3d00c7596337dc3ae37e32d35c',
+ 'channel': 'The Rubin Report',
+ 'channel_follower_count': int,
+ 'comment_count': int,
+ 'categories': ['News & Politics'],
+ 'like_count': int,
+ 'uploader_url': 'http://www.youtube.com/user/RubinReport',
+ 'uploader_id': 'RubinReport',
+ 'availability': 'public',
+ 'view_count': int,
+ 'duration': 240,
+ 'thumbnail': 'https://i.ytimg.com/vi_webp/jA84wQhVvg8/maxresdefault.webp',
+ 'live_status': 'not_live',
+ 'playable_in_embed': True,
+ 'age_limit': 0,
+ 'tags': 'count:29',
+ 'channel_id': 'UCJdKr0Bgd_5saZYqLCa9mng',
+ 'channel_url': 'https://www.youtube.com/channel/UCJdKr0Bgd_5saZYqLCa9mng',
+ 'upload_date': '20220606',
+ 'uploader': 'The Rubin Report',
+ }
+ }]
+ }]
+
+ def _real_extract(self, url):
+ news_article_id = self._match_id(url)
+ article_json = self._download_json(
+ f'https://www.israelnationalnews.com/Generic/NewAPI/Item?type=0&Item={news_article_id}', news_article_id)
+
+ urls = traverse_obj(article_json, ('Content2', ..., 'content', ..., 'attrs', 'src'))
+ if not urls:
+ raise ExtractorError('This article does not have any videos', expected=True)
+
+ return self.playlist_from_matches(urls, news_article_id, ie='Youtube')
diff --git a/hypervideo_dl/extractor/itprotv.py b/hypervideo_dl/extractor/itprotv.py
index 64cb4e6..4ac1260 100644
--- a/hypervideo_dl/extractor/itprotv.py
+++ b/hypervideo_dl/extractor/itprotv.py
@@ -1,5 +1,3 @@
-# coding: utf-8
-
import re
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/itv.py b/hypervideo_dl/extractor/itv.py
index 66705a2..0681050 100644
--- a/hypervideo_dl/extractor/itv.py
+++ b/hypervideo_dl/extractor/itv.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import json
from .common import InfoExtractor
@@ -175,7 +172,6 @@ class ITVIE(InfoExtractor):
formats.append({
'url': href,
})
- self._sort_formats(formats)
info = self._search_json_ld(webpage, video_id, default={})
if not info:
json_ld = self._parse_json(self._search_regex(
diff --git a/hypervideo_dl/extractor/ivi.py b/hypervideo_dl/extractor/ivi.py
index 098ab66..27a222a 100644
--- a/hypervideo_dl/extractor/ivi.py
+++ b/hypervideo_dl/extractor/ivi.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import json
import re
@@ -16,6 +13,7 @@ class IviIE(InfoExtractor):
IE_DESC = 'ivi.ru'
IE_NAME = 'ivi'
_VALID_URL = r'https?://(?:www\.)?ivi\.(?:ru|tv)/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<id>\d+)'
+ _EMBED_REGEX = [r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1']
_GEO_BYPASS = False
_GEO_COUNTRIES = ['RU']
_LIGHT_KEY = b'\xf1\x02\x32\xb7\xbc\x5c\x7a\xe8\xf7\x96\xc1\x33\x2b\x27\xa1\x8c'
@@ -168,7 +166,6 @@ class IviIE(InfoExtractor):
'quality': quality(content_format),
'filesize': int_or_none(f.get('size_in_bytes')),
})
- self._sort_formats(formats)
compilation = result.get('compilation')
episode = title if compilation else None
diff --git a/hypervideo_dl/extractor/ivideon.py b/hypervideo_dl/extractor/ivideon.py
index 44b2208..7d1e554 100644
--- a/hypervideo_dl/extractor/ivideon.py
+++ b/hypervideo_dl/extractor/ivideon.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse_urlencode,
@@ -71,7 +67,6 @@ class IvideonIE(InfoExtractor):
'ext': 'flv',
'quality': quality(format_id),
} for format_id in self._QUALITIES]
- self._sort_formats(formats)
return {
'id': server_id,
diff --git a/hypervideo_dl/extractor/iwara.py b/hypervideo_dl/extractor/iwara.py
index c0e01e3..ec3e59c 100644
--- a/hypervideo_dl/extractor/iwara.py
+++ b/hypervideo_dl/extractor/iwara.py
@@ -1,21 +1,29 @@
-# coding: utf-8
-from __future__ import unicode_literals
+import itertools
import re
+import urllib.parse
from .common import InfoExtractor
-from ..compat import compat_urllib_parse_urlparse
from ..utils import (
int_or_none,
mimetype2ext,
remove_end,
- url_or_none,
- unified_strdate,
strip_or_none,
+ unified_strdate,
+ url_or_none,
+ urljoin,
)
-class IwaraIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.|ecchi\.)?iwara\.tv/videos/(?P<id>[a-zA-Z0-9]+)'
+class IwaraBaseIE(InfoExtractor):
+ _BASE_REGEX = r'(?P<base_url>https?://(?:www\.|ecchi\.)?iwara\.tv)'
+
+ def _extract_playlist(self, base_url, webpage):
+ for path in re.findall(r'class="title">\s*<a[^<]+href="([^"]+)', webpage):
+ yield self.url_result(urljoin(base_url, path))
+
+
+class IwaraIE(IwaraBaseIE):
+ _VALID_URL = fr'{IwaraBaseIE._BASE_REGEX}/videos/(?P<id>[a-zA-Z0-9]+)'
_TESTS = [{
'url': 'http://iwara.tv/videos/amVwUl1EHpAD9RD',
# md5 is unstable
@@ -60,7 +68,7 @@ class IwaraIE(InfoExtractor):
webpage, urlh = self._download_webpage_handle(url, video_id)
- hostname = compat_urllib_parse_urlparse(urlh.geturl()).hostname
+ hostname = urllib.parse.urlparse(urlh.geturl()).hostname
# ecchi is 'sexy' in Japanese
age_limit = 18 if hostname.split('.')[0] == 'ecchi' else 0
@@ -108,8 +116,6 @@ class IwaraIE(InfoExtractor):
'quality': 1 if format_id == 'Source' else 0,
})
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': title,
@@ -120,3 +126,114 @@ class IwaraIE(InfoExtractor):
'upload_date': upload_date,
'description': description,
}
+
+
+class IwaraPlaylistIE(IwaraBaseIE):
+ _VALID_URL = fr'{IwaraBaseIE._BASE_REGEX}/playlist/(?P<id>[^/?#&]+)'
+ IE_NAME = 'iwara:playlist'
+
+ _TESTS = [{
+ 'url': 'https://ecchi.iwara.tv/playlist/best-enf',
+ 'info_dict': {
+ 'title': 'Best enf',
+ 'uploader': 'Jared98112',
+ 'id': 'best-enf',
+ },
+ 'playlist_mincount': 1097,
+ }, {
+ # urlencoded
+ 'url': 'https://ecchi.iwara.tv/playlist/%E3%83%97%E3%83%AC%E3%82%A4%E3%83%AA%E3%82%B9%E3%83%88-2',
+ 'info_dict': {
+ 'id': 'プレイリスト-2',
+ 'title': 'プレイリスト',
+ 'uploader': 'mainyu',
+ },
+ 'playlist_mincount': 91,
+ }]
+
+ def _real_extract(self, url):
+ playlist_id, base_url = self._match_valid_url(url).group('id', 'base_url')
+ playlist_id = urllib.parse.unquote(playlist_id)
+ webpage = self._download_webpage(url, playlist_id)
+
+ return {
+ '_type': 'playlist',
+ 'id': playlist_id,
+ 'title': self._html_search_regex(r'class="title"[^>]*>([^<]+)', webpage, 'title', fatal=False),
+ 'uploader': self._html_search_regex(r'<h2>([^<]+)', webpage, 'uploader', fatal=False),
+ 'entries': self._extract_playlist(base_url, webpage),
+ }
+
+
+class IwaraUserIE(IwaraBaseIE):
+ _VALID_URL = fr'{IwaraBaseIE._BASE_REGEX}/users/(?P<id>[^/?#&]+)'
+ IE_NAME = 'iwara:user'
+
+ _TESTS = [{
+ 'note': 'number of all videos page is just 1 page. less than 40 videos',
+ 'url': 'https://ecchi.iwara.tv/users/infinityyukarip',
+ 'info_dict': {
+ 'title': 'Uploaded videos from Infinity_YukariP',
+ 'id': 'infinityyukarip',
+ 'uploader': 'Infinity_YukariP',
+ 'uploader_id': 'infinityyukarip',
+ },
+ 'playlist_mincount': 39,
+ }, {
+ 'note': 'no even all videos page. probably less than 10 videos',
+ 'url': 'https://ecchi.iwara.tv/users/mmd-quintet',
+ 'info_dict': {
+ 'title': 'Uploaded videos from mmd quintet',
+ 'id': 'mmd-quintet',
+ 'uploader': 'mmd quintet',
+ 'uploader_id': 'mmd-quintet',
+ },
+ 'playlist_mincount': 6,
+ }, {
+ 'note': 'has paging. more than 40 videos',
+ 'url': 'https://ecchi.iwara.tv/users/theblackbirdcalls',
+ 'info_dict': {
+ 'title': 'Uploaded videos from TheBlackbirdCalls',
+ 'id': 'theblackbirdcalls',
+ 'uploader': 'TheBlackbirdCalls',
+ 'uploader_id': 'theblackbirdcalls',
+ },
+ 'playlist_mincount': 420,
+ }, {
+ 'note': 'foreign chars in URL. there must be foreign characters in URL',
+ 'url': 'https://ecchi.iwara.tv/users/ぶた丼',
+ 'info_dict': {
+ 'title': 'Uploaded videos from ぶた丼',
+ 'id': 'ぶた丼',
+ 'uploader': 'ぶた丼',
+ 'uploader_id': 'ぶた丼',
+ },
+ 'playlist_mincount': 170,
+ }]
+
+ def _entries(self, playlist_id, base_url):
+ webpage = self._download_webpage(
+ f'{base_url}/users/{playlist_id}', playlist_id)
+ videos_url = self._search_regex(r'<a href="(/users/[^/]+/videos)(?:\?[^"]+)?">', webpage, 'all videos url', default=None)
+ if not videos_url:
+ yield from self._extract_playlist(base_url, webpage)
+ return
+
+ videos_url = urljoin(base_url, videos_url)
+
+ for n in itertools.count(1):
+ page = self._download_webpage(
+ videos_url, playlist_id, note=f'Downloading playlist page {n}',
+ query={'page': str(n - 1)} if n > 1 else {})
+ yield from self._extract_playlist(
+ base_url, page)
+
+ if f'page={n}' not in page:
+ break
+
+ def _real_extract(self, url):
+ playlist_id, base_url = self._match_valid_url(url).group('id', 'base_url')
+ playlist_id = urllib.parse.unquote(playlist_id)
+
+ return self.playlist_result(
+ self._entries(playlist_id, base_url), playlist_id)
diff --git a/hypervideo_dl/extractor/ixigua.py b/hypervideo_dl/extractor/ixigua.py
new file mode 100644
index 0000000..1f086d2
--- /dev/null
+++ b/hypervideo_dl/extractor/ixigua.py
@@ -0,0 +1,83 @@
+import base64
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ get_element_by_id,
+ int_or_none,
+ js_to_json,
+ str_or_none,
+ traverse_obj,
+)
+
+
+class IxiguaIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:\w+\.)?ixigua\.com/(?:video/)?(?P<id>\d+).+'
+ _TESTS = [{
+ 'url': 'https://www.ixigua.com/6996881461559165471',
+ 'info_dict': {
+ 'id': '6996881461559165471',
+ 'ext': 'mp4',
+ 'title': '盲目涉水风险大,亲身示范高水位行车注意事项',
+ 'description': 'md5:8c82f46186299add4a1c455430740229',
+ 'tags': ['video_car'],
+ 'like_count': int,
+ 'dislike_count': int,
+ 'view_count': int,
+ 'uploader': '懂车帝原创',
+ 'uploader_id': '6480145787',
+ 'thumbnail': r're:^https?://.+\.(avif|webp)',
+ 'timestamp': 1629088414,
+ 'duration': 1030,
+ }
+ }]
+
+ def _get_json_data(self, webpage, video_id):
+ js_data = get_element_by_id('SSR_HYDRATED_DATA', webpage)
+ if not js_data:
+ if self._cookies_passed:
+ raise ExtractorError('Failed to get SSR_HYDRATED_DATA')
+ raise ExtractorError('Cookies (not necessarily logged in) are needed', expected=True)
+
+ return self._parse_json(
+ js_data.replace('window._SSR_HYDRATED_DATA=', ''), video_id, transform_source=js_to_json)
+
+ def _media_selector(self, json_data):
+ for path, override in (
+ (('video_list', ), {}),
+ (('dynamic_video', 'dynamic_video_list'), {'acodec': 'none'}),
+ (('dynamic_video', 'dynamic_audio_list'), {'vcodec': 'none', 'ext': 'm4a'}),
+ ):
+ for media in traverse_obj(json_data, (..., *path, lambda _, v: v['main_url'])):
+ yield {
+ 'url': base64.b64decode(media['main_url']).decode(),
+ 'width': int_or_none(media.get('vwidth')),
+ 'height': int_or_none(media.get('vheight')),
+ 'fps': int_or_none(media.get('fps')),
+ 'vcodec': media.get('codec_type'),
+ 'format_id': str_or_none(media.get('quality_type')),
+ 'filesize': int_or_none(media.get('size')),
+ 'ext': 'mp4',
+ **override,
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ json_data = self._get_json_data(webpage, video_id)['anyVideo']['gidInformation']['packerData']['video']
+
+ formats = list(self._media_selector(json_data.get('videoResource')))
+ return {
+ 'id': video_id,
+ 'title': json_data.get('title'),
+ 'description': json_data.get('video_abstract'),
+ 'formats': formats,
+ 'like_count': json_data.get('video_like_count'),
+ 'duration': int_or_none(json_data.get('duration')),
+ 'tags': [json_data.get('tag')],
+ 'uploader_id': traverse_obj(json_data, ('user_info', 'user_id')),
+ 'uploader': traverse_obj(json_data, ('user_info', 'name')),
+ 'view_count': json_data.get('video_watch_count'),
+ 'dislike_count': json_data.get('video_unlike_count'),
+ 'timestamp': int_or_none(json_data.get('video_publish_time')),
+ }
diff --git a/hypervideo_dl/extractor/izlesene.py b/hypervideo_dl/extractor/izlesene.py
index f8fca6c..5cdf870 100644
--- a/hypervideo_dl/extractor/izlesene.py
+++ b/hypervideo_dl/extractor/izlesene.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import (
compat_str,
@@ -81,7 +78,6 @@ class IzleseneIE(InfoExtractor):
'ext': ext,
'height': height,
})
- self._sort_formats(formats)
description = self._og_search_description(webpage, default=None)
thumbnail = video.get('posterURL') or self._proto_relative_url(
diff --git a/hypervideo_dl/extractor/jable.py b/hypervideo_dl/extractor/jable.py
new file mode 100644
index 0000000..84c3225
--- /dev/null
+++ b/hypervideo_dl/extractor/jable.py
@@ -0,0 +1,103 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ InAdvancePagedList,
+ int_or_none,
+ orderedSet,
+ unified_strdate,
+)
+
+
+class JableIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?jable.tv/videos/(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://jable.tv/videos/pppd-812/',
+ 'md5': 'f1537283a9bc073c31ff86ca35d9b2a6',
+ 'info_dict': {
+ 'id': 'pppd-812',
+ 'ext': 'mp4',
+ 'title': 'PPPD-812 只要表現好巨乳女教師吉根柚莉愛就獎勵學生們在白虎穴內射出精液',
+ 'description': 'md5:5b6d4199a854f62c5e56e26ccad19967',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'age_limit': 18,
+ 'like_count': int,
+ 'view_count': int,
+ },
+ }, {
+ 'url': 'https://jable.tv/videos/apak-220/',
+ 'md5': '71f9239d69ced58ab74a816908847cc1',
+ 'info_dict': {
+ 'id': 'apak-220',
+ 'ext': 'mp4',
+ 'title': 'md5:5c3861b7cf80112a6e2b70bccf170824',
+ 'description': '',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'age_limit': 18,
+ 'like_count': int,
+ 'view_count': int,
+ 'upload_date': '20220319',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ formats = self._extract_m3u8_formats(
+ self._search_regex(r'var\s+hlsUrl\s*=\s*\'([^\']+)', webpage, 'hls_url'), video_id, 'mp4', m3u8_id='hls')
+
+ return {
+ 'id': video_id,
+ 'title': self._og_search_title(webpage),
+ 'description': self._og_search_description(webpage, default=''),
+ 'thumbnail': self._og_search_thumbnail(webpage, default=None),
+ 'formats': formats,
+ 'age_limit': 18,
+ 'upload_date': unified_strdate(self._search_regex(
+ r'class="inactive-color">\D+\s+(\d{4}-\d+-\d+)', webpage, 'upload_date', default=None)),
+ 'view_count': int_or_none(self._search_regex(
+ r'#icon-eye"></use></svg>\n*<span class="mr-3">([\d ]+)',
+ webpage, 'view_count', default='').replace(' ', '')),
+ 'like_count': int_or_none(self._search_regex(
+ r'#icon-heart"></use></svg><span class="count">(\d+)', webpage, 'link_count', default=None)),
+ }
+
+
+class JablePlaylistIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?jable.tv/(?:categories|models|tags)/(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://jable.tv/models/kaede-karen/',
+ 'info_dict': {
+ 'id': 'kaede-karen',
+ 'title': '楓カレン',
+ },
+ 'playlist_count': 34,
+ }, {
+ 'url': 'https://jable.tv/categories/roleplay/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://jable.tv/tags/girl/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ webpage = self._download_webpage(url, playlist_id)
+
+ def page_func(page_num):
+ return [
+ self.url_result(player_url, JableIE)
+ for player_url in orderedSet(re.findall(
+ r'href="(https://jable.tv/videos/[\w-]+/?)"',
+ self._download_webpage(url, playlist_id, query={
+ 'mode': 'async',
+ 'from': page_num + 1,
+ 'function': 'get_block',
+ 'block_id': 'list_videos_common_videos_list',
+ }, note=f'Downloading page {page_num + 1}')))]
+
+ return self.playlist_result(
+ InAdvancePagedList(page_func, int_or_none(self._search_regex(
+ r'from:(\d+)">[^<]+\s*&raquo;', webpage, 'last page number', default=1)), 24),
+ playlist_id, self._search_regex(
+ r'<h2 class="h3-md mb-1">([^<]+)', webpage, 'playlist title', default=None))
diff --git a/hypervideo_dl/extractor/jamendo.py b/hypervideo_dl/extractor/jamendo.py
index 755d970..a2bbba3 100644
--- a/hypervideo_dl/extractor/jamendo.py
+++ b/hypervideo_dl/extractor/jamendo.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import hashlib
import random
@@ -31,10 +28,11 @@ class JamendoIE(InfoExtractor):
'ext': 'flac',
# 'title': 'Maya Filipič - Stories from Emona I',
'title': 'Stories from Emona I',
- # 'artist': 'Maya Filipič',
+ 'artist': 'Maya Filipič',
+ 'album': 'Between two worlds',
'track': 'Stories from Emona I',
'duration': 210,
- 'thumbnail': r're:^https?://.*\.jpg',
+ 'thumbnail': 'https://usercontent.jamendo.com?type=album&id=29279&width=300&trackid=196219',
'timestamp': 1217438117,
'upload_date': '20080730',
'license': 'by-nc-nd',
@@ -48,11 +46,11 @@ class JamendoIE(InfoExtractor):
'only_matching': True,
}]
- def _call_api(self, resource, resource_id):
+ def _call_api(self, resource, resource_id, fatal=True):
path = '/api/%ss' % resource
rand = compat_str(random.random())
return self._download_json(
- 'https://www.jamendo.com' + path, resource_id, query={
+ 'https://www.jamendo.com' + path, resource_id, fatal=fatal, query={
'id[]': resource_id,
}, headers={
'X-Jam-Call': '$%s*%s~' % (hashlib.sha1((path + rand).encode()).hexdigest(), rand)
@@ -74,6 +72,8 @@ class JamendoIE(InfoExtractor):
# if artist_name:
# title = '%s - %s' % (artist_name, title)
# album = get_model('album')
+ artist = self._call_api("artist", track.get('artistId'), fatal=False)
+ album = self._call_api("album", track.get('albumId'), fatal=False)
formats = [{
'url': 'https://%s.jamendo.com/?trackid=%s&format=%s&from=app-97dab294'
@@ -87,7 +87,6 @@ class JamendoIE(InfoExtractor):
('ogg1', 'ogg', 'ogg'),
('flac', 'flac', 'flac'),
))]
- self._sort_formats(formats)
urls = []
thumbnails = []
@@ -121,9 +120,9 @@ class JamendoIE(InfoExtractor):
'title': title,
'description': track.get('description'),
'duration': int_or_none(track.get('duration')),
- # 'artist': artist_name,
+ 'artist': artist.get('name'),
'track': track_name,
- # 'album': album.get('name'),
+ 'album': album.get('name'),
'formats': formats,
'license': '-'.join(license) if license else None,
'timestamp': int_or_none(track.get('dateCreated')),
@@ -134,7 +133,7 @@ class JamendoIE(InfoExtractor):
}
-class JamendoAlbumIE(JamendoIE):
+class JamendoAlbumIE(JamendoIE): # XXX: Do not subclass from concrete IE
_VALID_URL = r'https?://(?:www\.)?jamendo\.com/album/(?P<id>[0-9]+)'
_TESTS = [{
'url': 'https://www.jamendo.com/album/121486/duck-on-cover',
@@ -148,22 +147,38 @@ class JamendoAlbumIE(JamendoIE):
'info_dict': {
'id': '1032333',
'ext': 'flac',
- 'title': 'Shearer - Warmachine',
+ 'title': 'Warmachine',
'artist': 'Shearer',
'track': 'Warmachine',
'timestamp': 1368089771,
'upload_date': '20130509',
+ 'view_count': int,
+ 'thumbnail': 'https://usercontent.jamendo.com?type=album&id=121486&width=300&trackid=1032333',
+ 'duration': 190,
+ 'license': 'by',
+ 'album': 'Duck On Cover',
+ 'average_rating': 4,
+ 'tags': ['rock', 'drums', 'bass', 'world', 'punk', 'neutral'],
+ 'like_count': int,
}
}, {
'md5': '1f358d7b2f98edfe90fd55dac0799d50',
'info_dict': {
'id': '1032330',
'ext': 'flac',
- 'title': 'Shearer - Without Your Ghost',
+ 'title': 'Without Your Ghost',
'artist': 'Shearer',
'track': 'Without Your Ghost',
'timestamp': 1368089771,
'upload_date': '20130509',
+ 'duration': 192,
+ 'tags': ['rock', 'drums', 'bass', 'world', 'punk'],
+ 'album': 'Duck On Cover',
+ 'thumbnail': 'https://usercontent.jamendo.com?type=album&id=121486&width=300&trackid=1032330',
+ 'view_count': int,
+ 'average_rating': 4,
+ 'license': 'by',
+ 'like_count': int,
}
}],
'params': {
diff --git a/hypervideo_dl/extractor/japandiet.py b/hypervideo_dl/extractor/japandiet.py
new file mode 100644
index 0000000..6c65056
--- /dev/null
+++ b/hypervideo_dl/extractor/japandiet.py
@@ -0,0 +1,274 @@
+import re
+
+from ..utils import (
+ ExtractorError,
+ clean_html,
+ int_or_none,
+ join_nonempty,
+ parse_qs,
+ smuggle_url,
+ traverse_obj,
+ try_call,
+ unsmuggle_url
+)
+from .common import InfoExtractor
+
+
+def _parse_japanese_date(text):
+ if not text:
+ return None
+ ERA_TABLE = {
+ '明治': 1868,
+ '大正': 1912,
+ '昭和': 1926,
+ '平成': 1989,
+ '令和': 2019,
+ }
+ ERA_RE = '|'.join(map(re.escape, ERA_TABLE.keys()))
+ mobj = re.search(rf'({ERA_RE})?(\d+)年(\d+)月(\d+)日', re.sub(r'[\s\u3000]+', '', text))
+ if not mobj:
+ return None
+ era, year, month, day = mobj.groups()
+ year, month, day = map(int, (year, month, day))
+ if era:
+ # example input: 令和5年3月34日
+ # even though each era have their end, don't check here
+ year += ERA_TABLE[era]
+ return '%04d%02d%02d' % (year, month, day)
+
+
+def _parse_japanese_duration(text):
+ mobj = re.search(r'(?:(\d+)日間?)?(?:(\d+)時間?)?(?:(\d+)分)?(?:(\d+)秒)?', re.sub(r'[\s\u3000]+', '', text or ''))
+ if not mobj:
+ return
+ days, hours, mins, secs = [int_or_none(x, default=0) for x in mobj.groups()]
+ return secs + mins * 60 + hours * 60 * 60 + days * 24 * 60 * 60
+
+
+class ShugiinItvBaseIE(InfoExtractor):
+ _INDEX_ROOMS = None
+
+ @classmethod
+ def _find_rooms(cls, webpage):
+ return [{
+ '_type': 'url',
+ 'id': x.group(1),
+ 'title': clean_html(x.group(2)).strip(),
+ 'url': smuggle_url(f'https://www.shugiintv.go.jp/jp/index.php?room_id={x.group(1)}', {'g': x.groups()}),
+ 'ie_key': ShugiinItvLiveIE.ie_key(),
+ } for x in re.finditer(r'(?s)<a\s+href="[^"]+\?room_id=(room\d+)"\s*class="play_live".+?class="s12_14">(.+?)</td>', webpage)]
+
+ def _fetch_rooms(self):
+ if not self._INDEX_ROOMS:
+ webpage = self._download_webpage(
+ 'https://www.shugiintv.go.jp/jp/index.php', None,
+ encoding='euc-jp', note='Downloading proceedings info')
+ ShugiinItvBaseIE._INDEX_ROOMS = self._find_rooms(webpage)
+ return self._INDEX_ROOMS
+
+
+class ShugiinItvLiveIE(ShugiinItvBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?shugiintv\.go\.jp/(?:jp|en)(?:/index\.php)?$'
+ IE_DESC = '衆議院インターネット審議中継'
+
+ _TESTS = [{
+ 'url': 'https://www.shugiintv.go.jp/jp/index.php',
+ 'info_dict': {
+ '_type': 'playlist',
+ 'title': 'All proceedings for today',
+ },
+ # expect at least one proceedings is running
+ 'playlist_mincount': 1,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return super().suitable(url) and not any(x.suitable(url) for x in (ShugiinItvLiveRoomIE, ShugiinItvVodIE))
+
+ def _real_extract(self, url):
+ self.to_screen(
+ 'Downloading all running proceedings. To specify one proceeding, use direct link from the website')
+ return self.playlist_result(self._fetch_rooms(), playlist_title='All proceedings for today')
+
+
+class ShugiinItvLiveRoomIE(ShugiinItvBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?shugiintv\.go\.jp/(?:jp|en)/index\.php\?room_id=(?P<id>room\d+)'
+ IE_DESC = '衆議院インターネット審議中継 (中継)'
+
+ _TESTS = [{
+ 'url': 'https://www.shugiintv.go.jp/jp/index.php?room_id=room01',
+ 'info_dict': {
+ 'id': 'room01',
+ 'title': '内閣委員会',
+ },
+ 'skip': 'this runs for a time and not every day',
+ }, {
+ 'url': 'https://www.shugiintv.go.jp/jp/index.php?room_id=room11',
+ 'info_dict': {
+ 'id': 'room11',
+ 'title': '外務委員会',
+ },
+ 'skip': 'this runs for a time and not every day',
+ }]
+
+ def _real_extract(self, url):
+ url, smug = unsmuggle_url(url, default={})
+ if smug.get('g'):
+ room_id, title = smug['g']
+ else:
+ room_id = self._match_id(url)
+ title = traverse_obj(self._fetch_rooms(), (lambda k, v: v['id'] == room_id, 'title'), get_all=False)
+
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ f'https://hlslive.shugiintv.go.jp/{room_id}/amlst:{room_id}/playlist.m3u8',
+ room_id, ext='mp4')
+
+ return {
+ 'id': room_id,
+ 'title': title,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'is_live': True,
+ }
+
+
+class ShugiinItvVodIE(ShugiinItvBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?shugiintv\.go\.jp/(?:jp|en)/index\.php\?ex=VL(?:\&[^=]+=[^&]*)*\&deli_id=(?P<id>\d+)'
+ IE_DESC = '衆議院インターネット審議中継 (ビデオライブラリ)'
+ _TESTS = [{
+ 'url': 'https://www.shugiintv.go.jp/jp/index.php?ex=VL&media_type=&deli_id=53846',
+ 'info_dict': {
+ 'id': '53846',
+ 'title': 'ウクライナ大統領国会演説(オンライン)',
+ 'release_date': '20220323',
+ 'chapters': 'count:4',
+ }
+ }, {
+ 'url': 'https://www.shugiintv.go.jp/en/index.php?ex=VL&media_type=&deli_id=53846',
+ 'only_matching': True
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(
+ f'https://www.shugiintv.go.jp/jp/index.php?ex=VL&media_type=&deli_id={video_id}', video_id,
+ encoding='euc-jp')
+
+ m3u8_url = self._search_regex(
+ r'id="vtag_src_base_vod"\s*value="(http.+?\.m3u8)"', webpage, 'm3u8 url')
+ m3u8_url = re.sub(r'^http://', 'https://', m3u8_url)
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ m3u8_url, video_id, ext='mp4')
+
+ title = self._html_search_regex(
+ (r'<td\s+align="left">(.+)\s*\(\d+分\)',
+ r'<TD.+?<IMG\s*src=".+?/spacer\.gif".+?height="15">(.+?)<IMG'), webpage, 'title', fatal=False)
+
+ release_date = _parse_japanese_date(self._html_search_regex(
+ r'開会日</td>\s*<td.+?/td>\s*<TD>(.+?)</TD>',
+ webpage, 'title', fatal=False))
+
+ chapters = []
+ for chp in re.finditer(r'(?i)<A\s+HREF="([^"]+?)"\s*class="play_vod">(?!<img)(.+)</[Aa]>', webpage):
+ chapters.append({
+ 'title': clean_html(chp.group(2)).strip(),
+ 'start_time': try_call(lambda: float(parse_qs(chp.group(1))['time'][0].strip())),
+ })
+ # NOTE: there are blanks at the first and the end of the videos,
+ # so getting/providing the video duration is not possible
+ # also, the exact end_time for the last chapter is unknown (we can get at most minutes of granularity)
+ last_tr = re.findall(r'(?s)<TR\s*class="s14_24">(.+?)</TR>', webpage)[-1]
+ if last_tr and chapters:
+ last_td = re.findall(r'<TD.+?</TD>', last_tr)[-1]
+ if last_td:
+ chapters[-1]['end_time'] = chapters[-1]['start_time'] + _parse_japanese_duration(clean_html(last_td))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'release_date': release_date,
+ 'chapters': chapters,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+
+class SangiinInstructionIE(InfoExtractor):
+ _VALID_URL = r'^https?://www\.webtv\.sangiin\.go\.jp/webtv/index\.php'
+ IE_DESC = False # this shouldn't be listed as a supported site
+
+ def _real_extract(self, url):
+ raise ExtractorError('Copy the link from the botton below the video description or player, and use the link to download. If there are no button in the frame, get the URL of the frame showing the video.', expected=True)
+
+
+class SangiinIE(InfoExtractor):
+ _VALID_URL = r'https?://www\.webtv\.sangiin\.go\.jp/webtv/detail\.php\?sid=(?P<id>\d+)'
+ IE_DESC = '参議院インターネット審議中継 (archive)'
+
+ _TESTS = [{
+ 'url': 'https://www.webtv.sangiin.go.jp/webtv/detail.php?sid=7052',
+ 'info_dict': {
+ 'id': '7052',
+ 'title': '2022年10月7日 本会議',
+ 'description': 'md5:0a5fed523f95c88105a0b0bf1dd71489',
+ 'upload_date': '20221007',
+ 'ext': 'mp4',
+ },
+ }, {
+ 'url': 'https://www.webtv.sangiin.go.jp/webtv/detail.php?sid=7037',
+ 'info_dict': {
+ 'id': '7037',
+ 'title': '2022年10月3日 開会式',
+ 'upload_date': '20221003',
+ 'ext': 'mp4',
+ },
+ }, {
+ 'url': 'https://www.webtv.sangiin.go.jp/webtv/detail.php?sid=7076',
+ 'info_dict': {
+ 'id': '7076',
+ 'title': '2022年10月27日 法務委員会',
+ 'upload_date': '20221027',
+ 'ext': 'mp4',
+ 'is_live': True,
+ },
+ 'skip': 'this live is turned into archive after it ends',
+ }, ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ date = self._html_search_regex(
+ r'<dt[^>]*>\s*開会日\s*</dt>\s*<dd[^>]*>\s*(.+?)\s*</dd>', webpage,
+ 'date', fatal=False)
+ upload_date = _parse_japanese_date(date)
+
+ title = self._html_search_regex(
+ r'<dt[^>]*>\s*会議名\s*</dt>\s*<dd[^>]*>\s*(.+?)\s*</dd>', webpage,
+ 'date', fatal=False)
+
+ # some videos don't have the elements, so assume it's missing
+ description = self._html_search_regex(
+ r'会議の経過\s*</h3>\s*<span[^>]*>(.+?)</span>', webpage,
+ 'description', default=None)
+
+ # this row appears only when it's livestream
+ is_live = bool(self._html_search_regex(
+ r'<dt[^>]*>\s*公報掲載時刻\s*</dt>\s*<dd[^>]*>\s*(.+?)\s*</dd>', webpage,
+ 'is_live', default=None))
+
+ m3u8_url = self._search_regex(
+ r'var\s+videopath\s*=\s*(["\'])([^"\']+)\1', webpage,
+ 'm3u8 url', group=2)
+
+ formats, subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4')
+
+ return {
+ 'id': video_id,
+ 'title': join_nonempty(date, title, delim=' '),
+ 'description': description,
+ 'upload_date': upload_date,
+ 'formats': formats,
+ 'subtitles': subs,
+ 'is_live': is_live,
+ }
diff --git a/hypervideo_dl/extractor/jeuxvideo.py b/hypervideo_dl/extractor/jeuxvideo.py
index 77c0f52..56ea15c 100644
--- a/hypervideo_dl/extractor/jeuxvideo.py
+++ b/hypervideo_dl/extractor/jeuxvideo.py
@@ -1,8 +1,3 @@
-# coding: utf-8
-
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/jixie.py b/hypervideo_dl/extractor/jixie.py
new file mode 100644
index 0000000..4830e61
--- /dev/null
+++ b/hypervideo_dl/extractor/jixie.py
@@ -0,0 +1,47 @@
+from .common import InfoExtractor
+from ..utils import clean_html, float_or_none, traverse_obj, try_call
+
+
+class JixieBaseIE(InfoExtractor):
+ """
+ API Reference:
+ https://jixie.atlassian.net/servicedesk/customer/portal/2/article/1339654214?src=-1456335525,
+ https://scripts.jixie.media/jxvideo.3.1.min.js
+ """
+
+ def _extract_data_from_jixie_id(self, display_id, video_id, webpage):
+ json_data = self._download_json(
+ 'https://apidam.jixie.io/api/public/stream', display_id,
+ query={'metadata': 'full', 'video_id': video_id})['data']
+
+ formats, subtitles = [], {}
+ for stream in json_data['streams']:
+ if stream.get('type') == 'HLS':
+ fmt, sub = self._extract_m3u8_formats_and_subtitles(stream.get('url'), display_id, ext='mp4')
+ if json_data.get('drm'):
+ for f in fmt:
+ f['has_drm'] = True
+ formats.extend(fmt)
+ self._merge_subtitles(sub, target=subtitles)
+ else:
+ formats.append({
+ 'url': stream.get('url'),
+ 'width': stream.get('width'),
+ 'height': stream.get('height'),
+ 'ext': 'mp4',
+ })
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'title': json_data.get('title') or self._html_search_meta(['og:title', 'twitter:title'], webpage),
+ 'description': (clean_html(traverse_obj(json_data, ('metadata', 'description')))
+ or self._html_search_meta(['description', 'og:description', 'twitter:description'], webpage)),
+ 'thumbnails': traverse_obj(json_data, ('metadata', 'thumbnails')),
+ 'duration': float_or_none(traverse_obj(json_data, ('metadata', 'duration'))),
+ 'tags': try_call(lambda: (json_data['metadata']['keywords'] or None).split(',')),
+ 'categories': try_call(lambda: (json_data['metadata']['categories'] or None).split(',')),
+ 'uploader_id': json_data.get('owner_id'),
+ }
diff --git a/hypervideo_dl/extractor/joj.py b/hypervideo_dl/extractor/joj.py
index 7350f53..9b62284 100644
--- a/hypervideo_dl/extractor/joj.py
+++ b/hypervideo_dl/extractor/joj.py
@@ -1,8 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
@@ -21,6 +16,7 @@ class JojIE(InfoExtractor):
)
(?P<id>[^/?#^]+)
'''
+ _EMBED_REGEX = [r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//media\.joj\.sk/embed/(?:(?!\1).)+)\1']
_TESTS = [{
'url': 'https://media.joj.sk/embed/a388ec4c-6019-4a4a-9312-b1bee194e932',
'info_dict': {
@@ -41,14 +37,6 @@ class JojIE(InfoExtractor):
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- return [
- mobj.group('url')
- for mobj in re.finditer(
- r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//media\.joj\.sk/embed/(?:(?!\1).)+)\1',
- webpage)]
-
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -73,7 +61,7 @@ class JojIE(InfoExtractor):
r'(\d+)[pP]\.', format_url, 'height', default=None)
formats.append({
'url': format_url,
- 'format_id': format_field(height, template='%sp'),
+ 'format_id': format_field(height, None, '%sp'),
'height': int(height),
})
if not formats:
@@ -93,7 +81,6 @@ class JojIE(InfoExtractor):
r'(\d+)[pP]', format_id or path, 'height',
default=None)),
})
- self._sort_formats(formats)
thumbnail = self._og_search_thumbnail(webpage)
diff --git a/hypervideo_dl/extractor/jove.py b/hypervideo_dl/extractor/jove.py
index 4b7dfc5..245fe73 100644
--- a/hypervideo_dl/extractor/jove.py
+++ b/hypervideo_dl/extractor/jove.py
@@ -1,6 +1,3 @@
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..utils import (
ExtractorError,
diff --git a/hypervideo_dl/extractor/jwplatform.py b/hypervideo_dl/extractor/jwplatform.py
index 5aa508b..c949689 100644
--- a/hypervideo_dl/extractor/jwplatform.py
+++ b/hypervideo_dl/extractor/jwplatform.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -8,7 +5,7 @@ from ..utils import unsmuggle_url
class JWPlatformIE(InfoExtractor):
- _VALID_URL = r'(?:https?://(?:content\.jwplatform|cdn\.jwplayer)\.com/(?:(?:feed|player|thumb|preview)s|jw6|v2/media)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
+ _VALID_URL = r'(?:https?://(?:content\.jwplatform|cdn\.jwplayer)\.com/(?:(?:feed|player|thumb|preview|manifest)s|jw6|v2/media)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
_TESTS = [{
'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js',
'md5': 'fa8899fa601eb7c83a64e9d568bdf325',
@@ -25,21 +22,48 @@ class JWPlatformIE(InfoExtractor):
'only_matching': True,
}]
- @staticmethod
- def _extract_url(webpage):
- urls = JWPlatformIE._extract_urls(webpage)
- return urls[0] if urls else None
+ _WEBPAGE_TESTS = [{
+ # JWPlatform iframe
+ 'url': 'https://www.covermagazine.co.uk/feature/2465255/business-protection-involved',
+ 'info_dict': {
+ 'id': 'AG26UQXM',
+ 'ext': 'mp4',
+ 'upload_date': '20160719',
+ 'timestamp': 1468923808,
+ 'title': '2016_05_18 Cover L&G Business Protection V1 FINAL.mp4',
+ 'thumbnail': 'https://cdn.jwplayer.com/v2/media/AG26UQXM/poster.jpg?width=720',
+ 'description': '',
+ 'duration': 294.0,
+ },
+ }, {
+ # Player url not surrounded by quotes
+ 'url': 'https://www.deutsche-kinemathek.de/en/online/streaming/darling-berlin',
+ 'info_dict': {
+ 'id': 'R10NQdhY',
+ 'title': 'Playgirl',
+ 'ext': 'mp4',
+ 'upload_date': '20220624',
+ 'thumbnail': 'https://cdn.jwplayer.com/v2/media/R10NQdhY/poster.jpg?width=720',
+ 'timestamp': 1656064800,
+ 'description': 'BRD 1966, Will Tremper',
+ 'duration': 5146.0,
+ },
+ 'params': {'allowed_extractors': ['generic', 'jwplatform']},
+ }]
- @staticmethod
- def _extract_urls(webpage):
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
for tag, key in ((r'(?:script|iframe)', 'src'), ('input', 'value')):
# <input value=URL> is used by hyland.com
# if we find <iframe>, dont look for <input>
ret = re.findall(
- r'<%s[^>]+?%s=["\']((?:https?:)?//(?:content\.jwplatform|cdn\.jwplayer)\.com/players/[a-zA-Z0-9]{8})' % (tag, key),
+ r'<%s[^>]+?%s=["\']?((?:https?:)?//(?:content\.jwplatform|cdn\.jwplayer)\.com/players/[a-zA-Z0-9]{8})' % (tag, key),
webpage)
if ret:
return ret
+ mobj = re.search(r'<div\b[^>]* data-video-jw-id="([a-zA-Z0-9]{8})"', webpage)
+ if mobj:
+ return [f'jwplatform:{mobj.group(1)}']
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
diff --git a/hypervideo_dl/extractor/kakao.py b/hypervideo_dl/extractor/kakao.py
index 483ab71..1f0f0a5 100644
--- a/hypervideo_dl/extractor/kakao.py
+++ b/hypervideo_dl/extractor/kakao.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import compat_HTTPError
from ..utils import (
@@ -109,6 +105,7 @@ class KakaoIE(InfoExtractor):
resp = self._parse_json(e.cause.read().decode(), video_id)
if resp.get('code') == 'GeoBlocked':
self.raise_geo_restricted()
+ raise
fmt_url = traverse_obj(fmt_url_json, ('videoLocation', 'url'))
if not fmt_url:
@@ -123,7 +120,6 @@ class KakaoIE(InfoExtractor):
'filesize': int_or_none(fmt.get('filesize')),
'tbr': int_or_none(fmt.get('kbps')),
})
- self._sort_formats(formats)
thumbs = []
for thumb in clip.get('clipChapterThumbnailList') or []:
diff --git a/hypervideo_dl/extractor/kaltura.py b/hypervideo_dl/extractor/kaltura.py
index f6dfc9c..95e2dee 100644
--- a/hypervideo_dl/extractor/kaltura.py
+++ b/hypervideo_dl/extractor/kaltura.py
@@ -1,8 +1,6 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
import base64
+import json
+import re
from .common import InfoExtractor
from ..compat import (
@@ -16,13 +14,15 @@ from ..utils import (
int_or_none,
unsmuggle_url,
smuggle_url,
+ traverse_obj,
+ remove_start
)
class KalturaIE(InfoExtractor):
_VALID_URL = r'''(?x)
(?:
- kaltura:(?P<partner_id>\d+):(?P<id>[0-9a-z_]+)|
+ kaltura:(?P<partner_id>\w+):(?P<id>\w+)(?::(?P<player_type>\w+))?|
https?://
(:?(?:www|cdnapi(?:sec)?)\.)?kaltura\.com(?::\d+)?/
(?:
@@ -36,7 +36,7 @@ class KalturaIE(InfoExtractor):
)
'''
_SERVICE_URL = 'http://cdnapi.kaltura.com'
- _SERVICE_BASE = '/api_v3/index.php'
+ _SERVICE_BASE = '/api_v3/service/multirequest'
# See https://github.com/kaltura/server/blob/master/plugins/content/caption/base/lib/model/enums/CaptionType.php
_CAPTION_TYPES = {
1: 'srt',
@@ -57,6 +57,7 @@ class KalturaIE(InfoExtractor):
'thumbnail': 're:^https?://.*/thumbnail/.*',
'timestamp': int,
},
+ 'skip': 'The access to this service is forbidden since the specified partner is blocked'
},
{
'url': 'http://www.kaltura.com/index.php/kwidget/cache_st/1300318621/wid/_269692/uiconf_id/3873291/entry_id/1_1jc2y3e4',
@@ -109,16 +110,85 @@ class KalturaIE(InfoExtractor):
# unavailable source format
'url': 'kaltura:513551:1_66x4rg7o',
'only_matching': True,
+ },
+ {
+ # html5lib URL using kwidget player
+ 'url': 'https://cdnapisec.kaltura.com/html5/html5lib/v2.46/mwEmbedFrame.php/p/691292/uiconf_id/20499062/entry_id/0_c076mna6?wid=_691292&iframeembed=true&playerId=kaltura_player_1420508608&entry_id=0_c076mna6&flashvars%5BakamaiHD.loadingPolicy%5D=preInitialize&flashvars%5BakamaiHD.asyncInit%5D=true&flashvars%5BstreamerType%5D=hdnetwork',
+ 'info_dict': {
+ 'id': '0_c076mna6',
+ 'ext': 'mp4',
+ 'title': 'md5:4883e7acbcbf42583a2dddc97dee4855',
+ 'duration': 3608,
+ 'uploader_id': 'commons@swinburne.edu.au',
+ 'timestamp': 1408086874,
+ 'view_count': int,
+ 'upload_date': '20140815',
+ 'thumbnail': 'http://cfvod.kaltura.com/p/691292/sp/69129200/thumbnail/entry_id/0_c076mna6/version/100022',
+ }
+ },
+ {
+ # html5lib playlist URL using kwidget player
+ 'url': 'https://cdnapisec.kaltura.com/html5/html5lib/v2.89/mwEmbedFrame.php/p/2019031/uiconf_id/40436601?wid=1_4j3m32cv&iframeembed=true&playerId=kaltura_player_&flashvars[playlistAPI.kpl0Id]=1_jovey5nu&flashvars[ks]=&&flashvars[imageDefaultDuration]=30&flashvars[localizationCode]=en&flashvars[leadWithHTML5]=true&flashvars[forceMobileHTML5]=true&flashvars[nextPrevBtn.plugin]=true&flashvars[hotspots.plugin]=true&flashvars[sideBarContainer.plugin]=true&flashvars[sideBarContainer.position]=left&flashvars[sideBarContainer.clickToClose]=true&flashvars[chapters.plugin]=true&flashvars[chapters.layout]=vertical&flashvars[chapters.thumbnailRotator]=false&flashvars[streamSelector.plugin]=true&flashvars[EmbedPlayer.SpinnerTarget]=videoHolder&flashvars[dualScreen.plugin]=true&flashvars[playlistAPI.playlistUrl]=https://canvasgatechtest.kaf.kaltura.com/playlist/details/{playlistAPI.kpl0Id}/categoryid/126428551',
+ 'info_dict': {
+ 'id': '1_jovey5nu',
+ 'title': '00-00 Introduction'
+ },
+ 'playlist': [
+ {
+ 'info_dict': {
+ 'id': '1_b1y5hlvx',
+ 'ext': 'mp4',
+ 'title': 'CS7646_00-00 Introductio_Introduction',
+ 'duration': 91,
+ 'thumbnail': 'http://cfvod.kaltura.com/p/2019031/sp/201903100/thumbnail/entry_id/1_b1y5hlvx/version/100001',
+ 'view_count': int,
+ 'timestamp': 1533154447,
+ 'upload_date': '20180801',
+ 'uploader_id': 'djoyner3',
+ }
+ }, {
+ 'info_dict': {
+ 'id': '1_jfb7mdpn',
+ 'ext': 'mp4',
+ 'title': 'CS7646_00-00 Introductio_Three parts to the course',
+ 'duration': 63,
+ 'thumbnail': 'http://cfvod.kaltura.com/p/2019031/sp/201903100/thumbnail/entry_id/1_jfb7mdpn/version/100001',
+ 'view_count': int,
+ 'timestamp': 1533154489,
+ 'upload_date': '20180801',
+ 'uploader_id': 'djoyner3',
+ }
+ }, {
+ 'info_dict': {
+ 'id': '1_8xflxdp7',
+ 'ext': 'mp4',
+ 'title': 'CS7646_00-00 Introductio_Textbooks',
+ 'duration': 37,
+ 'thumbnail': 'http://cfvod.kaltura.com/p/2019031/sp/201903100/thumbnail/entry_id/1_8xflxdp7/version/100001',
+ 'view_count': int,
+ 'timestamp': 1533154512,
+ 'upload_date': '20180801',
+ 'uploader_id': 'djoyner3',
+ }
+ }, {
+ 'info_dict': {
+ 'id': '1_3hqew8kn',
+ 'ext': 'mp4',
+ 'title': 'CS7646_00-00 Introductio_Prerequisites',
+ 'duration': 49,
+ 'thumbnail': 'http://cfvod.kaltura.com/p/2019031/sp/201903100/thumbnail/entry_id/1_3hqew8kn/version/100001',
+ 'view_count': int,
+ 'timestamp': 1533154536,
+ 'upload_date': '20180801',
+ 'uploader_id': 'djoyner3',
+ }
+ }
+ ]
}
]
- @staticmethod
- def _extract_url(webpage):
- urls = KalturaIE._extract_urls(webpage)
- return urls[0] if urls else None
-
- @staticmethod
- def _extract_urls(webpage):
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
# Embed codes: https://knowledge.kaltura.com/embedding-kaltura-media-players-your-site
finditer = (
list(re.finditer(
@@ -160,63 +230,80 @@ class KalturaIE(InfoExtractor):
for k, v in embed_info.items():
if v:
embed_info[k] = v.strip()
- url = 'kaltura:%(partner_id)s:%(id)s' % embed_info
+ embed_url = 'kaltura:%(partner_id)s:%(id)s' % embed_info
escaped_pid = re.escape(embed_info['partner_id'])
service_mobj = re.search(
r'<script[^>]+src=(["\'])(?P<id>(?:https?:)?//(?:(?!\1).)+)/p/%s/sp/%s00/embedIframeJs' % (escaped_pid, escaped_pid),
webpage)
if service_mobj:
- url = smuggle_url(url, {'service_url': service_mobj.group('id')})
- urls.append(url)
+ embed_url = smuggle_url(embed_url, {'service_url': service_mobj.group('id')})
+ urls.append(embed_url)
return urls
def _kaltura_api_call(self, video_id, actions, service_url=None, *args, **kwargs):
params = actions[0]
- if len(actions) > 1:
- for i, a in enumerate(actions[1:], start=1):
- for k, v in a.items():
- params['%d:%s' % (i, k)] = v
+ params.update({i: a for i, a in enumerate(actions[1:], start=1)})
data = self._download_json(
(service_url or self._SERVICE_URL) + self._SERVICE_BASE,
- video_id, query=params, *args, **kwargs)
+ video_id, data=json.dumps(params).encode('utf-8'),
+ headers={
+ 'Content-Type': 'application/json',
+ 'Accept-Encoding': 'gzip, deflate, br',
+ }, *args, **kwargs)
- status = data if len(actions) == 1 else data[0]
- if status.get('objectType') == 'KalturaAPIException':
- raise ExtractorError(
- '%s said: %s' % (self.IE_NAME, status['message']))
+ for idx, status in enumerate(data):
+ if not isinstance(status, dict):
+ continue
+ if status.get('objectType') == 'KalturaAPIException':
+ raise ExtractorError(
+ '%s said: %s (%d)' % (self.IE_NAME, status['message'], idx))
+
+ data[1] = traverse_obj(data, (1, 'objects', 0))
return data
- def _get_video_info(self, video_id, partner_id, service_url=None):
+ def _get_video_info(self, video_id, partner_id, service_url=None, player_type='html5'):
+ assert player_type in ('html5', 'kwidget')
+ if player_type == 'kwidget':
+ return self._get_video_info_kwidget(video_id, partner_id, service_url)
+
+ return self._get_video_info_html5(video_id, partner_id, service_url)
+
+ def _get_video_info_html5(self, video_id, partner_id, service_url=None):
actions = [
{
- 'action': 'null',
- 'apiVersion': '3.1.5',
- 'clientTag': 'kdp:v3.8.5',
+ 'apiVersion': '3.3.0',
+ 'clientTag': 'html5:v3.1.0',
'format': 1, # JSON, 2 = XML, 3 = PHP
- 'service': 'multirequest',
+ 'ks': '',
+ 'partnerId': partner_id,
},
{
'expiry': 86400,
'service': 'session',
'action': 'startWidgetSession',
- 'widgetId': '_%s' % partner_id,
+ 'widgetId': self._build_widget_id(partner_id),
},
+ # info
{
- 'action': 'get',
- 'entryId': video_id,
+ 'action': 'list',
+ 'filter': {'redirectFromEntryId': video_id},
'service': 'baseentry',
'ks': '{1:result:ks}',
- 'responseProfile:fields': 'createdAt,dataUrl,duration,name,plays,thumbnailUrl,userId',
- 'responseProfile:type': 1,
+ 'responseProfile': {
+ 'type': 1,
+ 'fields': 'createdAt,dataUrl,duration,name,plays,thumbnailUrl,userId',
+ },
},
+ # flavor_assets
{
'action': 'getbyentryid',
'entryId': video_id,
'service': 'flavorAsset',
'ks': '{1:result:ks}',
},
+ # captions
{
'action': 'list',
'filter:entryIdEqual': video_id,
@@ -225,17 +312,85 @@ class KalturaIE(InfoExtractor):
},
]
return self._kaltura_api_call(
- video_id, actions, service_url, note='Downloading video info JSON')
+ video_id, actions, service_url, note='Downloading video info JSON (Kaltura html5 player)')
+
+ def _get_video_info_kwidget(self, video_id, partner_id, service_url=None):
+ actions = [
+ {
+ 'service': 'multirequest',
+ 'apiVersion': '3.1',
+ 'expiry': 86400,
+ 'clientTag': 'kwidget:v2.89',
+ 'format': 1, # JSON, 2 = XML, 3 = PHP
+ 'ignoreNull': 1,
+ 'action': 'null',
+ },
+ # header
+ {
+ 'expiry': 86400,
+ 'service': 'session',
+ 'action': 'startWidgetSession',
+ 'widgetId': self._build_widget_id(partner_id),
+ },
+ # (empty)
+ {
+ 'expiry': 86400,
+ 'service': 'session',
+ 'action': 'startwidgetsession',
+ 'widgetId': self._build_widget_id(partner_id),
+ 'format': 9,
+ 'apiVersion': '3.1',
+ 'clientTag': 'kwidget:v2.89',
+ 'ignoreNull': 1,
+ 'ks': '{1:result:ks}'
+ },
+ # info
+ {
+ 'action': 'list',
+ 'filter': {'redirectFromEntryId': video_id},
+ 'service': 'baseentry',
+ 'ks': '{1:result:ks}',
+ 'responseProfile': {
+ 'type': 1,
+ 'fields': 'createdAt,dataUrl,duration,name,plays,thumbnailUrl,userId',
+ },
+ },
+ # flavor_assets
+ {
+ 'action': 'getbyentryid',
+ 'entryId': video_id,
+ 'service': 'flavorAsset',
+ 'ks': '{1:result:ks}',
+ },
+ # captions
+ {
+ 'action': 'list',
+ 'filter:entryIdEqual': video_id,
+ 'service': 'caption_captionasset',
+ 'ks': '{1:result:ks}',
+ },
+ ]
+ # second object (representing the second start widget session) is None
+ header, _, _info, flavor_assets, captions = self._kaltura_api_call(
+ video_id, actions, service_url, note='Downloading video info JSON (Kaltura kwidget player)')
+ info = _info['objects'][0]
+ return header, info, flavor_assets, captions
+
+ def _build_widget_id(self, partner_id):
+ return partner_id if '_' in partner_id else f'_{partner_id}'
+
+ IFRAME_PACKAGE_DATA_REGEX = r'window\.kalturaIframePackageData\s*='
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
mobj = self._match_valid_url(url)
- partner_id, entry_id = mobj.group('partner_id', 'id')
- ks = None
- captions = None
+ partner_id, entry_id, player_type = mobj.group('partner_id', 'id', 'player_type')
+ ks, captions = None, None
+ if not player_type:
+ player_type = 'kwidget' if 'html5lib/v2' in url else 'html5'
if partner_id and entry_id:
- _, info, flavor_assets, captions = self._get_video_info(entry_id, partner_id, smuggled_data.get('service_url'))
+ _, info, flavor_assets, captions = self._get_video_info(entry_id, partner_id, smuggled_data.get('service_url'), player_type=player_type)
else:
path, query = mobj.group('path', 'query')
if not path and not query:
@@ -247,7 +402,7 @@ class KalturaIE(InfoExtractor):
splitted_path = path.split('/')
params.update(dict((zip(splitted_path[::2], [[v] for v in splitted_path[1::2]]))))
if 'wid' in params:
- partner_id = params['wid'][0][1:]
+ partner_id = remove_start(params['wid'][0], '_')
elif 'p' in params:
partner_id = params['p'][0]
elif 'partner_id' in params:
@@ -256,14 +411,13 @@ class KalturaIE(InfoExtractor):
raise ExtractorError('Invalid URL', expected=True)
if 'entry_id' in params:
entry_id = params['entry_id'][0]
- _, info, flavor_assets, captions = self._get_video_info(entry_id, partner_id)
+ _, info, flavor_assets, captions = self._get_video_info(entry_id, partner_id, player_type=player_type)
elif 'uiconf_id' in params and 'flashvars[referenceId]' in params:
reference_id = params['flashvars[referenceId]'][0]
webpage = self._download_webpage(url, reference_id)
- entry_data = self._parse_json(self._search_regex(
- r'window\.kalturaIframePackageData\s*=\s*({.*});',
- webpage, 'kalturaIframePackageData'),
- reference_id)['entryResult']
+ entry_data = self._search_json(
+ self.IFRAME_PACKAGE_DATA_REGEX, webpage,
+ 'kalturaIframePackageData', reference_id)['entryResult']
info, flavor_assets = entry_data['meta'], entry_data['contextData']['flavorAssets']
entry_id = info['id']
# Unfortunately, data returned in kalturaIframePackageData lacks
@@ -271,16 +425,29 @@ class KalturaIE(InfoExtractor):
# regular approach since we now know the entry_id
try:
_, info, flavor_assets, captions = self._get_video_info(
- entry_id, partner_id)
+ entry_id, partner_id, player_type=player_type)
except ExtractorError:
# Regular scenario failed but we already have everything
# extracted apart from captions and can process at least
# with this
pass
+ elif 'uiconf_id' in params and 'flashvars[playlistAPI.kpl0Id]' in params:
+ playlist_id = params['flashvars[playlistAPI.kpl0Id]'][0]
+ webpage = self._download_webpage(url, playlist_id)
+ playlist_data = self._search_json(
+ self.IFRAME_PACKAGE_DATA_REGEX, webpage,
+ 'kalturaIframePackageData', playlist_id)['playlistResult']
+ return self.playlist_from_matches(
+ traverse_obj(playlist_data, (playlist_id, 'items', ..., 'id')),
+ playlist_id, traverse_obj(playlist_data, (playlist_id, 'name')),
+ ie=KalturaIE, getter=lambda x: f'kaltura:{partner_id}:{x}:{player_type}')
else:
raise ExtractorError('Invalid URL', expected=True)
ks = params.get('flashvars[ks]', [None])[0]
+ return self._per_video_extract(smuggled_data, entry_id, info, ks, flavor_assets, captions)
+
+ def _per_video_extract(self, smuggled_data, entry_id, info, ks, flavor_assets, captions):
source_url = smuggled_data.get('source_url')
if source_url:
referrer = base64.b64encode(
@@ -351,8 +518,6 @@ class KalturaIE(InfoExtractor):
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
- self._sort_formats(formats)
-
if captions:
for caption in captions.get('objects', []):
# Continue if caption is not ready
@@ -376,5 +541,5 @@ class KalturaIE(InfoExtractor):
'duration': info.get('duration'),
'timestamp': info.get('createdAt'),
'uploader_id': format_field(info, 'userId', ignore=('None', None)),
- 'view_count': info.get('plays'),
+ 'view_count': int_or_none(info.get('plays')),
}
diff --git a/hypervideo_dl/extractor/kanal2.py b/hypervideo_dl/extractor/kanal2.py
new file mode 100644
index 0000000..3c0efe5
--- /dev/null
+++ b/hypervideo_dl/extractor/kanal2.py
@@ -0,0 +1,66 @@
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ join_nonempty,
+ traverse_obj,
+ unified_timestamp,
+ update_url_query,
+)
+
+
+class Kanal2IE(InfoExtractor):
+ _VALID_URL = r'https?://kanal2\.postimees\.ee/[^?#]+\?([^#]+&)?id=(?P<id>\d+)'
+ _TESTS = [{
+ 'note': 'Test standard url (#5575)',
+ 'url': 'https://kanal2.postimees.ee/pluss/video/?id=40792',
+ 'md5': '7ea7b16266ec1798743777df241883dd',
+ 'info_dict': {
+ 'id': '40792',
+ 'ext': 'mp4',
+ 'title': 'Aedniku aabits / Osa 53 (05.08.2016 20:00)',
+ 'thumbnail': r're:https?://.*\.jpg$',
+ 'description': 'md5:53cabf3c5d73150d594747f727431248',
+ 'upload_date': '20160805',
+ 'timestamp': 1470420000,
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ playlist = self._download_json(
+ f'https://kanal2.postimees.ee/player/playlist/{video_id}',
+ video_id, query={'type': 'episodes'},
+ headers={'X-Requested-With': 'XMLHttpRequest'})
+
+ return {
+ 'id': video_id,
+ 'title': join_nonempty(*traverse_obj(playlist, ('info', ('title', 'subtitle'))), delim=' / '),
+ 'description': traverse_obj(playlist, ('info', 'description')),
+ 'thumbnail': traverse_obj(playlist, ('data', 'image')),
+ 'formats': self.get_formats(playlist, video_id),
+ 'timestamp': unified_timestamp(self._search_regex(
+ r'\((\d{2}\.\d{2}\.\d{4}\s\d{2}:\d{2})\)$',
+ traverse_obj(playlist, ('info', 'subtitle')), 'timestamp', default='') + ' +0200'),
+ }
+
+ def get_formats(self, playlist, video_id):
+ path = traverse_obj(playlist, ('data', 'path'))
+ if not path:
+ raise ExtractorError('Path value not found in playlist JSON response')
+ session = self._download_json(
+ 'https://sts.postimees.ee/session/register',
+ video_id, note='Creating session', errnote='Error creating session',
+ headers={
+ 'X-Original-URI': path,
+ 'Accept': 'application/json',
+ })
+ if session.get('reason') != 'OK' or not session.get('session'):
+ reason = session.get('reason', 'unknown error')
+ raise ExtractorError(f'Unable to obtain session: {reason}')
+
+ formats = []
+ for stream in traverse_obj(playlist, ('data', 'streams', ..., 'file')):
+ formats.extend(self._extract_m3u8_formats(
+ update_url_query(stream, {'s': session['session']}), video_id, 'mp4'))
+
+ return formats
diff --git a/hypervideo_dl/extractor/kanalplay.py b/hypervideo_dl/extractor/kanalplay.py
deleted file mode 100644
index 5e24f7e..0000000
--- a/hypervideo_dl/extractor/kanalplay.py
+++ /dev/null
@@ -1,96 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
-from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
- float_or_none,
- srt_subtitles_timecode,
-)
-
-
-class KanalPlayIE(InfoExtractor):
- IE_DESC = 'Kanal 5/9/11 Play'
- _VALID_URL = r'https?://(?:www\.)?kanal(?P<channel_id>5|9|11)play\.se/(?:#!/)?(?:play/)?program/\d+/video/(?P<id>\d+)'
- _TESTS = [{
- 'url': 'http://www.kanal5play.se/#!/play/program/3060212363/video/3270012277',
- 'info_dict': {
- 'id': '3270012277',
- 'ext': 'flv',
- 'title': 'Saknar både dusch och avlopp',
- 'description': 'md5:6023a95832a06059832ae93bc3c7efb7',
- 'duration': 2636.36,
- },
- 'params': {
- # rtmp download
- 'skip_download': True,
- }
- }, {
- 'url': 'http://www.kanal9play.se/#!/play/program/335032/video/246042',
- 'only_matching': True,
- }, {
- 'url': 'http://www.kanal11play.se/#!/play/program/232835958/video/367135199',
- 'only_matching': True,
- }]
-
- def _fix_subtitles(self, subs):
- return '\r\n\r\n'.join(
- '%s\r\n%s --> %s\r\n%s'
- % (
- num,
- srt_subtitles_timecode(item['startMillis'] / 1000.0),
- srt_subtitles_timecode(item['endMillis'] / 1000.0),
- item['text'],
- ) for num, item in enumerate(subs, 1))
-
- def _get_subtitles(self, channel_id, video_id):
- subs = self._download_json(
- 'http://www.kanal%splay.se/api/subtitles/%s' % (channel_id, video_id),
- video_id, 'Downloading subtitles JSON', fatal=False)
- return {'sv': [{'ext': 'srt', 'data': self._fix_subtitles(subs)}]} if subs else {}
-
- def _real_extract(self, url):
- mobj = self._match_valid_url(url)
- video_id = mobj.group('id')
- channel_id = mobj.group('channel_id')
-
- video = self._download_json(
- 'http://www.kanal%splay.se/api/getVideo?format=FLASH&videoId=%s' % (channel_id, video_id),
- video_id)
-
- reasons_for_no_streams = video.get('reasonsForNoStreams')
- if reasons_for_no_streams:
- raise ExtractorError(
- '%s returned error: %s' % (self.IE_NAME, '\n'.join(reasons_for_no_streams)),
- expected=True)
-
- title = video['title']
- description = video.get('description')
- duration = float_or_none(video.get('length'), 1000)
- thumbnail = video.get('posterUrl')
-
- stream_base_url = video['streamBaseUrl']
-
- formats = [{
- 'url': stream_base_url,
- 'play_path': stream['source'],
- 'ext': 'flv',
- 'tbr': float_or_none(stream.get('bitrate'), 1000),
- 'rtmp_real_time': True,
- } for stream in video['streams']]
- self._sort_formats(formats)
-
- subtitles = {}
- if video.get('hasSubtitle'):
- subtitles = self.extract_subtitles(channel_id, video_id)
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'formats': formats,
- 'subtitles': subtitles,
- }
diff --git a/hypervideo_dl/extractor/kankan.py b/hypervideo_dl/extractor/kankan.py
deleted file mode 100644
index a677ff4..0000000
--- a/hypervideo_dl/extractor/kankan.py
+++ /dev/null
@@ -1,48 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-import hashlib
-
-from .common import InfoExtractor
-
-_md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
-
-
-class KankanIE(InfoExtractor):
- _VALID_URL = r'https?://(?:.*?\.)?kankan\.com/.+?/(?P<id>\d+)\.shtml'
-
- _TEST = {
- 'url': 'http://yinyue.kankan.com/vod/48/48863.shtml',
- 'md5': '29aca1e47ae68fc28804aca89f29507e',
- 'info_dict': {
- 'id': '48863',
- 'ext': 'flv',
- 'title': 'Ready To Go',
- },
- 'skip': 'Only available from China',
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- title = self._search_regex(r'(?:G_TITLE=|G_MOVIE_TITLE = )[\'"](.+?)[\'"]', webpage, 'video title')
- surls = re.search(r'surls:\[\'.+?\'\]|lurl:\'.+?\.flv\'', webpage).group(0)
- gcids = re.findall(r'http://.+?/.+?/(.+?)/', surls)
- gcid = gcids[-1]
-
- info_url = 'http://p2s.cl.kankan.com/getCdnresource_flv?gcid=%s' % gcid
- video_info_page = self._download_webpage(
- info_url, video_id, 'Downloading video url info')
- ip = self._search_regex(r'ip:"(.+?)"', video_info_page, 'video url ip')
- path = self._search_regex(r'path:"(.+?)"', video_info_page, 'video url path')
- param1 = self._search_regex(r'param1:(\d+)', video_info_page, 'param1')
- param2 = self._search_regex(r'param2:(\d+)', video_info_page, 'param2')
- key = _md5('xl_mp43651' + param1 + param2)
- video_url = 'http://%s%s?key=%s&key1=%s' % (ip, path, key, param2)
-
- return {
- 'id': video_id,
- 'title': title,
- 'url': video_url,
- }
diff --git a/hypervideo_dl/extractor/karaoketv.py b/hypervideo_dl/extractor/karaoketv.py
index bfccf89..381dc00 100644
--- a/hypervideo_dl/extractor/karaoketv.py
+++ b/hypervideo_dl/extractor/karaoketv.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/karrierevideos.py b/hypervideo_dl/extractor/karrierevideos.py
index 7b291e0..28d4841 100644
--- a/hypervideo_dl/extractor/karrierevideos.py
+++ b/hypervideo_dl/extractor/karrierevideos.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
diff --git a/hypervideo_dl/extractor/keezmovies.py b/hypervideo_dl/extractor/keezmovies.py
index 06dbcbb..b50da42 100644
--- a/hypervideo_dl/extractor/keezmovies.py
+++ b/hypervideo_dl/extractor/keezmovies.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -7,7 +5,6 @@ from ..aes import aes_decrypt_text
from ..compat import compat_urllib_parse_unquote
from ..utils import (
determine_ext,
- ExtractorError,
format_field,
int_or_none,
str_to_int,
@@ -70,7 +67,7 @@ class KeezMoviesIE(InfoExtractor):
video_url, title, 32).decode('utf-8')
formats.append({
'url': format_url,
- 'format_id': format_field(height, template='%dp'),
+ 'format_id': format_field(height, None, '%dp'),
'height': height,
'tbr': tbr,
})
@@ -105,12 +102,6 @@ class KeezMoviesIE(InfoExtractor):
self.raise_no_formats(
'Video %s is no longer available' % video_id, expected=True)
- try:
- self._sort_formats(formats)
- except ExtractorError:
- if fatal:
- raise
-
if not title:
title = self._html_search_regex(
r'<h1[^>]*>([^<]+)', webpage, 'title')
diff --git a/hypervideo_dl/extractor/kelbyone.py b/hypervideo_dl/extractor/kelbyone.py
index 20c26cf..2ca9ad4 100644
--- a/hypervideo_dl/extractor/kelbyone.py
+++ b/hypervideo_dl/extractor/kelbyone.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import int_or_none
@@ -62,7 +59,6 @@ class KelbyOneIE(InfoExtractor):
subtitles.setdefault('en', []).append({
'url': track['file'],
})
- self._sort_formats(formats)
yield {
'id': video_id,
'title': item['title'],
diff --git a/hypervideo_dl/extractor/ketnet.py b/hypervideo_dl/extractor/ketnet.py
index e0599d0..ab62767 100644
--- a/hypervideo_dl/extractor/ketnet.py
+++ b/hypervideo_dl/extractor/ketnet.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .canvas import CanvasIE
from .common import InfoExtractor
from ..compat import compat_urllib_parse_unquote
diff --git a/hypervideo_dl/extractor/khanacademy.py b/hypervideo_dl/extractor/khanacademy.py
index 87e5203..5333036 100644
--- a/hypervideo_dl/extractor/khanacademy.py
+++ b/hypervideo_dl/extractor/khanacademy.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import json
from .common import InfoExtractor
@@ -27,16 +25,21 @@ class KhanAcademyBaseIE(InfoExtractor):
def _real_extract(self, url):
display_id = self._match_id(url)
- component_props = self._parse_json(self._download_json(
- 'https://www.khanacademy.org/api/internal/graphql',
+ content = self._download_json(
+ 'https://www.khanacademy.org/api/internal/graphql/FetchContentData',
display_id, query={
- 'hash': 1604303425,
+ 'fastly_cacheable': 'persist_until_publish',
+ 'hash': '4134764944',
+ 'lang': 'en',
'variables': json.dumps({
'path': display_id,
- 'queryParams': '',
+ 'queryParams': 'lang=en',
+ 'isModal': False,
+ 'followRedirects': True,
+ 'countryCode': 'US',
}),
- })['data']['contentJson'], display_id)['componentProps']
- return self._parse_component_props(component_props)
+ })['data']['contentJson']
+ return self._parse_component_props(self._parse_json(content, display_id)['componentProps'])
class KhanAcademyIE(KhanAcademyBaseIE):
diff --git a/hypervideo_dl/extractor/kicker.py b/hypervideo_dl/extractor/kicker.py
new file mode 100644
index 0000000..a2c7dd4
--- /dev/null
+++ b/hypervideo_dl/extractor/kicker.py
@@ -0,0 +1,55 @@
+from .common import InfoExtractor
+from .dailymotion import DailymotionIE
+
+
+class KickerIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)kicker\.(?:de)/(?P<id>[\w-]+)/video'
+ _TESTS = [{
+ 'url': 'https://www.kicker.de/pogba-dembel-co-die-top-11-der-abloesefreien-spieler-905049/video',
+ 'info_dict': {
+ 'id': 'km04mrK0DrRAVxy2GcA',
+ 'title': 'md5:b91d145bac5745ac58d5479d8347a875',
+ 'ext': 'mp4',
+ 'duration': 350,
+ 'description': 'md5:a5a3dd77dbb6550dbfb997be100b9998',
+ 'uploader_id': 'x2dfupo',
+ 'timestamp': 1654677626,
+ 'like_count': int,
+ 'uploader': 'kicker.de',
+ 'view_count': int,
+ 'age_limit': 0,
+ 'thumbnail': r're:https://s\d+\.dmcdn\.net/v/T-x741YeYAx8aSZ0Z/x1080',
+ 'tags': ['published', 'category.InternationalSoccer'],
+ 'upload_date': '20220608'
+ }
+ }, {
+ 'url': 'https://www.kicker.de/ex-unioner-in-der-bezirksliga-felix-kroos-vereinschallenge-in-pankow-902825/video',
+ 'info_dict': {
+ 'id': 'k2omNsJKdZ3TxwxYSFJ',
+ 'title': 'md5:72ec24d7f84b8436fe1e89d198152adf',
+ 'ext': 'mp4',
+ 'uploader_id': 'x2dfupo',
+ 'duration': 331,
+ 'timestamp': 1652966015,
+ 'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/TxU4Z1YYCmtisTbMq/x1080',
+ 'tags': ['FELIX KROOS', 'EINFACH MAL LUPPEN', 'KROOS', 'FSV FORTUNA PANKOW', 'published', 'category.Amateurs', 'marketingpreset.Spreekick'],
+ 'age_limit': 0,
+ 'view_count': int,
+ 'upload_date': '20220519',
+ 'uploader': 'kicker.de',
+ 'description': 'md5:0c2060c899a91c8bf40f578f78c5846f',
+ 'like_count': int,
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_slug = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_slug)
+ dailymotion_video_id = self._search_regex(
+ r'data-dmprivateid\s*=\s*[\'"](?P<video_id>\w+)', webpage,
+ 'video id', group='video_id')
+
+ return self.url_result(
+ f'https://www.dailymotion.com/video/{dailymotion_video_id}',
+ ie=DailymotionIE, video_title=self._html_extract_title(webpage))
diff --git a/hypervideo_dl/extractor/kickstarter.py b/hypervideo_dl/extractor/kickstarter.py
index d4da8f4..c0d851d 100644
--- a/hypervideo_dl/extractor/kickstarter.py
+++ b/hypervideo_dl/extractor/kickstarter.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import smuggle_url
diff --git a/hypervideo_dl/extractor/kinja.py b/hypervideo_dl/extractor/kinja.py
index 1be8b48..df1386f 100644
--- a/hypervideo_dl/extractor/kinja.py
+++ b/hypervideo_dl/extractor/kinja.py
@@ -1,8 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
from .common import InfoExtractor
from ..compat import (
compat_str,
@@ -13,8 +8,6 @@ from ..utils import (
parse_iso8601,
strip_or_none,
try_get,
- unescapeHTML,
- urljoin,
)
@@ -58,6 +51,7 @@ class KinjaEmbedIE(InfoExtractor):
vine|
youtube-(?:list|video)
)-(?P<id>[^&]+)''' % (_DOMAIN_REGEX, _COMMON_REGEX)
+ _EMBED_REGEX = [rf'(?x)<iframe[^>]+?src=(?P<q>["\'])(?P<url>(?:(?:https?:)?//{_DOMAIN_REGEX})?{_COMMON_REGEX}(?:(?!\1).)+)\1']
_TESTS = [{
'url': 'https://kinja.com/ajax/inset/iframe?id=fb-10103303356633621',
'only_matching': True,
@@ -122,12 +116,6 @@ class KinjaEmbedIE(InfoExtractor):
'youtube-video': ('youtube.com/embed/', 'Youtube'),
}
- @staticmethod
- def _extract_urls(webpage, url):
- return [urljoin(url, unescapeHTML(mobj.group('url'))) for mobj in re.finditer(
- r'(?x)<iframe[^>]+?src=(?P<q>["\'])(?P<url>(?:(?:https?:)?//%s)?%s(?:(?!\1).)+)\1' % (KinjaEmbedIE._DOMAIN_REGEX, KinjaEmbedIE._COMMON_REGEX),
- webpage)]
-
def _real_extract(self, url):
video_type, video_id = self._match_valid_url(url).groups()
@@ -159,7 +147,6 @@ class KinjaEmbedIE(InfoExtractor):
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
- self._sort_formats(formats)
thumbnail = None
poster = data.get('poster') or {}
@@ -207,8 +194,6 @@ class KinjaEmbedIE(InfoExtractor):
'url': fallback_rendition_url,
})
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': title,
diff --git a/hypervideo_dl/extractor/kinopoisk.py b/hypervideo_dl/extractor/kinopoisk.py
index cdbb642..5db9083 100644
--- a/hypervideo_dl/extractor/kinopoisk.py
+++ b/hypervideo_dl/extractor/kinopoisk.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
dict_get,
@@ -47,7 +44,6 @@ class KinoPoiskIE(InfoExtractor):
formats = self._extract_m3u8_formats(
data['playlistEntity']['uri'], video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls')
- self._sort_formats(formats)
description = dict_get(
film, ('descriptscription', 'description',
diff --git a/hypervideo_dl/extractor/kompas.py b/hypervideo_dl/extractor/kompas.py
new file mode 100644
index 0000000..8bad961
--- /dev/null
+++ b/hypervideo_dl/extractor/kompas.py
@@ -0,0 +1,26 @@
+from .jixie import JixieBaseIE
+
+
+class KompasVideoIE(JixieBaseIE):
+ _VALID_URL = r'https?://video\.kompas\.com/\w+/(?P<id>\d+)/(?P<slug>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://video.kompas.com/watch/164474/kim-jong-un-siap-kirim-nuklir-lawan-as-dan-korsel',
+ 'info_dict': {
+ 'id': '164474',
+ 'ext': 'mp4',
+ 'title': 'Kim Jong Un Siap Kirim Nuklir Lawan AS dan Korsel',
+ 'description': 'md5:262530c4fb7462398235f9a5dba92456',
+ 'uploader_id': '9262bf2590d558736cac4fff7978fcb1',
+ 'display_id': 'kim-jong-un-siap-kirim-nuklir-lawan-as-dan-korsel',
+ 'duration': 85.066667,
+ 'categories': ['news'],
+ 'thumbnail': 'https://video.jixie.media/1001/164474/164474_1280x720.jpg',
+ 'tags': 'count:9',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id, display_id = self._match_valid_url(url).group('id', 'slug')
+ webpage = self._download_webpage(url, display_id)
+
+ return self._extract_data_from_jixie_id(display_id, video_id, webpage)
diff --git a/hypervideo_dl/extractor/konserthusetplay.py b/hypervideo_dl/extractor/konserthusetplay.py
index dd42bb2..10767f1 100644
--- a/hypervideo_dl/extractor/konserthusetplay.py
+++ b/hypervideo_dl/extractor/konserthusetplay.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
determine_ext,
@@ -98,8 +95,6 @@ class KonserthusetPlayIE(InfoExtractor):
'url': fallback_url,
})
- self._sort_formats(formats)
-
title = player_config.get('title') or media['title']
description = player_config.get('mediaInfo', {}).get('description')
thumbnail = media.get('image')
diff --git a/hypervideo_dl/extractor/koo.py b/hypervideo_dl/extractor/koo.py
index 2d6ed3b..6616ccd 100644
--- a/hypervideo_dl/extractor/koo.py
+++ b/hypervideo_dl/extractor/koo.py
@@ -1,5 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
clean_html,
@@ -103,7 +101,6 @@ class KooIE(InfoExtractor):
if not formats:
self.raise_no_formats('No video/audio found at the provided url.', expected=True)
- self._sort_formats(formats)
return {
'id': id,
'title': clean_html(item_json.get('title')),
diff --git a/hypervideo_dl/extractor/krasview.py b/hypervideo_dl/extractor/krasview.py
index d27d052..4323aa4 100644
--- a/hypervideo_dl/extractor/krasview.py
+++ b/hypervideo_dl/extractor/krasview.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import json
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/kth.py b/hypervideo_dl/extractor/kth.py
new file mode 100644
index 0000000..e17c6db
--- /dev/null
+++ b/hypervideo_dl/extractor/kth.py
@@ -0,0 +1,28 @@
+from .common import InfoExtractor
+from ..utils import smuggle_url
+
+
+class KTHIE(InfoExtractor):
+ _VALID_URL = r'https?://play\.kth\.se/(?:[^/]+/)+(?P<id>[a-z0-9_]+)'
+ _TEST = {
+ 'url': 'https://play.kth.se/media/Lunch+breakA+De+nya+aff%C3%A4rerna+inom+Fordonsdalen/0_uoop6oz9',
+ 'md5': 'd83ada6d00ca98b73243a88efe19e8a6',
+ 'info_dict': {
+ 'id': '0_uoop6oz9',
+ 'ext': 'mp4',
+ 'title': 'md5:bd1d6931facb6828762a33e6ce865f37',
+ 'thumbnail': 're:https?://.+/thumbnail/.+',
+ 'duration': 3516,
+ 'timestamp': 1647345358,
+ 'upload_date': '20220315',
+ 'uploader_id': 'md5:0ec23e33a89e795a4512930c8102509f',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ result = self.url_result(
+ smuggle_url('kaltura:308:%s' % video_id, {
+ 'service_url': 'https://api.kaltura.nordu.net'}),
+ 'Kaltura')
+ return result
diff --git a/hypervideo_dl/extractor/ku6.py b/hypervideo_dl/extractor/ku6.py
index a574408..31b4ea0 100644
--- a/hypervideo_dl/extractor/ku6.py
+++ b/hypervideo_dl/extractor/ku6.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/kusi.py b/hypervideo_dl/extractor/kusi.py
index 707fe18..a23ad89 100644
--- a/hypervideo_dl/extractor/kusi.py
+++ b/hypervideo_dl/extractor/kusi.py
@@ -1,13 +1,10 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import random
+import urllib.parse
from .common import InfoExtractor
-from ..compat import compat_urllib_parse_unquote_plus
from ..utils import (
- int_or_none,
float_or_none,
+ int_or_none,
timeconvert,
update_url_query,
xpath_text,
@@ -69,12 +66,11 @@ class KUSIIE(InfoExtractor):
formats = []
for quality in quality_options:
formats.append({
- 'url': compat_urllib_parse_unquote_plus(quality.attrib['url']),
+ 'url': urllib.parse.unquote_plus(quality.attrib['url']),
'height': int_or_none(quality.attrib.get('height')),
'width': int_or_none(quality.attrib.get('width')),
'vbr': float_or_none(quality.attrib.get('bitratebits'), scale=1000),
})
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/kuwo.py b/hypervideo_dl/extractor/kuwo.py
index 460a425..cfec1c5 100644
--- a/hypervideo_dl/extractor/kuwo.py
+++ b/hypervideo_dl/extractor/kuwo.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -107,7 +104,6 @@ class KuwoIE(KuwoBaseIE):
lrc_content = None
formats = self._get_formats(song_id)
- self._sort_formats(formats)
album_id = self._html_search_regex(
r'<a[^>]+href="http://www\.kuwo\.cn/album/(\d+)/"',
@@ -342,8 +338,6 @@ class KuwoMvIE(KuwoBaseIE):
'format_id': 'mv',
})
- self._sort_formats(formats)
-
return {
'id': song_id,
'title': song_name,
diff --git a/hypervideo_dl/extractor/la7.py b/hypervideo_dl/extractor/la7.py
index de985e4..68dc1d4 100644
--- a/hypervideo_dl/extractor/la7.py
+++ b/hypervideo_dl/extractor/la7.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -81,8 +78,6 @@ class LA7IE(InfoExtractor):
if http_f:
formats.append(http_f)
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': self._og_search_title(webpage, default=None),
@@ -139,7 +134,6 @@ class LA7PodcastEpisodeIE(InfoExtractor):
'format_id': ext,
'ext': ext,
}]
- self._sort_formats(formats)
title = self._html_search_regex(
(r'<div class="title">(?P<title>.+?)</',
@@ -197,7 +191,7 @@ class LA7PodcastEpisodeIE(InfoExtractor):
return self._extract_info(webpage, video_id)
-class LA7PodcastIE(LA7PodcastEpisodeIE):
+class LA7PodcastIE(LA7PodcastEpisodeIE): # XXX: Do not subclass from concrete IE
IE_NAME = 'la7.it:podcast'
_VALID_URL = r'(https?://)?(www\.)?la7\.it/(?P<id>[^/]+)/podcast/?(?:$|[#?])'
diff --git a/hypervideo_dl/extractor/laola1tv.py b/hypervideo_dl/extractor/laola1tv.py
index b5d27c2..416dd7e 100644
--- a/hypervideo_dl/extractor/laola1tv.py
+++ b/hypervideo_dl/extractor/laola1tv.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import json
import re
@@ -52,7 +49,6 @@ class Laola1TvEmbedIE(InfoExtractor):
formats = self._extract_akamai_formats(
'%s?hdnea=%s' % (token_attrib['url'], token_attrib['auth']),
video_id)
- self._sort_formats(formats)
return formats
def _real_extract(self, url):
@@ -121,7 +117,7 @@ class Laola1TvEmbedIE(InfoExtractor):
}
-class Laola1TvBaseIE(Laola1TvEmbedIE):
+class Laola1TvBaseIE(Laola1TvEmbedIE): # XXX: Do not subclass from concrete IE
def _extract_video(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
diff --git a/hypervideo_dl/extractor/lastfm.py b/hypervideo_dl/extractor/lastfm.py
index 5215717..f14198c 100644
--- a/hypervideo_dl/extractor/lastfm.py
+++ b/hypervideo_dl/extractor/lastfm.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -18,7 +15,7 @@ class LastFMPlaylistBaseIE(InfoExtractor):
for page_number in range(start_page_number, (last_page_number or start_page_number) + 1):
webpage = self._download_webpage(
url, playlist_id,
- note='Downloading page %d%s' % (page_number, format_field(last_page_number, template=' of %d')),
+ note='Downloading page %d%s' % (page_number, format_field(last_page_number, None, ' of %d')),
query={'page': page_number})
page_entries = [
self.url_result(player_url, 'Youtube')
diff --git a/hypervideo_dl/extractor/lbry.py b/hypervideo_dl/extractor/lbry.py
index 5d5457c..b5def1e 100644
--- a/hypervideo_dl/extractor/lbry.py
+++ b/hypervideo_dl/extractor/lbry.py
@@ -1,23 +1,18 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import functools
import json
from .common import InfoExtractor
-from ..compat import (
- compat_str,
- compat_urllib_parse_unquote,
-)
+from ..compat import compat_str, compat_urllib_parse_unquote
from ..utils import (
- determine_ext,
ExtractorError,
+ HEADRequest,
+ OnDemandPagedList,
+ UnsupportedError,
+ determine_ext,
int_or_none,
mimetype2ext,
parse_qs,
- OnDemandPagedList,
try_get,
- UnsupportedError,
urljoin,
)
@@ -29,10 +24,14 @@ class LBRYBaseIE(InfoExtractor):
_SUPPORTED_STREAM_TYPES = ['video', 'audio']
def _call_api_proxy(self, method, display_id, params, resource):
+ headers = {'Content-Type': 'application/json-rpc'}
+ token = try_get(self._get_cookies('https://odysee.com'), lambda x: x['auth_token'].value)
+ if token:
+ headers['x-lbry-auth-token'] = token
response = self._download_json(
'https://api.lbry.tv/api/v1/proxy',
display_id, 'Downloading %s JSON metadata' % resource,
- headers={'Content-Type': 'application/json-rpc'},
+ headers=headers,
data=json.dumps({
'method': method,
'params': params,
@@ -94,7 +93,7 @@ class LBRYIE(LBRYBaseIE):
_TESTS = [{
# Video
'url': 'https://lbry.tv/@Mantega:1/First-day-LBRY:1',
- 'md5': '65bd7ec1f6744ada55da8e4c48a2edf9',
+ 'md5': 'fffd15d76062e9a985c22c7c7f2f4805',
'info_dict': {
'id': '17f983b61f53091fb8ea58a9c56804e4ff8cff4d',
'ext': 'mp4',
@@ -106,6 +105,19 @@ class LBRYIE(LBRYBaseIE):
'release_date': '20200721',
'width': 1280,
'height': 720,
+ 'thumbnail': 'https://spee.ch/7/67f2d809c263288c.png',
+ 'license': 'None',
+ 'duration': 346,
+ 'channel': 'LBRY/Odysee rats united!!!',
+ 'channel_id': '1c8ad6a2ab4e889a71146ae4deeb23bb92dab627',
+ 'channel_url': 'https://lbry.tv/@Mantega:1c8ad6a2ab4e889a71146ae4deeb23bb92dab627',
+ 'tags': [
+ 'first day in lbry',
+ 'lbc',
+ 'lbry',
+ 'start',
+ 'tutorial'
+ ],
}
}, {
# Audio
@@ -126,11 +138,13 @@ class LBRYIE(LBRYBaseIE):
'channel_id': '0ed629d2b9c601300cacf7eabe9da0be79010212',
'channel_url': 'https://lbry.tv/@LBRYFoundation:0ed629d2b9c601300cacf7eabe9da0be79010212',
'vcodec': 'none',
+ 'thumbnail': 'https://spee.ch/d/0bc63b0e6bf1492d.png',
+ 'license': 'None',
}
}, {
# HLS
'url': 'https://odysee.com/@gardeningincanada:b/plants-i-will-never-grow-again.-the:e',
- 'md5': 'fc82f45ea54915b1495dd7cb5cc1289f',
+ 'md5': '25049011f3c8bc2f8b60ad88a031837e',
'info_dict': {
'id': 'e51671357333fe22ae88aad320bde2f6f96b1410',
'ext': 'mp4',
@@ -146,12 +160,37 @@ class LBRYIE(LBRYBaseIE):
'channel_id': 'b8be0e93b423dad221abe29545fbe8ec36e806bc',
'channel_url': 'https://odysee.com/@gardeningincanada:b8be0e93b423dad221abe29545fbe8ec36e806bc',
'formats': 'mincount:3',
+ 'thumbnail': 'https://thumbnails.lbry.com/AgHSc_HzrrE',
+ 'license': 'Copyrighted (contact publisher)',
}
}, {
+ # HLS live stream (might expire)
+ 'url': 'https://odysee.com/@RT:fd/livestream_RT:d',
+ 'info_dict': {
+ 'id': 'fdd11cb3ab75f95efb7b3bc2d726aa13ac915b66',
+ 'ext': 'mp4',
+ 'live_status': 'is_live',
+ 'title': 'startswith:RT News | Livestream 24/7',
+ 'description': 'md5:fe68d0056dfe79c1a6b8ce8c34d5f6fa',
+ 'timestamp': int,
+ 'upload_date': str,
+ 'release_timestamp': int,
+ 'release_date': str,
+ 'tags': list,
+ 'duration': None,
+ 'channel': 'RT',
+ 'channel_id': 'fdd11cb3ab75f95efb7b3bc2d726aa13ac915b66',
+ 'channel_url': 'https://odysee.com/@RT:fdd11cb3ab75f95efb7b3bc2d726aa13ac915b66',
+ 'formats': 'mincount:1',
+ 'thumbnail': 'startswith:https://thumb',
+ 'license': 'None',
+ },
+ 'params': {'skip_download': True}
+ }, {
'url': 'https://odysee.com/@BrodieRobertson:5/apple-is-tracking-everything-you-do-on:e',
'only_matching': True,
}, {
- 'url': "https://odysee.com/@ScammerRevolts:b0/I-SYSKEY'D-THE-SAME-SCAMMERS-3-TIMES!:b",
+ 'url': 'https://odysee.com/@ScammerRevolts:b0/I-SYSKEY\'D-THE-SAME-SCAMMERS-3-TIMES!:b',
'only_matching': True,
}, {
'url': 'https://lbry.tv/Episode-1:e7d93d772bd87e2b62d5ab993c1c3ced86ebb396',
@@ -185,20 +224,24 @@ class LBRYIE(LBRYBaseIE):
display_id = compat_urllib_parse_unquote(display_id)
uri = 'lbry://' + display_id
result = self._resolve_url(uri, display_id, 'stream')
+ headers = {'Referer': 'https://odysee.com/'}
if result['value'].get('stream_type') in self._SUPPORTED_STREAM_TYPES:
- claim_id, is_live, headers = result['claim_id'], False, None
+ claim_id, is_live = result['claim_id'], False
streaming_url = self._call_api_proxy(
'get', claim_id, {'uri': uri}, 'streaming url')['streaming_url']
final_url = self._request_webpage(
- streaming_url, display_id, note='Downloading streaming redirect url info').geturl()
+ HEADRequest(streaming_url), display_id, headers=headers,
+ note='Downloading streaming redirect url info').geturl()
elif result.get('value_type') == 'stream':
claim_id, is_live = result['signing_channel']['claim_id'], True
- headers = {'referer': 'https://player.odysee.live/'}
live_data = self._download_json(
- f'https://api.live.odysee.com/v1/odysee/live/{claim_id}', claim_id,
+ 'https://api.odysee.live/livestream/is_live', claim_id,
+ query={'channel_claim_id': claim_id},
note='Downloading livestream JSON metadata')['data']
- streaming_url = final_url = live_data.get('url')
- if not final_url and not live_data.get('live'):
+ streaming_url = final_url = live_data.get('VideoURL')
+ # Upcoming videos may still give VideoURL
+ if not live_data.get('Live'):
+ streaming_url = final_url = None
self.raise_no_formats('This stream is not live', True, claim_id)
else:
raise UnsupportedError(url)
@@ -207,7 +250,6 @@ class LBRYIE(LBRYBaseIE):
if determine_ext(final_url) == 'm3u8':
info['formats'] = self._extract_m3u8_formats(
final_url, display_id, 'mp4', 'm3u8_native', m3u8_id='hls', live=is_live, headers=headers)
- self._sort_formats(info['formats'])
else:
info['url'] = streaming_url
return {
@@ -229,7 +271,7 @@ class LBRYChannelIE(LBRYBaseIE):
'title': 'The LBRY Foundation',
'description': 'Channel for the LBRY Foundation. Follow for updates and news.',
},
- 'playlist_count': 29,
+ 'playlist_mincount': 29,
}, {
'url': 'https://lbry.tv/@LBRYFoundation',
'only_matching': True,
diff --git a/hypervideo_dl/extractor/lci.py b/hypervideo_dl/extractor/lci.py
index 920872f..e7d2f8a 100644
--- a/hypervideo_dl/extractor/lci.py
+++ b/hypervideo_dl/extractor/lci.py
@@ -1,26 +1,28 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
class LCIIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?lci\.fr/[^/]+/[\w-]+-(?P<id>\d+)\.html'
- _TEST = {
- 'url': 'http://www.lci.fr/international/etats-unis-a-j-62-hillary-clinton-reste-sans-voix-2001679.html',
- 'md5': '2fdb2538b884d4d695f9bd2bde137e6c',
+ _VALID_URL = r'https?://(?:www\.)?(?:lci|tf1info)\.fr/[^/]+/[\w-]+-(?P<id>\d+)\.html'
+ _TESTS = [{
+ 'url': 'https://www.tf1info.fr/politique/election-presidentielle-2022-second-tour-j-2-marine-le-pen-et-emmanuel-macron-en-interview-de-lci-vendredi-soir-2217486.html',
'info_dict': {
- 'id': '13244802',
+ 'id': '13875948',
'ext': 'mp4',
- 'title': 'Hillary Clinton et sa quinte de toux, en plein meeting',
- 'description': 'md5:a4363e3a960860132f8124b62f4a01c9',
- }
- }
+ 'title': 'md5:660df5481fd418bc3bbb0d070e6fdb5a',
+ 'thumbnail': 'https://photos.tf1.fr/1280/720/presidentielle-2022-marine-le-pen-et-emmanuel-macron-invites-de-lci-ce-vendredi-9c0e73-e1a036-0@1x.jpg',
+ 'upload_date': '20220422',
+ 'duration': 33,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.lci.fr/politique/election-presidentielle-2022-second-tour-j-2-marine-le-pen-et-emmanuel-macron-en-interview-de-lci-vendredi-soir-2217486.html',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- wat_id = self._search_regex(
- (r'data-watid=[\'"](\d+)', r'idwat["\']?\s*:\s*["\']?(\d+)'),
- webpage, 'wat id')
+ wat_id = self._search_regex(r'watId["\']?\s*:\s*["\']?(\d+)', webpage, 'wat id')
return self.url_result('wat:' + wat_id, 'Wat', wat_id)
diff --git a/hypervideo_dl/extractor/lcp.py b/hypervideo_dl/extractor/lcp.py
index ade27a9..9846319 100644
--- a/hypervideo_dl/extractor/lcp.py
+++ b/hypervideo_dl/extractor/lcp.py
@@ -1,11 +1,8 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from .arkena import ArkenaIE
-class LcpPlayIE(ArkenaIE):
+class LcpPlayIE(ArkenaIE): # XXX: Do not subclass from concrete IE
_VALID_URL = r'https?://play\.lcp\.fr/embed/(?P<id>[^/]+)/(?P<account_id>[^/]+)/[^/]+/[^/]+'
_TESTS = [{
'url': 'http://play.lcp.fr/embed/327336/131064/darkmatter/0',
diff --git a/hypervideo_dl/extractor/lecture2go.py b/hypervideo_dl/extractor/lecture2go.py
index 81b5d41..3a9b30a 100644
--- a/hypervideo_dl/extractor/lecture2go.py
+++ b/hypervideo_dl/extractor/lecture2go.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -52,8 +49,6 @@ class Lecture2GoIE(InfoExtractor):
'url': url,
})
- self._sort_formats(formats)
-
creator = self._html_search_regex(
r'<div[^>]+id="description">([^<]+)</div>', webpage, 'creator', fatal=False)
duration = parse_duration(self._html_search_regex(
diff --git a/hypervideo_dl/extractor/lecturio.py b/hypervideo_dl/extractor/lecturio.py
index 0ee1eeb..973764c 100644
--- a/hypervideo_dl/extractor/lecturio.py
+++ b/hypervideo_dl/extractor/lecturio.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -140,7 +137,6 @@ class LecturioIE(LecturioBaseIE):
'height': int(mobj.group(1)),
})
formats.append(f)
- self._sort_formats(formats)
subtitles = {}
automatic_captions = {}
diff --git a/hypervideo_dl/extractor/leeco.py b/hypervideo_dl/extractor/leeco.py
index d5e1142..85033b8 100644
--- a/hypervideo_dl/extractor/leeco.py
+++ b/hypervideo_dl/extractor/leeco.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import datetime
import hashlib
import re
@@ -185,7 +182,6 @@ class LeIE(InfoExtractor):
f['height'] = int_or_none(format_id[:-1])
formats.append(f)
- self._sort_formats(formats, ('res', 'quality'))
publish_time = parse_iso8601(self._html_search_regex(
r'发布时间&nbsp;([^<>]+) ', page, 'publish time', default=None),
@@ -199,6 +195,7 @@ class LeIE(InfoExtractor):
'thumbnail': playurl['pic'],
'description': description,
'timestamp': publish_time,
+ '_format_sort_fields': ('res', 'quality'),
}
@@ -359,7 +356,6 @@ class LetvCloudIE(InfoExtractor):
media_id = uu + '_' + vu
formats = self._get_formats('flash', uu, vu, media_id) + self._get_formats('html5', uu, vu, media_id)
- self._sort_formats(formats)
return {
'id': media_id,
diff --git a/hypervideo_dl/extractor/lego.py b/hypervideo_dl/extractor/lego.py
index 901f43b..811b447 100644
--- a/hypervideo_dl/extractor/lego.py
+++ b/hypervideo_dl/extractor/lego.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import uuid
from .common import InfoExtractor
@@ -116,7 +113,6 @@ class LEGOIE(InfoExtractor):
'width': quality[2],
}),
formats.append(f)
- self._sort_formats(formats)
subtitles = {}
sub_file_id = video.get('SubFileId')
diff --git a/hypervideo_dl/extractor/lemonde.py b/hypervideo_dl/extractor/lemonde.py
index 3306892..c916791 100644
--- a/hypervideo_dl/extractor/lemonde.py
+++ b/hypervideo_dl/extractor/lemonde.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/lenta.py b/hypervideo_dl/extractor/lenta.py
index 2ebd4e5..10aac98 100644
--- a/hypervideo_dl/extractor/lenta.py
+++ b/hypervideo_dl/extractor/lenta.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/libraryofcongress.py b/hypervideo_dl/extractor/libraryofcongress.py
index 03f2051..b76ca09 100644
--- a/hypervideo_dl/extractor/libraryofcongress.py
+++ b/hypervideo_dl/extractor/libraryofcongress.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -129,8 +126,6 @@ class LibraryOfCongressIE(InfoExtractor):
'filesize_approx': parse_filesize(m.group('size')),
})
- self._sort_formats(formats)
-
duration = float_or_none(data.get('duration'))
view_count = int_or_none(data.get('viewCount'))
diff --git a/hypervideo_dl/extractor/libsyn.py b/hypervideo_dl/extractor/libsyn.py
index d1fcda4..29bbb03 100644
--- a/hypervideo_dl/extractor/libsyn.py
+++ b/hypervideo_dl/extractor/libsyn.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..utils import (
clean_html,
@@ -14,6 +10,7 @@ from ..utils import (
class LibsynIE(InfoExtractor):
_VALID_URL = r'(?P<mainurl>https?://html5-player\.libsyn\.com/embed/episode/id/(?P<id>[0-9]+))'
+ _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1']
_TESTS = [{
'url': 'http://html5-player.libsyn.com/embed/episode/id/6385796/',
diff --git a/hypervideo_dl/extractor/lifenews.py b/hypervideo_dl/extractor/lifenews.py
index 49a0a59..919cfcb 100644
--- a/hypervideo_dl/extractor/lifenews.py
+++ b/hypervideo_dl/extractor/lifenews.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -226,8 +223,6 @@ class LifeEmbedIE(InfoExtractor):
else:
extract_original(video_url)
- self._sort_formats(formats)
-
thumbnail = thumbnail or self._search_regex(
r'"image"\s*:\s*"([^"]+)', webpage, 'thumbnail', default=None)
diff --git a/hypervideo_dl/extractor/likee.py b/hypervideo_dl/extractor/likee.py
new file mode 100644
index 0000000..74ee2be
--- /dev/null
+++ b/hypervideo_dl/extractor/likee.py
@@ -0,0 +1,192 @@
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ js_to_json,
+ parse_iso8601,
+ str_or_none,
+ traverse_obj,
+)
+
+
+class LikeeIE(InfoExtractor):
+ IE_NAME = 'likee'
+ _VALID_URL = r'(?x)https?://(www\.)?likee\.video/(?:(?P<channel_name>[^/]+)/video/|v/)(?P<id>\w+)'
+ _TESTS = [{
+ 'url': 'https://likee.video/@huynh_hong_quan_/video/7093444807096327263',
+ 'info_dict': {
+ 'id': '7093444807096327263',
+ 'ext': 'mp4',
+ 'title': '🤴🤴🤴',
+ 'description': 'md5:9a7ebe816f0e78722ee5ed76f75983b4',
+ 'thumbnail': r're:^https?://.+\.jpg',
+ 'uploader': 'Huỳnh Hồng Qu&acirc;n ',
+ 'play_count': int,
+ 'download_count': int,
+ 'artist': 'Huỳnh Hồng Qu&acirc;n ',
+ 'timestamp': 1651571320,
+ 'upload_date': '20220503',
+ 'view_count': int,
+ 'uploader_id': 'huynh_hong_quan_',
+ 'duration': 12374,
+ 'comment_count': int,
+ 'like_count': int,
+ },
+ }, {
+ 'url': 'https://likee.video/@649222262/video/7093167848050058862',
+ 'info_dict': {
+ 'id': '7093167848050058862',
+ 'ext': 'mp4',
+ 'title': 'likee video #7093167848050058862',
+ 'description': 'md5:3f971c8c6ee8a216f2b1a9094c5de99f',
+ 'thumbnail': r're:^https?://.+\.jpg',
+ 'comment_count': int,
+ 'like_count': int,
+ 'uploader': 'Vương Phước Nhi',
+ 'download_count': int,
+ 'timestamp': 1651506835,
+ 'upload_date': '20220502',
+ 'duration': 60024,
+ 'play_count': int,
+ 'artist': 'Vương Phước Nhi',
+ 'uploader_id': '649222262',
+ 'view_count': int,
+ },
+ }, {
+ 'url': 'https://likee.video/@fernanda_rivasg/video/6932224568407629502',
+ 'info_dict': {
+ 'id': '6932224568407629502',
+ 'ext': 'mp4',
+ 'title': 'Un trend viejito🔥 #LIKEE #Ferlovers #trend ',
+ 'description': 'md5:c42b903a72a99d6d8b73e3d1126fbcef',
+ 'thumbnail': r're:^https?://.+\.jpg',
+ 'comment_count': int,
+ 'duration': 9684,
+ 'uploader_id': 'fernanda_rivasg',
+ 'view_count': int,
+ 'play_count': int,
+ 'artist': 'La Cami La✨',
+ 'download_count': int,
+ 'like_count': int,
+ 'uploader': 'Fernanda Rivas🎶',
+ 'timestamp': 1614034308,
+ 'upload_date': '20210222',
+ },
+ }, {
+ 'url': 'https://likee.video/v/k6QcOp',
+ 'info_dict': {
+ 'id': 'k6QcOp',
+ 'ext': 'mp4',
+ 'title': '#AguaChallenge t&uacute; ya lo intentaste?😱🤩',
+ 'description': 'md5:b0cc462689d4ff2b624daa4dba7640d9',
+ 'thumbnail': r're:^https?://.+\.jpg',
+ 'comment_count': int,
+ 'duration': 18014,
+ 'play_count': int,
+ 'view_count': int,
+ 'timestamp': 1611694774,
+ 'like_count': int,
+ 'uploader': 'Fernanda Rivas🎶',
+ 'uploader_id': 'fernanda_rivasg',
+ 'download_count': int,
+ 'artist': 'ʟᴇʀɪᴋ_ᴜɴɪᴄᴏʀɴ♡︎',
+ 'upload_date': '20210126',
+ },
+ }, {
+ 'url': 'https://www.likee.video/@649222262/video/7093167848050058862',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.likee.video/v/k6QcOp',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ info = self._parse_json(
+ self._search_regex(r'window\.data\s=\s({.+?});', webpage, 'video info'),
+ video_id, transform_source=js_to_json)
+ video_url = traverse_obj(info, 'video_url', ('originVideoInfo', 'video_url'))
+ if not video_url:
+ self.raise_no_formats('Video was deleted', expected=True)
+ formats = [{
+ 'format_id': 'mp4-with-watermark',
+ 'url': video_url,
+ 'height': info.get('video_height'),
+ 'width': info.get('video_width'),
+ }, {
+ 'format_id': 'mp4-without-watermark',
+ 'url': video_url.replace('_4', ''),
+ 'height': info.get('video_height'),
+ 'width': info.get('video_width'),
+ 'quality': 1,
+ }]
+ return {
+ 'id': video_id,
+ 'title': info.get('msgText'),
+ 'description': info.get('share_desc'),
+ 'view_count': int_or_none(info.get('video_count')),
+ 'like_count': int_or_none(info.get('likeCount')),
+ 'play_count': int_or_none(info.get('play_count')),
+ 'download_count': int_or_none(info.get('download_count')),
+ 'comment_count': int_or_none(info.get('comment_count')),
+ 'uploader': str_or_none(info.get('nick_name')),
+ 'uploader_id': str_or_none(info.get('likeeId')),
+ 'artist': str_or_none(traverse_obj(info, ('sound', 'owner_name'))),
+ 'timestamp': parse_iso8601(info.get('uploadDate')),
+ 'thumbnail': info.get('coverUrl'),
+ 'duration': int_or_none(traverse_obj(info, ('option_data', 'dur'))),
+ 'formats': formats,
+ }
+
+
+class LikeeUserIE(InfoExtractor):
+ IE_NAME = 'likee:user'
+ _VALID_URL = r'https?://(www\.)?likee\.video/(?P<id>[^/]+)/?$'
+ _TESTS = [{
+ 'url': 'https://likee.video/@fernanda_rivasg',
+ 'info_dict': {
+ 'id': '925638334',
+ 'title': 'fernanda_rivasg',
+ },
+ 'playlist_mincount': 500,
+ }, {
+ 'url': 'https://likee.video/@may_hmoob',
+ 'info_dict': {
+ 'id': '2943949041',
+ 'title': 'may_hmoob',
+ },
+ 'playlist_mincount': 80,
+ }]
+ _PAGE_SIZE = 50
+ _API_GET_USER_VIDEO = 'https://api.like-video.com/likee-activity-flow-micro/videoApi/getUserVideo'
+
+ def _entries(self, user_name, user_id):
+ last_post_id = ''
+ while True:
+ user_videos = self._download_json(
+ self._API_GET_USER_VIDEO, user_name,
+ data=json.dumps({
+ 'uid': user_id,
+ 'count': self._PAGE_SIZE,
+ 'lastPostId': last_post_id,
+ 'tabType': 0,
+ }).encode('utf-8'),
+ headers={'content-type': 'application/json'},
+ note=f'Get user info with lastPostId #{last_post_id}')
+ items = traverse_obj(user_videos, ('data', 'videoList'))
+ if not items:
+ break
+ for item in items:
+ last_post_id = item['postId']
+ yield self.url_result(f'https://likee.video/{user_name}/video/{last_post_id}')
+
+ def _real_extract(self, url):
+ user_name = self._match_id(url)
+ webpage = self._download_webpage(url, user_name)
+ info = self._parse_json(
+ self._search_regex(r'window\.data\s*=\s*({.+?});', webpage, 'user info'),
+ user_name, transform_source=js_to_json)
+ user_id = traverse_obj(info, ('userinfo', 'uid'))
+ return self.playlist_result(self._entries(user_name, user_id), user_id, traverse_obj(info, ('userinfo', 'user_name')))
diff --git a/hypervideo_dl/extractor/limelight.py b/hypervideo_dl/extractor/limelight.py
index b20681a..e11ec43 100644
--- a/hypervideo_dl/extractor/limelight.py
+++ b/hypervideo_dl/extractor/limelight.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -20,7 +17,7 @@ class LimelightBaseIE(InfoExtractor):
_PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/%s/%s/%s'
@classmethod
- def _extract_urls(cls, webpage, source_url):
+ def _extract_embed_urls(cls, url, webpage):
lm = {
'Media': 'media',
'Channel': 'channel',
@@ -28,7 +25,7 @@ class LimelightBaseIE(InfoExtractor):
}
def smuggle(url):
- return smuggle_url(url, {'source_url': source_url})
+ return smuggle_url(url, {'source_url': url})
entries = []
for kind, video_id in re.findall(
@@ -182,8 +179,6 @@ class LimelightBaseIE(InfoExtractor):
'ext': ext,
})
- self._sort_formats(formats)
-
subtitles = {}
for flag in mobile_item.get('flags'):
if flag == 'ClosedCaptions':
diff --git a/hypervideo_dl/extractor/line.py b/hypervideo_dl/extractor/line.py
index 987c434..3fab9c8 100644
--- a/hypervideo_dl/extractor/line.py
+++ b/hypervideo_dl/extractor/line.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
@@ -38,7 +34,7 @@ class LineLiveBaseIE(InfoExtractor):
'timestamp': int_or_none(item.get('createdAt')),
'channel': channel.get('name'),
'channel_id': channel_id,
- 'channel_url': format_field(channel_id, template='https://live.line.me/channels/%s'),
+ 'channel_url': format_field(channel_id, None, 'https://live.line.me/channels/%s'),
'duration': int_or_none(item.get('archiveDuration')),
'view_count': int_or_none(item.get('viewerCount')),
'comment_count': int_or_none(item.get('chatCount')),
@@ -102,7 +98,6 @@ class LineLiveIE(LineLiveBaseIE):
archive_status = item.get('archiveStatus')
if archive_status != 'ARCHIVED':
self.raise_no_formats('this video has been ' + archive_status.lower(), expected=True)
- self._sort_formats(formats)
info['formats'] = formats
return info
diff --git a/hypervideo_dl/extractor/linkedin.py b/hypervideo_dl/extractor/linkedin.py
index 0f57bfa..2bf2e9a 100644
--- a/hypervideo_dl/extractor/linkedin.py
+++ b/hypervideo_dl/extractor/linkedin.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from itertools import zip_longest
import re
@@ -114,8 +111,6 @@ class LinkedInIE(LinkedInBaseIE):
'tbr': float_or_none(source.get('data-bitrate'), scale=1000),
} for source in sources]
- self._sort_formats(formats)
-
return {
'id': video_id,
'formats': formats,
@@ -190,10 +185,6 @@ class LinkedInLearningIE(LinkedInLearningBaseIE):
streaming_url, video_slug, 'mp4',
'm3u8_native', m3u8_id='hls', fatal=False))
- # It seems like this would be correctly handled by default
- # However, unless someone can confirm this, the old
- # behaviour is being kept as-is
- self._sort_formats(formats, ('res', 'source_preference'))
subtitles = {}
duration = int_or_none(video_data.get('durationInSeconds'))
transcript_lines = try_get(video_data, lambda x: x['transcript']['lines'], expected_type=list)
@@ -211,6 +202,10 @@ class LinkedInLearningIE(LinkedInLearningBaseIE):
'timestamp': float_or_none(video_data.get('publishedOn'), 1000),
'duration': duration,
'subtitles': subtitles,
+ # It seems like this would be correctly handled by default
+ # However, unless someone can confirm this, the old
+ # behaviour is being kept as-is
+ '_format_sort_fields': ('res', 'source_preference')
}
diff --git a/hypervideo_dl/extractor/linuxacademy.py b/hypervideo_dl/extractor/linuxacademy.py
index 6aff88e..a570248 100644
--- a/hypervideo_dl/extractor/linuxacademy.py
+++ b/hypervideo_dl/extractor/linuxacademy.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import json
import random
@@ -220,7 +218,6 @@ class LinuxAcademyIE(InfoExtractor):
formats = self._extract_m3u8_formats(
m3u8_url, item_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls')
- self._sort_formats(formats)
info = {
'id': item_id,
'formats': formats,
diff --git a/hypervideo_dl/extractor/liputan6.py b/hypervideo_dl/extractor/liputan6.py
new file mode 100644
index 0000000..c4477b9
--- /dev/null
+++ b/hypervideo_dl/extractor/liputan6.py
@@ -0,0 +1,64 @@
+from .common import InfoExtractor
+from .vidio import VidioIE
+
+
+class Liputan6IE(InfoExtractor):
+ _VALID_URL = r'https?://www\.liputan6\.com/\w+/read/\d+/(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://www.liputan6.com/news/read/5007510/video-duh-perawat-rs-di-medan-diduga-salah-berikan-obat-ke-pasien',
+ 'info_dict': {
+ 'id': '7082548',
+ 'ext': 'mp4',
+ 'title': 'Duh, Perawat RS di Medan Diduga Salah Berikan Obat Ke Pasien',
+ 'thumbnail': 'https://thumbor.prod.vidiocdn.com/lOz5pStm9X-jjlTa_VQQUelOPtw=/640x360/filters:quality(70)/vidio-web-prod-video/uploads/video/image/7082548/duh-perawat-rs-di-medan-diduga-salah-berikan-obat-ke-pasien-ca1125.jpg',
+ 'channel_id': '185693',
+ 'uploader': 'Liputan6.com',
+ 'duration': 104,
+ 'uploader_url': 'https://www.vidio.com/@liputan6',
+ 'description': 'md5:3b58ecff10ec3a41d4304cf98228435a',
+ 'timestamp': 1657159427,
+ 'uploader_id': 'liputan6',
+ 'display_id': 'video-duh-perawat-rs-di-medan-diduga-salah-berikan-obat-ke-pasien',
+ 'like_count': int,
+ 'view_count': int,
+ 'comment_count': int,
+ 'tags': ['perawat indonesia', 'rumah sakit', 'Medan', 'viral hari ini', 'viral', 'enamplus'],
+ 'channel': 'Default Channel',
+ 'dislike_count': int,
+ 'upload_date': '20220707'
+ }
+ }, {
+ 'url': 'https://www.liputan6.com/tv/read/5007719/video-program-minyakita-minyak-goreng-kemasan-sederhana-seharga-rp-14-ribu',
+ 'info_dict': {
+ 'id': '7082543',
+ 'ext': 'mp4',
+ 'title': 'md5:ecb7b3c598b97798bfd0eb50c6233b8c',
+ 'channel_id': '604054',
+ 'dislike_count': int,
+ 'comment_count': int,
+ 'timestamp': 1657159211,
+ 'upload_date': '20220707',
+ 'tags': ['minyakita', 'minyak goreng', 'liputan 6', 'sctv'],
+ 'uploader_url': 'https://www.vidio.com/@sctv',
+ 'display_id': 'video-program-minyakita-minyak-goreng-kemasan-sederhana-seharga-rp-14-ribu',
+ 'like_count': int,
+ 'uploader': 'SCTV',
+ 'description': 'md5:6c374d82589b71fb98b3d550edb6873f',
+ 'duration': 99,
+ 'uploader_id': 'sctv',
+ 'thumbnail': 'https://thumbor.prod.vidiocdn.com/AAIOjz-64hKojjdw5hr0oNNEeJg=/640x360/filters:quality(70)/vidio-web-prod-video/uploads/video/image/7082543/program-minyakita-minyak-goreng-kemasan-sederhana-seharga-rp14-ribu-_-liputan-6-7d9fbb.jpg',
+ 'channel': 'Liputan 6 Pagi',
+ 'view_count': int,
+ }
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ json_data = self._search_json(
+ r'window.kmklabs.gtm\s*=', webpage, 'json_data', display_id)
+ video_id = json_data['videos']['video_1']['video_id']
+
+ return self.url_result(
+ f'https://www.vidio.com/watch/{video_id}-{display_id}', ie=VidioIE, video_id=display_id)
diff --git a/hypervideo_dl/extractor/listennotes.py b/hypervideo_dl/extractor/listennotes.py
new file mode 100644
index 0000000..4ebc9be
--- /dev/null
+++ b/hypervideo_dl/extractor/listennotes.py
@@ -0,0 +1,86 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ extract_attributes,
+ get_element_by_class,
+ get_element_html_by_id,
+ get_element_text_and_html_by_tag,
+ parse_duration,
+ strip_or_none,
+ traverse_obj,
+ try_call,
+)
+
+
+class ListenNotesIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?listennotes\.com/podcasts/[^/]+/[^/]+-(?P<id>.+)/'
+ _TESTS = [{
+ 'url': 'https://www.listennotes.com/podcasts/thriving-on-overload/tim-oreilly-on-noticing-KrDgvNb_u1n/',
+ 'md5': '5b91a32f841e5788fb82b72a1a8af7f7',
+ 'info_dict': {
+ 'id': 'KrDgvNb_u1n',
+ 'ext': 'mp3',
+ 'title': 'md5:32236591a921adf17bbdbf0441b6c0e9',
+ 'description': 'md5:c581ed197eeddcee55a67cdb547c8cbd',
+ 'duration': 2148.0,
+ 'channel': 'Thriving on Overload',
+ 'channel_id': 'ed84wITivxF',
+ 'episode_id': 'e1312583fa7b4e24acfbb5131050be00',
+ 'thumbnail': 'https://production.listennotes.com/podcasts/thriving-on-overload-ross-dawson-1wb_KospA3P-ed84wITivxF.300x300.jpg',
+ 'channel_url': 'https://www.listennotes.com/podcasts/thriving-on-overload-ross-dawson-ed84wITivxF/',
+ 'cast': ['Tim O’Reilly', 'Cookie Monster', 'Lao Tzu', 'Wallace Steven', 'Eric Raymond', 'Christine Peterson', 'John Maynard Keyne', 'Ross Dawson'],
+ }
+ }, {
+ 'url': 'https://www.listennotes.com/podcasts/ask-noah-show/episode-177-wireguard-with-lwEA3154JzG/',
+ 'md5': '62fb4ffe7fc525632a1138bf72a5ce53',
+ 'info_dict': {
+ 'id': 'lwEA3154JzG',
+ 'ext': 'mp3',
+ 'title': 'Episode 177: WireGuard with Jason Donenfeld',
+ 'description': 'md5:24744f36456a3e95f83c1193a3458594',
+ 'duration': 3861.0,
+ 'channel': 'Ask Noah Show',
+ 'channel_id': '4DQTzdS5-j7',
+ 'episode_id': '8c8954b95e0b4859ad1eecec8bf6d3a4',
+ 'channel_url': 'https://www.listennotes.com/podcasts/ask-noah-show-noah-j-chelliah-4DQTzdS5-j7/',
+ 'thumbnail': 'https://production.listennotes.com/podcasts/ask-noah-show-noah-j-chelliah-cfbRUw9Gs3F-4DQTzdS5-j7.300x300.jpg',
+ 'cast': ['noah showlink', 'noah show', 'noah dashboard', 'jason donenfeld'],
+ }
+ }]
+
+ def _clean_description(self, description):
+ return clean_html(re.sub(r'(</?(div|p)>\s*)+', '<br/><br/>', description or ''))
+
+ def _real_extract(self, url):
+ audio_id = self._match_id(url)
+ webpage = self._download_webpage(url, audio_id)
+ data = self._search_json(
+ r'<script id="original-content"[^>]+\btype="application/json">', webpage, 'content', audio_id)
+ data.update(extract_attributes(get_element_html_by_id(
+ r'episode-play-button-toolbar|episode-no-play-button-toolbar', webpage, escape_value=False)))
+
+ duration, description = self._search_regex(
+ r'(?P<duration>[\d:]+)\s*-\s*(?P<description>.+)',
+ self._html_search_meta(['og:description', 'description', 'twitter:description'], webpage),
+ 'description', fatal=False, group=('duration', 'description')) or (None, None)
+
+ return {
+ 'id': audio_id,
+ 'url': data['audio'],
+ 'title': (data.get('data-title')
+ or try_call(lambda: get_element_text_and_html_by_tag('h1', webpage)[0])
+ or self._html_search_meta(('og:title', 'title', 'twitter:title'), webpage, 'title')),
+ 'description': (self._clean_description(get_element_by_class('ln-text-p', webpage))
+ or strip_or_none(description)),
+ 'duration': parse_duration(traverse_obj(data, 'audio_length', 'data-duration') or duration),
+ 'episode_id': traverse_obj(data, 'uuid', 'data-episode-uuid'),
+ **traverse_obj(data, {
+ 'thumbnail': 'data-image',
+ 'channel': 'data-channel-title',
+ 'cast': ('nlp_entities', ..., 'name'),
+ 'channel_url': 'channel_url',
+ 'channel_id': 'channel_short_uuid',
+ })
+ }
diff --git a/hypervideo_dl/extractor/litv.py b/hypervideo_dl/extractor/litv.py
index 16b475a..31826ac 100644
--- a/hypervideo_dl/extractor/litv.py
+++ b/hypervideo_dl/extractor/litv.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import json
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/livejournal.py b/hypervideo_dl/extractor/livejournal.py
index 3a9f455..96bd8b2 100644
--- a/hypervideo_dl/extractor/livejournal.py
+++ b/hypervideo_dl/extractor/livejournal.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import int_or_none
diff --git a/hypervideo_dl/extractor/liveleak.py b/hypervideo_dl/extractor/liveleak.py
deleted file mode 100644
index 4ac437c..0000000
--- a/hypervideo_dl/extractor/liveleak.py
+++ /dev/null
@@ -1,191 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import int_or_none
-
-
-class LiveLeakIE(InfoExtractor):
- _VALID_URL = r'https?://(?:\w+\.)?liveleak\.com/view\?.*?\b[it]=(?P<id>[\w_]+)'
- _TESTS = [{
- 'url': 'http://www.liveleak.com/view?i=757_1364311680',
- 'md5': '0813c2430bea7a46bf13acf3406992f4',
- 'info_dict': {
- 'id': '757_1364311680',
- 'ext': 'mp4',
- 'description': 'extremely bad day for this guy..!',
- 'uploader': 'ljfriel2',
- 'title': 'Most unlucky car accident',
- 'thumbnail': r're:^https?://.*\.jpg$'
- }
- }, {
- 'url': 'http://www.liveleak.com/view?i=f93_1390833151',
- 'md5': 'd3f1367d14cc3c15bf24fbfbe04b9abf',
- 'info_dict': {
- 'id': 'f93_1390833151',
- 'ext': 'mp4',
- 'description': 'German Television Channel NDR does an exclusive interview with Edward Snowden.\r\nUploaded on LiveLeak cause German Television thinks the rest of the world isn\'t intereseted in Edward Snowden.',
- 'uploader': 'ARD_Stinkt',
- 'title': 'German Television does first Edward Snowden Interview (ENGLISH)',
- 'thumbnail': r're:^https?://.*\.jpg$'
- }
- }, {
- # Prochan embed
- 'url': 'http://www.liveleak.com/view?i=4f7_1392687779',
- 'md5': '42c6d97d54f1db107958760788c5f48f',
- 'info_dict': {
- 'id': '4f7_1392687779',
- 'ext': 'mp4',
- 'description': "The guy with the cigarette seems amazingly nonchalant about the whole thing... I really hope my friends' reactions would be a bit stronger.\r\n\r\nAction-go to 0:55.",
- 'uploader': 'CapObveus',
- 'title': 'Man is Fatally Struck by Reckless Car While Packing up a Moving Truck',
- 'age_limit': 18,
- },
- 'skip': 'Video is dead',
- }, {
- # Covers https://github.com/ytdl-org/youtube-dl/pull/5983
- # Multiple resolutions
- 'url': 'http://www.liveleak.com/view?i=801_1409392012',
- 'md5': 'c3a449dbaca5c0d1825caecd52a57d7b',
- 'info_dict': {
- 'id': '801_1409392012',
- 'ext': 'mp4',
- 'description': 'Happened on 27.7.2014. \r\nAt 0:53 you can see people still swimming at near beach.',
- 'uploader': 'bony333',
- 'title': 'Crazy Hungarian tourist films close call waterspout in Croatia',
- 'thumbnail': r're:^https?://.*\.jpg$'
- }
- }, {
- # Covers https://github.com/ytdl-org/youtube-dl/pull/10664#issuecomment-247439521
- 'url': 'http://m.liveleak.com/view?i=763_1473349649',
- 'add_ie': ['Youtube'],
- 'info_dict': {
- 'id': '763_1473349649',
- 'ext': 'mp4',
- 'title': 'Reporters and public officials ignore epidemic of black on asian violence in Sacramento | Colin Flaherty',
- 'description': 'Colin being the warrior he is and showing the injustice Asians in Sacramento are being subjected to.',
- 'uploader': 'Ziz',
- 'upload_date': '20160908',
- 'uploader_id': 'UCEbta5E_jqlZmEJsriTEtnw'
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- 'url': 'https://www.liveleak.com/view?i=677_1439397581',
- 'info_dict': {
- 'id': '677_1439397581',
- 'title': 'Fuel Depot in China Explosion caught on video',
- },
- 'playlist_count': 3,
- }, {
- 'url': 'https://www.liveleak.com/view?t=HvHi_1523016227',
- 'only_matching': True,
- }, {
- # No original video
- 'url': 'https://www.liveleak.com/view?t=C26ZZ_1558612804',
- 'only_matching': True,
- }]
-
- @staticmethod
- def _extract_urls(webpage):
- return re.findall(
- r'<iframe[^>]+src="(https?://(?:\w+\.)?liveleak\.com/ll_embed\?[^"]*[ift]=[\w_]+[^"]+)"',
- webpage)
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- video_title = self._og_search_title(webpage).replace('LiveLeak.com -', '').strip()
- video_description = self._og_search_description(webpage)
- video_uploader = self._html_search_regex(
- r'By:.*?(\w+)</a>', webpage, 'uploader', fatal=False)
- age_limit = int_or_none(self._search_regex(
- r'you confirm that you are ([0-9]+) years and over.',
- webpage, 'age limit', default=None))
- video_thumbnail = self._og_search_thumbnail(webpage)
-
- entries = self._parse_html5_media_entries(url, webpage, video_id)
- if not entries:
- # Maybe an embed?
- embed_url = self._search_regex(
- r'<iframe[^>]+src="((?:https?:)?//(?:www\.)?(?:prochan|youtube)\.com/embed[^"]+)"',
- webpage, 'embed URL')
- return {
- '_type': 'url_transparent',
- 'url': embed_url,
- 'id': video_id,
- 'title': video_title,
- 'description': video_description,
- 'uploader': video_uploader,
- 'age_limit': age_limit,
- }
-
- for idx, info_dict in enumerate(entries):
- formats = []
- for a_format in info_dict['formats']:
- if not a_format.get('height'):
- a_format['height'] = int_or_none(self._search_regex(
- r'([0-9]+)p\.mp4', a_format['url'], 'height label',
- default=None))
- formats.append(a_format)
-
- # Removing '.*.mp4' gives the raw video, which is essentially
- # the same video without the LiveLeak logo at the top (see
- # https://github.com/ytdl-org/youtube-dl/pull/4768)
- orig_url = re.sub(r'\.mp4\.[^.]+', '', a_format['url'])
- if a_format['url'] != orig_url:
- format_id = a_format.get('format_id')
- format_id = 'original' + ('-' + format_id if format_id else '')
- if self._is_valid_url(orig_url, video_id, format_id):
- formats.append({
- 'format_id': format_id,
- 'url': orig_url,
- 'preference': 1,
- })
- self._sort_formats(formats)
- info_dict['formats'] = formats
-
- # Don't append entry ID for one-video pages to keep backward compatibility
- if len(entries) > 1:
- info_dict['id'] = '%s_%s' % (video_id, idx + 1)
- else:
- info_dict['id'] = video_id
-
- info_dict.update({
- 'title': video_title,
- 'description': video_description,
- 'uploader': video_uploader,
- 'age_limit': age_limit,
- 'thumbnail': video_thumbnail,
- })
-
- return self.playlist_result(entries, video_id, video_title)
-
-
-class LiveLeakEmbedIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?liveleak\.com/ll_embed\?.*?\b(?P<kind>[ift])=(?P<id>[\w_]+)'
-
- # See generic.py for actual test cases
- _TESTS = [{
- 'url': 'https://www.liveleak.com/ll_embed?i=874_1459135191',
- 'only_matching': True,
- }, {
- 'url': 'https://www.liveleak.com/ll_embed?f=ab065df993c1',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- kind, video_id = re.match(self._VALID_URL, url).groups()
-
- if kind == 'f':
- webpage = self._download_webpage(url, video_id)
- liveleak_url = self._search_regex(
- r'(?:logourl\s*:\s*|window\.open\()(?P<q1>[\'"])(?P<url>%s)(?P=q1)' % LiveLeakIE._VALID_URL,
- webpage, 'LiveLeak URL', group='url')
- else:
- liveleak_url = 'http://www.liveleak.com/view?%s=%s' % (kind, video_id)
-
- return self.url_result(liveleak_url, ie=LiveLeakIE.ie_key())
diff --git a/hypervideo_dl/extractor/livestream.py b/hypervideo_dl/extractor/livestream.py
index 45bf26d..d883eaf 100644
--- a/hypervideo_dl/extractor/livestream.py
+++ b/hypervideo_dl/extractor/livestream.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import re
import itertools
@@ -25,6 +23,8 @@ from ..utils import (
class LivestreamIE(InfoExtractor):
IE_NAME = 'livestream'
_VALID_URL = r'https?://(?:new\.)?livestream\.com/(?:accounts/(?P<account_id>\d+)|(?P<account_name>[^/]+))/(?:events/(?P<event_id>\d+)|(?P<event_name>[^/]+))(?:/videos/(?P<id>\d+))?'
+ _EMBED_REGEX = [r'<iframe[^>]+src="(?P<url>https?://(?:new\.)?livestream\.com/[^"]+/player[^"]+)"']
+
_TESTS = [{
'url': 'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370',
'md5': '53274c76ba7754fb0e8d072716f2292b',
@@ -126,7 +126,6 @@ class LivestreamIE(InfoExtractor):
if f4m_url:
formats.extend(self._extract_f4m_formats(
f4m_url, video_id, f4m_id='hds', fatal=False))
- self._sort_formats(formats)
comments = [{
'author_id': comment.get('author_id'),
@@ -171,7 +170,6 @@ class LivestreamIE(InfoExtractor):
'url': rtsp_url,
'format_id': 'rtsp',
})
- self._sort_formats(formats)
return {
'id': broadcast_id,
@@ -300,7 +298,6 @@ class LivestreamOriginalIE(InfoExtractor):
'format_id': 'rtsp',
})
- self._sort_formats(formats)
return formats
def _extract_folder(self, url, folder_id):
diff --git a/hypervideo_dl/extractor/livestreamfails.py b/hypervideo_dl/extractor/livestreamfails.py
new file mode 100644
index 0000000..0df6384
--- /dev/null
+++ b/hypervideo_dl/extractor/livestreamfails.py
@@ -0,0 +1,37 @@
+from .common import InfoExtractor
+from ..utils import format_field, traverse_obj, unified_timestamp
+
+
+class LivestreamfailsIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?livestreamfails\.com/(?:clip|post)/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'https://livestreamfails.com/clip/139200',
+ 'md5': '8a03aea1a46e94a05af6410337463102',
+ 'info_dict': {
+ 'id': '139200',
+ 'ext': 'mp4',
+ 'display_id': 'ConcernedLitigiousSalmonPeteZaroll-O8yo9W2L8OZEKhV2',
+ 'title': 'Streamer jumps off a trampoline at full speed',
+ 'creator': 'paradeev1ch',
+ 'thumbnail': r're:^https?://.+',
+ 'timestamp': 1656271785,
+ 'upload_date': '20220626',
+ }
+ }, {
+ 'url': 'https://livestreamfails.com/post/139200',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ api_response = self._download_json(f'https://api.livestreamfails.com/clip/{video_id}', video_id)
+
+ return {
+ 'id': video_id,
+ 'display_id': api_response.get('sourceId'),
+ 'timestamp': unified_timestamp(api_response.get('createdAt')),
+ 'url': f'https://livestreamfails-video-prod.b-cdn.net/video/{api_response["videoId"]}',
+ 'title': api_response.get('label'),
+ 'creator': traverse_obj(api_response, ('streamer', 'label')),
+ 'thumbnail': format_field(api_response, 'imageId', 'https://livestreamfails-image-prod.b-cdn.net/image/%s')
+ }
diff --git a/hypervideo_dl/extractor/lnkgo.py b/hypervideo_dl/extractor/lnkgo.py
index bd2dffa..6282d2e 100644
--- a/hypervideo_dl/extractor/lnkgo.py
+++ b/hypervideo_dl/extractor/lnkgo.py
@@ -1,11 +1,7 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
clean_html,
- compat_str,
format_field,
int_or_none,
parse_iso8601,
@@ -71,7 +67,6 @@ class LnkGoIE(InfoExtractor):
formats = self._extract_m3u8_formats(
self._M3U8_TEMPL % (prefix, video_info['videoUrl'], video_info.get('secureTokenParams') or ''),
video_id, 'mp4', 'm3u8_native')
- self._sort_formats(formats)
return {
'id': video_id,
@@ -153,7 +148,6 @@ class LnkIE(InfoExtractor):
formats.extend(fmts)
subtitles = self._merge_subtitles(subtitles, subs)
- self._sort_formats(formats)
return {
'id': id,
'title': video_json.get('title'),
diff --git a/hypervideo_dl/extractor/localnews8.py b/hypervideo_dl/extractor/localnews8.py
index c3e9d10..6f3f02c 100644
--- a/hypervideo_dl/extractor/localnews8.py
+++ b/hypervideo_dl/extractor/localnews8.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/lovehomeporn.py b/hypervideo_dl/extractor/lovehomeporn.py
index ca4b5f3..ba5a13a 100644
--- a/hypervideo_dl/extractor/lovehomeporn.py
+++ b/hypervideo_dl/extractor/lovehomeporn.py
@@ -1,6 +1,3 @@
-from __future__ import unicode_literals
-
-
from .nuevo import NuevoBaseIE
diff --git a/hypervideo_dl/extractor/lrt.py b/hypervideo_dl/extractor/lrt.py
index 4024aef..80d4d1c 100644
--- a/hypervideo_dl/extractor/lrt.py
+++ b/hypervideo_dl/extractor/lrt.py
@@ -1,21 +1,58 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..utils import (
clean_html,
merge_dicts,
+ traverse_obj,
+ url_or_none,
)
-class LRTIE(InfoExtractor):
- IE_NAME = 'lrt.lt'
+class LRTBaseIE(InfoExtractor):
+ def _extract_js_var(self, webpage, var_name, default=None):
+ return self._search_regex(
+ fr'{var_name}\s*=\s*(["\'])((?:(?!\1).)+)\1',
+ webpage, var_name.replace('_', ' '), default, group=2)
+
+
+class LRTStreamIE(LRTBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?lrt\.lt/mediateka/tiesiogiai/(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://www.lrt.lt/mediateka/tiesiogiai/lrt-opus',
+ 'info_dict': {
+ 'id': 'lrt-opus',
+ 'live_status': 'is_live',
+ 'title': 're:^LRT Opus.+$',
+ 'ext': 'mp4'
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ streams_data = self._download_json(self._extract_js_var(webpage, 'tokenURL'), video_id)
+
+ formats, subtitles = [], {}
+ for stream_url in traverse_obj(streams_data, (
+ 'response', 'data', lambda k, _: k.startswith('content')), expected_type=url_or_none):
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(stream_url, video_id, 'mp4', m3u8_id='hls', live=True)
+ formats.extend(fmts)
+ subtitles = self._merge_subtitles(subtitles, subs)
+
+ stream_title = self._extract_js_var(webpage, 'video_title', 'LRT')
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'is_live': True,
+ 'title': f'{self._og_search_title(webpage)} - {stream_title}'
+ }
+
+
+class LRTVODIE(LRTBaseIE):
_VALID_URL = r'https?://(?:www\.)?lrt\.lt(?P<path>/mediateka/irasas/(?P<id>[0-9]+))'
_TESTS = [{
# m3u8 download
'url': 'https://www.lrt.lt/mediateka/irasas/2000127261/greita-ir-gardu-sicilijos-ikvepta-klasikiniu-makaronu-su-baklazanais-vakariene',
- 'md5': '85cb2bb530f31d91a9c65b479516ade4',
'info_dict': {
'id': '2000127261',
'ext': 'mp4',
@@ -24,6 +61,8 @@ class LRTIE(InfoExtractor):
'duration': 3035,
'timestamp': 1604079000,
'upload_date': '20201030',
+ 'tags': ['LRT TELEVIZIJA', 'Beatos virtuvė', 'Beata Nicholson', 'Makaronai', 'Baklažanai', 'Vakarienė', 'Receptas'],
+ 'thumbnail': 'https://www.lrt.lt/img/2020/10/30/764041-126478-1287x836.jpg'
},
}, {
# direct mp3 download
@@ -40,11 +79,6 @@ class LRTIE(InfoExtractor):
},
}]
- def _extract_js_var(self, webpage, var_name, default):
- return self._search_regex(
- r'%s\s*=\s*(["\'])((?:(?!\1).)+)\1' % var_name,
- webpage, var_name.replace('_', ' '), default, group=2)
-
def _real_extract(self, url):
path, video_id = self._match_valid_url(url).groups()
webpage = self._download_webpage(url, video_id)
diff --git a/hypervideo_dl/extractor/lynda.py b/hypervideo_dl/extractor/lynda.py
index ce30474..768ce91 100644
--- a/hypervideo_dl/extractor/lynda.py
+++ b/hypervideo_dl/extractor/lynda.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -159,7 +157,6 @@ class LyndaIE(LyndaBaseIE):
'format_id': '%s-%s' % (cdn, format_id) if cdn else format_id,
'height': int_or_none(format_id),
})
- self._sort_formats(formats)
conviva = self._download_json(
'https://www.lynda.com/ajax/player/conviva', video_id,
@@ -209,7 +206,6 @@ class LyndaIE(LyndaBaseIE):
} for format_id, video_url in prioritized_stream.items()])
self._check_formats(formats, video_id)
- self._sort_formats(formats)
subtitles = self.extract_subtitles(video_id)
diff --git a/hypervideo_dl/extractor/m6.py b/hypervideo_dl/extractor/m6.py
index 9806875..9dcc601 100644
--- a/hypervideo_dl/extractor/m6.py
+++ b/hypervideo_dl/extractor/m6.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/magentamusik360.py b/hypervideo_dl/extractor/magentamusik360.py
index 5c27490..5d0cb3b 100644
--- a/hypervideo_dl/extractor/magentamusik360.py
+++ b/hypervideo_dl/extractor/magentamusik360.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/mailru.py b/hypervideo_dl/extractor/mailru.py
index 5d9f80b..387d211 100644
--- a/hypervideo_dl/extractor/mailru.py
+++ b/hypervideo_dl/extractor/mailru.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import itertools
import json
import re
@@ -163,7 +160,6 @@ class MailRuIE(InfoExtractor):
'height': height,
'http_headers': headers,
})
- self._sort_formats(formats)
meta_data = video_data['meta']
title = remove_end(meta_data['title'], '.mp4')
diff --git a/hypervideo_dl/extractor/mainstreaming.py b/hypervideo_dl/extractor/mainstreaming.py
index 0f349a7..fe5589d 100644
--- a/hypervideo_dl/extractor/mainstreaming.py
+++ b/hypervideo_dl/extractor/mainstreaming.py
@@ -1,4 +1,3 @@
-# coding: utf-8
import re
from .common import InfoExtractor
@@ -15,6 +14,7 @@ from ..utils import (
class MainStreamingIE(InfoExtractor):
_VALID_URL = r'https?://(?:webtools-?)?(?P<host>[A-Za-z0-9-]*\.msvdn.net)/(?:embed|amp_embed|content)/(?P<id>\w+)'
+ _EMBED_REGEX = [rf'<iframe[^>]+?src=["\']?(?P<url>{_VALID_URL})["\']?']
IE_DESC = 'MainStreaming Player'
_TESTS = [
@@ -103,13 +103,6 @@ class MainStreamingIE(InfoExtractor):
}
]
- @staticmethod
- def _extract_urls(webpage):
- mobj = re.findall(
- r'<iframe[^>]+?src=["\']?(?P<url>%s)["\']?' % MainStreamingIE._VALID_URL, webpage)
- if mobj:
- return [group[0] for group in mobj]
-
def _playlist_entries(self, host, playlist_content):
for entry in playlist_content:
content_id = entry.get('contentID')
@@ -204,8 +197,6 @@ class MainStreamingIE(InfoExtractor):
subtitles = self._merge_subtitles(m3u8_subs, mpd_subs)
formats.extend(m3u8_formats + mpd_formats)
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': title,
diff --git a/hypervideo_dl/extractor/malltv.py b/hypervideo_dl/extractor/malltv.py
index fadfd93..e1031d8 100644
--- a/hypervideo_dl/extractor/malltv.py
+++ b/hypervideo_dl/extractor/malltv.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
clean_html,
@@ -17,7 +14,7 @@ class MallTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www|sk)\.)?mall\.tv/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://www.mall.tv/18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
- 'md5': '1c4a37f080e1f3023103a7b43458e518',
+ 'md5': 'cd69ce29176f6533b65bff69ed9a5f2a',
'info_dict': {
'id': 't0zzt0',
'display_id': '18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
@@ -28,6 +25,11 @@ class MallTVIE(InfoExtractor):
'timestamp': 1538870400,
'upload_date': '20181007',
'view_count': int,
+ 'comment_count': int,
+ 'thumbnail': 'https://cdn.vpplayer.tech/agmipnzv/encode/vjsnigfq/thumbnails/retina.jpg',
+ 'average_rating': 9.060869565217391,
+ 'dislike_count': int,
+ 'like_count': int,
}
}, {
'url': 'https://www.mall.tv/kdo-to-plati/18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
@@ -35,6 +37,24 @@ class MallTVIE(InfoExtractor):
}, {
'url': 'https://sk.mall.tv/gejmhaus/reklamacia-nehreje-vyrobnik-tepla-alebo-spekacka',
'only_matching': True,
+ }, {
+ 'url': 'https://www.mall.tv/zivoty-slavnych/nadeje-vychodu-i-zapadu-jak-michail-gorbacov-zmenil-politickou-mapu-sveta-a-ziskal-za-to-nobelovu-cenu-miru',
+ 'info_dict': {
+ 'id': 'yx010y',
+ 'ext': 'mp4',
+ 'dislike_count': int,
+ 'description': 'md5:aee02bee5a8d072c6a8207b91d1905a9',
+ 'thumbnail': 'https://cdn.vpplayer.tech/agmipnzv/encode/vjsnjdeu/thumbnails/retina.jpg',
+ 'comment_count': int,
+ 'display_id': 'md5:0ec2afa94d2e2b7091c019cef2a43a9b',
+ 'like_count': int,
+ 'duration': 752,
+ 'timestamp': 1646956800,
+ 'title': 'md5:fe79385daaf16d74c12c1ec4a26687af',
+ 'view_count': int,
+ 'upload_date': '20220311',
+ 'average_rating': 9.685714285714285,
+ }
}]
def _real_extract(self, url):
@@ -46,13 +66,12 @@ class MallTVIE(InfoExtractor):
video = self._parse_json(self._search_regex(
r'videoObject\s*=\s*JSON\.parse\(JSON\.stringify\(({.+?})\)\);',
webpage, 'video object'), display_id)
- video_source = video['VideoSource']
+
video_id = self._search_regex(
- r'/([\da-z]+)/index\b', video_source, 'video id')
+ r'<input\s*id\s*=\s*player-id-name\s*[^>]+value\s*=\s*(\w+)', webpage, 'video id')
formats = self._extract_m3u8_formats(
- video_source + '.m3u8', video_id, 'mp4', 'm3u8_native')
- self._sort_formats(formats)
+ video['VideoSource'], video_id, 'mp4', 'm3u8_native')
subtitles = {}
for s in (video.get('Subtitles') or {}):
@@ -72,7 +91,7 @@ class MallTVIE(InfoExtractor):
info = self._search_json_ld(webpage, video_id, default={})
return merge_dicts({
- 'id': video_id,
+ 'id': str(video_id),
'display_id': display_id,
'title': video.get('Title'),
'description': clean_html(video.get('Description')),
diff --git a/hypervideo_dl/extractor/mangomolo.py b/hypervideo_dl/extractor/mangomolo.py
index 68ce138..efaf66f 100644
--- a/hypervideo_dl/extractor/mangomolo.py
+++ b/hypervideo_dl/extractor/mangomolo.py
@@ -1,16 +1,31 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import (
compat_b64decode,
compat_urllib_parse_unquote,
)
-from ..utils import int_or_none
+from ..utils import classproperty, int_or_none
class MangomoloBaseIE(InfoExtractor):
- _BASE_REGEX = r'https?://(?:admin\.mangomolo\.com/analytics/index\.php/customers/embed/|player\.mangomolo\.com/v1/)'
+ _BASE_REGEX = r'(?:https?:)?//(?:admin\.mangomolo\.com/analytics/index\.php/customers/embed/|player\.mangomolo\.com/v1/)'
+ _SLUG = None
+
+ @classproperty
+ def _VALID_URL(cls):
+ return f'{cls._BASE_REGEX}{cls._SLUG}'
+
+ @classproperty
+ def _EMBED_REGEX(cls):
+ return [rf'<iframe[^>]+src=(["\'])(?P<url>{cls._VALID_URL}.+?)\1']
+
+ def _extract_from_webpage(self, url, webpage):
+ for res in super()._extract_from_webpage(url, webpage):
+ yield {
+ **res,
+ '_type': 'url_transparent',
+ 'id': self._search_regex(self._SLUG, res['url'], 'id', group='id'),
+ 'uploader': self._search_regex(r'^(?:https?://)?([^/]*)/.*', url, 'video uploader'),
+ }
def _get_real_id(self, page_id):
return page_id
@@ -29,7 +44,6 @@ class MangomoloBaseIE(InfoExtractor):
], webpage, 'format url')
formats = self._extract_wowza_formats(
format_url, page_id, m3u8_entry_protocol, ['smil'])
- self._sort_formats(formats)
return {
'id': page_id,
@@ -44,14 +58,15 @@ class MangomoloBaseIE(InfoExtractor):
class MangomoloVideoIE(MangomoloBaseIE):
_TYPE = 'video'
IE_NAME = 'mangomolo:' + _TYPE
- _VALID_URL = MangomoloBaseIE._BASE_REGEX + r'video\?.*?\bid=(?P<id>\d+)'
+ _SLUG = r'video\?.*?\bid=(?P<id>\d+)'
+
_IS_LIVE = False
class MangomoloLiveIE(MangomoloBaseIE):
_TYPE = 'live'
IE_NAME = 'mangomolo:' + _TYPE
- _VALID_URL = MangomoloBaseIE._BASE_REGEX + r'(live|index)\?.*?\bchannelid=(?P<id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)'
+ _SLUG = r'(?:live|index)\?.*?\bchannelid=(?P<id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)'
_IS_LIVE = True
def _get_real_id(self, page_id):
diff --git a/hypervideo_dl/extractor/manoto.py b/hypervideo_dl/extractor/manoto.py
index d12aa5f..2792e6e 100644
--- a/hypervideo_dl/extractor/manoto.py
+++ b/hypervideo_dl/extractor/manoto.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
clean_html,
@@ -57,7 +54,6 @@ class ManotoTVIE(InfoExtractor):
episode_json = self._download_json(_API_URL.format('showmodule', 'episodedetails', video_id), video_id)
details = episode_json.get('details', {})
formats = self._extract_m3u8_formats(details.get('videoM3u8Url'), video_id, 'mp4')
- self._sort_formats(formats)
return {
'id': video_id,
'series': details.get('showTitle'),
@@ -129,7 +125,6 @@ class ManotoTVLiveIE(InfoExtractor):
details = json.get('details', {})
video_url = details.get('liveUrl')
formats = self._extract_m3u8_formats(video_url, video_id, 'mp4', live=True)
- self._sort_formats(formats)
return {
'id': video_id,
'title': 'Manoto TV Live',
diff --git a/hypervideo_dl/extractor/manyvids.py b/hypervideo_dl/extractor/manyvids.py
index bd24f88..7417453 100644
--- a/hypervideo_dl/extractor/manyvids.py
+++ b/hypervideo_dl/extractor/manyvids.py
@@ -1,11 +1,12 @@
-# coding: utf-8
-from __future__ import unicode_literals
+import re
from .common import InfoExtractor
from ..utils import (
determine_ext,
+ extract_attributes,
int_or_none,
str_to_int,
+ url_or_none,
urlencode_postdata,
)
@@ -20,17 +21,20 @@ class ManyVidsIE(InfoExtractor):
'id': '133957',
'ext': 'mp4',
'title': 'everthing about me (Preview)',
+ 'uploader': 'ellyxxix',
'view_count': int,
'like_count': int,
},
}, {
# full video
'url': 'https://www.manyvids.com/Video/935718/MY-FACE-REVEAL/',
- 'md5': 'f3e8f7086409e9b470e2643edb96bdcc',
+ 'md5': 'bb47bab0e0802c2a60c24ef079dfe60f',
'info_dict': {
'id': '935718',
'ext': 'mp4',
'title': 'MY FACE REVEAL',
+ 'description': 'md5:ec5901d41808b3746fed90face161612',
+ 'uploader': 'Sarah Calanthe',
'view_count': int,
'like_count': int,
},
@@ -39,17 +43,50 @@ class ManyVidsIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
+ real_url = 'https://www.manyvids.com/video/%s/gtm.js' % (video_id, )
+ try:
+ webpage = self._download_webpage(real_url, video_id)
+ except Exception:
+ # probably useless fallback
+ webpage = self._download_webpage(url, video_id)
- video_url = self._search_regex(
- r'data-(?:video-filepath|meta-video)\s*=s*(["\'])(?P<url>(?:(?!\1).)+)\1',
- webpage, 'video URL', group='url')
+ info = self._search_regex(
+ r'''(<div\b[^>]*\bid\s*=\s*(['"])pageMetaDetails\2[^>]*>)''',
+ webpage, 'meta details', default='')
+ info = extract_attributes(info)
- title = self._html_search_regex(
- (r'<span[^>]+class=["\']item-title[^>]+>([^<]+)',
- r'<h2[^>]+class=["\']h2 m-0["\'][^>]*>([^<]+)'),
- webpage, 'title', default=None) or self._html_search_meta(
- 'twitter:title', webpage, 'title', fatal=True)
+ player = self._search_regex(
+ r'''(<div\b[^>]*\bid\s*=\s*(['"])rmpPlayerStream\2[^>]*>)''',
+ webpage, 'player details', default='')
+ player = extract_attributes(player)
+
+ video_urls_and_ids = (
+ (info.get('data-meta-video'), 'video'),
+ (player.get('data-video-transcoded'), 'transcoded'),
+ (player.get('data-video-filepath'), 'filepath'),
+ (self._og_search_video_url(webpage, secure=False, default=None), 'og_video'),
+ )
+
+ def txt_or_none(s, default=None):
+ return (s.strip() or default) if isinstance(s, str) else default
+
+ uploader = txt_or_none(info.get('data-meta-author'))
+
+ def mung_title(s):
+ if uploader:
+ s = re.sub(r'^\s*%s\s+[|-]' % (re.escape(uploader), ), '', s)
+ return txt_or_none(s)
+
+ title = (
+ mung_title(info.get('data-meta-title'))
+ or self._html_search_regex(
+ (r'<span[^>]+class=["\']item-title[^>]+>([^<]+)',
+ r'<h2[^>]+class=["\']h2 m-0["\'][^>]*>([^<]+)'),
+ webpage, 'title', default=None)
+ or self._html_search_meta(
+ 'twitter:title', webpage, 'title', fatal=True))
+
+ title = re.sub(r'\s*[|-]\s+ManyVids\s*$', '', title) or title
if any(p in webpage for p in ('preview_videos', '_preview.mp4')):
title += ' (Preview)'
@@ -62,7 +99,8 @@ class ManyVidsIE(InfoExtractor):
# Sets some cookies
self._download_webpage(
'https://www.manyvids.com/includes/ajax_repository/you_had_me_at_hello.php',
- video_id, fatal=False, data=urlencode_postdata({
+ video_id, note='Setting format cookies', fatal=False,
+ data=urlencode_postdata({
'mvtoken': mv_token,
'vid': video_id,
}), headers={
@@ -70,24 +108,54 @@ class ManyVidsIE(InfoExtractor):
'X-Requested-With': 'XMLHttpRequest'
})
- if determine_ext(video_url) == 'm3u8':
- formats = self._extract_m3u8_formats(
- video_url, video_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id='hls')
- else:
- formats = [{'url': video_url}]
+ formats = []
+ for v_url, fmt in video_urls_and_ids:
+ v_url = url_or_none(v_url)
+ if not v_url:
+ continue
+ if determine_ext(v_url) == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ v_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls'))
+ else:
+ formats.append({
+ 'url': v_url,
+ 'format_id': fmt,
+ })
+
+ self._remove_duplicate_formats(formats)
+
+ for f in formats:
+ if f.get('height') is None:
+ f['height'] = int_or_none(
+ self._search_regex(r'_(\d{2,3}[02468])_', f['url'], 'video height', default=None))
+ if '/preview/' in f['url']:
+ f['format_id'] = '_'.join(filter(None, (f.get('format_id'), 'preview')))
+ f['preference'] = -10
+ if 'transcoded' in f['format_id']:
+ f['preference'] = f.get('preference', -1) - 1
+
+ def get_likes():
+ likes = self._search_regex(
+ r'''(<a\b[^>]*\bdata-id\s*=\s*(['"])%s\2[^>]*>)''' % (video_id, ),
+ webpage, 'likes', default='')
+ likes = extract_attributes(likes)
+ return int_or_none(likes.get('data-likes'))
- like_count = int_or_none(self._search_regex(
- r'data-likes=["\'](\d+)', webpage, 'like count', default=None))
- view_count = str_to_int(self._html_search_regex(
- r'(?s)<span[^>]+class="views-wrapper"[^>]*>(.+?)</span', webpage,
- 'view count', default=None))
+ def get_views():
+ return str_to_int(self._html_search_regex(
+ r'''(?s)<span\b[^>]*\bclass\s*=["']views-wrapper\b[^>]+>.+?<span\b[^>]+>\s*(\d[\d,.]*)\s*</span>''',
+ webpage, 'view count', default=None))
return {
'id': video_id,
'title': title,
- 'view_count': view_count,
- 'like_count': like_count,
'formats': formats,
- 'uploader': self._html_search_regex(r'<meta[^>]+name="author"[^>]*>([^<]+)', webpage, 'uploader'),
+ 'description': txt_or_none(info.get('data-meta-description')),
+ 'uploader': txt_or_none(info.get('data-meta-author')),
+ 'thumbnail': (
+ url_or_none(info.get('data-meta-image'))
+ or url_or_none(player.get('data-video-screenshot'))),
+ 'view_count': get_views(),
+ 'like_count': get_likes(),
}
diff --git a/hypervideo_dl/extractor/maoritv.py b/hypervideo_dl/extractor/maoritv.py
index 0d23fec..67780ea 100644
--- a/hypervideo_dl/extractor/maoritv.py
+++ b/hypervideo_dl/extractor/maoritv.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/markiza.py b/hypervideo_dl/extractor/markiza.py
index def960a..53ed791 100644
--- a/hypervideo_dl/extractor/markiza.py
+++ b/hypervideo_dl/extractor/markiza.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/massengeschmacktv.py b/hypervideo_dl/extractor/massengeschmacktv.py
index b381d31..7dacb43 100644
--- a/hypervideo_dl/extractor/massengeschmacktv.py
+++ b/hypervideo_dl/extractor/massengeschmacktv.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -67,8 +65,6 @@ class MassengeschmackTVIE(InfoExtractor):
'vcodec': 'none' if format_id.startswith('Audio') else None,
})
- self._sort_formats(formats)
-
return {
'id': episode,
'title': title,
diff --git a/hypervideo_dl/extractor/masters.py b/hypervideo_dl/extractor/masters.py
new file mode 100644
index 0000000..716f1c9
--- /dev/null
+++ b/hypervideo_dl/extractor/masters.py
@@ -0,0 +1,38 @@
+from __future__ import unicode_literals
+from .common import InfoExtractor
+from ..utils import (
+ traverse_obj,
+ unified_strdate,
+)
+
+
+class MastersIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?masters\.com/en_US/watch/(?P<date>\d{4}-\d{2}-\d{2})/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.masters.com/en_US/watch/2022-04-07/16493755593805191/sungjae_im_thursday_interview_2022.html',
+ 'info_dict': {
+ 'id': '16493755593805191',
+ 'ext': 'mp4',
+ 'title': 'Sungjae Im: Thursday Interview 2022',
+ 'upload_date': '20220407',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id, upload_date = self._match_valid_url(url).group('id', 'date')
+ content_resp = self._download_json(
+ f'https://www.masters.com/relatedcontent/rest/v2/masters_v1/en/content/masters_v1_{video_id}_en',
+ video_id)
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(traverse_obj(content_resp, ('media', 'm3u8')), video_id, 'mp4')
+
+ thumbnails = [{'id': name, 'url': url} for name, url in traverse_obj(content_resp, ('images', 0), default={}).items()]
+
+ return {
+ 'id': video_id,
+ 'title': content_resp.get('title'),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'upload_date': unified_strdate(upload_date),
+ 'thumbnails': thumbnails,
+ }
diff --git a/hypervideo_dl/extractor/matchtv.py b/hypervideo_dl/extractor/matchtv.py
index e003b8d..a67fa9f 100644
--- a/hypervideo_dl/extractor/matchtv.py
+++ b/hypervideo_dl/extractor/matchtv.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import random
from .common import InfoExtractor
@@ -46,7 +43,6 @@ class MatchTVIE(InfoExtractor):
})['data']['videoUrl']
f4m_url = xpath_text(self._download_xml(video_url, video_id), './to')
formats = self._extract_f4m_formats(f4m_url, video_id)
- self._sort_formats(formats)
return {
'id': video_id,
'title': 'Матч ТВ - Прямой эфир',
diff --git a/hypervideo_dl/extractor/mdr.py b/hypervideo_dl/extractor/mdr.py
index 3ca174c..49f5b49 100644
--- a/hypervideo_dl/extractor/mdr.py
+++ b/hypervideo_dl/extractor/mdr.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
@@ -165,8 +162,6 @@ class MDRIE(InfoExtractor):
formats.append(f)
- self._sort_formats(formats)
-
description = xpath_text(doc, './broadcast/broadcastDescription', 'description')
timestamp = parse_iso8601(
xpath_text(
diff --git a/hypervideo_dl/extractor/medaltv.py b/hypervideo_dl/extractor/medaltv.py
index 59cc307..82be823 100644
--- a/hypervideo_dl/extractor/medaltv.py
+++ b/hypervideo_dl/extractor/medaltv.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -11,15 +8,33 @@ from ..utils import (
float_or_none,
int_or_none,
str_or_none,
- try_get,
+ traverse_obj,
)
class MedalTVIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?medal\.tv/clips/(?P<id>[^/?#&]+)'
+ _VALID_URL = r'https?://(?:www\.)?medal\.tv/(?P<path>games/[^/?#&]+/clips)/(?P<id>[^/?#&]+)'
_TESTS = [{
- 'url': 'https://medal.tv/clips/2mA60jWAGQCBH',
- 'md5': '7b07b064331b1cf9e8e5c52a06ae68fa',
+ 'url': 'https://medal.tv/games/valorant/clips/jTBFnLKdLy15K',
+ 'md5': '6930f8972914b6b9fdc2bb3918098ba0',
+ 'info_dict': {
+ 'id': 'jTBFnLKdLy15K',
+ 'ext': 'mp4',
+ 'title': "Mornu's clutch",
+ 'description': '',
+ 'uploader': 'Aciel',
+ 'timestamp': 1651628243,
+ 'upload_date': '20220504',
+ 'uploader_id': '19335460',
+ 'uploader_url': 'https://medal.tv/users/19335460',
+ 'comment_count': int,
+ 'view_count': int,
+ 'like_count': int,
+ 'duration': 13,
+ }
+ }, {
+ 'url': 'https://medal.tv/games/cod%20cold%20war/clips/2mA60jWAGQCBH',
+ 'md5': '3d19d426fe0b2d91c26e412684e66a06',
'info_dict': {
'id': '2mA60jWAGQCBH',
'ext': 'mp4',
@@ -29,9 +44,15 @@ class MedalTVIE(InfoExtractor):
'timestamp': 1603165266,
'upload_date': '20201020',
'uploader_id': '10619174',
+ 'thumbnail': 'https://cdn.medal.tv/10619174/thumbnail-34934644-720p.jpg?t=1080p&c=202042&missing',
+ 'uploader_url': 'https://medal.tv/users/10619174',
+ 'comment_count': int,
+ 'view_count': int,
+ 'like_count': int,
+ 'duration': 23,
}
}, {
- 'url': 'https://medal.tv/clips/2um24TWdty0NA',
+ 'url': 'https://medal.tv/games/cod%20cold%20war/clips/2um24TWdty0NA',
'md5': 'b6dc76b78195fff0b4f8bf4a33ec2148',
'info_dict': {
'id': '2um24TWdty0NA',
@@ -42,25 +63,42 @@ class MedalTVIE(InfoExtractor):
'timestamp': 1605580939,
'upload_date': '20201117',
'uploader_id': '5156321',
+ 'thumbnail': 'https://cdn.medal.tv/5156321/thumbnail-36787208-360p.jpg?t=1080p&c=202046&missing',
+ 'uploader_url': 'https://medal.tv/users/5156321',
+ 'comment_count': int,
+ 'view_count': int,
+ 'like_count': int,
+ 'duration': 9,
}
}, {
- 'url': 'https://medal.tv/clips/37rMeFpryCC-9',
+ 'url': 'https://medal.tv/games/valorant/clips/37rMeFpryCC-9',
'only_matching': True,
}, {
- 'url': 'https://medal.tv/clips/2WRj40tpY_EU9',
+ 'url': 'https://medal.tv/games/valorant/clips/2WRj40tpY_EU9',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
+ path = self._match_valid_url(url).group('path')
+
webpage = self._download_webpage(url, video_id)
- hydration_data = self._parse_json(self._search_regex(
- r'<script[^>]*>\s*(?:var\s*)?hydrationData\s*=\s*({.+?})\s*</script>',
- webpage, 'hydration data', default='{}'), video_id)
+ next_data = self._search_json(
+ '<script[^>]*__NEXT_DATA__[^>]*>', webpage,
+ 'next data', video_id, end_pattern='</script>', fatal=False)
- clip = try_get(
- hydration_data, lambda x: x['clips'][video_id], dict) or {}
+ build_id = next_data.get('buildId')
+ if not build_id:
+ raise ExtractorError(
+ 'Could not find build ID.', video_id=video_id)
+
+ locale = next_data.get('locale', 'en')
+
+ api_response = self._download_json(
+ f'https://medal.tv/_next/data/{build_id}/{locale}/{path}/{video_id}.json', video_id)
+
+ clip = traverse_obj(api_response, ('pageProps', 'clip')) or {}
if not clip:
raise ExtractorError(
'Could not find video information.', video_id=video_id)
@@ -112,14 +150,11 @@ class MedalTVIE(InfoExtractor):
'An unknown error occurred ({0}).'.format(error),
video_id=video_id)
- self._sort_formats(formats)
-
# Necessary because the id of the author is not known in advance.
# Won't raise an issue if no profile can be found as this is optional.
- author = try_get(
- hydration_data, lambda x: list(x['profiles'].values())[0], dict) or {}
- author_id = str_or_none(author.get('id'))
- author_url = format_field(author_id, template='https://medal.tv/users/%s')
+ author = traverse_obj(api_response, ('pageProps', 'profile')) or {}
+ author_id = str_or_none(author.get('userId'))
+ author_url = format_field(author_id, None, 'https://medal.tv/users/%s')
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/mediaite.py b/hypervideo_dl/extractor/mediaite.py
index b670f0d..0f9079b 100644
--- a/hypervideo_dl/extractor/mediaite.py
+++ b/hypervideo_dl/extractor/mediaite.py
@@ -1,6 +1,3 @@
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/mediaklikk.py b/hypervideo_dl/extractor/mediaklikk.py
index 18ff3be..4636508 100644
--- a/hypervideo_dl/extractor/mediaklikk.py
+++ b/hypervideo_dl/extractor/mediaklikk.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from ..utils import (
unified_strdate
)
@@ -92,7 +89,6 @@ class MediaKlikkIE(InfoExtractor):
formats = self._extract_wowza_formats(
playlist_url, video_id, skip_protocols=['f4m', 'smil', 'dash'])
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/medialaan.py b/hypervideo_dl/extractor/medialaan.py
index 788acf7..bce20dc 100644
--- a/hypervideo_dl/extractor/medialaan.py
+++ b/hypervideo_dl/extractor/medialaan.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -71,8 +69,8 @@ class MedialaanIE(InfoExtractor):
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
entries = []
for element in re.findall(r'(<div[^>]+data-mychannels-type="video"[^>]*>)', webpage):
mychannels_id = extract_attributes(element).get('data-mychannels-id')
@@ -102,7 +100,6 @@ class MedialaanIE(InfoExtractor):
'ext': ext,
'url': src,
})
- self._sort_formats(formats)
return {
'id': production_id,
diff --git a/hypervideo_dl/extractor/mediaset.py b/hypervideo_dl/extractor/mediaset.py
index d6b456c..61bdb2a 100644
--- a/hypervideo_dl/extractor/mediaset.py
+++ b/hypervideo_dl/extractor/mediaset.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import functools
import re
@@ -23,10 +20,10 @@ class MediasetIE(ThePlatformBaseIE):
(?:
mediaset:|
https?://
- (?:(?:www|static3)\.)?mediasetplay\.mediaset\.it/
+ (?:\w+\.)+mediaset\.it/
(?:
(?:video|on-demand|movie)/(?:[^/]+/)+[^/]+_|
- player/index\.html\?.*?\bprogramGuid=
+ player/(?:v\d+/)?index\.html\?.*?\bprogramGuid=
)
)(?P<id>[0-9A-Z]{16,})
'''
@@ -145,6 +142,10 @@ class MediasetIE(ThePlatformBaseIE):
'url': 'https://static3.mediasetplay.mediaset.it/player/index.html?appKey=5ad3966b1de1c4000d5cec48&programGuid=FAFU000000665104&id=665104',
'only_matching': True,
}, {
+ # embedUrl (from https://www.wittytv.it/amici/est-ce-que-tu-maimes-gabriele-5-dicembre-copia/)
+ 'url': 'https://static3.mediasetplay.mediaset.it/player/v2/index.html?partnerId=wittytv&configId=&programGuid=FD00000000153323&autoplay=true&purl=http://www.wittytv.it/amici/est-ce-que-tu-maimes-gabriele-5-dicembre-copia/',
+ 'only_matching': True,
+ }, {
'url': 'mediaset:FAFU000000665924',
'only_matching': True,
}, {
@@ -162,36 +163,36 @@ class MediasetIE(ThePlatformBaseIE):
}, {
'url': 'https://www.mediasetplay.mediaset.it/movie/herculeslaleggendahainizio/hercules-la-leggenda-ha-inizio_F305927501000102',
'only_matching': True,
+ }, {
+ 'url': 'https://mediasetinfinity.mediaset.it/video/braveandbeautiful/episodio-113_F310948005000402',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://static3.mediasetplay.mediaset.it/player/v2/index.html?partnerId=wittytv&configId=&programGuid=FD00000000153323',
+ 'only_matching': True,
}]
- @staticmethod
- def _extract_urls(ie, webpage):
- def _qs(url):
- return parse_qs(url)
-
+ def _extract_from_webpage(self, url, webpage):
def _program_guid(qs):
return qs.get('programGuid', [None])[0]
- entries = []
for mobj in re.finditer(
r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?video\.mediaset\.it/player/playerIFrame(?:Twitter)?\.shtml.*?)\1',
webpage):
embed_url = mobj.group('url')
- embed_qs = _qs(embed_url)
+ embed_qs = parse_qs(embed_url)
program_guid = _program_guid(embed_qs)
if program_guid:
- entries.append(embed_url)
+ yield self.url_result(embed_url)
continue
+
video_id = embed_qs.get('id', [None])[0]
if not video_id:
continue
- urlh = ie._request_webpage(
- embed_url, video_id, note='Following embed URL redirect')
+ urlh = self._request_webpage(embed_url, video_id, note='Following embed URL redirect')
embed_url = urlh.geturl()
- program_guid = _program_guid(_qs(embed_url))
+ program_guid = _program_guid(parse_qs(embed_url))
if program_guid:
- entries.append(embed_url)
- return entries
+ yield self.url_result(embed_url)
def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
for video in smil.findall(self._xpath_ns('.//video', namespace)):
@@ -246,8 +247,6 @@ class MediasetIE(ThePlatformBaseIE):
if (first_e or geo_e) and not formats:
raise geo_e or first_e
- self._sort_formats(formats)
-
feed_data = self._download_json(
'https://feed.entertainment.tv.theplatform.eu/f/PR1GhC/mediaset-prod-all-programs-v2/guid/-/' + guid,
guid, fatal=False)
@@ -285,11 +284,11 @@ class MediasetIE(ThePlatformBaseIE):
return info
-class MediasetShowIE(MediasetIE):
+class MediasetShowIE(MediasetIE): # XXX: Do not subclass from concrete IE
_VALID_URL = r'''(?x)
(?:
https?://
- (?:(?:www|static3)\.)?mediasetplay\.mediaset\.it/
+ (\w+\.)+mediaset\.it/
(?:
(?:fiction|programmi-tv|serie-tv|kids)/(?:.+?/)?
(?:[a-z-]+)_SE(?P<id>\d{12})
diff --git a/hypervideo_dl/extractor/mediasite.py b/hypervideo_dl/extractor/mediasite.py
index fbf9223..fe549c4 100644
--- a/hypervideo_dl/extractor/mediasite.py
+++ b/hypervideo_dl/extractor/mediasite.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
import json
@@ -16,7 +13,7 @@ from ..utils import (
str_or_none,
try_call,
try_get,
- unescapeHTML,
+ smuggle_url,
unsmuggle_url,
url_or_none,
urljoin,
@@ -28,6 +25,7 @@ _ID_RE = r'(?:[0-9a-f]{32,34}|[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0
class MediasiteIE(InfoExtractor):
_VALID_URL = r'(?xi)https?://[^/]+/Mediasite/(?:Play|Showcase/[^/#?]+/Presentation)/(?P<id>%s)(?P<query>\?[^#]+|)' % _ID_RE
+ _EMBED_REGEX = [r'(?xi)<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:(?:https?:)?//[^/]+)?/Mediasite/Play/%s(?:\?.*?)?)\1' % _ID_RE]
_TESTS = [
{
'url': 'https://hitsmediaweb.h-its.org/mediasite/Play/2db6c271681e4f199af3c60d1f82869b1d',
@@ -115,13 +113,10 @@ class MediasiteIE(InfoExtractor):
5: 'video3',
}
- @staticmethod
- def _extract_urls(webpage):
- return [
- unescapeHTML(mobj.group('url'))
- for mobj in re.finditer(
- r'(?xi)<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:(?:https?:)?//[^/]+)?/Mediasite/Play/%s(?:\?.*?)?)\1' % _ID_RE,
- webpage)]
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ for embed_url in super()._extract_embed_urls(url, webpage):
+ yield smuggle_url(embed_url, {'UrlReferrer': url})
def __extract_slides(self, *, stream_id, snum, Stream, duration, images):
slide_base_url = Stream['SlideBaseUrl']
@@ -269,8 +264,6 @@ class MediasiteIE(InfoExtractor):
})
formats.extend(stream_formats)
- self._sort_formats(formats)
-
# XXX: Presentation['Presenters']
# XXX: Presentation['Transcript']
diff --git a/hypervideo_dl/extractor/mediaworksnz.py b/hypervideo_dl/extractor/mediaworksnz.py
new file mode 100644
index 0000000..62e37d2
--- /dev/null
+++ b/hypervideo_dl/extractor/mediaworksnz.py
@@ -0,0 +1,103 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ bug_reports_message,
+ float_or_none,
+ traverse_obj,
+ unified_timestamp,
+)
+
+
+class MediaWorksNZVODIE(InfoExtractor):
+ _VALID_URL_BASE_RE = r'https?://vodupload-api\.mediaworks\.nz/library/asset/published/'
+ _VALID_URL_ID_RE = r'(?P<id>[A-Za-z0-9-]+)'
+ _VALID_URL = rf'{_VALID_URL_BASE_RE}{_VALID_URL_ID_RE}'
+ _TESTS = [{
+ 'url': 'https://vodupload-api.mediaworks.nz/library/asset/published/VID00359',
+ 'info_dict': {
+ 'id': 'VID00359',
+ 'ext': 'mp4',
+ 'title': 'GRG Jacinda Ardern safe drug testing 1920x1080',
+ 'description': 'md5:d4d7dc366742e86d8130b257dcb520ba',
+ 'duration': 142.76,
+ 'timestamp': 1604268608,
+ 'upload_date': '20201101',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'channel': 'George FM'
+ }
+ }, {
+ # has audio-only format
+ 'url': 'https://vodupload-api.mediaworks.nz/library/asset/published/VID02627',
+ 'info_dict': {
+ 'id': 'VID02627',
+ 'ext': 'mp3',
+ 'title': 'Tova O\'Brien meets Ukraine President Volodymyr Zelensky',
+ 'channel': 'Today FM',
+ 'description': 'Watch in full the much anticipated interview of Volodymyr Zelensky',
+ 'duration': 2061.16,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20220822',
+ 'timestamp': 1661152289,
+ },
+ 'params': {'format': 'ba[ext=mp3]'}
+ }]
+
+ _WEBPAGE_TESTS = [{
+ 'url': 'https://www.rova.nz/home/podcasts/socrates-walks-into-a-bar/the-trolley-problem---episode-1.html',
+ 'info_dict': {
+ 'id': 'VID02494',
+ 'ext': 'mp4',
+ 'title': 'The Trolley Problem',
+ 'duration': 2843.56,
+ 'channel': 'Other',
+ 'timestamp': 1658356489,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'description': 'Socrates Walks Into A Bar Podcast Episode 1',
+ 'upload_date': '20220720',
+ }
+ }]
+
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ for mobj in re.finditer(
+ rf'''(?x)<div\s+\bid=["']Player-Attributes-JWID[^>]+\b
+ data-request-url=["']{cls._VALID_URL_BASE_RE}["'][^>]+\b
+ data-asset-id=["']{cls._VALID_URL_ID_RE}["']''', webpage
+ ):
+ yield f'https://vodupload-api.mediaworks.nz/library/asset/published/{mobj.group("id")}'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ asset = self._download_json(url, video_id)['asset']
+
+ if asset.get('drm') not in ('NonDRM', None):
+ self.report_drm(video_id)
+
+ content_type = asset.get('type')
+ if content_type and content_type != 'video':
+ self.report_warning(f'Unknown content type: {content_type}' + bug_reports_message(), video_id)
+
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(asset['streamingUrl'], video_id)
+
+ audio_streaming_url = traverse_obj(
+ asset, 'palyoutPathAudio', 'playoutpathaudio', expected_type=str)
+ if audio_streaming_url:
+ audio_formats = self._extract_m3u8_formats(audio_streaming_url, video_id, fatal=False, ext='mp3')
+ for audio_format in audio_formats:
+ # all the audio streams appear to be aac
+ audio_format.setdefault('vcodec', 'none')
+ audio_format.setdefault('acodec', 'aac')
+ formats.append(audio_format)
+
+ return {
+ 'id': video_id,
+ 'title': asset.get('title'),
+ 'description': asset.get('description'),
+ 'duration': float_or_none(asset.get('duration')),
+ 'timestamp': unified_timestamp(asset.get('dateadded')),
+ 'channel': asset.get('brand'),
+ 'thumbnails': [{'url': thumbnail_url} for thumbnail_url in asset.get('thumbnails') or []],
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
diff --git a/hypervideo_dl/extractor/medici.py b/hypervideo_dl/extractor/medici.py
index cd91023..328ccd2 100644
--- a/hypervideo_dl/extractor/medici.py
+++ b/hypervideo_dl/extractor/medici.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
unified_strdate,
diff --git a/hypervideo_dl/extractor/megaphone.py b/hypervideo_dl/extractor/megaphone.py
index 5bafa6c..af80523 100644
--- a/hypervideo_dl/extractor/megaphone.py
+++ b/hypervideo_dl/extractor/megaphone.py
@@ -1,8 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
from .common import InfoExtractor
from ..utils import js_to_json
@@ -11,6 +6,7 @@ class MegaphoneIE(InfoExtractor):
IE_NAME = 'megaphone.fm'
IE_DESC = 'megaphone.fm embedded players'
_VALID_URL = r'https://player\.megaphone\.fm/(?P<id>[A-Z0-9]+)'
+ _EMBED_REGEX = [rf'<iframe[^>]*?\ssrc=["\'](?P<url>{_VALID_URL})']
_TEST = {
'url': 'https://player.megaphone.fm/GLT9749789991?"',
'md5': '4816a0de523eb3e972dc0dda2c191f96',
@@ -48,8 +44,3 @@ class MegaphoneIE(InfoExtractor):
'duration': episode_data['duration'],
'formats': formats,
}
-
- @classmethod
- def _extract_urls(cls, webpage):
- return [m[0] for m in re.findall(
- r'<iframe[^>]*?\ssrc=["\'](%s)' % cls._VALID_URL, webpage)]
diff --git a/hypervideo_dl/extractor/megatvcom.py b/hypervideo_dl/extractor/megatvcom.py
index 0d6793a..2f3f11f 100644
--- a/hypervideo_dl/extractor/megatvcom.py
+++ b/hypervideo_dl/extractor/megatvcom.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -90,7 +87,6 @@ class MegaTVComIE(MegaTVComBaseIE):
formats, subs = [{'url': source}], {}
if player_attrs.get('subs'):
self._merge_subtitles({'und': [{'url': player_attrs['subs']}]}, target=subs)
- self._sort_formats(formats)
return {
'id': video_id,
'display_id': display_id,
@@ -107,7 +103,7 @@ class MegaTVComEmbedIE(MegaTVComBaseIE):
IE_NAME = 'megatvcom:embed'
IE_DESC = 'megatv.com embedded videos'
_VALID_URL = r'(?:https?:)?//(?:www\.)?megatv\.com/embed/?\?p=(?P<id>\d+)'
- _EMBED_RE = re.compile(rf'''<iframe[^>]+?src=(?P<_q1>["'])(?P<url>{_VALID_URL})(?P=_q1)''')
+ _EMBED_REGEX = [rf'''<iframe[^>]+?src=(?P<_q1>["'])(?P<url>{_VALID_URL})(?P=_q1)''']
_TESTS = [{
'url': 'https://www.megatv.com/embed/?p=2020520979',
@@ -137,11 +133,6 @@ class MegaTVComEmbedIE(MegaTVComBaseIE):
},
}]
- @classmethod
- def _extract_urls(cls, webpage):
- for mobj in cls._EMBED_RE.finditer(webpage):
- yield unescapeHTML(mobj.group('url'))
-
def _match_canonical_url(self, webpage):
LINK_RE = r'''(?x)
<link(?:
diff --git a/hypervideo_dl/extractor/meipai.py b/hypervideo_dl/extractor/meipai.py
index 2445b8b..1a6f3cd 100644
--- a/hypervideo_dl/extractor/meipai.py
+++ b/hypervideo_dl/extractor/meipai.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
int_or_none,
@@ -51,9 +48,7 @@ class MeipaiIE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- title = self._og_search_title(
- webpage, default=None) or self._html_search_regex(
- r'<title[^>]*>([^<]+)</title>', webpage, 'title')
+ title = self._generic_title('', webpage)
formats = []
diff --git a/hypervideo_dl/extractor/melonvod.py b/hypervideo_dl/extractor/melonvod.py
index bd8cf13..1d3fff8 100644
--- a/hypervideo_dl/extractor/melonvod.py
+++ b/hypervideo_dl/extractor/melonvod.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
int_or_none,
@@ -47,7 +44,6 @@ class MelonVODIE(InfoExtractor):
formats = self._extract_m3u8_formats(
stream_info['encUrl'], video_id, 'mp4', m3u8_id='hls')
- self._sort_formats(formats)
artist_list = play_info.get('artistList')
artist = None
diff --git a/hypervideo_dl/extractor/meta.py b/hypervideo_dl/extractor/meta.py
index cdb46e1..7c11e60 100644
--- a/hypervideo_dl/extractor/meta.py
+++ b/hypervideo_dl/extractor/meta.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from .pladform import PladformIE
from ..utils import (
diff --git a/hypervideo_dl/extractor/metacafe.py b/hypervideo_dl/extractor/metacafe.py
index 7b2d4a0..d7f5def 100644
--- a/hypervideo_dl/extractor/metacafe.py
+++ b/hypervideo_dl/extractor/metacafe.py
@@ -1,19 +1,14 @@
-from __future__ import unicode_literals
-
import json
import re
+import urllib.parse
from .common import InfoExtractor
-from ..compat import (
- compat_parse_qs,
- compat_urllib_parse,
- compat_urllib_parse_unquote,
-)
+from ..compat import compat_parse_qs, compat_urllib_parse_unquote
from ..utils import (
- determine_ext,
ExtractorError,
- int_or_none,
+ determine_ext,
get_element_by_attribute,
+ int_or_none,
mimetype2ext,
)
@@ -145,7 +140,7 @@ class MetacafeIE(InfoExtractor):
headers = {
# Disable family filter
- 'Cookie': 'user=%s; ' % compat_urllib_parse.quote(json.dumps({'ffilter': False}))
+ 'Cookie': 'user=%s; ' % urllib.parse.quote(json.dumps({'ffilter': False}))
}
# AnyClip videos require the flashversion cookie so that we get the link
@@ -272,7 +267,6 @@ class MetacafeIE(InfoExtractor):
'url': video_url,
'ext': video_ext,
}]
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/metacritic.py b/hypervideo_dl/extractor/metacritic.py
index 1424288..1441054 100644
--- a/hypervideo_dl/extractor/metacritic.py
+++ b/hypervideo_dl/extractor/metacritic.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -51,7 +49,6 @@ class MetacriticIE(InfoExtractor):
'format_id': rate_str,
'tbr': int(rate_str),
})
- self._sort_formats(formats)
description = self._html_search_regex(r'<b>Description:</b>(.*?)</p>',
webpage, 'description', flags=re.DOTALL)
diff --git a/hypervideo_dl/extractor/mgoon.py b/hypervideo_dl/extractor/mgoon.py
index 184c311..2388a71 100644
--- a/hypervideo_dl/extractor/mgoon.py
+++ b/hypervideo_dl/extractor/mgoon.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..utils import (
ExtractorError,
@@ -72,7 +68,6 @@ class MgoonIE(InfoExtractor):
'ext': fmt['format'],
})
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/mgtv.py b/hypervideo_dl/extractor/mgtv.py
index 4ac70ea..edc92b3 100644
--- a/hypervideo_dl/extractor/mgtv.py
+++ b/hypervideo_dl/extractor/mgtv.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import base64
import time
import uuid
@@ -70,7 +67,7 @@ class MGTVIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
tk2 = base64.urlsafe_b64encode(
- f'did={compat_str(uuid.uuid4()).encode()}|pno=1030|ver=0.3.0301|clit={int(time.time())}'.encode())[::-1]
+ f'did={str(uuid.uuid4())}|pno=1030|ver=0.3.0301|clit={int(time.time())}'.encode())[::-1]
try:
api_data = self._download_json(
'https://pcweb.api.mgtv.com/player/video', video_id, query={
@@ -120,7 +117,6 @@ class MGTVIE(InfoExtractor):
},
'format_note': stream.get('name'),
})
- self._sort_formats(formats)
return {
'id': video_id,
@@ -140,14 +136,15 @@ class MGTVIE(InfoExtractor):
url_sub = sub.get('url')
if not url_sub:
continue
- locale = sub.get('captionCountrySimpleName')
+ locale = sub.get('captionSimpleName') or 'en'
sub = self._download_json(f'{domain}{url_sub}', video_id, fatal=False,
note=f'Download subtitle for locale {sub.get("name")} ({locale})') or {}
sub_url = url_or_none(sub.get('info'))
if not sub_url:
continue
- subtitles.setdefault(locale or 'en', []).append({
+ subtitles.setdefault(locale.lower(), []).append({
'url': sub_url,
+ 'name': sub.get('name'),
'ext': 'srt'
})
return subtitles
diff --git a/hypervideo_dl/extractor/miaopai.py b/hypervideo_dl/extractor/miaopai.py
index cf0610b..329ce36 100644
--- a/hypervideo_dl/extractor/miaopai.py
+++ b/hypervideo_dl/extractor/miaopai.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/microsoftembed.py b/hypervideo_dl/extractor/microsoftembed.py
new file mode 100644
index 0000000..f71ab3e
--- /dev/null
+++ b/hypervideo_dl/extractor/microsoftembed.py
@@ -0,0 +1,65 @@
+from .common import InfoExtractor
+from ..utils import int_or_none, traverse_obj, unified_timestamp
+
+
+class MicrosoftEmbedIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?microsoft\.com/(?:[^/]+/)?videoplayer/embed/(?P<id>[a-z0-9A-Z]+)'
+
+ _TESTS = [{
+ 'url': 'https://www.microsoft.com/en-us/videoplayer/embed/RWL07e',
+ 'md5': 'eb0ae9007f9b305f9acd0a03e74cb1a9',
+ 'info_dict': {
+ 'id': 'RWL07e',
+ 'title': 'Microsoft for Public Health and Social Services',
+ 'ext': 'mp4',
+ 'thumbnail': 'http://img-prod-cms-rt-microsoft-com.akamaized.net/cms/api/am/imageFileData/RWL7Ju?ver=cae5',
+ 'age_limit': 0,
+ 'timestamp': 1631658316,
+ 'upload_date': '20210914'
+ }
+ }]
+ _API_URL = 'https://prod-video-cms-rt-microsoft-com.akamaized.net/vhs/api/videos/'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ metadata = self._download_json(self._API_URL + video_id, video_id)
+
+ formats = []
+ for source_type, source in metadata['streams'].items():
+ if source_type == 'smooth_Streaming':
+ formats.extend(self._extract_ism_formats(source['url'], video_id, 'mss'))
+ elif source_type == 'apple_HTTP_Live_Streaming':
+ formats.extend(self._extract_m3u8_formats(source['url'], video_id, 'mp4'))
+ elif source_type == 'mPEG_DASH':
+ formats.extend(self._extract_mpd_formats(source['url'], video_id))
+ else:
+ formats.append({
+ 'format_id': source_type,
+ 'url': source['url'],
+ 'height': source.get('heightPixels'),
+ 'width': source.get('widthPixels'),
+ })
+
+ subtitles = {
+ lang: [{
+ 'url': data.get('url'),
+ 'ext': 'vtt',
+ }] for lang, data in traverse_obj(metadata, 'captions', default={}).items()
+ }
+
+ thumbnails = [{
+ 'url': thumb.get('url'),
+ 'width': thumb.get('width') or None,
+ 'height': thumb.get('height') or None,
+ } for thumb in traverse_obj(metadata, ('snippet', 'thumbnails', ...))]
+ self._remove_duplicate_formats(thumbnails)
+
+ return {
+ 'id': video_id,
+ 'title': traverse_obj(metadata, ('snippet', 'title')),
+ 'timestamp': unified_timestamp(traverse_obj(metadata, ('snippet', 'activeStartDate'))),
+ 'age_limit': int_or_none(traverse_obj(metadata, ('snippet', 'minimumAge'))) or 0,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'thumbnails': thumbnails,
+ }
diff --git a/hypervideo_dl/extractor/microsoftstream.py b/hypervideo_dl/extractor/microsoftstream.py
index 4d5a9df..9b50996 100644
--- a/hypervideo_dl/extractor/microsoftstream.py
+++ b/hypervideo_dl/extractor/microsoftstream.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from base64 import b64decode
from .common import InfoExtractor
@@ -104,7 +101,6 @@ class MicrosoftStreamIE(InfoExtractor):
playlist['playbackUrl'], video_id, ism_id='mss',
fatal=False, headers=headers))
formats = [merge_dicts(f, {'language': language}) for f in formats]
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/microsoftvirtualacademy.py b/hypervideo_dl/extractor/microsoftvirtualacademy.py
index 46abd2a..b759b18 100644
--- a/hypervideo_dl/extractor/microsoftvirtualacademy.py
+++ b/hypervideo_dl/extractor/microsoftvirtualacademy.py
@@ -1,11 +1,6 @@
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
-from ..compat import (
- compat_xpath,
-)
from ..utils import (
int_or_none,
parse_duration,
@@ -70,9 +65,9 @@ class MicrosoftVirtualAcademyIE(MicrosoftVirtualAcademyBaseIE):
formats = []
- for sources in settings.findall(compat_xpath('.//MediaSources')):
+ for sources in settings.findall('.//MediaSources'):
sources_type = sources.get('videoType')
- for source in sources.findall(compat_xpath('./MediaSource')):
+ for source in sources.findall('./MediaSource'):
video_url = source.text
if not video_url or not video_url.startswith('http'):
continue
@@ -98,10 +93,9 @@ class MicrosoftVirtualAcademyIE(MicrosoftVirtualAcademyBaseIE):
'acodec': acodec,
'vcodec': vcodec,
})
- self._sort_formats(formats)
subtitles = {}
- for source in settings.findall(compat_xpath('.//MarkerResourceSource')):
+ for source in settings.findall('.//MarkerResourceSource'):
subtitle_url = source.text
if not subtitle_url:
continue
diff --git a/hypervideo_dl/extractor/mildom.py b/hypervideo_dl/extractor/mildom.py
index 5f2df29..f64d575 100644
--- a/hypervideo_dl/extractor/mildom.py
+++ b/hypervideo_dl/extractor/mildom.py
@@ -1,8 +1,6 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import functools
import json
+import uuid
from .common import InfoExtractor
from ..utils import (
@@ -11,7 +9,6 @@ from ..utils import (
ExtractorError,
float_or_none,
OnDemandPagedList,
- random_uuidv4,
traverse_obj,
)
@@ -21,7 +18,7 @@ class MildomBaseIE(InfoExtractor):
def _call_api(self, url, video_id, query=None, note='Downloading JSON metadata', body=None):
if not self._GUEST_ID:
- self._GUEST_ID = f'pc-gp-{random_uuidv4()}'
+ self._GUEST_ID = f'pc-gp-{str(uuid.uuid4())}'
content = self._download_json(
url, video_id, note=note, data=json.dumps(body).encode() if body else None,
@@ -77,8 +74,6 @@ class MildomIE(MildomBaseIE):
for fmt in formats:
fmt.setdefault('http_headers', {})['Referer'] = 'https://www.mildom.com/'
- self._sort_formats(formats)
-
return {
'id': result_video_id,
'title': self._html_search_meta('twitter:description', webpage, default=None) or traverse_obj(enterstudio, 'anchor_intro'),
@@ -169,8 +164,6 @@ class MildomVodIE(MildomBaseIE):
'ext': 'mp4'
})
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': self._html_search_meta(('og:description', 'description'), webpage, default=None) or autoplay.get('title'),
diff --git a/hypervideo_dl/extractor/minds.py b/hypervideo_dl/extractor/minds.py
index 9da0720..2fb1792 100644
--- a/hypervideo_dl/extractor/minds.py
+++ b/hypervideo_dl/extractor/minds.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
@@ -79,7 +76,7 @@ class MindsIE(MindsBaseIE):
else:
return self.url_result(entity['perma_url'])
else:
- assert(entity['subtype'] == 'video')
+ assert entity['subtype'] == 'video'
video_id = entity_id
# 1080p and webm formats available only on the sources array
video = self._call_api(
@@ -95,7 +92,6 @@ class MindsIE(MindsBaseIE):
'height': int_or_none(source.get('size')),
'url': src,
})
- self._sort_formats(formats)
entity = video.get('entity') or entity
owner = entity.get('ownerObj') or {}
@@ -121,7 +117,7 @@ class MindsIE(MindsBaseIE):
'timestamp': int_or_none(entity.get('time_created')),
'uploader': strip_or_none(owner.get('name')),
'uploader_id': uploader_id,
- 'uploader_url': format_field(uploader_id, template='https://www.minds.com/%s'),
+ 'uploader_url': format_field(uploader_id, None, 'https://www.minds.com/%s'),
'view_count': int_or_none(entity.get('play:count')),
'like_count': int_or_none(entity.get('thumbs:up:count')),
'dislike_count': int_or_none(entity.get('thumbs:down:count')),
diff --git a/hypervideo_dl/extractor/ministrygrid.py b/hypervideo_dl/extractor/ministrygrid.py
index 8ad9239..053c672 100644
--- a/hypervideo_dl/extractor/ministrygrid.py
+++ b/hypervideo_dl/extractor/ministrygrid.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
ExtractorError,
diff --git a/hypervideo_dl/extractor/minoto.py b/hypervideo_dl/extractor/minoto.py
index 603ce94..8d18179 100644
--- a/hypervideo_dl/extractor/minoto.py
+++ b/hypervideo_dl/extractor/minoto.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..utils import (
int_or_none,
@@ -39,7 +35,6 @@ class MinotoIE(InfoExtractor):
'height': int_or_none(fmt.get('height')),
**parse_codecs(fmt.get('codecs')),
})
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/miomio.py b/hypervideo_dl/extractor/miomio.py
index 40f72d6..a0a041e 100644
--- a/hypervideo_dl/extractor/miomio.py
+++ b/hypervideo_dl/extractor/miomio.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import random
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/mirrativ.py b/hypervideo_dl/extractor/mirrativ.py
index 2111de6..0a8ee0c 100644
--- a/hypervideo_dl/extractor/mirrativ.py
+++ b/hypervideo_dl/extractor/mirrativ.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
ExtractorError,
@@ -57,7 +55,6 @@ class MirrativIE(MirrativBaseIE):
hls_url, video_id,
ext='mp4', entry_protocol='m3u8_native',
m3u8_id='hls', live=is_live)
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/mirrorcouk.py b/hypervideo_dl/extractor/mirrorcouk.py
new file mode 100644
index 0000000..7b4f95b
--- /dev/null
+++ b/hypervideo_dl/extractor/mirrorcouk.py
@@ -0,0 +1,98 @@
+from .common import InfoExtractor
+from ..utils import unescapeHTML
+
+
+class MirrorCoUKIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?mirror\.co\.uk/[/+[\w-]+-(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.mirror.co.uk/tv/tv-news/love-island-fans-baffled-after-27163139',
+ 'info_dict': {
+ 'id': 'voyyS7SV',
+ 'ext': 'mp4',
+ 'title': 'Love Island: Gemma Owen enters the villa',
+ 'description': 'Love Island: Michael Owen\'s daughter Gemma Owen enters the villa.',
+ 'thumbnail': 'https://cdn.jwplayer.com/v2/media/voyyS7SV/poster.jpg?width=720',
+ 'display_id': '27163139',
+ 'timestamp': 1654547895,
+ 'duration': 57.0,
+ 'upload_date': '20220606',
+ },
+ }, {
+ 'url': 'https://www.mirror.co.uk/3am/celebrity-news/michael-jacksons-son-blankets-new-25344890',
+ 'info_dict': {
+ 'id': 'jyXpdvxp',
+ 'ext': 'mp4',
+ 'title': 'Michael Jackson’s son Bigi calls for action on climate change',
+ 'description': 'md5:d39ceaba2b7a615b4ca6557e7bc40222',
+ 'thumbnail': 'https://cdn.jwplayer.com/v2/media/jyXpdvxp/poster.jpg?width=720',
+ 'display_id': '25344890',
+ 'timestamp': 1635749907,
+ 'duration': 56.0,
+ 'upload_date': '20211101',
+ },
+ }, {
+ 'url': 'https://www.mirror.co.uk/sport/football/news/antonio-conte-next-tottenham-manager-25346042',
+ 'info_dict': {
+ 'id': 'q6FkKa4p',
+ 'ext': 'mp4',
+ 'title': 'Nuno sacked by Tottenham after fifth Premier League defeat of the season',
+ 'description': 'Nuno Espirito Santo has been sacked as Tottenham boss after only four months in charge.',
+ 'thumbnail': 'https://cdn.jwplayer.com/v2/media/q6FkKa4p/poster.jpg?width=720',
+ 'display_id': '25346042',
+ 'timestamp': 1635763157,
+ 'duration': 40.0,
+ 'upload_date': '20211101',
+ },
+ }, {
+ 'url': 'https://www.mirror.co.uk/3am/celebrity-news/johnny-depp-splashes-50k-curry-27160737',
+ 'info_dict': {
+ 'id': 'IT0oa1nH',
+ 'ext': 'mp4',
+ 'title': 'Johnny Depp Leaves The Grand Hotel in Birmingham',
+ 'description': 'Johnny Depp Leaves The Grand Hotel in Birmingham.',
+ 'thumbnail': 'https://cdn.jwplayer.com/v2/media/IT0oa1nH/poster.jpg?width=720',
+ 'display_id': '27160737',
+ 'timestamp': 1654524120,
+ 'duration': 65.0,
+ 'upload_date': '20220606',
+ },
+ }, {
+ 'url': 'https://www.mirror.co.uk/tv/tv-news/love-islands-liam-could-first-27162602',
+ 'info_dict': {
+ 'id': 'EaPr5Z2j',
+ 'ext': 'mp4',
+ 'title': 'Love Island: Davide reveals plot twist after receiving text',
+ 'description': 'Love Island: Davide reveals plot twist after receiving text',
+ 'thumbnail': 'https://cdn.jwplayer.com/v2/media/EaPr5Z2j/poster.jpg?width=720',
+ 'display_id': '27162602',
+ 'timestamp': 1654552597,
+ 'duration': 23.0,
+ 'upload_date': '20220606',
+ },
+ }, {
+ 'url': 'https://www.mirror.co.uk/news/uk-news/william-kate-sent-message-george-27160572',
+ 'info_dict': {
+ 'id': 'ygtceXIu',
+ 'ext': 'mp4',
+ 'title': 'Prince William and Kate arrive in Wales with George and Charlotte',
+ 'description': 'Prince William and Kate Middleton arrive in Wales with children Prince George and Princess Charlotte.',
+ 'thumbnail': 'https://cdn.jwplayer.com/v2/media/ygtceXIu/poster.jpg?width=720',
+ 'display_id': '27160572',
+ 'timestamp': 1654349678,
+ 'duration': 106.0,
+ 'upload_date': '20220604',
+ },
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ data = self._search_json(r'div\s+class="json-placeholder"\s+data-json="',
+ webpage, 'data', display_id, transform_source=unescapeHTML)['videoData']
+
+ return {
+ '_type': 'url_transparent',
+ 'url': f'jwplatform:{data["videoId"]}',
+ 'ie_key': 'JWPlatform',
+ 'display_id': display_id,
+ }
diff --git a/hypervideo_dl/extractor/mit.py b/hypervideo_dl/extractor/mit.py
index 60e4569..38cc0c2 100644
--- a/hypervideo_dl/extractor/mit.py
+++ b/hypervideo_dl/extractor/mit.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import re
import json
diff --git a/hypervideo_dl/extractor/mitele.py b/hypervideo_dl/extractor/mitele.py
index b593723..ea29986 100644
--- a/hypervideo_dl/extractor/mitele.py
+++ b/hypervideo_dl/extractor/mitele.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .telecinco import TelecincoIE
from ..utils import (
int_or_none,
@@ -8,7 +5,7 @@ from ..utils import (
)
-class MiTeleIE(TelecincoIE):
+class MiTeleIE(TelecincoIE): # XXX: Do not subclass from concrete IE
IE_DESC = 'mitele.es'
_VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/)+(?P<id>[^/]+)/player'
diff --git a/hypervideo_dl/extractor/mixch.py b/hypervideo_dl/extractor/mixch.py
index 31f450d..3f430a7 100644
--- a/hypervideo_dl/extractor/mixch.py
+++ b/hypervideo_dl/extractor/mixch.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
ExtractorError,
diff --git a/hypervideo_dl/extractor/mixcloud.py b/hypervideo_dl/extractor/mixcloud.py
index c2dd078..fb5a08c 100644
--- a/hypervideo_dl/extractor/mixcloud.py
+++ b/hypervideo_dl/extractor/mixcloud.py
@@ -1,15 +1,11 @@
-from __future__ import unicode_literals
-
import itertools
from .common import InfoExtractor
from ..compat import (
compat_b64decode,
- compat_chr,
compat_ord,
compat_str,
compat_urllib_parse_unquote,
- compat_zip
)
from ..utils import (
ExtractorError,
@@ -75,8 +71,8 @@ class MixcloudIE(MixcloudBaseIE):
def _decrypt_xor_cipher(key, ciphertext):
"""Encrypt/Decrypt XOR cipher. Both ways are possible because it's XOR."""
return ''.join([
- compat_chr(compat_ord(ch) ^ compat_ord(k))
- for ch, k in compat_zip(ciphertext, itertools.cycle(key))])
+ chr(compat_ord(ch) ^ compat_ord(k))
+ for ch, k in zip(ciphertext, itertools.cycle(key))])
def _real_extract(self, url):
username, slug = self._match_valid_url(url).groups()
@@ -163,6 +159,7 @@ class MixcloudIE(MixcloudBaseIE):
formats.append({
'format_id': 'http',
'url': decrypted,
+ 'vcodec': 'none',
'downloader_options': {
# Mixcloud starts throttling at >~5M
'http_chunk_size': 5242880,
@@ -172,8 +169,6 @@ class MixcloudIE(MixcloudBaseIE):
if not formats and cloudcast.get('isExclusive'):
self.raise_login_required(metadata_available=True)
- self._sort_formats(formats)
-
comments = []
for edge in (try_get(cloudcast, lambda x: x['comments']['edges']) or []):
node = edge.get('node') or {}
diff --git a/hypervideo_dl/extractor/mlb.py b/hypervideo_dl/extractor/mlb.py
index b69301d..72057dc 100644
--- a/hypervideo_dl/extractor/mlb.py
+++ b/hypervideo_dl/extractor/mlb.py
@@ -1,13 +1,15 @@
-from __future__ import unicode_literals
-
import re
+import urllib.parse
+import uuid
from .common import InfoExtractor
from ..utils import (
determine_ext,
int_or_none,
+ join_nonempty,
parse_duration,
parse_iso8601,
+ traverse_obj,
try_get,
)
@@ -52,7 +54,6 @@ class MLBBaseIE(InfoExtractor):
'width': int(mobj.group(1)),
})
formats.append(f)
- self._sort_formats(formats)
thumbnails = []
for cut in (try_get(feed, lambda x: x['image']['cuts'], list) or []):
@@ -94,6 +95,10 @@ class MLBIE(MLBBaseIE):
(?P<id>\d+)
)
'''
+ _EMBED_REGEX = [
+ r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
+ r'data-video-link=["\'](?P<url>http://m\.mlb\.com/video/[^"\']+)',
+ ]
_TESTS = [
{
'url': 'https://www.mlb.com/mariners/video/ackleys-spectacular-catch/c-34698933',
@@ -265,3 +270,112 @@ class MLBVideoIE(MLBBaseIE):
}
}''' % display_id,
})['data']['mediaPlayback'][0]
+
+
+class MLBTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?mlb\.com/tv/g(?P<id>\d{6})'
+ _NETRC_MACHINE = 'mlb'
+
+ _TESTS = [{
+ 'url': 'https://www.mlb.com/tv/g661581/vee2eff5f-a7df-4c20-bdb4-7b926fa12638',
+ 'info_dict': {
+ 'id': '661581',
+ 'ext': 'mp4',
+ 'title': '2022-07-02 - St. Louis Cardinals @ Philadelphia Phillies',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+ _access_token = None
+
+ def _real_initialize(self):
+ if not self._access_token:
+ self.raise_login_required(
+ 'All videos are only available to registered users', method='password')
+
+ def _perform_login(self, username, password):
+ data = f'grant_type=password&username={urllib.parse.quote(username)}&password={urllib.parse.quote(password)}&scope=openid offline_access&client_id=0oa3e1nutA1HLzAKG356'
+ access_token = self._download_json(
+ 'https://ids.mlb.com/oauth2/aus1m088yK07noBfh356/v1/token', None,
+ headers={
+ 'User-Agent': 'okhttp/3.12.1',
+ 'Content-Type': 'application/x-www-form-urlencoded'
+ }, data=data.encode())['access_token']
+
+ entitlement = self._download_webpage(
+ f'https://media-entitlement.mlb.com/api/v3/jwt?os=Android&appname=AtBat&did={str(uuid.uuid4())}', None,
+ headers={
+ 'User-Agent': 'okhttp/3.12.1',
+ 'Authorization': f'Bearer {access_token}'
+ })
+
+ data = f'grant_type=urn:ietf:params:oauth:grant-type:token-exchange&subject_token={entitlement}&subject_token_type=urn:ietf:params:oauth:token-type:jwt&platform=android-tv'
+ self._access_token = self._download_json(
+ 'https://us.edge.bamgrid.com/token', None,
+ headers={
+ 'Accept': 'application/json',
+ 'Authorization': 'Bearer bWxidHYmYW5kcm9pZCYxLjAuMA.6LZMbH2r--rbXcgEabaDdIslpo4RyZrlVfWZhsAgXIk',
+ 'Content-Type': 'application/x-www-form-urlencoded'
+ }, data=data.encode())['access_token']
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ airings = self._download_json(
+ f'https://search-api-mlbtv.mlb.com/svc/search/v2/graphql/persisted/query/core/Airings?variables=%7B%22partnerProgramIds%22%3A%5B%22{video_id}%22%5D%2C%22applyEsniMediaRightsLabels%22%3Atrue%7D',
+ video_id)['data']['Airings']
+
+ formats, subtitles = [], {}
+ for airing in airings:
+ m3u8_url = self._download_json(
+ airing['playbackUrls'][0]['href'].format(scenario='browser~csai'), video_id,
+ headers={
+ 'Authorization': self._access_token,
+ 'Accept': 'application/vnd.media-service+json; version=2'
+ })['stream']['complete']
+ f, s = self._extract_m3u8_formats_and_subtitles(
+ m3u8_url, video_id, 'mp4', m3u8_id=join_nonempty(airing.get('feedType'), airing.get('feedLanguage')))
+ formats.extend(f)
+ self._merge_subtitles(s, target=subtitles)
+
+ return {
+ 'id': video_id,
+ 'title': traverse_obj(airings, (..., 'titles', 0, 'episodeName'), get_all=False),
+ 'is_live': traverse_obj(airings, (..., 'mediaConfig', 'productType'), get_all=False) == 'LIVE',
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'http_headers': {'Authorization': f'Bearer {self._access_token}'},
+ }
+
+
+class MLBArticleIE(InfoExtractor):
+ _VALID_URL = r'https?://www\.mlb\.com/news/(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://www.mlb.com/news/manny-machado-robs-guillermo-heredia-reacts',
+ 'info_dict': {
+ 'id': '36db7394-343c-4ea3-b8ca-ead2e61bca9a',
+ 'title': 'Machado\'s grab draws hilarious irate reaction',
+ 'modified_timestamp': 1650130737,
+ 'description': 'md5:a19d4eb0487b2cb304e9a176f6b67676',
+ 'modified_date': '20220416',
+ },
+ 'playlist_count': 2,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ apollo_cache_json = self._search_json(r'window\.initState\s*=', webpage, 'window.initState', display_id)['apolloCache']
+
+ content_data_id = traverse_obj(
+ apollo_cache_json, ('ROOT_QUERY', lambda k, _: k.startswith('getForgeContent'), 'id'), get_all=False)
+
+ content_real_info = apollo_cache_json[content_data_id]
+
+ return self.playlist_from_matches(
+ traverse_obj(content_real_info, ('parts', lambda _, v: v['typename'] == 'Video', 'id')),
+ getter=lambda x: f'https://www.mlb.com/video/{apollo_cache_json[x]["slug"]}',
+ ie=MLBVideoIE, playlist_id=content_real_info.get('_translationId'),
+ title=self._html_search_meta('og:title', webpage),
+ description=content_real_info.get('summary'),
+ modified_timestamp=parse_iso8601(content_real_info.get('lastUpdatedDate')))
diff --git a/hypervideo_dl/extractor/mlssoccer.py b/hypervideo_dl/extractor/mlssoccer.py
index 1d6d4b8..9383f13 100644
--- a/hypervideo_dl/extractor/mlssoccer.py
+++ b/hypervideo_dl/extractor/mlssoccer.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/mnet.py b/hypervideo_dl/extractor/mnet.py
index 0e26ca1..98bab2e 100644
--- a/hypervideo_dl/extractor/mnet.py
+++ b/hypervideo_dl/extractor/mnet.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
int_or_none,
@@ -62,7 +59,6 @@ class MnetIE(InfoExtractor):
m3u8_url += '?' + token
formats = self._extract_wowza_formats(
m3u8_url, video_id, skip_protocols=['rtmp', 'rtsp', 'f4m'])
- self._sort_formats(formats)
description = info.get('ment')
duration = parse_duration(info.get('time'))
diff --git a/hypervideo_dl/extractor/mocha.py b/hypervideo_dl/extractor/mocha.py
new file mode 100644
index 0000000..5f72b81
--- /dev/null
+++ b/hypervideo_dl/extractor/mocha.py
@@ -0,0 +1,64 @@
+from .common import InfoExtractor
+from ..utils import int_or_none, traverse_obj
+
+
+class MochaVideoIE(InfoExtractor):
+ _VALID_URL = r'https?://video.mocha.com.vn/(?P<video_slug>[\w-]+)'
+ _TESTS = [{
+ 'url': 'http://video.mocha.com.vn/chuyen-meo-gia-su-tu-thong-diep-cuoc-song-v18694039',
+ 'info_dict': {
+ 'id': '18694039',
+ 'title': 'Chuyện mèo giả sư tử | Thông điệp cuộc sống',
+ 'ext': 'mp4',
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'display_id': 'chuyen-meo-gia-su-tu-thong-diep-cuoc-song',
+ 'thumbnail': 'http://mcvideomd1fr.keeng.net/playnow/images/20220505/ad0a055d-2f69-42ca-b888-4790041fe6bc_640x480.jpg',
+ 'description': '',
+ 'duration': 70,
+ 'timestamp': 1652254203,
+ 'upload_date': '20220511',
+ 'comment_count': int,
+ 'categories': ['Kids']
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_slug = self._match_valid_url(url).group('video_slug')
+ json_data = self._download_json(
+ 'http://apivideo.mocha.com.vn:8081/onMediaBackendBiz/mochavideo/getVideoDetail',
+ video_slug, query={'url': url, 'token': ''})['data']['videoDetail']
+ video_id = str(json_data['id'])
+ video_urls = (json_data.get('list_resolution') or []) + [json_data.get('original_path')]
+
+ formats, subtitles = [], {}
+ for video in video_urls:
+ if isinstance(video, str):
+ formats.extend([{'url': video, 'ext': 'mp4'}])
+ else:
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ video.get('video_path'), video_id, ext='mp4')
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+
+ return {
+ 'id': video_id,
+ 'display_id': json_data.get('slug') or video_slug,
+ 'title': json_data.get('name'),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'description': json_data.get('description'),
+ 'duration': json_data.get('durationS'),
+ 'view_count': json_data.get('total_view'),
+ 'like_count': json_data.get('total_like'),
+ 'dislike_count': json_data.get('total_unlike'),
+ 'thumbnail': json_data.get('image_path_thumb'),
+ 'timestamp': int_or_none(json_data.get('publish_time'), scale=1000),
+ 'is_live': json_data.get('isLive'),
+ 'channel': traverse_obj(json_data, ('channels', '0', 'name')),
+ 'channel_id': traverse_obj(json_data, ('channels', '0', 'id')),
+ 'channel_follower_count': traverse_obj(json_data, ('channels', '0', 'numfollow')),
+ 'categories': traverse_obj(json_data, ('categories', ..., 'categoryname')),
+ 'comment_count': json_data.get('total_comment'),
+ }
diff --git a/hypervideo_dl/extractor/moevideo.py b/hypervideo_dl/extractor/moevideo.py
index a3f1b38..fda08ca 100644
--- a/hypervideo_dl/extractor/moevideo.py
+++ b/hypervideo_dl/extractor/moevideo.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..utils import (
clean_html,
diff --git a/hypervideo_dl/extractor/mofosex.py b/hypervideo_dl/extractor/mofosex.py
index 5234cac..9cb6980 100644
--- a/hypervideo_dl/extractor/mofosex.py
+++ b/hypervideo_dl/extractor/mofosex.py
@@ -1,7 +1,3 @@
-from __future__ import unicode_literals
-
-import re
-
from .common import InfoExtractor
from ..utils import (
int_or_none,
@@ -11,7 +7,7 @@ from ..utils import (
from .keezmovies import KeezMoviesIE
-class MofosexIE(KeezMoviesIE):
+class MofosexIE(KeezMoviesIE): # XXX: Do not subclass from concrete IE
_VALID_URL = r'https?://(?:www\.)?mofosex\.com/videos/(?P<id>\d+)/(?P<display_id>[^/?#&.]+)\.html'
_TESTS = [{
'url': 'http://www.mofosex.com/videos/318131/amateur-teen-playing-and-masturbating-318131.html',
@@ -61,17 +57,12 @@ class MofosexIE(KeezMoviesIE):
class MofosexEmbedIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?mofosex\.com/embed/?\?.*?\bvideoid=(?P<id>\d+)'
+ _EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//(?:www\.)?mofosex\.com/embed/?\?.*?\bvideoid=\d+)']
_TESTS = [{
'url': 'https://www.mofosex.com/embed/?videoid=318131&referrer=KM',
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- return re.findall(
- r'<iframe[^>]+\bsrc=["\']((?:https?:)?//(?:www\.)?mofosex\.com/embed/?\?.*?\bvideoid=\d+)',
- webpage)
-
def _real_extract(self, url):
video_id = self._match_id(url)
return self.url_result(
diff --git a/hypervideo_dl/extractor/mojvideo.py b/hypervideo_dl/extractor/mojvideo.py
index 16d9405..d47ad07 100644
--- a/hypervideo_dl/extractor/mojvideo.py
+++ b/hypervideo_dl/extractor/mojvideo.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..utils import (
ExtractorError,
diff --git a/hypervideo_dl/extractor/morningstar.py b/hypervideo_dl/extractor/morningstar.py
index 71a22a6..e9fcfe3 100644
--- a/hypervideo_dl/extractor/morningstar.py
+++ b/hypervideo_dl/extractor/morningstar.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/motherless.py b/hypervideo_dl/extractor/motherless.py
index 111c7c5..c24ef9b 100644
--- a/hypervideo_dl/extractor/motherless.py
+++ b/hypervideo_dl/extractor/motherless.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import datetime
import re
@@ -71,7 +69,7 @@ class MotherlessIE(InfoExtractor):
'title': 'a/ Hot Teens',
'categories': list,
'upload_date': '20210104',
- 'uploader_id': 'yonbiw',
+ 'uploader_id': 'anonymous',
'thumbnail': r're:https?://.*\.jpg',
'age_limit': 18,
},
@@ -125,11 +123,12 @@ class MotherlessIE(InfoExtractor):
kwargs = {_AGO_UNITS.get(uploaded_ago[-1]): delta}
upload_date = (datetime.datetime.utcnow() - datetime.timedelta(**kwargs)).strftime('%Y%m%d')
- comment_count = webpage.count('class="media-comment-contents"')
+ comment_count = len(re.findall(r'''class\s*=\s*['"]media-comment-contents\b''', webpage))
uploader_id = self._html_search_regex(
- (r'"media-meta-member">\s+<a href="/m/([^"]+)"',
- r'<span\b[^>]+\bclass="username">([^<]+)</span>'),
+ (r'''<span\b[^>]+\bclass\s*=\s*["']username\b[^>]*>([^<]+)</span>''',
+ r'''(?s)['"](?:media-meta-member|thumb-member-username)\b[^>]+>\s*<a\b[^>]+\bhref\s*=\s*['"]/m/([^"']+)'''),
webpage, 'uploader_id', fatal=False)
+
categories = self._html_search_meta('keywords', webpage, default=None)
if categories:
categories = [cat.strip() for cat in categories.split(',')]
@@ -219,23 +218,23 @@ class MotherlessGroupIE(InfoExtractor):
r'<title>([\w\s]+\w)\s+-', webpage, 'title', fatal=False)
description = self._html_search_meta(
'description', webpage, fatal=False)
- page_count = self._int(self._search_regex(
- r'(\d+)</(?:a|span)><(?:a|span)[^>]+rel="next">',
- webpage, 'page_count', default=0), 'page_count')
+ page_count = str_to_int(self._search_regex(
+ r'(\d+)\s*</(?:a|span)>\s*<(?:a|span)[^>]+(?:>\s*NEXT|\brel\s*=\s*["\']?next)\b',
+ webpage, 'page_count', default=0))
if not page_count:
message = self._search_regex(
- r'class="error-page"[^>]*>\s*<p[^>]*>\s*(?P<error_msg>[^<]+)(?<=\S)\s*',
+ r'''class\s*=\s*['"]error-page\b[^>]*>\s*<p[^>]*>\s*(?P<error_msg>[^<]+)(?<=\S)\s*''',
webpage, 'error_msg', default=None) or 'This group has no videos.'
self.report_warning(message, group_id)
+ page_count = 1
PAGE_SIZE = 80
def _get_page(idx):
- if not page_count:
- return
- webpage = self._download_webpage(
- page_url, group_id, query={'page': idx + 1},
- note='Downloading page %d/%d' % (idx + 1, page_count)
- )
+ if idx > 0:
+ webpage = self._download_webpage(
+ page_url, group_id, query={'page': idx + 1},
+ note='Downloading page %d/%d' % (idx + 1, page_count)
+ )
for entry in self._extract_entries(webpage, url):
yield entry
diff --git a/hypervideo_dl/extractor/motorsport.py b/hypervideo_dl/extractor/motorsport.py
index c9d1ab6..efb087d 100644
--- a/hypervideo_dl/extractor/motorsport.py
+++ b/hypervideo_dl/extractor/motorsport.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import (
compat_urlparse,
@@ -34,8 +31,13 @@ class MotorsportIE(InfoExtractor):
webpage = self._download_webpage(url, display_id)
iframe_path = self._html_search_regex(
- r'<iframe id="player_iframe"[^>]+src="([^"]+)"', webpage,
- 'iframe path')
+ r'<iframe id="player_iframe"[^>]+src="([^"]+)"', webpage, 'iframe path', default=None)
+
+ if iframe_path is None:
+ iframe_path = self._html_search_regex(
+ r'<iframe [^>]*\bsrc="(https://motorsport\.tv/embed/[^"]+)', webpage, 'embed iframe path')
+ return self.url_result(iframe_path)
+
iframe = self._download_webpage(
compat_urlparse.urljoin(url, iframe_path), display_id,
'Downloading iframe')
diff --git a/hypervideo_dl/extractor/movieclips.py b/hypervideo_dl/extractor/movieclips.py
index 5453da1..4777f44 100644
--- a/hypervideo_dl/extractor/movieclips.py
+++ b/hypervideo_dl/extractor/movieclips.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
smuggle_url,
diff --git a/hypervideo_dl/extractor/moviepilot.py b/hypervideo_dl/extractor/moviepilot.py
new file mode 100644
index 0000000..ca54156
--- /dev/null
+++ b/hypervideo_dl/extractor/moviepilot.py
@@ -0,0 +1,112 @@
+from .dailymotion import DailymotionIE
+from .common import InfoExtractor
+from ..utils import (
+ parse_iso8601,
+ try_get,
+)
+
+import re
+
+
+class MoviepilotIE(InfoExtractor):
+ _IE_NAME = 'moviepilot'
+ _IE_DESC = 'Moviepilot trailer'
+ _VALID_URL = r'https?://(?:www\.)?moviepilot\.de/movies/(?P<id>[^/]+)'
+
+ _TESTS = [{
+ 'url': 'https://www.moviepilot.de/movies/interstellar-2/',
+ 'info_dict': {
+ 'id': 'x7xdut5',
+ 'display_id': 'interstellar-2',
+ 'ext': 'mp4',
+ 'title': 'Interstellar',
+ 'thumbnail': r're:https://\w+\.dmcdn\.net/v/SaXev1VvzitVZMFsR/x720',
+ 'timestamp': 1400491705,
+ 'description': 'md5:7dfc5c1758e7322a7346934f1f0c489c',
+ 'uploader': 'Moviepilot',
+ 'like_count': int,
+ 'view_count': int,
+ 'uploader_id': 'x6nd9k',
+ 'upload_date': '20140519',
+ 'duration': 140,
+ 'age_limit': 0,
+ 'tags': ['Alle Trailer', 'Movie', 'Third Party'],
+ },
+ }, {
+ 'url': 'https://www.moviepilot.de/movies/interstellar-2/trailer',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.moviepilot.de/movies/interstellar-2/kinoprogramm/berlin',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.moviepilot.de/movies/queen-slim/trailer',
+ 'info_dict': {
+ 'id': 'x7xj6o7',
+ 'display_id': 'queen-slim',
+ 'title': 'Queen & Slim',
+ 'ext': 'mp4',
+ 'thumbnail': r're:https://\w+\.dmcdn\.net/v/SbUM71WtomSjVmI_q/x720',
+ 'timestamp': 1571838685,
+ 'description': 'md5:73058bcd030aa12d991e4280d65fbebe',
+ 'uploader': 'Moviepilot',
+ 'like_count': int,
+ 'view_count': int,
+ 'uploader_id': 'x6nd9k',
+ 'upload_date': '20191023',
+ 'duration': 138,
+ 'age_limit': 0,
+ 'tags': ['Movie', 'Verleih', 'Neue Trailer'],
+ },
+ }, {
+ 'url': 'https://www.moviepilot.de/movies/der-geiger-von-florenz/trailer',
+ 'info_dict': {
+ 'id': 'der-geiger-von-florenz',
+ 'title': 'Der Geiger von Florenz',
+ 'ext': 'mp4',
+ },
+ 'skip': 'No trailer for this movie.',
+ }, {
+ 'url': 'https://www.moviepilot.de/movies/muellers-buero/',
+ 'info_dict': {
+ 'id': 'x7xcw1i',
+ 'display_id': 'muellers-buero',
+ 'title': 'Müllers Büro',
+ 'ext': 'mp4',
+ 'description': 'md5:57501251c05cdc61ca314b7633e0312e',
+ 'timestamp': 1287584475,
+ 'age_limit': 0,
+ 'duration': 82,
+ 'upload_date': '20101020',
+ 'thumbnail': r're:https://\w+\.dmcdn\.net/v/SaMes1WfAm1d6maq_/x720',
+ 'uploader': 'Moviepilot',
+ 'like_count': int,
+ 'view_count': int,
+ 'tags': ['Alle Trailer', 'Movie', 'Verleih'],
+ 'uploader_id': 'x6nd9k',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(f'https://www.moviepilot.de/movies/{video_id}/trailer', video_id)
+
+ duration = try_get(
+ re.match(r'P(?P<hours>\d+)H(?P<mins>\d+)M(?P<secs>\d+)S',
+ self._html_search_meta('duration', webpage, fatal=False) or ''),
+ lambda mobj: sum(float(x) * y for x, y in zip(mobj.groups(), (3600, 60, 1))))
+ # _html_search_meta is not used since we don't want name=description to match
+ description = self._html_search_regex(
+ '<meta[^>]+itemprop="description"[^>]+content="([^>"]+)"', webpage, 'description', fatal=False)
+
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': DailymotionIE.ie_key(),
+ 'display_id': video_id,
+ 'title': self._og_search_title(webpage),
+ 'url': self._html_search_meta('embedURL', webpage),
+ 'thumbnail': self._html_search_meta('thumbnailURL', webpage),
+ 'description': description,
+ 'duration': duration,
+ 'timestamp': parse_iso8601(self._html_search_meta('uploadDate', webpage), delimiter=' ')
+ }
diff --git a/hypervideo_dl/extractor/moview.py b/hypervideo_dl/extractor/moview.py
new file mode 100644
index 0000000..678b2eb
--- /dev/null
+++ b/hypervideo_dl/extractor/moview.py
@@ -0,0 +1,43 @@
+from .jixie import JixieBaseIE
+
+
+class MoviewPlayIE(JixieBaseIE):
+ _VALID_URL = r'https?://www\.moview\.id/play/\d+/(?P<id>[\w-]+)'
+ _TESTS = [
+ {
+ # drm hls, only use direct link
+ 'url': 'https://www.moview.id/play/174/Candy-Monster',
+ 'info_dict': {
+ 'id': '146182',
+ 'ext': 'mp4',
+ 'display_id': 'Candy-Monster',
+ 'uploader_id': 'Mo165qXUUf',
+ 'duration': 528.2,
+ 'title': 'Candy Monster',
+ 'description': 'Mengapa Candy Monster ingin mengambil permen Chloe?',
+ 'thumbnail': 'https://video.jixie.media/1034/146182/146182_1280x720.jpg',
+ }
+ }, {
+ # non-drm hls
+ 'url': 'https://www.moview.id/play/75/Paris-Van-Java-Episode-16',
+ 'info_dict': {
+ 'id': '28210',
+ 'ext': 'mp4',
+ 'duration': 2595.666667,
+ 'display_id': 'Paris-Van-Java-Episode-16',
+ 'uploader_id': 'Mo165qXUUf',
+ 'thumbnail': 'https://video.jixie.media/1003/28210/28210_1280x720.jpg',
+ 'description': 'md5:2a5e18d98eef9b39d7895029cac96c63',
+ 'title': 'Paris Van Java Episode 16',
+ }
+ }
+ ]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ video_id = self._search_regex(
+ r'video_id\s*=\s*"(?P<video_id>[^"]+)', webpage, 'video_id')
+
+ return self._extract_data_from_jixie_id(display_id, video_id, webpage)
diff --git a/hypervideo_dl/extractor/moviezine.py b/hypervideo_dl/extractor/moviezine.py
index 730da4b..cffcdcf 100644
--- a/hypervideo_dl/extractor/moviezine.py
+++ b/hypervideo_dl/extractor/moviezine.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
@@ -33,8 +29,6 @@ class MoviezineIE(InfoExtractor):
'ext': 'mp4',
}]
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': self._search_regex(r'title: "(.+?)",', jsplayer, 'title'),
diff --git a/hypervideo_dl/extractor/movingimage.py b/hypervideo_dl/extractor/movingimage.py
index 4f62d62..cdd8ba4 100644
--- a/hypervideo_dl/extractor/movingimage.py
+++ b/hypervideo_dl/extractor/movingimage.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
unescapeHTML,
diff --git a/hypervideo_dl/extractor/msn.py b/hypervideo_dl/extractor/msn.py
index f34e210..f91c53e 100644
--- a/hypervideo_dl/extractor/msn.py
+++ b/hypervideo_dl/extractor/msn.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -134,7 +131,6 @@ class MSNIE(InfoExtractor):
'vbr': int_or_none(self._search_regex(r'_(\d+)\.mp4', format_url, 'vbr', default=None)),
'quality': 1 if format_id == '1001' else None,
})
- self._sort_formats(formats)
subtitles = {}
for file_ in video.get('files', []):
diff --git a/hypervideo_dl/extractor/mtv.py b/hypervideo_dl/extractor/mtv.py
index be5de0a..d91be62 100644
--- a/hypervideo_dl/extractor/mtv.py
+++ b/hypervideo_dl/extractor/mtv.py
@@ -1,13 +1,7 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
-from ..compat import (
- compat_str,
- compat_xpath,
-)
+from ..compat import compat_str
from ..utils import (
ExtractorError,
find_xpath_attr,
@@ -108,8 +102,6 @@ class MTVServicesInfoExtractor(InfoExtractor):
}])
except (KeyError, TypeError):
raise ExtractorError('Invalid rendition field.')
- if formats:
- self._sort_formats(formats)
return formats
def _extract_subtitles(self, mdoc, mtvn_id):
@@ -167,9 +159,9 @@ class MTVServicesInfoExtractor(InfoExtractor):
itemdoc, './/{http://search.yahoo.com/mrss/}category',
'scheme', 'urn:mtvn:video_title')
if title_el is None:
- title_el = itemdoc.find(compat_xpath('.//{http://search.yahoo.com/mrss/}title'))
+ title_el = itemdoc.find('.//{http://search.yahoo.com/mrss/}title')
if title_el is None:
- title_el = itemdoc.find(compat_xpath('.//title'))
+ title_el = itemdoc.find('.//title')
if title_el.text is None:
title_el = None
@@ -208,8 +200,6 @@ class MTVServicesInfoExtractor(InfoExtractor):
if not formats:
return None
- self._sort_formats(formats)
-
return {
'title': title,
'formats': formats,
@@ -337,6 +327,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
class MTVServicesEmbeddedIE(MTVServicesInfoExtractor):
IE_NAME = 'mtvservices:embedded'
_VALID_URL = r'https?://media\.mtvnservices\.com/embed/(?P<mgid>.+?)(\?|/|$)'
+ _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//media\.mtvnservices\.com/embed/.+?)\1']
_TEST = {
# From http://www.thewrap.com/peter-dinklage-sums-up-game-of-thrones-in-45-seconds-video/
@@ -352,13 +343,6 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor):
},
}
- @staticmethod
- def _extract_url(webpage):
- mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//media\.mtvnservices\.com/embed/.+?)\1', webpage)
- if mobj:
- return mobj.group('url')
-
def _get_feed_url(self, uri, url=None):
video_id = self._id_from_uri(uri)
config = self._download_json(
@@ -548,7 +532,7 @@ class MTVItaliaIE(MTVServicesInfoExtractor):
}
-class MTVItaliaProgrammaIE(MTVItaliaIE):
+class MTVItaliaProgrammaIE(MTVItaliaIE): # XXX: Do not subclass from concrete IE
IE_NAME = 'mtv.it:programma'
_VALID_URL = r'https?://(?:www\.)?mtv\.it/(?:programmi|playlist)/(?P<id>[0-9a-z]+)'
_TESTS = [{
diff --git a/hypervideo_dl/extractor/muenchentv.py b/hypervideo_dl/extractor/muenchentv.py
index a53929e..36a2d46 100644
--- a/hypervideo_dl/extractor/muenchentv.py
+++ b/hypervideo_dl/extractor/muenchentv.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import json
from .common import InfoExtractor
@@ -63,7 +60,6 @@ class MuenchenTVIE(InfoExtractor):
'format_id': format_id,
'preference': -100 if '.smil' in s['file'] else 0, # Strictly inferior than all other formats?
})
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/murrtube.py b/hypervideo_dl/extractor/murrtube.py
index 1eb5de6..6cdbbda 100644
--- a/hypervideo_dl/extractor/murrtube.py
+++ b/hypervideo_dl/extractor/murrtube.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import functools
import json
@@ -102,7 +99,7 @@ query Medium($id: ID!) {
}
-class MurrtubeUserIE(MurrtubeIE):
+class MurrtubeUserIE(MurrtubeIE): # XXX: Do not subclass from concrete IE
IE_DESC = 'Murrtube user profile'
_VALID_URL = r'https?://murrtube\.net/(?P<id>[^/]+)$'
_TEST = {
diff --git a/hypervideo_dl/extractor/musescore.py b/hypervideo_dl/extractor/musescore.py
index 09fadf8..289ae57 100644
--- a/hypervideo_dl/extractor/musescore.py
+++ b/hypervideo_dl/extractor/musescore.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/musicdex.py b/hypervideo_dl/extractor/musicdex.py
index 05f7220..48f2970 100644
--- a/hypervideo_dl/extractor/musicdex.py
+++ b/hypervideo_dl/extractor/musicdex.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
date_from_str,
@@ -100,7 +97,7 @@ class MusicdexAlbumIE(MusicdexBaseIE):
}
-class MusicdexPageIE(MusicdexBaseIE):
+class MusicdexPageIE(MusicdexBaseIE): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor
def _entries(self, id):
next_page_url = self._API_URL % id
while next_page_url:
diff --git a/hypervideo_dl/extractor/mwave.py b/hypervideo_dl/extractor/mwave.py
index a672765..efbfd9d 100644
--- a/hypervideo_dl/extractor/mwave.py
+++ b/hypervideo_dl/extractor/mwave.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
@@ -49,7 +47,6 @@ class MwaveIE(InfoExtractor):
continue
formats.extend(
self._extract_f4m_formats(f4m_url + '&hdcore=3.0.3', video_id, f4m_id=stream_name))
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/mxplayer.py b/hypervideo_dl/extractor/mxplayer.py
index 3c2afd8..1fdb08e 100644
--- a/hypervideo_dl/extractor/mxplayer.py
+++ b/hypervideo_dl/extractor/mxplayer.py
@@ -1,9 +1,11 @@
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..compat import compat_str
-from ..utils import try_get
+from ..utils import (
+ int_or_none,
+ traverse_obj,
+ try_get,
+ urljoin,
+)
class MxplayerIE(InfoExtractor):
@@ -12,6 +14,7 @@ class MxplayerIE(InfoExtractor):
'url': 'https://www.mxplayer.in/show/watch-my-girlfriend-is-an-alien-hindi-dubbed/season-1/episode-1-online-9d2013d31d5835bb8400e3b3c5e7bb72',
'info_dict': {
'id': '9d2013d31d5835bb8400e3b3c5e7bb72',
+ 'display_id': 'episode-1-online',
'ext': 'mp4',
'title': 'Episode 1',
'description': 'md5:62ed43eb9fec5efde5cf3bd1040b7670',
@@ -20,7 +23,6 @@ class MxplayerIE(InfoExtractor):
'duration': 2451,
'season': 'Season 1',
'series': 'My Girlfriend Is An Alien (Hindi Dubbed)',
- 'thumbnail': 'https://qqcdnpictest.mxplay.com/pic/9d2013d31d5835bb8400e3b3c5e7bb72/en/16x9/320x180/9562f5f8df42cad09c9a9c4e69eb1567_1920x1080.webp',
'episode': 'Episode 1'
},
'params': {
@@ -31,21 +33,17 @@ class MxplayerIE(InfoExtractor):
'url': 'https://www.mxplayer.in/movie/watch-knock-knock-hindi-dubbed-movie-online-b9fa28df3bfb8758874735bbd7d2655a?watch=true',
'info_dict': {
'id': 'b9fa28df3bfb8758874735bbd7d2655a',
+ 'display_id': 'episode-1-online',
'ext': 'mp4',
'title': 'Knock Knock (Hindi Dubbed)',
- 'description': 'md5:b195ba93ff1987309cfa58e2839d2a5b',
- 'season_number': 0,
- 'episode_number': 0,
+ 'description': 'md5:4160f2dfc3b87c524261366f6b736329',
'duration': 5970,
- 'season': 'Season 0',
- 'series': None,
- 'thumbnail': 'https://qqcdnpictest.mxplay.com/pic/b9fa28df3bfb8758874735bbd7d2655a/en/16x9/320x180/test_pic1588676032011.webp',
- 'episode': 'Episode 0'
},
'params': {
'format': 'bv',
'skip_download': True,
},
+ 'skip': 'No longer available',
}, {
'url': 'https://www.mxplayer.in/show/watch-shaitaan/season-1/the-infamous-taxi-gang-of-meerut-online-45055d5bcff169ad48f2ad7552a83d6c',
'info_dict': {
@@ -58,26 +56,26 @@ class MxplayerIE(InfoExtractor):
'duration': 2332,
'season': 'Season 1',
'series': 'Shaitaan',
- 'thumbnail': 'https://qqcdnpictest.mxplay.com/pic/45055d5bcff169ad48f2ad7552a83d6c/en/16x9/320x180/voot_8e7d5f8d8183340869279c732c1e3a43.webp',
'episode': 'Episode 1'
},
'params': {
'format': 'best',
'skip_download': True,
},
+ 'skip': 'No longer available.'
}, {
'url': 'https://www.mxplayer.in/show/watch-aashram/chapter-1/duh-swapna-online-d445579792b0135598ba1bc9088a84cb',
'info_dict': {
'id': 'd445579792b0135598ba1bc9088a84cb',
+ 'display_id': 'duh-swapna-online',
'ext': 'mp4',
'title': 'Duh Swapna',
'description': 'md5:35ff39c4bdac403c53be1e16a04192d8',
'season_number': 1,
'episode_number': 3,
'duration': 2568,
- 'season': 'Chapter 1',
+ 'season': 'Season 1',
'series': 'Aashram',
- 'thumbnail': 'https://qqcdnpictest.mxplay.com/pic/d445579792b0135598ba1bc9088a84cb/en/4x3/1600x1200/test_pic1624819307993.webp',
'episode': 'Episode 3'
},
'params': {
@@ -88,6 +86,7 @@ class MxplayerIE(InfoExtractor):
'url': 'https://www.mxplayer.in/show/watch-dangerous/season-1/chapter-1-online-5a351b4f9fb69436f6bd6ae3a1a75292',
'info_dict': {
'id': '5a351b4f9fb69436f6bd6ae3a1a75292',
+ 'display_id': 'chapter-1-online',
'ext': 'mp4',
'title': 'Chapter 1',
'description': 'md5:233886b8598bc91648ac098abe1d288f',
@@ -96,7 +95,6 @@ class MxplayerIE(InfoExtractor):
'duration': 1305,
'season': 'Season 1',
'series': 'Dangerous',
- 'thumbnail': 'https://qqcdnpictest.mxplay.com/pic/5a351b4f9fb69436f6bd6ae3a1a75292/en/4x3/1600x1200/test_pic1624706302350.webp',
'episode': 'Episode 1'
},
'params': {
@@ -110,72 +108,93 @@ class MxplayerIE(InfoExtractor):
'ext': 'mp4',
'title': 'The Attacks of 26/11',
'description': 'md5:689bacd29e97b3f31eaf519eb14127e5',
- 'season_number': 0,
- 'episode_number': 0,
'duration': 6085,
- 'season': 'Season 0',
- 'series': None,
- 'thumbnail': 'https://qqcdnpictest.mxplay.com/pic/0452f0d80226c398d63ce7e3ea40fa2d/en/16x9/320x180/00c8955dab5e5d340dbde643f9b1f6fd_1920x1080.webp',
- 'episode': 'Episode 0'
},
'params': {
'format': 'best',
'skip_download': True,
},
+ 'skip': 'No longer available. Cannot be played on browser'
+ }, {
+ 'url': 'https://www.mxplayer.in/movie/watch-kitne-door-kitne-paas-movie-online-a9e9c76c566205955f70d8b2cb88a6a2',
+ 'info_dict': {
+ 'id': 'a9e9c76c566205955f70d8b2cb88a6a2',
+ 'display_id': 'watch-kitne-door-kitne-paas-movie-online',
+ 'title': 'Kitne Door Kitne Paas',
+ 'duration': 8458,
+ 'ext': 'mp4',
+ 'description': 'md5:fb825f3c542513088024dcafef0921b4',
+ },
+ 'params': {
+ 'format': 'bv',
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.mxplayer.in/show/watch-ek-thi-begum-hindi/season-2/game-of-power-online-5e5305c28f1409847cdc4520b6ad77cf',
+ 'info_dict': {
+ 'id': '5e5305c28f1409847cdc4520b6ad77cf',
+ 'display_id': 'game-of-power-online',
+ 'title': 'Game Of Power',
+ 'duration': 1845,
+ 'ext': 'mp4',
+ 'description': 'md5:1d0948d2a5312d7013792d53542407f9',
+ 'series': 'Ek Thi Begum (Hindi)',
+ 'season': 'Season 2',
+ 'season_number': 2,
+ 'episode': 'Episode 2',
+ 'episode_number': 2,
+ },
+ 'params': {
+ 'format': 'bv',
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.mxplayer.in/movie/watch-deewane-huye-paagal-movie-online-4f9175c40a11c3994182a65afdd37ec6?watch=true',
+ 'info_dict': {
+ 'id': '4f9175c40a11c3994182a65afdd37ec6',
+ 'display_id': 'watch-deewane-huye-paagal-movie-online',
+ 'title': 'Deewane Huye Paagal',
+ 'duration': 9037,
+ 'ext': 'mp4',
+ 'description': 'md5:d17bd5c651016c4ed2e6f8a4ace15534',
+ },
+ 'params': {'skip_download': 'm3u8'},
}]
def _real_extract(self, url):
- type, display_id, video_id = self._match_valid_url(url).groups()
- type = 'movie_film' if type == 'movie' else 'tvshow_episode'
- API_URL = 'https://androidapi.mxplay.com/v1/detail/'
- headers = {
- 'X-Av-Code': '23',
- 'X-Country': 'IN',
- 'X-Platform': 'android',
- 'X-App-Version': '1370001318',
- 'X-Resolution': '3840x2160',
- }
- data_json = self._download_json(f'{API_URL}{type}/{video_id}', display_id, headers=headers)['profile']
+ video_type, display_id, video_id = self._match_valid_url(url).group('type', 'display_id', 'id')
+ if 'show' in video_type:
+ video_type = 'episode'
- season, series = None, None
- for dct in data_json.get('levelInfos', []):
- if dct.get('type') == 'tvshow_season':
- season = dct.get('name')
- elif dct.get('type') == 'tvshow_show':
- series = dct.get('name')
- thumbnails = []
- for thumb in data_json.get('poster', []):
- thumbnails.append({
- 'url': thumb.get('url'),
- 'width': thumb.get('width'),
- 'height': thumb.get('height'),
- })
+ data_json = self._download_json(
+ f'https://api.mxplay.com/v1/web/detail/video?type={video_type}&id={video_id}', display_id)
- formats = []
- subtitles = {}
- for dct in data_json.get('playInfo', []):
- if dct.get('extension') == 'mpd':
- frmt, subs = self._extract_mpd_formats_and_subtitles(dct.get('playUrl'), display_id, fatal=False)
- formats.extend(frmt)
- subtitles = self._merge_subtitles(subtitles, subs)
- elif dct.get('extension') == 'm3u8':
- frmt, subs = self._extract_m3u8_formats_and_subtitles(dct.get('playUrl'), display_id, fatal=False)
- formats.extend(frmt)
- subtitles = self._merge_subtitles(subtitles, subs)
- self._sort_formats(formats)
+ formats, subtitles = [], {}
+ m3u8_url = urljoin('https://llvod.mxplay.com/', traverse_obj(
+ data_json, ('stream', (('thirdParty', 'hlsUrl'), ('hls', 'high'))), get_all=False))
+ if m3u8_url:
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, display_id, 'mp4', fatal=False)
+ mpd_url = urljoin('https://llvod.mxplay.com/', traverse_obj(
+ data_json, ('stream', (('thirdParty', 'dashUrl'), ('dash', 'high'))), get_all=False))
+ if mpd_url:
+ fmts, subs = self._extract_mpd_formats_and_subtitles(mpd_url, display_id, fatal=False)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+
+ season = traverse_obj(data_json, ('container', 'title'))
return {
'id': video_id,
+ 'title': data_json.get('title'),
+ 'formats': formats,
+ 'subtitles': subtitles,
'display_id': display_id,
- 'title': data_json.get('name') or display_id,
- 'description': data_json.get('description'),
- 'season_number': data_json.get('seasonNum'),
- 'episode_number': data_json.get('episodeNum'),
'duration': data_json.get('duration'),
+ 'series': traverse_obj(data_json, ('container', 'container', 'title')),
+ 'description': data_json.get('description'),
'season': season,
- 'series': series,
- 'thumbnails': thumbnails,
- 'formats': formats,
- 'subtitles': subtitles,
+ 'season_number': int_or_none(
+ self._search_regex(r'Season (\d+)', season, 'Season Number', default=None)),
+ 'episode_number': data_json.get('sequence') or None,
}
diff --git a/hypervideo_dl/extractor/mychannels.py b/hypervideo_dl/extractor/mychannels.py
index d820d4e..8a70c1f 100644
--- a/hypervideo_dl/extractor/mychannels.py
+++ b/hypervideo_dl/extractor/mychannels.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/myspace.py b/hypervideo_dl/extractor/myspace.py
index 4227d42..3451098 100644
--- a/hypervideo_dl/extractor/myspace.py
+++ b/hypervideo_dl/extractor/myspace.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -125,7 +122,6 @@ class MySpaceIE(InfoExtractor):
else:
raise ExtractorError(
'Found song but don\'t know how to download it')
- self._sort_formats(formats)
return {
'id': video_id,
'title': self._og_search_title(webpage),
@@ -143,7 +139,6 @@ class MySpaceIE(InfoExtractor):
video.get('streamUrl'), video.get('hlsStreamUrl'),
video.get('mp4StreamUrl'), int_or_none(video.get('width')),
int_or_none(video.get('height')))
- self._sort_formats(formats)
return {
'id': video_id,
'title': video['title'],
diff --git a/hypervideo_dl/extractor/myspass.py b/hypervideo_dl/extractor/myspass.py
index 1775d5f..28ac982 100644
--- a/hypervideo_dl/extractor/myspass.py
+++ b/hypervideo_dl/extractor/myspass.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
diff --git a/hypervideo_dl/extractor/myvi.py b/hypervideo_dl/extractor/myvi.py
index 75d2863..df7200b 100644
--- a/hypervideo_dl/extractor/myvi.py
+++ b/hypervideo_dl/extractor/myvi.py
@@ -1,8 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
from .common import InfoExtractor
from .vimple import SprutoBaseIE
@@ -29,6 +24,7 @@ class MyviIE(SprutoBaseIE):
)
(?P<id>[\da-zA-Z_-]+)
'''
+ _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//myvi\.(?:ru/player|tv)/(?:embed/html|flash)/[^"]+)\1']
_TESTS = [{
'url': 'http://myvi.ru/player/embed/html/oOy4euHA6LVwNNAjhD9_Jq5Ha2Qf0rtVMVFMAZav8wObeRTZaCATzucDQIDph8hQU0',
'md5': '571bbdfba9f9ed229dc6d34cc0f335bf',
@@ -59,13 +55,6 @@ class MyviIE(SprutoBaseIE):
'only_matching': True,
}]
- @classmethod
- def _extract_url(cls, webpage):
- mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//myvi\.(?:ru/player|tv)/(?:embed/html|flash)/[^"]+)\1', webpage)
- if mobj:
- return mobj.group('url')
-
def _real_extract(self, url):
video_id = self._match_id(url)
diff --git a/hypervideo_dl/extractor/myvideoge.py b/hypervideo_dl/extractor/myvideoge.py
index 0a1d7d0..513d4cb 100644
--- a/hypervideo_dl/extractor/myvideoge.py
+++ b/hypervideo_dl/extractor/myvideoge.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import js_to_json
diff --git a/hypervideo_dl/extractor/myvidster.py b/hypervideo_dl/extractor/myvidster.py
index 2117d30..c91f294 100644
--- a/hypervideo_dl/extractor/myvidster.py
+++ b/hypervideo_dl/extractor/myvidster.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/n1.py b/hypervideo_dl/extractor/n1.py
index fdb7f32..55345f3 100644
--- a/hypervideo_dl/extractor/n1.py
+++ b/hypervideo_dl/extractor/n1.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -27,8 +24,6 @@ class N1InfoAssetIE(InfoExtractor):
formats = self._extract_m3u8_formats(
url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': video_id,
diff --git a/hypervideo_dl/extractor/nate.py b/hypervideo_dl/extractor/nate.py
index 072faf6..5e74caa 100644
--- a/hypervideo_dl/extractor/nate.py
+++ b/hypervideo_dl/extractor/nate.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import itertools
from .common import InfoExtractor
@@ -71,7 +68,6 @@ class NateIE(InfoExtractor):
'height': self._QUALITY.get(f_url[-2:]),
'quality': int_or_none(f_url[-2:]),
} for f_url in video_data.get('smcUriList') or []]
- self._sort_formats(formats)
return {
'id': id,
'title': video_data.get('clipTitle'),
diff --git a/hypervideo_dl/extractor/nationalgeographic.py b/hypervideo_dl/extractor/nationalgeographic.py
index ee12e2b..ad525c2 100644
--- a/hypervideo_dl/extractor/nationalgeographic.py
+++ b/hypervideo_dl/extractor/nationalgeographic.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from .fox import FOXIE
from ..utils import (
@@ -61,7 +59,7 @@ class NationalGeographicVideoIE(InfoExtractor):
}
-class NationalGeographicTVIE(FOXIE):
+class NationalGeographicTVIE(FOXIE): # XXX: Do not subclass from concrete IE
_VALID_URL = r'https?://(?:www\.)?nationalgeographic\.com/tv/watch/(?P<id>[\da-fA-F]+)'
_TESTS = [{
'url': 'https://www.nationalgeographic.com/tv/watch/6a875e6e734b479beda26438c9f21138/',
diff --git a/hypervideo_dl/extractor/naver.py b/hypervideo_dl/extractor/naver.py
index a6821ba..e2e6e97 100644
--- a/hypervideo_dl/extractor/naver.py
+++ b/hypervideo_dl/extractor/naver.py
@@ -1,16 +1,19 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
+import itertools
import re
+from urllib.parse import urlparse, parse_qs
from .common import InfoExtractor
from ..utils import (
+ ExtractorError,
clean_html,
dict_get,
- ExtractorError,
int_or_none,
+ join_nonempty,
+ merge_dicts,
parse_duration,
+ traverse_obj,
try_get,
+ unified_timestamp,
update_url_query,
)
@@ -65,19 +68,16 @@ class NaverBaseIE(InfoExtractor):
formats.extend(self._extract_m3u8_formats(
update_url_query(stream_url, query), video_id,
'mp4', 'm3u8_native', m3u8_id=stream_type, fatal=False))
- self._sort_formats(formats)
replace_ext = lambda x, y: re.sub(self._CAPTION_EXT_RE, '.' + y, x)
def get_subs(caption_url):
if re.search(self._CAPTION_EXT_RE, caption_url):
- return [{
- 'url': replace_ext(caption_url, 'ttml'),
- }, {
- 'url': replace_ext(caption_url, 'vtt'),
- }]
- else:
- return [{'url': caption_url}]
+ return [
+ replace_ext(caption_url, 'ttml'),
+ replace_ext(caption_url, 'vtt'),
+ ]
+ return [caption_url]
automatic_captions = {}
subtitles = {}
@@ -86,7 +86,13 @@ class NaverBaseIE(InfoExtractor):
if not caption_url:
continue
sub_dict = automatic_captions if caption.get('type') == 'auto' else subtitles
- sub_dict.setdefault(dict_get(caption, ('locale', 'language')), []).extend(get_subs(caption_url))
+ lang = caption.get('locale') or join_nonempty('language', 'country', from_dict=caption) or 'und'
+ if caption.get('type') == 'fan':
+ lang += '_fan%d' % next(i for i in itertools.count(1) if f'{lang}_fan{i}' not in sub_dict)
+ sub_dict.setdefault(lang, []).extend({
+ 'url': sub_url,
+ 'name': join_nonempty('label', 'fanName', from_dict=caption, delim=' - '),
+ } for sub_url in get_subs(caption_url))
user = meta.get('user', {})
@@ -237,7 +243,6 @@ class NaverLiveIE(InfoExtractor):
quality.get('url'), video_id, 'mp4',
m3u8_id=quality.get('qualityId'), live=True
))
- self._sort_formats(formats)
return {
'id': video_id,
@@ -250,3 +255,142 @@ class NaverLiveIE(InfoExtractor):
'categories': [meta.get('categoryId')],
'is_live': True
}
+
+
+class NaverNowIE(NaverBaseIE):
+ IE_NAME = 'navernow'
+ _VALID_URL = r'https?://now\.naver\.com/s/now\.(?P<id>\w+)'
+ _API_URL = 'https://apis.naver.com/now_web/oldnow_web/v4'
+ _TESTS = [{
+ 'url': 'https://now.naver.com/s/now.4759?shareReplayId=26331132#replay=',
+ 'md5': 'e05854162c21c221481de16b2944a0bc',
+ 'info_dict': {
+ 'id': '4759-26331132',
+ 'title': '아이키X노제\r\n💖꽁냥꽁냥💖(1)',
+ 'ext': 'mp4',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'timestamp': 1650369600,
+ 'upload_date': '20220419',
+ 'uploader_id': 'now',
+ 'view_count': int,
+ 'uploader_url': 'https://now.naver.com/show/4759',
+ 'uploader': '아이키의 떰즈업',
+ },
+ 'params': {
+ 'noplaylist': True,
+ }
+ }, {
+ 'url': 'https://now.naver.com/s/now.4759?shareHightlight=26601461#highlight=',
+ 'md5': '9f6118e398aa0f22b2152f554ea7851b',
+ 'info_dict': {
+ 'id': '4759-26601461',
+ 'title': '아이키: 나 리정한테 흔들렸어,,, 질투 폭발하는 노제 여보😾 [아이키의 떰즈업]ㅣ네이버 NOW.',
+ 'ext': 'mp4',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'upload_date': '20220504',
+ 'timestamp': 1651648311,
+ 'uploader_id': 'now',
+ 'view_count': int,
+ 'uploader_url': 'https://now.naver.com/show/4759',
+ 'uploader': '아이키의 떰즈업',
+ },
+ 'params': {
+ 'noplaylist': True,
+ },
+ }, {
+ 'url': 'https://now.naver.com/s/now.4759',
+ 'info_dict': {
+ 'id': '4759',
+ 'title': '아이키의 떰즈업',
+ },
+ 'playlist_mincount': 101
+ }, {
+ 'url': 'https://now.naver.com/s/now.4759?shareReplayId=26331132#replay',
+ 'info_dict': {
+ 'id': '4759',
+ 'title': '아이키의 떰즈업',
+ },
+ 'playlist_mincount': 101,
+ }, {
+ 'url': 'https://now.naver.com/s/now.4759?shareHightlight=26601461#highlight=',
+ 'info_dict': {
+ 'id': '4759',
+ 'title': '아이키의 떰즈업',
+ },
+ 'playlist_mincount': 101,
+ }, {
+ 'url': 'https://now.naver.com/s/now.kihyunplay?shareReplayId=30573291#replay',
+ 'only_matching': True,
+ }]
+
+ def _extract_replay(self, show_id, replay_id):
+ vod_info = self._download_json(f'{self._API_URL}/shows/now.{show_id}/vod/{replay_id}', replay_id)
+ in_key = self._download_json(f'{self._API_URL}/shows/now.{show_id}/vod/{replay_id}/inkey', replay_id)['inKey']
+ return merge_dicts({
+ 'id': f'{show_id}-{replay_id}',
+ 'title': traverse_obj(vod_info, ('episode', 'title')),
+ 'timestamp': unified_timestamp(traverse_obj(vod_info, ('episode', 'start_time'))),
+ 'thumbnail': vod_info.get('thumbnail_image_url'),
+ }, self._extract_video_info(replay_id, vod_info['video_id'], in_key))
+
+ def _extract_show_replays(self, show_id):
+ page_size = 15
+ page = 1
+ while True:
+ show_vod_info = self._download_json(
+ f'{self._API_URL}/vod-shows/now.{show_id}', show_id,
+ query={'page': page, 'page_size': page_size},
+ note=f'Downloading JSON vod list for show {show_id} - page {page}'
+ )['response']['result']
+ for v in show_vod_info.get('vod_list') or []:
+ yield self._extract_replay(show_id, v['id'])
+
+ if len(show_vod_info.get('vod_list') or []) < page_size:
+ break
+ page += 1
+
+ def _extract_show_highlights(self, show_id, highlight_id=None):
+ page_size = 10
+ page = 1
+ while True:
+ highlights_videos = self._download_json(
+ f'{self._API_URL}/shows/now.{show_id}/highlights/videos/', show_id,
+ query={'page': page, 'page_size': page_size},
+ note=f'Downloading JSON highlights for show {show_id} - page {page}')
+
+ for highlight in highlights_videos.get('results') or []:
+ if highlight_id and highlight.get('clip_no') != int(highlight_id):
+ continue
+ yield merge_dicts({
+ 'id': f'{show_id}-{highlight["clip_no"]}',
+ 'title': highlight.get('title'),
+ 'timestamp': unified_timestamp(highlight.get('regdate')),
+ 'thumbnail': highlight.get('thumbnail_url'),
+ }, self._extract_video_info(highlight['clip_no'], highlight['video_id'], highlight['video_inkey']))
+
+ if len(highlights_videos.get('results') or []) < page_size:
+ break
+ page += 1
+
+ def _extract_highlight(self, show_id, highlight_id):
+ try:
+ return next(self._extract_show_highlights(show_id, highlight_id))
+ except StopIteration:
+ raise ExtractorError(f'Unable to find highlight {highlight_id} for show {show_id}')
+
+ def _real_extract(self, url):
+ show_id = self._match_id(url)
+ qs = parse_qs(urlparse(url).query)
+
+ if not self._yes_playlist(show_id, qs.get('shareHightlight')):
+ return self._extract_highlight(show_id, qs['shareHightlight'][0])
+ elif not self._yes_playlist(show_id, qs.get('shareReplayId')):
+ return self._extract_replay(show_id, qs['shareReplayId'][0])
+
+ show_info = self._download_json(
+ f'{self._API_URL}/shows/now.{show_id}/', show_id,
+ note=f'Downloading JSON vod list for show {show_id}')
+
+ return self.playlist_result(
+ itertools.chain(self._extract_show_replays(show_id), self._extract_show_highlights(show_id)),
+ show_id, show_info.get('title'))
diff --git a/hypervideo_dl/extractor/nba.py b/hypervideo_dl/extractor/nba.py
index 359cc52..d8fc824 100644
--- a/hypervideo_dl/extractor/nba.py
+++ b/hypervideo_dl/extractor/nba.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import functools
import re
@@ -94,7 +92,6 @@ class NBAWatchBaseIE(NBACVPBaseIE):
formats.extend(cvp_info['formats'])
info = merge_dicts(info, cvp_info)
- self._sort_formats(formats)
info['formats'] = formats
return info
@@ -320,7 +317,6 @@ class NBABaseIE(NBACVPBaseIE):
subtitles = self._merge_subtitles(subtitles, cvp_info['subtitles'])
info = merge_dicts(info, cvp_info)
- self._sort_formats(formats)
else:
info.update(self._embed_url_result(team, video['videoId']))
diff --git a/hypervideo_dl/extractor/nbc.py b/hypervideo_dl/extractor/nbc.py
index 1094034..1ea6355 100644
--- a/hypervideo_dl/extractor/nbc.py
+++ b/hypervideo_dl/extractor/nbc.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import base64
import json
import re
@@ -9,18 +7,24 @@ from .theplatform import ThePlatformIE
from .adobepass import AdobePassIE
from ..compat import compat_urllib_parse_unquote
from ..utils import (
+ ExtractorError,
int_or_none,
parse_age_limit,
parse_duration,
RegexNotFoundError,
smuggle_url,
+ str_or_none,
+ traverse_obj,
try_get,
+ unified_strdate,
unified_timestamp,
update_url_query,
+ url_basename,
+ variadic,
)
-class NBCIE(ThePlatformIE):
+class NBCIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
_VALID_URL = r'https?(?P<permalink>://(?:www\.)?nbc\.com/(?:classic-tv/)?[^/]+/video/[^/]+/(?P<id>n?\d+))'
_TESTS = [
@@ -186,6 +190,7 @@ class NBCIE(ThePlatformIE):
class NBCSportsVPlayerIE(InfoExtractor):
_VALID_URL_BASE = r'https?://(?:vplayer\.nbcsports\.com|(?:www\.)?nbcsports\.com/vplayer)/'
_VALID_URL = _VALID_URL_BASE + r'(?:[^/]+/)+(?P<id>[0-9a-zA-Z_]+)'
+ _EMBED_REGEX = [r'(?:iframe[^>]+|var video|div[^>]+data-(?:mpx-)?)[sS]rc\s?=\s?"(?P<url>%s[^\"]+)' % _VALID_URL_BASE]
_TESTS = [{
'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/9CsDKds0kvHI',
@@ -209,13 +214,6 @@ class NBCSportsVPlayerIE(InfoExtractor):
'only_matching': True,
}]
- @staticmethod
- def _extract_url(webpage):
- video_urls = re.search(
- r'(?:iframe[^>]+|var video|div[^>]+data-(?:mpx-)?)[sS]rc\s?=\s?"(?P<url>%s[^\"]+)' % NBCSportsVPlayerIE._VALID_URL_BASE, webpage)
- if video_urls:
- return video_urls.group('url')
-
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
@@ -307,7 +305,6 @@ class NBCSportsStreamIE(AdobePassIE):
'resourceId': base64.b64encode(resource.encode()).decode(),
}).encode())['tokenizedUrl']
formats = self._extract_m3u8_formats(tokenized_url, video_id, 'mp4')
- self._sort_formats(formats)
return {
'id': video_id,
'title': title,
@@ -317,8 +314,9 @@ class NBCSportsStreamIE(AdobePassIE):
}
-class NBCNewsIE(ThePlatformIE):
+class NBCNewsIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
_VALID_URL = r'(?x)https?://(?:www\.)?(?:nbcnews|today|msnbc)\.com/([^/]+/)*(?:.*-)?(?P<id>[^/?]+)'
+ _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//www\.nbcnews\.com/widget/video-embed/[^"\']+)\1']
_TESTS = [
{
@@ -438,7 +436,6 @@ class NBCNewsIE(ThePlatformIE):
'tbr': tbr,
'ext': 'mp4',
})
- self._sort_formats(formats)
subtitles = {}
closed_captioning = video_data.get('closedCaptioning')
@@ -581,8 +578,7 @@ class NBCOlympicsStreamIE(AdobePassIE):
for f in formats:
# -http_seekable requires ffmpeg 4.3+ but it doesnt seem possible to
# download with ffmpeg without this option
- f['_ffmpeg_args'] = ['-seekable', '0', '-http_seekable', '0', '-icy', '0']
- self._sort_formats(formats)
+ f['downloader_options'] = {'ffmpeg_args': ['-seekable', '0', '-http_seekable', '0', '-icy', '0']}
return {
'id': pid,
@@ -591,3 +587,168 @@ class NBCOlympicsStreamIE(AdobePassIE):
'formats': formats,
'is_live': is_live,
}
+
+
+class NBCStationsIE(InfoExtractor):
+ _DOMAIN_RE = '|'.join(map(re.escape, (
+ 'nbcbayarea', 'nbcboston', 'nbcchicago', 'nbcconnecticut', 'nbcdfw', 'nbclosangeles',
+ 'nbcmiami', 'nbcnewyork', 'nbcphiladelphia', 'nbcsandiego', 'nbcwashington',
+ 'necn', 'telemundo52', 'telemundoarizona', 'telemundochicago', 'telemundonuevainglaterra',
+ )))
+ _VALID_URL = rf'https?://(?:www\.)?(?P<site>{_DOMAIN_RE})\.com/(?:[^/?#]+/)*(?P<id>[^/?#]+)/?(?:$|[#?])'
+
+ _TESTS = [{
+ 'url': 'https://www.nbclosangeles.com/news/local/large-structure-fire-in-downtown-la-prompts-smoke-odor-advisory/2968618/',
+ 'md5': '462041d91bd762ef5a38b7d85d6dc18f',
+ 'info_dict': {
+ 'id': '2968618',
+ 'ext': 'mp4',
+ 'title': 'Large Structure Fire in Downtown LA Prompts Smoke Odor Advisory',
+ 'description': None,
+ 'timestamp': 1661135892,
+ 'upload_date': '20220821',
+ 'uploader': 'NBC 4',
+ 'uploader_id': 'KNBC',
+ 'channel': 'nbclosangeles',
+ },
+ }, {
+ 'url': 'https://www.telemundoarizona.com/responde/huracan-complica-reembolso-para-televidente-de-tucson/2247002/',
+ 'md5': '0917dcf7885be1023a9220630d415f67',
+ 'info_dict': {
+ 'id': '2247002',
+ 'ext': 'mp4',
+ 'title': 'Huracán complica que televidente de Tucson reciba reembolso',
+ 'description': 'md5:af298dc73aab74d4fca6abfb12acb6cf',
+ 'timestamp': 1660886507,
+ 'upload_date': '20220819',
+ 'uploader': 'Telemundo Arizona',
+ 'uploader_id': 'KTAZ',
+ 'channel': 'telemundoarizona',
+ },
+ }]
+
+ _RESOLUTIONS = {
+ '1080': '1920',
+ '720': '1280',
+ '540': '960',
+ '360': '640',
+ '234': '416',
+ }
+
+ def _real_extract(self, url):
+ channel, video_id = self._match_valid_url(url).group('site', 'id')
+ webpage = self._download_webpage(url, video_id)
+
+ nbc_data = self._search_json(
+ r'<script>var\s*nbc\s*=', webpage, 'NBC JSON data', video_id)
+ pdk_acct = nbc_data.get('pdkAcct') or 'Yh1nAC'
+ fw_ssid = traverse_obj(nbc_data, ('video', 'fwSSID'))
+ fw_network_id = traverse_obj(nbc_data, ('video', 'fwNetworkID'), default='382114')
+
+ video_data = self._parse_json(self._html_search_regex(
+ r'data-videos="([^"]*)"', webpage, 'video data', default='{}'), video_id)
+ video_data = variadic(video_data)[0]
+ video_data.update(self._parse_json(self._html_search_regex(
+ r'data-meta="([^"]*)"', webpage, 'metadata', default='{}'), video_id))
+
+ formats = []
+
+ if video_data.get('mpx_is_livestream') == '1':
+ live = True
+ player_id = traverse_obj(
+ video_data, 'mpx_m3upid', ('video', 'meta', 'mpx_m3upid'), 'mpx_pid',
+ ('video', 'meta', 'mpx_pid'), 'pid_streaming_web_medium')
+ query = {
+ 'mbr': 'true',
+ 'assetTypes': 'LegacyRelease',
+ 'fwsitesection': fw_ssid,
+ 'fwNetworkID': fw_network_id,
+ 'pprofile': 'ots_desktop_html',
+ 'sensitive': 'false',
+ 'w': '1920',
+ 'h': '1080',
+ 'rnd': '1660303',
+ 'mode': 'LIVE',
+ 'format': 'SMIL',
+ 'tracking': 'true',
+ 'formats': 'M3U+none,MPEG-DASH+none,MPEG4,MP3',
+ 'vpaid': 'script',
+ 'schema': '2.0',
+ 'SDK': 'PDK+6.1.3',
+ }
+ info = {
+ 'title': f'{channel} livestream',
+ }
+
+ else:
+ live = False
+ player_id = traverse_obj(
+ video_data, ('video', 'meta', 'pid_streaming_web_high'), 'pid_streaming_web_high',
+ ('video', 'meta', 'mpx_pid'), 'mpx_pid')
+
+ date_string = traverse_obj(video_data, 'date_string', 'date_gmt')
+ if date_string:
+ date_string = self._search_regex(
+ r'datetime="([^"]+)"', date_string, 'date string', fatal=False)
+ else:
+ date_string = traverse_obj(
+ nbc_data, ('dataLayer', 'adobe', 'prop70'), ('dataLayer', 'adobe', 'eVar70'),
+ ('dataLayer', 'adobe', 'eVar59'))
+
+ video_url = traverse_obj(video_data, ('video', 'meta', 'mp4_url'), 'mp4_url')
+ if video_url:
+ height = url_basename(video_url).split('-')[1].split('p')[0]
+ formats.append({
+ 'url': video_url,
+ 'ext': 'mp4',
+ 'width': int_or_none(self._RESOLUTIONS.get(height)),
+ 'height': int_or_none(height),
+ 'format_id': f'http-{height}',
+ })
+
+ query = {
+ 'mbr': 'true',
+ 'assetTypes': 'LegacyRelease',
+ 'fwsitesection': fw_ssid,
+ 'fwNetworkID': fw_network_id,
+ 'format': 'redirect',
+ 'manifest': 'm3u',
+ 'Tracking': 'true',
+ 'Embedded': 'true',
+ 'formats': 'MPEG4',
+ }
+ info = {
+ 'title': video_data.get('title') or traverse_obj(
+ nbc_data, ('dataLayer', 'contenttitle'), ('dataLayer', 'title'),
+ ('dataLayer', 'adobe', 'prop22'), ('dataLayer', 'id')),
+ 'description': traverse_obj(video_data, 'summary', 'excerpt', 'video_hero_text'),
+ 'upload_date': str_or_none(unified_strdate(date_string)),
+ 'timestamp': int_or_none(unified_timestamp(date_string)),
+ }
+
+ if not player_id:
+ raise ExtractorError(
+ 'No video player ID or livestream player ID found in webpage', expected=True)
+
+ headers = {'Origin': f'https://www.{channel}.com'}
+ manifest, urlh = self._download_webpage_handle(
+ f'https://link.theplatform.com/s/{pdk_acct}/{player_id}', video_id,
+ headers=headers, query=query, note='Downloading manifest')
+ if live:
+ manifest_url = self._search_regex(r'<video src="([^"]*)', manifest, 'manifest URL')
+ else:
+ manifest_url = urlh.geturl()
+
+ formats.extend(self._extract_m3u8_formats(
+ manifest_url, video_id, 'mp4', headers=headers, m3u8_id='hls',
+ fatal=live, live=live, errnote='No HLS formats found'))
+
+ return {
+ 'id': str_or_none(video_id),
+ 'channel': channel,
+ 'uploader': str_or_none(nbc_data.get('on_air_name')),
+ 'uploader_id': str_or_none(nbc_data.get('callLetters')),
+ 'formats': formats,
+ 'is_live': live,
+ **info,
+ }
diff --git a/hypervideo_dl/extractor/ndr.py b/hypervideo_dl/extractor/ndr.py
index 1917254..41ea362 100644
--- a/hypervideo_dl/extractor/ndr.py
+++ b/hypervideo_dl/extractor/ndr.py
@@ -1,14 +1,15 @@
-# coding: utf-8
-from __future__ import unicode_literals
+import re
from .common import InfoExtractor
+from ..compat import compat_urllib_parse_urlparse
from ..utils import (
determine_ext,
+ ExtractorError,
int_or_none,
- parse_duration,
+ merge_dicts,
+ parse_iso8601,
qualities,
try_get,
- unified_strdate,
urljoin,
)
@@ -17,120 +18,139 @@ class NDRBaseIE(InfoExtractor):
def _real_extract(self, url):
mobj = self._match_valid_url(url)
display_id = next(group for group in mobj.groups() if group)
- id = mobj.group('id')
webpage = self._download_webpage(url, display_id)
- return self._extract_embed(webpage, display_id, id)
+ return self._extract_embed(webpage, display_id, url)
class NDRIE(NDRBaseIE):
IE_NAME = 'ndr'
IE_DESC = 'NDR.de - Norddeutscher Rundfunk'
- _VALID_URL = r'https?://(?:www\.)?(?:daserste\.)?ndr\.de/(?:[^/]+/)*(?P<display_id>[^/?#]+),(?P<id>[\da-z]+)\.html'
+ _VALID_URL = r'https?://(?:\w+\.)*ndr\.de/(?:[^/]+/)*(?P<id>[^/?#]+),[\da-z]+\.html'
_TESTS = [{
+ # httpVideo, same content id
'url': 'http://www.ndr.de/fernsehen/Party-Poette-und-Parade,hafengeburtstag988.html',
+ 'md5': '6515bc255dc5c5f8c85bbc38e035a659',
'info_dict': {
'id': 'hafengeburtstag988',
+ 'display_id': 'Party-Poette-und-Parade',
'ext': 'mp4',
'title': 'Party, Pötte und Parade',
- 'thumbnail': 'https://www.ndr.de/fernsehen/hafengeburtstag990_v-contentxl.jpg',
'description': 'md5:ad14f9d2f91d3040b6930c697e5f6b4c',
- 'series': None,
- 'channel': 'NDR Fernsehen',
- 'upload_date': '20150508',
+ 'uploader': 'ndrtv',
+ 'timestamp': 1431255671,
+ 'upload_date': '20150510',
'duration': 3498,
},
- }, {
- 'url': 'https://www.ndr.de/sport/fussball/Rostocks-Matchwinner-Froede-Ein-Hansa-Debuet-wie-im-Maerchen,hansa10312.html',
- 'only_matching': True
- }, {
- 'url': 'https://www.ndr.de/nachrichten/niedersachsen/kommunalwahl_niedersachsen_2021/Grosse-Parteien-zufrieden-mit-Ergebnissen-der-Kommunalwahl,kommunalwahl1296.html',
- 'info_dict': {
- 'id': 'kommunalwahl1296',
- 'ext': 'mp4',
- 'title': 'Die Spitzenrunde: Die Wahl aus Sicht der Landespolitik',
- 'thumbnail': 'https://www.ndr.de/fernsehen/screenshot1194912_v-contentxl.jpg',
- 'description': 'md5:5c6e2ad744cef499135735a1036d7aa7',
- 'series': 'Hallo Niedersachsen',
- 'channel': 'NDR Fernsehen',
- 'upload_date': '20210913',
- 'duration': 438,
+ 'params': {
+ 'skip_download': True,
},
+ 'expected_warnings': ['Unable to download f4m manifest'],
}, {
- 'url': 'https://www.ndr.de/fernsehen/sendungen/extra_3/extra-3-Satiremagazin-mit-Christian-Ehring,sendung1091858.html',
+ # httpVideo, different content id
+ 'url': 'http://www.ndr.de/sport/fussball/40-Osnabrueck-spielt-sich-in-einen-Rausch,osna270.html',
+ 'md5': '1043ff203eab307f0c51702ec49e9a71',
'info_dict': {
- 'id': 'sendung1091858',
+ 'id': 'osna272',
+ 'display_id': '40-Osnabrueck-spielt-sich-in-einen-Rausch',
'ext': 'mp4',
- 'title': 'Extra 3 vom 11.11.2020 mit Christian Ehring',
- 'thumbnail': 'https://www.ndr.de/fernsehen/screenshot983938_v-contentxl.jpg',
- 'description': 'md5:700f6de264010585012a72f97b0ac0c9',
- 'series': 'extra 3',
- 'channel': 'NDR Fernsehen',
- 'upload_date': '20201111',
- 'duration': 1749,
- }
+ 'title': 'Osnabrück - Wehen Wiesbaden: Die Highlights',
+ 'description': 'md5:32e9b800b3d2d4008103752682d5dc01',
+ 'uploader': 'ndrtv',
+ 'timestamp': 1442059200,
+ 'upload_date': '20150912',
+ 'duration': 510,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'No longer available',
}, {
+ # httpAudio, same content id
'url': 'http://www.ndr.de/info/La-Valette-entgeht-der-Hinrichtung,audio51535.html',
+ 'md5': 'bb3cd38e24fbcc866d13b50ca59307b8',
'info_dict': {
'id': 'audio51535',
+ 'display_id': 'La-Valette-entgeht-der-Hinrichtung',
'ext': 'mp3',
'title': 'La Valette entgeht der Hinrichtung',
- 'thumbnail': 'https://www.ndr.de/mediathek/mediathekbild140_v-podcast.jpg',
'description': 'md5:22f9541913a40fe50091d5cdd7c9f536',
- 'upload_date': '20140729',
- 'duration': 884.0,
+ 'uploader': 'ndrinfo',
+ 'timestamp': 1631711863,
+ 'upload_date': '20210915',
+ 'duration': 884,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # with subtitles
+ 'url': 'https://www.ndr.de/fernsehen/sendungen/extra_3/extra-3-Satiremagazin-mit-Christian-Ehring,sendung1091858.html',
+ 'info_dict': {
+ 'id': 'extra18674',
+ 'display_id': 'extra-3-Satiremagazin-mit-Christian-Ehring',
+ 'ext': 'mp4',
+ 'title': 'Extra 3 vom 11.11.2020 mit Christian Ehring',
+ 'description': 'md5:700f6de264010585012a72f97b0ac0c9',
+ 'uploader': 'ndrtv',
+ 'upload_date': '20201207',
+ 'timestamp': 1614349457,
+ 'duration': 1749,
+ 'subtitles': {
+ 'de': [{
+ 'ext': 'ttml',
+ 'url': r're:^https://www\.ndr\.de.+',
+ }],
+ },
},
- 'expected_warnings': ['unable to extract json url'],
+ 'params': {
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Unable to download f4m manifest'],
+ }, {
+ 'url': 'https://www.ndr.de/Fettes-Brot-Ferris-MC-und-Thees-Uhlmann-live-on-stage,festivalsommer116.html',
+ 'only_matching': True,
}]
- def _extract_embed(self, webpage, display_id, id):
- formats = []
- base_url = 'https://www.ndr.de'
- json_url = self._search_regex(r'<iframe[^>]+src=\"([^\"]+)_theme-ndrde[^\.]*\.html\"', webpage,
- 'json url', fatal=False)
- if json_url:
- data_json = self._download_json(base_url + json_url.replace('ardplayer_image', 'ardjson_image') + '.json',
- id, fatal=False)
- info_json = data_json.get('_info', {})
- media_json = try_get(data_json, lambda x: x['_mediaArray'][0]['_mediaStreamArray'])
- for media in media_json:
- if media.get('_quality') == 'auto':
- formats.extend(self._extract_m3u8_formats(media['_stream'], id))
- subtitles = {}
- sub_url = data_json.get('_subtitleUrl')
- if sub_url:
- subtitles.setdefault('de', []).append({
- 'url': base_url + sub_url,
- })
- self._sort_formats(formats)
- return {
- 'id': id,
- 'title': info_json.get('clipTitle'),
- 'thumbnail': base_url + data_json.get('_previewImage'),
- 'description': info_json.get('clipDescription'),
- 'series': info_json.get('seriesTitle') or None,
- 'channel': info_json.get('channelTitle'),
- 'upload_date': unified_strdate(info_json.get('clipDate')),
- 'duration': data_json.get('_duration'),
- 'formats': formats,
- 'subtitles': subtitles,
- }
- else:
- json_url = base_url + self._search_regex(r'apiUrl\s?=\s?\'([^\']+)\'', webpage, 'json url').replace(
- '_belongsToPodcast-', '')
- data_json = self._download_json(json_url, id, fatal=False)
- return {
- 'id': id,
- 'title': data_json.get('title'),
- 'thumbnail': base_url + data_json.get('poster'),
- 'description': data_json.get('summary'),
- 'upload_date': unified_strdate(data_json.get('publicationDate')),
- 'duration': parse_duration(data_json.get('duration')),
- 'formats': [{
- 'url': try_get(data_json, (lambda x: x['audio'][0]['url'], lambda x: x['files'][0]['url'])),
- 'vcodec': 'none',
- 'ext': 'mp3',
- }],
- }
+ def _extract_embed(self, webpage, display_id, url):
+ embed_url = (
+ self._html_search_meta(
+ 'embedURL', webpage, 'embed URL',
+ default=None)
+ or self._search_regex(
+ r'\bembedUrl["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
+ 'embed URL', group='url', default=None)
+ or self._search_regex(
+ r'\bvar\s*sophoraID\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
+ 'embed URL', group='url', default=''))
+ # some more work needed if we only found sophoraID
+ if re.match(r'^[a-z]+\d+$', embed_url):
+ # get the initial part of the url path,. eg /panorama/archiv/2022/
+ parsed_url = compat_urllib_parse_urlparse(url)
+ path = self._search_regex(r'(.+/)%s' % display_id, parsed_url.path or '', 'embed URL', default='')
+ # find tell-tale image with the actual ID
+ ndr_id = self._search_regex(r'%s([a-z]+\d+)(?!\.)\b' % (path, ), webpage, 'embed URL', default=None)
+ # or try to use special knowledge!
+ NDR_INFO_URL_TPL = 'https://www.ndr.de/info/%s-player.html'
+ embed_url = 'ndr:%s' % (ndr_id, ) if ndr_id else NDR_INFO_URL_TPL % (embed_url, )
+ if not embed_url:
+ raise ExtractorError('Unable to extract embedUrl')
+
+ description = self._search_regex(
+ r'<p[^>]+itemprop="description">([^<]+)</p>',
+ webpage, 'description', default=None) or self._og_search_description(webpage)
+ timestamp = parse_iso8601(
+ self._search_regex(
+ (r'<span[^>]+itemprop="(?:datePublished|uploadDate)"[^>]+content="(?P<cont>[^"]+)"',
+ r'\bvar\s*pdt\s*=\s*(?P<q>["\'])(?P<cont>(?:(?!(?P=q)).)+)(?P=q)', ),
+ webpage, 'upload date', group='cont', default=None))
+ info = self._search_json_ld(webpage, display_id, default={})
+ return merge_dicts({
+ '_type': 'url_transparent',
+ 'url': embed_url,
+ 'display_id': display_id,
+ 'description': description,
+ 'timestamp': timestamp,
+ }, info)
class NJoyIE(NDRBaseIE):
@@ -154,19 +174,19 @@ class NJoyIE(NDRBaseIE):
'params': {
'skip_download': True,
},
+ 'skip': 'No longer available',
}, {
# httpVideo, different content id
'url': 'http://www.n-joy.de/musik/Das-frueheste-DJ-Set-des-Nordens-live-mit-Felix-Jaehn-,felixjaehn168.html',
'md5': '417660fffa90e6df2fda19f1b40a64d8',
'info_dict': {
- 'id': 'dockville882',
+ 'id': 'livestream283',
'display_id': 'Das-frueheste-DJ-Set-des-Nordens-live-mit-Felix-Jaehn-',
- 'ext': 'mp4',
- 'title': '"Ich hab noch nie" mit Felix Jaehn',
- 'description': 'md5:85dd312d53be1b99e1f998a16452a2f3',
+ 'ext': 'mp3',
+ 'title': 'Das frueheste DJ Set des Nordens live mit Felix Jaehn',
+ 'description': 'md5:681698f527b8601e511e7b79edde7d2c',
'uploader': 'njoy',
- 'upload_date': '20150822',
- 'duration': 211,
+ 'upload_date': '20210830',
},
'params': {
'skip_download': True,
@@ -176,22 +196,29 @@ class NJoyIE(NDRBaseIE):
'only_matching': True,
}]
- def _extract_embed(self, webpage, display_id, id):
+ def _extract_embed(self, webpage, display_id, url=None):
+ # find tell-tale URL with the actual ID, or ...
video_id = self._search_regex(
- r'<iframe[^>]+id="pp_([\da-z]+)"', webpage, 'embed id')
- description = self._search_regex(
- r'<div[^>]+class="subline"[^>]*>[^<]+</div>\s*<p>([^<]+)</p>',
- webpage, 'description', fatal=False)
+ (r'''\bsrc\s*=\s*["']?(?:/\w+)+/([a-z]+\d+)(?!\.)\b''',
+ r'<iframe[^>]+id="pp_([\da-z]+)"', ),
+ webpage, 'NDR id', default=None)
+
+ description = (
+ self._html_search_meta('description', webpage)
+ or self._search_regex(
+ r'<div[^>]+class="subline"[^>]*>[^<]+</div>\s*<p>([^<]+)</p>',
+ webpage, 'description', fatal=False))
return {
'_type': 'url_transparent',
'ie_key': 'NDREmbedBase',
'url': 'ndr:%s' % video_id,
'display_id': display_id,
'description': description,
+ 'title': display_id.replace('-', ' ').strip(),
}
-class NDREmbedBaseIE(InfoExtractor):
+class NDREmbedBaseIE(InfoExtractor): # XXX: Conventionally, Concrete class names do not end in BaseIE
IE_NAME = 'ndr:embed:base'
_VALID_URL = r'(?:ndr:(?P<id_s>[\da-z]+)|https?://www\.ndr\.de/(?P<id>[\da-z]+)-ppjson\.json)'
_TESTS = [{
@@ -239,7 +266,6 @@ class NDREmbedBaseIE(InfoExtractor):
ff['vcodec'] = 'none'
ff['ext'] = ext or 'mp3'
formats.append(ff)
- self._sort_formats(formats)
config = playlist['config']
@@ -288,9 +314,9 @@ class NDREmbedBaseIE(InfoExtractor):
}
-class NDREmbedIE(NDREmbedBaseIE):
+class NDREmbedIE(NDREmbedBaseIE): # XXX: Do not subclass from concrete IE
IE_NAME = 'ndr:embed'
- _VALID_URL = r'https?://(?:www\.)?(?:daserste\.)?ndr\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:player|externalPlayer)\.html'
+ _VALID_URL = r'https?://(?:\w+\.)*ndr\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:(?:ard)?player|externalPlayer)\.html'
_TESTS = [{
'url': 'http://www.ndr.de/fernsehen/sendungen/ndr_aktuell/ndraktuell28488-player.html',
'md5': '8b9306142fe65bbdefb5ce24edb6b0a9',
@@ -303,6 +329,7 @@ class NDREmbedIE(NDREmbedBaseIE):
'upload_date': '20150907',
'duration': 132,
},
+ 'skip': 'No longer available',
}, {
'url': 'http://www.ndr.de/ndr2/events/soundcheck/soundcheck3366-player.html',
'md5': '002085c44bae38802d94ae5802a36e78',
@@ -318,6 +345,7 @@ class NDREmbedIE(NDREmbedBaseIE):
'params': {
'skip_download': True,
},
+ 'skip': 'No longer available',
}, {
'url': 'http://www.ndr.de/info/audio51535-player.html',
'md5': 'bb3cd38e24fbcc866d13b50ca59307b8',
@@ -327,7 +355,7 @@ class NDREmbedIE(NDREmbedBaseIE):
'title': 'La Valette entgeht der Hinrichtung',
'is_live': False,
'uploader': 'ndrinfo',
- 'upload_date': '20140729',
+ 'upload_date': '20210915',
'duration': 884,
},
'params': {
@@ -348,15 +376,17 @@ class NDREmbedIE(NDREmbedBaseIE):
'params': {
'skip_download': True,
},
+ 'skip': 'No longer available',
}, {
# httpVideoLive
'url': 'http://www.ndr.de/fernsehen/livestream/livestream217-externalPlayer.html',
'info_dict': {
'id': 'livestream217',
- 'ext': 'flv',
+ 'ext': 'mp4',
'title': r're:^NDR Fernsehen Niedersachsen \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
'is_live': True,
- 'upload_date': '20150910',
+ 'upload_date': '20210409',
+ 'uploader': 'ndrtv',
},
'params': {
'skip_download': True,
@@ -382,7 +412,7 @@ class NDREmbedIE(NDREmbedBaseIE):
}]
-class NJoyEmbedIE(NDREmbedBaseIE):
+class NJoyEmbedIE(NDREmbedBaseIE): # XXX: Do not subclass from concrete IE
IE_NAME = 'njoy:embed'
_VALID_URL = r'https?://(?:www\.)?n-joy\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:player|externalPlayer)_[^/]+\.html'
_TESTS = [{
@@ -394,9 +424,10 @@ class NJoyEmbedIE(NDREmbedBaseIE):
'ext': 'mp4',
'title': 'Zehn Jahre Reeperbahn Festival - die Doku',
'is_live': False,
- 'upload_date': '20150807',
+ 'upload_date': '20200826',
'duration': 1011,
},
+ 'expected_warnings': ['Unable to download f4m manifest'],
}, {
# httpAudio
'url': 'http://www.n-joy.de/news_wissen/stefanrichter100-player_image-d5e938b1-f21a-4b9a-86b8-aaba8bca3a13_theme-n-joy.html',
@@ -413,6 +444,7 @@ class NJoyEmbedIE(NDREmbedBaseIE):
'params': {
'skip_download': True,
},
+ 'skip': 'No longer available',
}, {
# httpAudioLive, no explicit ext
'url': 'http://www.n-joy.de/news_wissen/webradioweltweit100-player_image-3fec0484-2244-4565-8fb8-ed25fd28b173_theme-n-joy.html',
@@ -422,7 +454,7 @@ class NJoyEmbedIE(NDREmbedBaseIE):
'title': r're:^N-JOY Weltweit \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
'is_live': True,
'uploader': 'njoy',
- 'upload_date': '20150810',
+ 'upload_date': '20210830',
},
'params': {
'skip_download': True,
diff --git a/hypervideo_dl/extractor/ndtv.py b/hypervideo_dl/extractor/ndtv.py
index bc3eb91..bfe52f7 100644
--- a/hypervideo_dl/extractor/ndtv.py
+++ b/hypervideo_dl/extractor/ndtv.py
@@ -1,16 +1,7 @@
-# coding: utf-8
-from __future__ import unicode_literals
+import urllib.parse
from .common import InfoExtractor
-from ..compat import (
- compat_urllib_parse_unquote_plus
-)
-from ..utils import (
- parse_duration,
- remove_end,
- unified_strdate,
- urljoin
-)
+from ..utils import parse_duration, remove_end, unified_strdate, urljoin
class NDTVIE(InfoExtractor):
@@ -83,7 +74,7 @@ class NDTVIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
# '__title' does not contain extra words such as sub-site name, "Video" etc.
- title = compat_urllib_parse_unquote_plus(
+ title = urllib.parse.unquote_plus(
self._search_regex(r"__title\s*=\s*'([^']+)'", webpage, 'title', default=None)
or self._og_search_title(webpage))
diff --git a/hypervideo_dl/extractor/nebula.py b/hypervideo_dl/extractor/nebula.py
index 77f2535..861fcb1 100644
--- a/hypervideo_dl/extractor/nebula.py
+++ b/hypervideo_dl/extractor/nebula.py
@@ -1,17 +1,13 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import itertools
import json
import time
-import urllib
+import urllib.error
+import urllib.parse
-from ..utils import (
- ExtractorError,
- parse_iso8601,
- try_get,
-)
from .common import InfoExtractor
+from ..utils import ExtractorError, parse_iso8601, try_get
+
+_BASE_URL_RE = r'https?://(?:www\.)?(?:watchnebula\.com|nebula\.app|nebula\.tv)'
class NebulaBaseIE(InfoExtractor):
@@ -21,9 +17,8 @@ class NebulaBaseIE(InfoExtractor):
_nebula_bearer_token = None
_zype_access_token = None
- def _perform_nebula_auth(self):
- username, password = self._get_login_info()
- if not (username and password):
+ def _perform_nebula_auth(self, username, password):
+ if not username or not password:
self.raise_login_required()
data = json.dumps({'email': username, 'password': password}).encode('utf8')
@@ -54,7 +49,7 @@ class NebulaBaseIE(InfoExtractor):
return response['key']
- def _retrieve_nebula_api_token(self):
+ def _retrieve_nebula_api_token(self, username=None, password=None):
"""
Check cookie jar for valid token. Try to authenticate using credentials if no valid token
can be found in the cookie jar.
@@ -68,7 +63,7 @@ class NebulaBaseIE(InfoExtractor):
if nebula_api_token:
return nebula_api_token
- return self._perform_nebula_auth()
+ return self._perform_nebula_auth(username, password)
def _call_nebula_api(self, url, video_id=None, method='GET', auth_type='api', note=''):
assert method in ('GET', 'POST',)
@@ -149,18 +144,17 @@ class NebulaBaseIE(InfoExtractor):
}
def _perform_login(self, username=None, password=None):
- # FIXME: username should be passed from here to inner functions
- self._nebula_api_token = self._retrieve_nebula_api_token()
+ self._nebula_api_token = self._retrieve_nebula_api_token(username, password)
self._nebula_bearer_token = self._fetch_nebula_bearer_token()
self._zype_access_token = self._fetch_zype_access_token()
class NebulaIE(NebulaBaseIE):
- _VALID_URL = r'https?://(?:www\.)?(?:watchnebula\.com|nebula\.app)/videos/(?P<id>[-\w]+)'
+ _VALID_URL = rf'{_BASE_URL_RE}/videos/(?P<id>[-\w]+)'
_TESTS = [
{
'url': 'https://nebula.app/videos/that-time-disney-remade-beauty-and-the-beast',
- 'md5': 'fe79c4df8b3aa2fea98a93d027465c7e',
+ 'md5': '14944cfee8c7beeea106320c47560efc',
'info_dict': {
'id': '5c271b40b13fd613090034fd',
'ext': 'mp4',
@@ -172,14 +166,21 @@ class NebulaIE(NebulaBaseIE):
'channel_id': 'lindsayellis',
'uploader': 'Lindsay Ellis',
'uploader_id': 'lindsayellis',
- },
- 'params': {
- 'usenetrc': True,
+ 'timestamp': 1533009600,
+ 'uploader_url': 'https://nebula.app/lindsayellis',
+ 'series': 'Lindsay Ellis',
+ 'average_rating': int,
+ 'display_id': 'that-time-disney-remade-beauty-and-the-beast',
+ 'channel_url': 'https://nebula.app/lindsayellis',
+ 'creator': 'Lindsay Ellis',
+ 'duration': 2212,
+ 'view_count': int,
+ 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*',
},
},
{
'url': 'https://nebula.app/videos/the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
- 'md5': '6d4edd14ce65720fa63aba5c583fb328',
+ 'md5': 'd05739cf6c38c09322422f696b569c23',
'info_dict': {
'id': '5e7e78171aaf320001fbd6be',
'ext': 'mp4',
@@ -191,14 +192,20 @@ class NebulaIE(NebulaBaseIE):
'channel_id': 'realengineering',
'uploader': 'Real Engineering',
'uploader_id': 'realengineering',
- },
- 'params': {
- 'usenetrc': True,
+ 'view_count': int,
+ 'series': 'Real Engineering',
+ 'average_rating': int,
+ 'display_id': 'the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
+ 'creator': 'Real Engineering',
+ 'duration': 841,
+ 'channel_url': 'https://nebula.app/realengineering',
+ 'uploader_url': 'https://nebula.app/realengineering',
+ 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*',
},
},
{
'url': 'https://nebula.app/videos/money-episode-1-the-draw',
- 'md5': '8c7d272910eea320f6f8e6d3084eecf5',
+ 'md5': 'ebe28a7ad822b9ee172387d860487868',
'info_dict': {
'id': '5e779ebdd157bc0001d1c75a',
'ext': 'mp4',
@@ -210,9 +217,15 @@ class NebulaIE(NebulaBaseIE):
'channel_id': 'tom-scott-presents-money',
'uploader': 'Tom Scott Presents: Money',
'uploader_id': 'tom-scott-presents-money',
- },
- 'params': {
- 'usenetrc': True,
+ 'uploader_url': 'https://nebula.app/tom-scott-presents-money',
+ 'duration': 825,
+ 'channel_url': 'https://nebula.app/tom-scott-presents-money',
+ 'view_count': int,
+ 'series': 'Tom Scott Presents: Money',
+ 'display_id': 'money-episode-1-the-draw',
+ 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*',
+ 'average_rating': int,
+ 'creator': 'Tom Scott Presents: Money',
},
},
{
@@ -233,9 +246,37 @@ class NebulaIE(NebulaBaseIE):
return self._build_video_info(video)
-class NebulaCollectionIE(NebulaBaseIE):
- IE_NAME = 'nebula:collection'
- _VALID_URL = r'https?://(?:www\.)?(?:watchnebula\.com|nebula\.app)/(?!videos/)(?P<id>[-\w]+)'
+class NebulaSubscriptionsIE(NebulaBaseIE):
+ IE_NAME = 'nebula:subscriptions'
+ _VALID_URL = rf'{_BASE_URL_RE}/myshows'
+ _TESTS = [
+ {
+ 'url': 'https://nebula.app/myshows',
+ 'playlist_mincount': 1,
+ 'info_dict': {
+ 'id': 'myshows',
+ },
+ },
+ ]
+
+ def _generate_playlist_entries(self):
+ next_url = 'https://content.watchnebula.com/library/video/?page_size=100'
+ page_num = 1
+ while next_url:
+ channel = self._call_nebula_api(next_url, 'myshows', auth_type='bearer',
+ note=f'Retrieving subscriptions page {page_num}')
+ for episode in channel['results']:
+ yield self._build_video_info(episode)
+ next_url = channel['next']
+ page_num += 1
+
+ def _real_extract(self, url):
+ return self.playlist_result(self._generate_playlist_entries(), 'myshows')
+
+
+class NebulaChannelIE(NebulaBaseIE):
+ IE_NAME = 'nebula:channel'
+ _VALID_URL = rf'{_BASE_URL_RE}/(?!myshows|videos/)(?P<id>[-\w]+)'
_TESTS = [
{
'url': 'https://nebula.app/tom-scott-presents-money',
@@ -245,9 +286,6 @@ class NebulaCollectionIE(NebulaBaseIE):
'description': 'Tom Scott hosts a series all about trust, negotiation and money.',
},
'playlist_count': 5,
- 'params': {
- 'usenetrc': True,
- },
}, {
'url': 'https://nebula.app/lindsayellis',
'info_dict': {
@@ -256,9 +294,6 @@ class NebulaCollectionIE(NebulaBaseIE):
'description': 'Enjoy these hottest of takes on Disney, Transformers, and Musicals.',
},
'playlist_mincount': 100,
- 'params': {
- 'usenetrc': True,
- },
},
]
diff --git a/hypervideo_dl/extractor/nerdcubed.py b/hypervideo_dl/extractor/nerdcubed.py
index 9feccc6..7c801b5 100644
--- a/hypervideo_dl/extractor/nerdcubed.py
+++ b/hypervideo_dl/extractor/nerdcubed.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import datetime
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/neteasemusic.py b/hypervideo_dl/extractor/neteasemusic.py
index 57b4774..5957098 100644
--- a/hypervideo_dl/extractor/neteasemusic.py
+++ b/hypervideo_dl/extractor/neteasemusic.py
@@ -1,20 +1,25 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from hashlib import md5
+import itertools
+import json
+import re
+import time
from base64 import b64encode
+from binascii import hexlify
from datetime import datetime
-import re
+from hashlib import md5
+from random import randint
from .common import InfoExtractor
-from ..compat import (
- compat_urllib_parse_urlencode,
- compat_str,
- compat_itertools_count,
-)
+from ..aes import aes_ecb_encrypt, pkcs7_padding
+from ..compat import compat_urllib_parse_urlencode
from ..utils import (
- sanitized_Request,
+ ExtractorError,
+ bytes_to_intlist,
+ error_to_compat_str,
float_or_none,
+ int_or_none,
+ intlist_to_bytes,
+ sanitized_Request,
+ try_get,
)
@@ -26,7 +31,7 @@ class NetEaseMusicBaseIE(InfoExtractor):
@classmethod
def _encrypt(cls, dfsid):
salt_bytes = bytearray(cls._NETEASE_SALT.encode('utf-8'))
- string_bytes = bytearray(compat_str(dfsid).encode('ascii'))
+ string_bytes = bytearray(str(dfsid).encode('ascii'))
salt_len = len(salt_bytes)
for i in range(len(string_bytes)):
string_bytes[i] = string_bytes[i] ^ salt_bytes[i % salt_len]
@@ -35,32 +40,105 @@ class NetEaseMusicBaseIE(InfoExtractor):
result = b64encode(m.digest()).decode('ascii')
return result.replace('/', '_').replace('+', '-')
+ def make_player_api_request_data_and_headers(self, song_id, bitrate):
+ KEY = b'e82ckenh8dichen8'
+ URL = '/api/song/enhance/player/url'
+ now = int(time.time() * 1000)
+ rand = randint(0, 1000)
+ cookie = {
+ 'osver': None,
+ 'deviceId': None,
+ 'appver': '8.0.0',
+ 'versioncode': '140',
+ 'mobilename': None,
+ 'buildver': '1623435496',
+ 'resolution': '1920x1080',
+ '__csrf': '',
+ 'os': 'pc',
+ 'channel': None,
+ 'requestId': '{0}_{1:04}'.format(now, rand),
+ }
+ request_text = json.dumps(
+ {'ids': '[{0}]'.format(song_id), 'br': bitrate, 'header': cookie},
+ separators=(',', ':'))
+ message = 'nobody{0}use{1}md5forencrypt'.format(
+ URL, request_text).encode('latin1')
+ msg_digest = md5(message).hexdigest()
+
+ data = '{0}-36cd479b6b5-{1}-36cd479b6b5-{2}'.format(
+ URL, request_text, msg_digest)
+ data = pkcs7_padding(bytes_to_intlist(data))
+ encrypted = intlist_to_bytes(aes_ecb_encrypt(data, bytes_to_intlist(KEY)))
+ encrypted_params = hexlify(encrypted).decode('ascii').upper()
+
+ cookie = '; '.join(
+ ['{0}={1}'.format(k, v if v is not None else 'undefined')
+ for [k, v] in cookie.items()])
+
+ headers = {
+ 'User-Agent': self.extractor.get_param('http_headers')['User-Agent'],
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ 'Referer': 'https://music.163.com',
+ 'Cookie': cookie,
+ }
+ return ('params={0}'.format(encrypted_params), headers)
+
+ def _call_player_api(self, song_id, bitrate):
+ url = 'https://interface3.music.163.com/eapi/song/enhance/player/url'
+ data, headers = self.make_player_api_request_data_and_headers(song_id, bitrate)
+ try:
+ msg = 'empty result'
+ result = self._download_json(
+ url, song_id, data=data.encode('ascii'), headers=headers)
+ if result:
+ return result
+ except ExtractorError as e:
+ if type(e.cause) in (ValueError, TypeError):
+ # JSON load failure
+ raise
+ except Exception as e:
+ msg = error_to_compat_str(e)
+ self.report_warning('%s API call (%s) failed: %s' % (
+ song_id, bitrate, msg))
+ return {}
+
def extract_formats(self, info):
+ err = 0
formats = []
+ song_id = info['id']
for song_format in self._FORMATS:
details = info.get(song_format)
if not details:
continue
- song_file_path = '/%s/%s.%s' % (
- self._encrypt(details['dfsId']), details['dfsId'], details['extension'])
-
- # 203.130.59.9, 124.40.233.182, 115.231.74.139, etc is a reverse proxy-like feature
- # from NetEase's CDN provider that can be used if m5.music.126.net does not
- # work, especially for users outside of Mainland China
- # via: https://github.com/JixunMoe/unblock-163/issues/3#issuecomment-163115880
- for host in ('http://m5.music.126.net', 'http://115.231.74.139/m1.music.126.net',
- 'http://124.40.233.182/m1.music.126.net', 'http://203.130.59.9/m1.music.126.net'):
- song_url = host + song_file_path
+
+ bitrate = int_or_none(details.get('bitrate')) or 999000
+ data = self._call_player_api(song_id, bitrate)
+ for song in try_get(data, lambda x: x['data'], list) or []:
+ song_url = try_get(song, lambda x: x['url'])
+ if not song_url:
+ continue
if self._is_valid_url(song_url, info['id'], 'song'):
formats.append({
'url': song_url,
'ext': details.get('extension'),
- 'abr': float_or_none(details.get('bitrate'), scale=1000),
+ 'abr': float_or_none(song.get('br'), scale=1000),
'format_id': song_format,
- 'filesize': details.get('size'),
- 'asr': details.get('sr')
+ 'filesize': int_or_none(song.get('size')),
+ 'asr': int_or_none(details.get('sr')),
})
- break
+ elif err == 0:
+ err = try_get(song, lambda x: x['code'], int)
+
+ if not formats:
+ msg = 'No media links found'
+ if err != 0 and (err < 200 or err >= 400):
+ raise ExtractorError(
+ '%s (site code %d)' % (msg, err, ), expected=True)
+ else:
+ self.raise_geo_restricted(
+ msg + ': probably this video is not available from your location due to geo restriction.',
+ countries=['CN'])
+
return formats
@classmethod
@@ -76,33 +154,19 @@ class NetEaseMusicBaseIE(InfoExtractor):
class NetEaseMusicIE(NetEaseMusicBaseIE):
IE_NAME = 'netease:song'
IE_DESC = '网易云音乐'
- _VALID_URL = r'https?://music\.163\.com/(#/)?song\?id=(?P<id>[0-9]+)'
+ _VALID_URL = r'https?://(y\.)?music\.163\.com/(?:[#m]/)?song\?.*?\bid=(?P<id>[0-9]+)'
_TESTS = [{
'url': 'http://music.163.com/#/song?id=32102397',
- 'md5': 'f2e97280e6345c74ba9d5677dd5dcb45',
+ 'md5': '3e909614ce09b1ccef4a3eb205441190',
'info_dict': {
'id': '32102397',
'ext': 'mp3',
- 'title': 'Bad Blood (feat. Kendrick Lamar)',
+ 'title': 'Bad Blood',
'creator': 'Taylor Swift / Kendrick Lamar',
- 'upload_date': '20150517',
- 'timestamp': 1431878400,
- 'description': 'md5:a10a54589c2860300d02e1de821eb2ef',
+ 'upload_date': '20150516',
+ 'timestamp': 1431792000,
+ 'description': 'md5:25fc5f27e47aad975aa6d36382c7833c',
},
- 'skip': 'Blocked outside Mainland China',
- }, {
- 'note': 'No lyrics translation.',
- 'url': 'http://music.163.com/#/song?id=29822014',
- 'info_dict': {
- 'id': '29822014',
- 'ext': 'mp3',
- 'title': '听见下雨的声音',
- 'creator': '周杰伦',
- 'upload_date': '20141225',
- 'timestamp': 1419523200,
- 'description': 'md5:a4d8d89f44656af206b7b2555c0bce6c',
- },
- 'skip': 'Blocked outside Mainland China',
}, {
'note': 'No lyrics.',
'url': 'http://music.163.com/song?id=17241424',
@@ -112,9 +176,9 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
'title': 'Opus 28',
'creator': 'Dustin O\'Halloran',
'upload_date': '20080211',
+ 'description': 'md5:f12945b0f6e0365e3b73c5032e1b0ff4',
'timestamp': 1202745600,
},
- 'skip': 'Blocked outside Mainland China',
}, {
'note': 'Has translated name.',
'url': 'http://music.163.com/#/song?id=22735043',
@@ -128,7 +192,18 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
'timestamp': 1264608000,
'alt_title': '说出愿望吧(Genie)',
},
- 'skip': 'Blocked outside Mainland China',
+ }, {
+ 'url': 'https://y.music.163.com/m/song?app_version=8.8.45&id=95670&uct2=sKnvS4+0YStsWkqsPhFijw%3D%3D&dlt=0846',
+ 'md5': '95826c73ea50b1c288b22180ec9e754d',
+ 'info_dict': {
+ 'id': '95670',
+ 'ext': 'mp3',
+ 'title': '国际歌',
+ 'creator': '马备',
+ 'upload_date': '19911130',
+ 'timestamp': 691516800,
+ 'description': 'md5:1ba2f911a2b0aa398479f595224f2141',
+ },
}]
def _process_lyrics(self, lyrics_info):
@@ -161,7 +236,6 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
song_id, 'Downloading song info')['songs'][0]
formats = self.extract_formats(info)
- self._sort_formats(formats)
lyrics_info = self.query_api(
'song/lyric?id=%s&lv=-1&tv=-1' % song_id,
@@ -337,7 +411,6 @@ class NetEaseMusicMvIE(NetEaseMusicBaseIE):
{'url': mv_url, 'ext': 'mp4', 'format_id': '%sp' % brs, 'height': int(brs)}
for brs, mv_url in info['brs'].items()
]
- self._sort_formats(formats)
return {
'id': mv_id,
@@ -407,7 +480,6 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
if not self._yes_playlist(info['songs'] and program_id, info['mainSong']['id']):
formats = self.extract_formats(info['mainSong'])
- self._sort_formats(formats)
return {
'id': info['mainSong']['id'],
@@ -452,7 +524,7 @@ class NetEaseMusicDjRadioIE(NetEaseMusicBaseIE):
name = None
desc = None
entries = []
- for offset in compat_itertools_count(start=0, step=self._PAGE_SIZE):
+ for offset in itertools.count(start=0, step=self._PAGE_SIZE):
info = self.query_api(
'dj/program/byradio?asc=false&limit=%d&radioId=%s&offset=%d'
% (self._PAGE_SIZE, dj_id, offset),
diff --git a/hypervideo_dl/extractor/netverse.py b/hypervideo_dl/extractor/netverse.py
new file mode 100644
index 0000000..719a9da
--- /dev/null
+++ b/hypervideo_dl/extractor/netverse.py
@@ -0,0 +1,176 @@
+from .common import InfoExtractor
+from .dailymotion import DailymotionIE
+from ..utils import smuggle_url, traverse_obj
+
+
+class NetverseBaseIE(InfoExtractor):
+ _ENDPOINTS = {
+ 'watch': 'watchvideo',
+ 'video': 'watchvideo',
+ 'webseries': 'webseries',
+ 'season': 'webseason_videos',
+ }
+
+ def _call_api(self, slug, endpoint, query={}, season_id='', display_id=None):
+ return self._download_json(
+ f'https://api.netverse.id/medias/api/v2/{self._ENDPOINTS[endpoint]}/{slug}/{season_id}',
+ display_id or slug, query=query)
+
+
+class NetverseIE(NetverseBaseIE):
+ _VALID_URL = r'https?://(?:\w+\.)?netverse\.id/(?P<type>watch|video)/(?P<display_id>[^/?#&]+)'
+ _TESTS = [{
+ # Watch video
+ 'url': 'https://www.netverse.id/watch/waktu-indonesia-bercanda-edisi-spesial-lebaran-2016',
+ 'info_dict': {
+ 'id': 'k4yhqUwINAGtmHx3NkL',
+ 'title': 'Waktu Indonesia Bercanda - Edisi Spesial Lebaran 2016',
+ 'ext': 'mp4',
+ 'season': 'Season 2016',
+ 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+ 'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/T7aV31Y0eGRWBbwkK/x1080',
+ 'episode_number': 22,
+ 'episode': 'Episode 22',
+ 'uploader_id': 'x2ir3vq',
+ 'age_limit': 0,
+ 'tags': [],
+ 'view_count': int,
+ 'display_id': 'waktu-indonesia-bercanda-edisi-spesial-lebaran-2016',
+ 'duration': 2990,
+ 'upload_date': '20210722',
+ 'timestamp': 1626919804,
+ 'like_count': int,
+ 'uploader': 'Net Prime',
+ }
+ }, {
+ # series
+ 'url': 'https://www.netverse.id/watch/jadoo-seorang-model',
+ 'info_dict': {
+ 'id': 'x88izwc',
+ 'title': 'Jadoo Seorang Model',
+ 'ext': 'mp4',
+ 'season': 'Season 2',
+ 'description': 'md5:8a74f70812cca267e19ee0635f0af835',
+ 'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/Thwuy1YURicFmGu0v/x1080',
+ 'episode_number': 2,
+ 'episode': 'Episode 2',
+ 'view_count': int,
+ 'like_count': int,
+ 'display_id': 'jadoo-seorang-model',
+ 'uploader_id': 'x2ir3vq',
+ 'duration': 635,
+ 'timestamp': 1646372927,
+ 'tags': ['PG069497-hellojadooseason2eps2'],
+ 'upload_date': '20220304',
+ 'uploader': 'Net Prime',
+ 'age_limit': 0,
+ },
+ 'skip': 'video get Geo-blocked for some country'
+ }, {
+ # non www host
+ 'url': 'https://netverse.id/watch/tetangga-baru',
+ 'info_dict': {
+ 'id': 'k4CNGz7V0HJ7vfwZbXy',
+ 'ext': 'mp4',
+ 'title': 'Tetangga Baru',
+ 'season': 'Season 1',
+ 'description': 'md5:23fcf70e97d461d3029d25d59b2ccfb9',
+ 'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/T3Ogm1YEnnyjVKAFF/x1080',
+ 'episode_number': 1,
+ 'episode': 'Episode 1',
+ 'timestamp': 1624538169,
+ 'view_count': int,
+ 'upload_date': '20210624',
+ 'age_limit': 0,
+ 'uploader_id': 'x2ir3vq',
+ 'like_count': int,
+ 'uploader': 'Net Prime',
+ 'tags': ['PG008534', 'tetangga', 'Baru'],
+ 'display_id': 'tetangga-baru',
+ 'duration': 1406,
+ },
+ }, {
+ # /video url
+ 'url': 'https://www.netverse.id/video/pg067482-hellojadoo-season1',
+ 'title': 'Namaku Choi Jadoo',
+ 'info_dict': {
+ 'id': 'x887jzz',
+ 'ext': 'mp4',
+ 'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/TfuZ_1Y6PboJ5An_s/x1080',
+ 'season': 'Season 1',
+ 'episode_number': 1,
+ 'description': 'md5:d4f627b3e7a3f9acdc55f6cdd5ea41d5',
+ 'title': 'Namaku Choi Jadoo',
+ 'episode': 'Episode 1',
+ 'age_limit': 0,
+ 'like_count': int,
+ 'view_count': int,
+ 'tags': ['PG067482', 'PG067482-HelloJadoo-season1'],
+ 'duration': 780,
+ 'display_id': 'pg067482-hellojadoo-season1',
+ 'uploader_id': 'x2ir3vq',
+ 'uploader': 'Net Prime',
+ 'timestamp': 1645764984,
+ 'upload_date': '20220225',
+ },
+ 'skip': 'This video get Geo-blocked for some country'
+ }]
+
+ def _real_extract(self, url):
+ display_id, sites_type = self._match_valid_url(url).group('display_id', 'type')
+ program_json = self._call_api(display_id, sites_type)
+ videos = program_json['response']['videos']
+
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': DailymotionIE.ie_key(),
+ 'url': smuggle_url(videos['dailymotion_url'], {'query': {'embedder': 'https://www.netverse.id'}}),
+ 'display_id': display_id,
+ 'title': videos.get('title'),
+ 'season': videos.get('season_name'),
+ 'thumbnail': traverse_obj(videos, ('program_detail', 'thumbnail_image')),
+ 'description': traverse_obj(videos, ('program_detail', 'description')),
+ 'episode_number': videos.get('episode_order'),
+ }
+
+
+class NetversePlaylistIE(NetverseBaseIE):
+ _VALID_URL = r'https?://(?:\w+\.)?netverse\.id/(?P<type>webseries)/(?P<display_id>[^/?#&]+)'
+ _TESTS = [{
+ # multiple season
+ 'url': 'https://netverse.id/webseries/tetangga-masa-gitu',
+ 'info_dict': {
+ 'id': 'tetangga-masa-gitu',
+ 'title': 'Tetangga Masa Gitu',
+ },
+ 'playlist_count': 519,
+ }, {
+ # single season
+ 'url': 'https://netverse.id/webseries/kelas-internasional',
+ 'info_dict': {
+ 'id': 'kelas-internasional',
+ 'title': 'Kelas Internasional',
+ },
+ 'playlist_count': 203,
+ }]
+
+ def parse_playlist(self, json_data, playlist_id):
+ slug_sample = traverse_obj(json_data, ('related', 'data', ..., 'slug'))[0]
+ for season in traverse_obj(json_data, ('seasons', ..., 'id')):
+ playlist_json = self._call_api(
+ slug_sample, 'season', display_id=playlist_id, season_id=season)
+
+ for current_page in range(playlist_json['response']['season_list']['last_page']):
+ playlist_json = self._call_api(slug_sample, 'season', query={'page': current_page + 1},
+ season_id=season, display_id=playlist_id)
+ for slug in traverse_obj(playlist_json, ('response', ..., 'data', ..., 'slug')):
+ yield self.url_result(f'https://www.netverse.id/video/{slug}', NetverseIE)
+
+ def _real_extract(self, url):
+ playlist_id, sites_type = self._match_valid_url(url).group('display_id', 'type')
+ playlist_data = self._call_api(playlist_id, sites_type)
+
+ return self.playlist_result(
+ self.parse_playlist(playlist_data['response'], playlist_id),
+ traverse_obj(playlist_data, ('response', 'webseries_info', 'slug')),
+ traverse_obj(playlist_data, ('response', 'webseries_info', 'title')))
diff --git a/hypervideo_dl/extractor/netzkino.py b/hypervideo_dl/extractor/netzkino.py
index 4ad0d8e..9c314e2 100644
--- a/hypervideo_dl/extractor/netzkino.py
+++ b/hypervideo_dl/extractor/netzkino.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..utils import (
clean_html,
@@ -76,7 +72,6 @@ class NetzkinoIE(InfoExtractor):
'ext': 'mp4',
'url': tpl.replace('{}', film_fn) + suffix[key],
} for key, tpl in templates.items()]
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/newgrounds.py b/hypervideo_dl/extractor/newgrounds.py
index 6525a6d..9e3286d 100644
--- a/hypervideo_dl/extractor/newgrounds.py
+++ b/hypervideo_dl/extractor/newgrounds.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import functools
import re
@@ -175,7 +172,6 @@ class NewgroundsIE(InfoExtractor):
if video_type_description == 'Audio File':
formats[0]['vcodec'] = 'none'
self._check_formats(formats, media_id)
- self._sort_formats(formats)
return {
'id': media_id,
diff --git a/hypervideo_dl/extractor/newspicks.py b/hypervideo_dl/extractor/newspicks.py
new file mode 100644
index 0000000..b6334dc
--- /dev/null
+++ b/hypervideo_dl/extractor/newspicks.py
@@ -0,0 +1,53 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import ExtractorError
+
+
+class NewsPicksIE(InfoExtractor):
+ _VALID_URL = r'https://newspicks\.com/movie-series/(?P<channel_id>\d+)\?movieId=(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'https://newspicks.com/movie-series/11?movieId=1813',
+ 'info_dict': {
+ 'id': '1813',
+ 'title': '日本の課題を破壊せよ【ゲスト:成田悠輔】',
+ 'description': 'md5:09397aad46d6ded6487ff13f138acadf',
+ 'channel': 'HORIE ONE',
+ 'channel_id': '11',
+ 'release_date': '20220117',
+ 'thumbnail': r're:https://.+jpg',
+ 'ext': 'mp4',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id, channel_id = self._match_valid_url(url).group('id', 'channel_id')
+ webpage = self._download_webpage(url, video_id)
+ entries = self._parse_html5_media_entries(
+ url, webpage.replace('movie-for-pc', 'movie'), video_id, 'hls')
+ if not entries:
+ raise ExtractorError('No HTML5 media elements found')
+ info = entries[0]
+
+ title = self._html_search_meta('og:title', webpage, fatal=False)
+ description = self._html_search_meta(
+ ('og:description', 'twitter:title'), webpage, fatal=False)
+ channel = self._html_search_regex(
+ r'value="11".+?<div\s+class="title">(.+?)</div', webpage, 'channel name', fatal=False)
+ if not title or not channel:
+ title, channel = re.split(r'\s*|\s*', self._html_extract_title(webpage))
+
+ release_date = self._search_regex(
+ r'<span\s+class="on-air-date">\s*(\d+)年(\d+)月(\d+)日\s*</span>',
+ webpage, 'release date', fatal=False, group=(1, 2, 3))
+
+ info.update({
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'channel': channel,
+ 'channel_id': channel_id,
+ 'release_date': ('%04d%02d%02d' % tuple(map(int, release_date))) if release_date else None,
+ })
+ return info
diff --git a/hypervideo_dl/extractor/newstube.py b/hypervideo_dl/extractor/newstube.py
index 479141a..820eb4b 100644
--- a/hypervideo_dl/extractor/newstube.py
+++ b/hypervideo_dl/extractor/newstube.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import base64
import hashlib
@@ -67,7 +64,6 @@ class NewstubeIE(InfoExtractor):
formats.append(f)
self._check_formats(formats, video_guid)
- self._sort_formats(formats)
return {
'id': video_guid,
diff --git a/hypervideo_dl/extractor/newsy.py b/hypervideo_dl/extractor/newsy.py
index cf31641..a5a7b16 100644
--- a/hypervideo_dl/extractor/newsy.py
+++ b/hypervideo_dl/extractor/newsy.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
js_to_json,
@@ -39,7 +36,6 @@ class NewsyIE(InfoExtractor):
fmts, subs = self._extract_m3u8_formats_and_subtitles(data_json['stream'], display_id)
formats.extend(fmts)
subtitles = self._merge_subtitles(subtitles, subs)
- self._sort_formats(formats)
return merge_dicts(ld_json, {
'id': data_json['id'],
'display_id': display_id,
diff --git a/hypervideo_dl/extractor/nextmedia.py b/hypervideo_dl/extractor/nextmedia.py
index 7bd1290..0e47a4d 100644
--- a/hypervideo_dl/extractor/nextmedia.py
+++ b/hypervideo_dl/extractor/nextmedia.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
@@ -80,7 +77,7 @@ class NextMediaIE(InfoExtractor):
return self._og_search_property('description', page)
-class NextMediaActionNewsIE(NextMediaIE):
+class NextMediaActionNewsIE(NextMediaIE): # XXX: Do not subclass from concrete IE
IE_DESC = '蘋果日報 - 動新聞'
_VALID_URL = r'https?://hk\.dv\.nextmedia\.com/actionnews/[^/]+/(?P<date>\d+)/(?P<id>\d+)/\d+'
_TESTS = [{
@@ -105,7 +102,7 @@ class NextMediaActionNewsIE(NextMediaIE):
return self._extract_from_nextmedia_page(news_id, url, article_page)
-class AppleDailyIE(NextMediaIE):
+class AppleDailyIE(NextMediaIE): # XXX: Do not subclass from concrete IE
IE_DESC = '臺灣蘋果日報'
_VALID_URL = r'https?://(www|ent)\.appledaily\.com\.tw/[^/]+/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?'
_TESTS = [{
diff --git a/hypervideo_dl/extractor/nexx.py b/hypervideo_dl/extractor/nexx.py
index a521bb6..b4874c8 100644
--- a/hypervideo_dl/extractor/nexx.py
+++ b/hypervideo_dl/extractor/nexx.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import hashlib
import random
import re
@@ -117,8 +114,8 @@ class NexxIE(InfoExtractor):
webpage)
return mobj.group('id') if mobj else None
- @staticmethod
- def _extract_urls(webpage):
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
# Reference:
# 1. https://nx-s.akamaized.net/files/201510/44.pdf
@@ -138,10 +135,6 @@ class NexxIE(InfoExtractor):
return entries
- @staticmethod
- def _extract_url(webpage):
- return NexxIE._extract_urls(webpage)[0]
-
def _handle_error(self, response):
if traverse_obj(response, ('metadata', 'notice'), expected_type=str):
self.report_warning('%s said: %s' % (self.IE_NAME, response['metadata']['notice']))
@@ -459,8 +452,6 @@ class NexxIE(InfoExtractor):
else:
self.raise_no_formats(f'{cdn} formats are currently not supported', video_id)
- self._sort_formats(formats)
-
subtitles = {}
for sub in video.get('captiondata') or []:
if sub.get('data'):
@@ -501,6 +492,8 @@ class NexxIE(InfoExtractor):
class NexxEmbedIE(InfoExtractor):
_VALID_URL = r'https?://embed\.nexx(?:\.cloud|cdn\.com)/\d+/(?:video/)?(?P<id>[^/?#&]+)'
+ # Reference. https://nx-s.akamaized.net/files/201510/44.pdf
+ _EMBED_REGEX = [r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//embed\.nexx(?:\.cloud|cdn\.com)/\d+/(?:(?!\1).)+)\1']
_TESTS = [{
'url': 'http://embed.nexx.cloud/748/KC1614647Z27Y7T?autoplay=1',
'md5': '16746bfc28c42049492385c989b26c4a',
@@ -524,16 +517,6 @@ class NexxEmbedIE(InfoExtractor):
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- # Reference:
- # 1. https://nx-s.akamaized.net/files/201510/44.pdf
-
- # iFrame Embed Integration
- return [mobj.group('url') for mobj in re.finditer(
- r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//embed\.nexx(?:\.cloud|cdn\.com)/\d+/(?:(?!\1).)+)\1',
- webpage)]
-
def _real_extract(self, url):
embed_id = self._match_id(url)
diff --git a/hypervideo_dl/extractor/nfb.py b/hypervideo_dl/extractor/nfb.py
index a12e503..38e068a 100644
--- a/hypervideo_dl/extractor/nfb.py
+++ b/hypervideo_dl/extractor/nfb.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import int_or_none
@@ -38,7 +35,6 @@ class NFBIE(InfoExtractor):
player, 'source', default=None, fatal=True)
formats, subtitles = self._extract_m3u8_formats_and_subtitles(source, video_id, ext='mp4')
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/nfhsnetwork.py b/hypervideo_dl/extractor/nfhsnetwork.py
index 802f6ca..febad8f 100644
--- a/hypervideo_dl/extractor/nfhsnetwork.py
+++ b/hypervideo_dl/extractor/nfhsnetwork.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
@@ -127,7 +124,6 @@ class NFHSNetworkIE(InfoExtractor):
video_id).get('video_url')
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', live=isLive)
- self._sort_formats(formats, ['res', 'tbr'])
return {
'id': video_id,
@@ -140,5 +136,6 @@ class NFHSNetworkIE(InfoExtractor):
'uploader_url': uploaderPage,
'location': location,
'upload_date': upload_date,
- 'is_live': isLive
+ 'is_live': isLive,
+ '_format_sort_fields': ('res', 'tbr'),
}
diff --git a/hypervideo_dl/extractor/nfl.py b/hypervideo_dl/extractor/nfl.py
index 821276a..29c53d5 100644
--- a/hypervideo_dl/extractor/nfl.py
+++ b/hypervideo_dl/extractor/nfl.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -56,8 +53,7 @@ class NFLBaseIE(InfoExtractor):
)
)/
'''
- _VIDEO_CONFIG_REGEX = r'<script[^>]+id="[^"]*video-config-[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12}[^"]*"[^>]*>\s*({.+})'
- _WORKING = False
+ _VIDEO_CONFIG_REGEX = r'<script[^>]+id="[^"]*video-config-[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12}[^"]*"[^>]*>\s*({.+});?\s*</script>'
def _parse_video_config(self, video_config, display_id):
video_config = self._parse_json(video_config, display_id)
@@ -69,13 +65,12 @@ class NFLBaseIE(InfoExtractor):
'Anvato', mcp_id)
else:
media_id = item.get('id') or item['entityId']
- title = item['title']
+ title = item.get('title')
item_url = item['url']
info = {'id': media_id}
ext = determine_ext(item_url)
if ext == 'm3u8':
info['formats'] = self._extract_m3u8_formats(item_url, media_id, 'mp4')
- self._sort_formats(info['formats'])
else:
info['url'] = item_url
if item.get('audio') is True:
@@ -111,6 +106,9 @@ class NFLIE(NFLBaseIE):
'timestamp': 1608009755,
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'NFL',
+ 'tags': 'count:6',
+ 'duration': 157,
+ 'categories': 'count:3',
}
}, {
'url': 'https://www.chiefs.com/listen/patrick-mahomes-travis-kelce-react-to-win-over-dolphins-the-breakdown',
@@ -120,7 +118,8 @@ class NFLIE(NFLBaseIE):
'ext': 'mp3',
'title': 'Patrick Mahomes, Travis Kelce React to Win Over Dolphins | The Breakdown',
'description': 'md5:12ada8ee70e6762658c30e223e095075',
- }
+ },
+ 'skip': 'HTTP Error 404: Not Found',
}, {
'url': 'https://www.buffalobills.com/video/buffalo-bills-military-recognition-week-14',
'only_matching': True,
diff --git a/hypervideo_dl/extractor/nhk.py b/hypervideo_dl/extractor/nhk.py
index 3b8efc3..59702b2 100644
--- a/hypervideo_dl/extractor/nhk.py
+++ b/hypervideo_dl/extractor/nhk.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -13,7 +11,7 @@ from ..utils import (
class NhkBaseIE(InfoExtractor):
- _API_URL_TEMPLATE = 'https://api.nhk.or.jp/nhkworld/%sod%slist/v7a/%s/%s/%s/all%s.json'
+ _API_URL_TEMPLATE = 'https://nwapi.nhk.jp/nhkworld/%sod%slist/v7b/%s/%s/%s/all%s.json'
_BASE_URL_REGEX = r'https?://www3\.nhk\.or\.jp/nhkworld/(?P<lang>[a-z]{2})/ondemand'
_TYPE_REGEX = r'/(?P<type>video|audio)/'
@@ -29,7 +27,7 @@ class NhkBaseIE(InfoExtractor):
def _extract_episode_info(self, url, episode=None):
fetch_episode = episode is None
lang, m_type, episode_id = NhkVodIE._match_valid_url(url).groups()
- if episode_id.isdigit():
+ if len(episode_id) == 7:
episode_id = episode_id[:4] + '-' + episode_id[4:]
is_video = m_type == 'video'
@@ -80,7 +78,6 @@ class NhkBaseIE(InfoExtractor):
m3u8_id='hls', fatal=False)
for f in info['formats']:
f['language'] = lang
- self._sort_formats(info['formats'])
else:
info.update({
'_type': 'url_transparent',
@@ -91,7 +88,8 @@ class NhkBaseIE(InfoExtractor):
class NhkVodIE(NhkBaseIE):
- _VALID_URL = r'%s%s(?P<id>\d{7}|[^/]+?-\d{8}-[0-9a-z]+)' % (NhkBaseIE._BASE_URL_REGEX, NhkBaseIE._TYPE_REGEX)
+ # the 7-character IDs can have alphabetic chars too: assume [a-z] rather than just [a-f], eg
+ _VALID_URL = r'%s%s(?P<id>[0-9a-z]{7}|[^/]+?-\d{8}-[0-9a-z]+)' % (NhkBaseIE._BASE_URL_REGEX, NhkBaseIE._TYPE_REGEX)
# Content available only for a limited period of time. Visit
# https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
_TESTS = [{
@@ -131,6 +129,19 @@ class NhkVodIE(NhkBaseIE):
}, {
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/j_art-20150903-1/',
'only_matching': True,
+ }, {
+ # video, alphabetic character in ID #29670
+ 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999a34/',
+ 'only_matching': True,
+ 'info_dict': {
+ 'id': 'qfjay6cg',
+ 'ext': 'mp4',
+ 'title': 'DESIGN TALKS plus - Fishermen’s Finery',
+ 'description': 'md5:8a8f958aaafb0d7cb59d38de53f1e448',
+ 'thumbnail': r're:^https?:/(/[a-z0-9.-]+)+\.jpg\?w=1920&h=1080$',
+ 'upload_date': '20210615',
+ 'timestamp': 1623722008,
+ }
}]
def _real_extract(self, url):
@@ -228,7 +239,6 @@ class NhkForSchoolBangumiIE(InfoExtractor):
formats = self._extract_m3u8_formats(
f'https://nhks-vh.akamaihd.net/i/das/{video_id[0:8]}/{video_id}_V_000.f4v/master.m3u8',
video_id, ext='mp4', m3u8_id='hls')
- self._sort_formats(formats)
duration = parse_duration(base_values.get('r_duration'))
@@ -309,8 +319,7 @@ class NhkForSchoolProgramListIE(InfoExtractor):
webpage = self._download_webpage(f'https://www.nhk.or.jp/school/{program_id}/', program_id)
- title = (self._og_search_title(webpage)
- or self._html_extract_title(webpage)
+ title = (self._generic_title('', webpage)
or self._html_search_regex(r'<h3>([^<]+?)とは?\s*</h3>', webpage, 'title', fatal=False))
title = re.sub(r'\s*\|\s*NHK\s+for\s+School\s*$', '', title) if title else None
description = self._html_search_regex(
diff --git a/hypervideo_dl/extractor/nhl.py b/hypervideo_dl/extractor/nhl.py
index d3a5e17..2521c40 100644
--- a/hypervideo_dl/extractor/nhl.py
+++ b/hypervideo_dl/extractor/nhl.py
@@ -1,6 +1,3 @@
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
@@ -51,7 +48,6 @@ class NHLBaseIE(InfoExtractor):
'height': height,
'tbr': int_or_none(self._search_regex(r'_(\d+)[kK]', playback_url, 'bitrate', default=None)),
})
- self._sort_formats(formats)
thumbnails = []
cuts = video_data.get('image', {}).get('cuts') or []
diff --git a/hypervideo_dl/extractor/nick.py b/hypervideo_dl/extractor/nick.py
index ba7da76..de22cb8 100644
--- a/hypervideo_dl/extractor/nick.py
+++ b/hypervideo_dl/extractor/nick.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .mtv import MTVServicesInfoExtractor
from ..utils import update_url_query
@@ -192,7 +188,7 @@ class NickDeIE(MTVServicesInfoExtractor):
return self._remove_template_parameter(config['feedWithQueryParams'])
-class NickNightIE(NickDeIE):
+class NickNightIE(NickDeIE): # XXX: Do not subclass from concrete IE
IE_NAME = 'nicknight'
_VALID_URL = r'https?://(?:www\.)(?P<host>nicknight\.(?:de|at|tv))/(?:playlist|shows)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{
diff --git a/hypervideo_dl/extractor/niconico.py b/hypervideo_dl/extractor/niconico.py
index 4eb6ed0..2103037 100644
--- a/hypervideo_dl/extractor/niconico.py
+++ b/hypervideo_dl/extractor/niconico.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import datetime
import functools
import itertools
@@ -10,8 +7,6 @@ import time
from .common import InfoExtractor, SearchInfoExtractor
from ..compat import (
- compat_parse_qs,
- compat_urllib_parse_urlparse,
compat_HTTPError,
)
from ..utils import (
@@ -35,6 +30,7 @@ from ..utils import (
update_url_query,
url_or_none,
urlencode_postdata,
+ urljoin,
)
@@ -195,7 +191,7 @@ class NiconicoIE(InfoExtractor):
self._request_webpage(
'https://account.nicovideo.jp/login', None,
note='Acquiring Login session')
- urlh = self._request_webpage(
+ page = self._download_webpage(
'https://account.nicovideo.jp/login/redirector?show_button_twitter=1&site=niconico&show_button_facebook=1', None,
note='Logging in', errnote='Unable to log in',
data=urlencode_postdata(login_form_strs),
@@ -203,26 +199,39 @@ class NiconicoIE(InfoExtractor):
'Referer': 'https://account.nicovideo.jp/login',
'Content-Type': 'application/x-www-form-urlencoded',
})
- if urlh is False:
- login_ok = False
- else:
- parts = compat_urllib_parse_urlparse(urlh.geturl())
- if compat_parse_qs(parts.query).get('message', [None])[0] == 'cant_login':
- login_ok = False
+ if 'oneTimePw' in page:
+ post_url = self._search_regex(
+ r'<form[^>]+action=(["\'])(?P<url>.+?)\1', page, 'post url', group='url')
+ page = self._download_webpage(
+ urljoin('https://account.nicovideo.jp', post_url), None,
+ note='Performing MFA', errnote='Unable to complete MFA',
+ data=urlencode_postdata({
+ 'otp': self._get_tfa_info('6 digits code')
+ }), headers={
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ })
+ if 'oneTimePw' in page or 'formError' in page:
+ err_msg = self._html_search_regex(
+ r'formError["\']+>(.*?)</div>', page, 'form_error',
+ default='There\'s an error but the message can\'t be parsed.',
+ flags=re.DOTALL)
+ self.report_warning(f'Unable to log in: MFA challenge failed, "{err_msg}"')
+ return False
+ login_ok = 'class="notice error"' not in page
if not login_ok:
- self.report_warning('unable to log in: bad username or password')
+ self.report_warning('Unable to log in: bad username or password')
return login_ok
def _get_heartbeat_info(self, info_dict):
video_id, video_src_id, audio_src_id = info_dict['url'].split(':')[1].split('/')
- dmc_protocol = info_dict['_expected_protocol']
+ dmc_protocol = info_dict['expected_protocol']
api_data = (
info_dict.get('_api_data')
or self._parse_json(
self._html_search_regex(
'data-api-data="([^"]+)"',
- self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id),
+ self._download_webpage('https://www.nicovideo.jp/watch/' + video_id, video_id),
'API data', default='{}'),
video_id))
@@ -369,7 +378,7 @@ class NiconicoIE(InfoExtractor):
'width': traverse_obj(video_quality, ('metadata', 'resolution', 'width')),
'quality': -2 if 'low' in video_quality['id'] else None,
'protocol': 'niconico_dmc',
- '_expected_protocol': dmc_protocol,
+ 'expected_protocol': dmc_protocol, # XXX: This is not a documented field
'http_headers': {
'Origin': 'https://www.nicovideo.jp',
'Referer': 'https://www.nicovideo.jp/watch/' + video_id,
@@ -381,7 +390,7 @@ class NiconicoIE(InfoExtractor):
try:
webpage, handle = self._download_webpage_handle(
- 'http://www.nicovideo.jp/watch/' + video_id, video_id)
+ 'https://www.nicovideo.jp/watch/' + video_id, video_id)
if video_id.startswith('so'):
video_id = self._match_id(handle.geturl())
@@ -416,8 +425,6 @@ class NiconicoIE(InfoExtractor):
if fmt:
formats.append(fmt)
- self._sort_formats(formats)
-
# Start extracting information
tags = None
if webpage:
@@ -548,8 +555,7 @@ class NiconicoPlaylistBaseIE(InfoExtractor):
}
def _call_api(self, list_id, resource, query):
- "Implement this in child class"
- pass
+ raise NotImplementedError('Must be implemented in subclasses')
@staticmethod
def _parse_owner(item):
@@ -638,14 +644,14 @@ class NiconicoSeriesIE(InfoExtractor):
'id': '110226',
'title': 'ご立派ァ!のシリーズ',
},
- 'playlist_mincount': 10, # as of 2021/03/17
+ 'playlist_mincount': 10,
}, {
'url': 'https://www.nicovideo.jp/series/12312/',
'info_dict': {
'id': '12312',
'title': 'バトルスピリッツ お勧めカード紹介(調整中)',
},
- 'playlist_mincount': 97, # as of 2021/03/17
+ 'playlist_mincount': 103,
}, {
'url': 'https://nico.ms/series/203559',
'only_matching': True,
@@ -663,7 +669,7 @@ class NiconicoSeriesIE(InfoExtractor):
title = unescapeHTML(title)
playlist = [
self.url_result(f'https://www.nicovideo.jp/watch/{v_id}', video_id=v_id)
- for v_id in re.findall(r'href="/watch/([a-z0-9]+)" data-href="/watch/\1', webpage)]
+ for v_id in re.findall(r'data-href=[\'"](?:https://www\.nicovideo\.jp)?/watch/([a-z0-9]+)', webpage)]
return self.playlist_result(playlist, list_id, title)
@@ -720,7 +726,7 @@ class NicovideoSearchBaseIE(InfoExtractor):
webpage = self._download_webpage(url, item_id, query=query, note=note % {'page': page_num})
results = re.findall(r'(?<=data-video-id=)["\']?(?P<videoid>.*?)(?=["\'])', webpage)
for item in results:
- yield self.url_result(f'http://www.nicovideo.jp/watch/{item}', 'Niconico', item)
+ yield self.url_result(f'https://www.nicovideo.jp/watch/{item}', 'Niconico', item)
if not results:
break
diff --git a/hypervideo_dl/extractor/ninecninemedia.py b/hypervideo_dl/extractor/ninecninemedia.py
index 7818427..31df42f 100644
--- a/hypervideo_dl/extractor/ninecninemedia.py
+++ b/hypervideo_dl/extractor/ninecninemedia.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
float_or_none,
@@ -46,7 +43,6 @@ class NineCNineMediaIE(InfoExtractor):
formats.extend(self._extract_mpd_formats(
manifest_base_url + 'mpd', content_id,
mpd_id='dash', fatal=False))
- self._sort_formats(formats)
thumbnails = []
for image in (content.get('Images') or []):
diff --git a/hypervideo_dl/extractor/ninegag.py b/hypervideo_dl/extractor/ninegag.py
index 1439082..865ad99 100644
--- a/hypervideo_dl/extractor/ninegag.py
+++ b/hypervideo_dl/extractor/ninegag.py
@@ -1,11 +1,9 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
ExtractorError,
determine_ext,
int_or_none,
- try_get,
+ traverse_obj,
unescapeHTML,
url_or_none,
)
@@ -13,18 +11,20 @@ from ..utils import (
class NineGagIE(InfoExtractor):
IE_NAME = '9gag'
+ IE_DESC = '9GAG'
_VALID_URL = r'https?://(?:www\.)?9gag\.com/gag/(?P<id>[^/?&#]+)'
_TESTS = [{
'url': 'https://9gag.com/gag/ae5Ag7B',
'info_dict': {
'id': 'ae5Ag7B',
- 'ext': 'mp4',
+ 'ext': 'webm',
'title': 'Capybara Agility Training',
'upload_date': '20191108',
'timestamp': 1573237208,
+ 'thumbnail': 'https://img-9gag-fun.9cache.com/photo/ae5Ag7B_460s.jpg',
'categories': ['Awesome'],
- 'tags': ['Weimaraner', 'American Pit Bull Terrier'],
+ 'tags': ['Awesome'],
'duration': 44,
'like_count': int,
'dislike_count': int,
@@ -34,6 +34,26 @@ class NineGagIE(InfoExtractor):
# HTML escaped title
'url': 'https://9gag.com/gag/av5nvyb',
'only_matching': True,
+ }, {
+ # Non Anonymous Uploader
+ 'url': 'https://9gag.com/gag/ajgp66G',
+ 'info_dict': {
+ 'id': 'ajgp66G',
+ 'ext': 'webm',
+ 'title': 'Master Shifu! Or Splinter! You decide:',
+ 'upload_date': '20220806',
+ 'timestamp': 1659803411,
+ 'thumbnail': 'https://img-9gag-fun.9cache.com/photo/ajgp66G_460s.jpg',
+ 'categories': ['Funny'],
+ 'tags': ['Funny'],
+ 'duration': 26,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'comment_count': int,
+ 'uploader': 'Peter Klaus',
+ 'uploader_id': 'peterklaus12',
+ 'uploader_url': 'https://9gag.com/u/peterklaus12',
+ }
}]
def _real_extract(self, url):
@@ -48,8 +68,6 @@ class NineGagIE(InfoExtractor):
'The given url does not contain a video',
expected=True)
- title = unescapeHTML(post['title'])
-
duration = None
formats = []
thumbnails = []
@@ -98,9 +116,8 @@ class NineGagIE(InfoExtractor):
'format_id': image_id,
})
formats.append(common)
- self._sort_formats(formats)
- section = try_get(post, lambda x: x['postSection']['name'])
+ section = traverse_obj(post, ('postSection', 'name'))
tags = None
post_tags = post.get('tags')
@@ -112,18 +129,19 @@ class NineGagIE(InfoExtractor):
continue
tags.append(tag_key)
- get_count = lambda x: int_or_none(post.get(x + 'Count'))
-
return {
'id': post_id,
- 'title': title,
+ 'title': unescapeHTML(post.get('title')),
'timestamp': int_or_none(post.get('creationTs')),
'duration': duration,
+ 'uploader': traverse_obj(post, ('creator', 'fullName')),
+ 'uploader_id': traverse_obj(post, ('creator', 'username')),
+ 'uploader_url': url_or_none(traverse_obj(post, ('creator', 'profileUrl'))),
'formats': formats,
'thumbnails': thumbnails,
- 'like_count': get_count('upVote'),
- 'dislike_count': get_count('downVote'),
- 'comment_count': get_count('comments'),
+ 'like_count': int_or_none(post.get('upVoteCount')),
+ 'dislike_count': int_or_none(post.get('downVoteCount')),
+ 'comment_count': int_or_none(post.get('commentsCount')),
'age_limit': 18 if post.get('nsfw') == 1 else None,
'categories': [section] if section else None,
'tags': tags,
diff --git a/hypervideo_dl/extractor/ninenow.py b/hypervideo_dl/extractor/ninenow.py
index 6043674..b970f8c 100644
--- a/hypervideo_dl/extractor/ninenow.py
+++ b/hypervideo_dl/extractor/ninenow.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
diff --git a/hypervideo_dl/extractor/nintendo.py b/hypervideo_dl/extractor/nintendo.py
index ff8f70b..ed839af 100644
--- a/hypervideo_dl/extractor/nintendo.py
+++ b/hypervideo_dl/extractor/nintendo.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/nitter.py b/hypervideo_dl/extractor/nitter.py
index 8bb709c..251bf44 100644
--- a/hypervideo_dl/extractor/nitter.py
+++ b/hypervideo_dl/extractor/nitter.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
diff --git a/hypervideo_dl/extractor/njpwworld.py b/hypervideo_dl/extractor/njpwworld.py
index 68c8c8e..7b8a526 100644
--- a/hypervideo_dl/extractor/njpwworld.py
+++ b/hypervideo_dl/extractor/njpwworld.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -72,8 +69,6 @@ class NJPWWorldIE(InfoExtractor):
formats += self._extract_m3u8_formats(
player_url, video_id, 'mp4', 'm3u8_native', m3u8_id=kind, fatal=False, quality=int(kind == 'high'))
- self._sort_formats(formats)
-
tag_block = get_element_by_class('tag-block', webpage)
tags = re.findall(
r'<a[^>]+class="tag-[^"]+"[^>]*>([^<]+)</a>', tag_block
diff --git a/hypervideo_dl/extractor/nobelprize.py b/hypervideo_dl/extractor/nobelprize.py
index 4dfdb09..1aa9705 100644
--- a/hypervideo_dl/extractor/nobelprize.py
+++ b/hypervideo_dl/extractor/nobelprize.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
js_to_json,
@@ -51,7 +48,6 @@ class NobelPrizeIE(InfoExtractor):
formats.append({
'url': source_src,
})
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/noco.py b/hypervideo_dl/extractor/noco.py
deleted file mode 100644
index 28af909..0000000
--- a/hypervideo_dl/extractor/noco.py
+++ /dev/null
@@ -1,228 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-import time
-import hashlib
-
-from .common import InfoExtractor
-from ..compat import (
- compat_str,
-)
-from ..utils import (
- clean_html,
- ExtractorError,
- int_or_none,
- float_or_none,
- parse_iso8601,
- parse_qs,
- sanitized_Request,
- urlencode_postdata,
-)
-
-
-class NocoIE(InfoExtractor):
- _VALID_URL = r'https?://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P<id>\d+)'
- _LOGIN_URL = 'https://noco.tv/do.php'
- _API_URL_TEMPLATE = 'https://api.noco.tv/1.1/%s?ts=%s&tk=%s'
- _SUB_LANG_TEMPLATE = '&sub_lang=%s'
- _NETRC_MACHINE = 'noco'
-
- _TESTS = [
- {
- 'url': 'http://noco.tv/emission/11538/nolife/ami-ami-idol-hello-france/',
- 'md5': '0a993f0058ddbcd902630b2047ef710e',
- 'info_dict': {
- 'id': '11538',
- 'ext': 'mp4',
- 'title': 'Ami Ami Idol - Hello! France',
- 'description': 'md5:4eaab46ab68fa4197a317a88a53d3b86',
- 'upload_date': '20140412',
- 'uploader': 'Nolife',
- 'uploader_id': 'NOL',
- 'duration': 2851.2,
- },
- 'skip': 'Requires noco account',
- },
- {
- 'url': 'http://noco.tv/emission/12610/lbl42/the-guild/s01e01-wake-up-call',
- 'md5': 'c190f1f48e313c55838f1f412225934d',
- 'info_dict': {
- 'id': '12610',
- 'ext': 'mp4',
- 'title': 'The Guild #1 - Wake-Up Call',
- 'timestamp': 1403863200,
- 'upload_date': '20140627',
- 'uploader': 'LBL42',
- 'uploader_id': 'LBL',
- 'duration': 233.023,
- },
- 'skip': 'Requires noco account',
- }
- ]
-
- def _perform_login(self, username, password):
- login = self._download_json(
- self._LOGIN_URL, None, 'Logging in',
- data=urlencode_postdata({
- 'a': 'login',
- 'cookie': '1',
- 'username': username,
- 'password': password,
- }),
- headers={
- 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
- })
-
- if 'erreur' in login:
- raise ExtractorError('Unable to login: %s' % clean_html(login['erreur']), expected=True)
-
- @staticmethod
- def _ts():
- return int(time.time() * 1000)
-
- def _call_api(self, path, video_id, note, sub_lang=None):
- ts = compat_str(self._ts() + self._ts_offset)
- tk = hashlib.md5((hashlib.md5(ts.encode('ascii')).hexdigest() + '#8S?uCraTedap6a').encode('ascii')).hexdigest()
- url = self._API_URL_TEMPLATE % (path, ts, tk)
- if sub_lang:
- url += self._SUB_LANG_TEMPLATE % sub_lang
-
- request = sanitized_Request(url)
- request.add_header('Referer', self._referer)
-
- resp = self._download_json(request, video_id, note)
-
- if isinstance(resp, dict) and resp.get('error'):
- self._raise_error(resp['error'], resp['description'])
-
- return resp
-
- def _raise_error(self, error, description):
- raise ExtractorError(
- '%s returned error: %s - %s' % (self.IE_NAME, error, description),
- expected=True)
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- # Timestamp adjustment offset between server time and local time
- # must be calculated in order to use timestamps closest to server's
- # in all API requests (see https://github.com/ytdl-org/youtube-dl/issues/7864)
- webpage = self._download_webpage(url, video_id)
-
- player_url = self._search_regex(
- r'(["\'])(?P<player>https?://noco\.tv/(?:[^/]+/)+NocoPlayer.+?\.swf.*?)\1',
- webpage, 'noco player', group='player',
- default='http://noco.tv/cdata/js/player/NocoPlayer-v1.2.40.swf')
-
- qs = parse_qs(player_url)
- ts = int_or_none(qs.get('ts', [None])[0])
- self._ts_offset = ts - self._ts() if ts else 0
- self._referer = player_url
-
- medias = self._call_api(
- 'shows/%s/medias' % video_id,
- video_id, 'Downloading video JSON')
-
- show = self._call_api(
- 'shows/by_id/%s' % video_id,
- video_id, 'Downloading show JSON')[0]
-
- options = self._call_api(
- 'users/init', video_id,
- 'Downloading user options JSON')['options']
- audio_lang_pref = options.get('audio_language') or options.get('language', 'fr')
-
- if audio_lang_pref == 'original':
- audio_lang_pref = show['original_lang']
- if len(medias) == 1:
- audio_lang_pref = list(medias.keys())[0]
- elif audio_lang_pref not in medias:
- audio_lang_pref = 'fr'
-
- qualities = self._call_api(
- 'qualities',
- video_id, 'Downloading qualities JSON')
-
- formats = []
-
- for audio_lang, audio_lang_dict in medias.items():
- preference = 1 if audio_lang == audio_lang_pref else 0
- for sub_lang, lang_dict in audio_lang_dict['video_list'].items():
- for format_id, fmt in lang_dict['quality_list'].items():
- format_id_extended = 'audio-%s_sub-%s_%s' % (audio_lang, sub_lang, format_id)
-
- video = self._call_api(
- 'shows/%s/video/%s/%s' % (video_id, format_id.lower(), audio_lang),
- video_id, 'Downloading %s video JSON' % format_id_extended,
- sub_lang if sub_lang != 'none' else None)
-
- file_url = video['file']
- if not file_url:
- continue
-
- if file_url in ['forbidden', 'not found']:
- popmessage = video['popmessage']
- self._raise_error(popmessage['title'], popmessage['message'])
-
- formats.append({
- 'url': file_url,
- 'format_id': format_id_extended,
- 'width': int_or_none(fmt.get('res_width')),
- 'height': int_or_none(fmt.get('res_lines')),
- 'abr': int_or_none(fmt.get('audiobitrate'), 1000),
- 'vbr': int_or_none(fmt.get('videobitrate'), 1000),
- 'filesize': int_or_none(fmt.get('filesize')),
- 'format_note': qualities[format_id].get('quality_name'),
- 'quality': qualities[format_id].get('priority'),
- 'language_preference': preference,
- })
-
- self._sort_formats(formats)
-
- timestamp = parse_iso8601(show.get('online_date_start_utc'), ' ')
-
- if timestamp is not None and timestamp < 0:
- timestamp = None
-
- uploader = show.get('partner_name')
- uploader_id = show.get('partner_key')
- duration = float_or_none(show.get('duration_ms'), 1000)
-
- thumbnails = []
- for thumbnail_key, thumbnail_url in show.items():
- m = re.search(r'^screenshot_(?P<width>\d+)x(?P<height>\d+)$', thumbnail_key)
- if not m:
- continue
- thumbnails.append({
- 'url': thumbnail_url,
- 'width': int(m.group('width')),
- 'height': int(m.group('height')),
- })
-
- episode = show.get('show_TT') or show.get('show_OT')
- family = show.get('family_TT') or show.get('family_OT')
- episode_number = show.get('episode_number')
-
- title = ''
- if family:
- title += family
- if episode_number:
- title += ' #' + compat_str(episode_number)
- if episode:
- title += ' - ' + compat_str(episode)
-
- description = show.get('show_resume') or show.get('family_resume')
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'thumbnails': thumbnails,
- 'timestamp': timestamp,
- 'uploader': uploader,
- 'uploader_id': uploader_id,
- 'duration': duration,
- 'formats': formats,
- }
diff --git a/hypervideo_dl/extractor/nonktube.py b/hypervideo_dl/extractor/nonktube.py
index ca1424e..f191be3 100644
--- a/hypervideo_dl/extractor/nonktube.py
+++ b/hypervideo_dl/extractor/nonktube.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .nuevo import NuevoBaseIE
diff --git a/hypervideo_dl/extractor/noodlemagazine.py b/hypervideo_dl/extractor/noodlemagazine.py
index 2f170bb..e620895 100644
--- a/hypervideo_dl/extractor/noodlemagazine.py
+++ b/hypervideo_dl/extractor/noodlemagazine.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
parse_duration,
@@ -50,8 +47,6 @@ class NoodleMagazineIE(InfoExtractor):
'ext': source.get('type'),
} for source in playlist_info.get('sources')]
- self._sort_formats(formats)
-
return {
'id': video_id,
'formats': formats,
diff --git a/hypervideo_dl/extractor/noovo.py b/hypervideo_dl/extractor/noovo.py
index b40770d..acbb74c 100644
--- a/hypervideo_dl/extractor/noovo.py
+++ b/hypervideo_dl/extractor/noovo.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .brightcove import BrightcoveNewIE
from .common import InfoExtractor
from ..compat import compat_str
diff --git a/hypervideo_dl/extractor/normalboots.py b/hypervideo_dl/extractor/normalboots.py
index 61fe571..07babcd 100644
--- a/hypervideo_dl/extractor/normalboots.py
+++ b/hypervideo_dl/extractor/normalboots.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from .jwplatform import JWPlatformIE
diff --git a/hypervideo_dl/extractor/nosnl.py b/hypervideo_dl/extractor/nosnl.py
new file mode 100644
index 0000000..eba94c4
--- /dev/null
+++ b/hypervideo_dl/extractor/nosnl.py
@@ -0,0 +1,95 @@
+from .common import InfoExtractor
+from ..utils import parse_duration, parse_iso8601, traverse_obj
+
+
+class NOSNLArticleIE(InfoExtractor):
+ _VALID_URL = r'https?://nos\.nl/((?!video)(\w+/)?\w+/)\d+-(?P<display_id>[\w-]+)'
+ _TESTS = [
+ {
+ # only 1 video
+ 'url': 'https://nos.nl/nieuwsuur/artikel/2440353-verzakking-door-droogte-dreigt-tot-een-miljoen-kwetsbare-huizen',
+ 'info_dict': {
+ 'id': '2440340',
+ 'ext': 'mp4',
+ 'description': 'md5:5f83185d902ac97af3af4bed7ece3db5',
+ 'title': '\'We hebben een huis vol met scheuren\'',
+ 'duration': 95.0,
+ 'thumbnail': 'https://cdn.nos.nl/image/2022/08/12/887149/3840x2160a.jpg',
+ }
+ }, {
+ # more than 1 video
+ 'url': 'https://nos.nl/artikel/2440409-vannacht-sliepen-weer-enkele-honderden-asielzoekers-in-ter-apel-buiten',
+ 'info_dict': {
+ 'id': '2440409',
+ 'title': 'Vannacht sliepen weer enkele honderden asielzoekers in Ter Apel buiten',
+ 'description': 'Er werd wel geprobeerd om kwetsbare migranten onderdak te bieden, zegt het COA.',
+ 'tags': ['aanmeldcentrum', 'Centraal Orgaan opvang asielzoekers', 'COA', 'asielzoekers', 'Ter Apel'],
+ 'modified_timestamp': 1660452773,
+ 'modified_date': '20220814',
+ 'upload_date': '20220813',
+ 'thumbnail': 'https://cdn.nos.nl/image/2022/07/18/880346/1024x576a.jpg',
+ 'timestamp': 1660401384,
+ },
+ 'playlist_count': 2,
+ }, {
+ # audio + video
+ 'url': 'https://nos.nl/artikel/2440789-wekdienst-16-8-groningse-acties-tien-jaar-na-zware-aardbeving-femke-bol-in-actie-op-ek-atletiek',
+ 'info_dict': {
+ 'id': '2440789',
+ 'title': 'Wekdienst 16/8: Groningse acties tien jaar na zware aardbeving • Femke Bol in actie op EK atletiek ',
+ 'description': 'Nieuws, weer, verkeer: met dit overzicht begin je geïnformeerd aan de dag.',
+ 'tags': ['wekdienst'],
+ 'modified_date': '20220816',
+ 'modified_timestamp': 1660625449,
+ 'timestamp': 1660625449,
+ 'upload_date': '20220816',
+ 'thumbnail': 'https://cdn.nos.nl/image/2022/08/16/888178/1024x576a.jpg',
+ },
+ 'playlist_count': 2,
+ }
+ ]
+
+ def _entries(self, nextjs_json, display_id):
+ for item in nextjs_json['items']:
+ if item.get('type') == 'video':
+ formats, subtitle = self._extract_m3u8_formats_and_subtitles(
+ traverse_obj(item, ('source', 'url')), display_id, ext='mp4')
+ yield {
+ 'id': str(item['id']),
+ 'title': item.get('title'),
+ 'description': item.get('description'),
+ 'formats': formats,
+ 'subtitles': subtitle,
+ 'duration': parse_duration(item.get('duration')),
+ 'thumbnails': [{
+ 'url': traverse_obj(image, ('url', ...), get_all=False),
+ 'width': image.get('width'),
+ 'height': image.get('height')
+ } for image in traverse_obj(item, ('imagesByRatio', ...))[0]],
+ }
+
+ elif item.get('type') == 'audio':
+ yield {
+ 'id': str(item['id']),
+ 'title': item.get('title'),
+ 'url': traverse_obj(item, ('media', 'src')),
+ 'ext': 'mp3',
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_valid_url(url).group('display_id')
+ webpage = self._download_webpage(url, display_id)
+
+ nextjs_json = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['data']
+ return {
+ '_type': 'playlist',
+ 'entries': self._entries(nextjs_json, display_id),
+ 'id': str(nextjs_json['id']),
+ 'title': nextjs_json.get('title') or self._html_search_meta(['title', 'og:title', 'twitter:title'], webpage),
+ 'description': (nextjs_json.get('description')
+ or self._html_search_meta(['description', 'twitter:description', 'og:description'], webpage)),
+ 'tags': nextjs_json.get('keywords'),
+ 'modified_timestamp': parse_iso8601(nextjs_json.get('modifiedAt')),
+ 'thumbnail': nextjs_json.get('shareImageSrc') or self._html_search_meta(['og:image', 'twitter:image'], webpage),
+ 'timestamp': parse_iso8601(nextjs_json.get('publishedAt'))
+ }
diff --git a/hypervideo_dl/extractor/nosvideo.py b/hypervideo_dl/extractor/nosvideo.py
index 53c500c..b6d3ea4 100644
--- a/hypervideo_dl/extractor/nosvideo.py
+++ b/hypervideo_dl/extractor/nosvideo.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/nova.py b/hypervideo_dl/extractor/nova.py
index 00a64f8..8bd3fd4 100644
--- a/hypervideo_dl/extractor/nova.py
+++ b/hypervideo_dl/extractor/nova.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -126,7 +123,6 @@ class NovaEmbedIE(InfoExtractor):
if not formats and has_drm:
self.report_drm(video_id)
- self._sort_formats(formats)
title = self._og_search_title(
webpage, default=None) or self._search_regex(
@@ -311,7 +307,6 @@ class NovaIE(InfoExtractor):
formats = [{
'url': video_url,
}]
- self._sort_formats(formats)
title = mediafile.get('meta', {}).get('title') or self._og_search_title(webpage)
thumbnail = config.get('poster')
diff --git a/hypervideo_dl/extractor/novaplay.py b/hypervideo_dl/extractor/novaplay.py
index bfb2c87..92d1d13 100644
--- a/hypervideo_dl/extractor/novaplay.py
+++ b/hypervideo_dl/extractor/novaplay.py
@@ -1,4 +1,3 @@
-# coding: utf-8
from .common import InfoExtractor
from ..utils import int_or_none, parse_duration, parse_iso8601
@@ -7,46 +6,55 @@ class NovaPlayIE(InfoExtractor):
_VALID_URL = r'https://play.nova\.bg/video/.*/(?P<id>\d+)'
_TESTS = [
{
- 'url': 'https://play.nova.bg/video/bratya/season-3/bratq-2021-10-08/548677',
- 'md5': 'b1127a84e61bed1632b7c2ca9cbb4153',
+ 'url': 'https://play.nova.bg/video/ochakvaite/season-0/ochakvaite-2022-07-22-sybudi-se-sat/606627',
+ 'md5': 'd79dff2d09d196c595a7290f48e33399',
'info_dict': {
- 'id': '548677',
+ 'id': '606627',
'ext': 'mp4',
- 'title': 'Братя',
- 'alt_title': 'bratya/season-3/bratq-2021-10-08',
- 'duration': 1603.0,
- 'timestamp': 1633724150,
- 'upload_date': '20211008',
- 'thumbnail': 'https://nbg-img.fite.tv/img/548677_460x260.jpg',
- 'description': 'Сезон 3 Епизод 25'
+ 'title': 'Събуди се - събота по NOVA (23.07.2022)',
+ 'alt_title': 'ochakvaite/season-0/ochakvaite-2022-07-22-sybudi-se-sat',
+ 'duration': 29.0,
+ 'timestamp': 1658491547,
+ 'upload_date': '20220722',
+ 'thumbnail': 'https://nbg-img.fite.tv/img/606627_460x260.jpg',
+ 'description': '29 сек',
+ 'view_count': False
},
},
{
- 'url': 'https://play.nova.bg/video/igri-na-volqta/season-3/igri-na-volqta-2021-09-20-1/548227',
- 'md5': '5fd61b8ecbe582fc021019d570965d58',
+ 'url': 'https://play.nova.bg/video/ochakvaite/season-0/ochakvaite-2022-07-22-cherry-tazi/606609',
+ 'md5': 'f3e973e2ed1a5b9b3f498b1ab82d01b3',
'info_dict': {
- 'id': '548227',
+ 'id': '606609',
'ext': 'mp4',
- 'title': 'Игри на волята: България (20.09.2021) - част 1',
- 'alt_title': 'gri-na-volqta/season-3/igri-na-volqta-2021-09-20-1',
- 'duration': 4060.0,
- 'timestamp': 1632167564,
- 'upload_date': '20210920',
- 'thumbnail': 'https://nbg-img.fite.tv/img/548227_460x260.jpg',
- 'description': 'Сезон 3 Епизод 13'
+ 'title': 'Черешката на тортата - тази вечер по NOVA (22.07.2022)',
+ 'alt_title': 'ochakvaite/season-0/ochakvaite-2022-07-22-cherry-tazi',
+ 'duration': 29.0,
+ 'timestamp': 1658476303,
+ 'upload_date': '20220722',
+ 'thumbnail': 'https://nbg-img.fite.tv/img/606609_460x260.jpg',
+ 'description': '29 сек',
+ 'view_count': False
},
}
]
+ _access_token = None
+
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
+ self._access_token = self._access_token or self._download_json(
+ 'https://play.nova.bg/api/client', None, note='Fetching access token')['accessToken']
video_props = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['video']
m3u8_url = self._download_json(
f'https://nbg-api.fite.tv/api/v2/videos/{video_id}/streams',
- video_id, headers={'x-flipps-user-agent': 'Flipps/75/9.7'})[0]['url']
+ video_id, headers={
+ 'x-flipps-user-agent': 'Flipps/75/9.7',
+ 'x-flipps-version': '2022-05-17',
+ 'Authorization': f'Bearer {self._access_token}'
+ })[0]['links']['play']['href']
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls')
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/nowness.py b/hypervideo_dl/extractor/nowness.py
index 20ef4cd..18bb880 100644
--- a/hypervideo_dl/extractor/nowness.py
+++ b/hypervideo_dl/extractor/nowness.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .brightcove import (
BrightcoveLegacyIE,
BrightcoveNewIE,
diff --git a/hypervideo_dl/extractor/noz.py b/hypervideo_dl/extractor/noz.py
index ccafd77..59d259f 100644
--- a/hypervideo_dl/extractor/noz.py
+++ b/hypervideo_dl/extractor/noz.py
@@ -1,17 +1,11 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
-from ..compat import (
- compat_urllib_parse_unquote,
- compat_xpath,
-)
from ..utils import (
int_or_none,
find_xpath_attr,
xpath_text,
update_url_query,
)
+from ..compat import compat_urllib_parse_unquote
class NozIE(InfoExtractor):
@@ -50,7 +44,7 @@ class NozIE(InfoExtractor):
duration = int_or_none(xpath_text(
doc, './/article/movie/file/duration'))
formats = []
- for qnode in doc.findall(compat_xpath('.//article/movie/file/qualities/qual')):
+ for qnode in doc.findall('.//article/movie/file/qualities/qual'):
http_url_ele = find_xpath_attr(
qnode, './html_urls/video_url', 'format', 'video/mp4')
http_url = http_url_ele.text if http_url_ele is not None else None
@@ -77,7 +71,6 @@ class NozIE(InfoExtractor):
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/npo.py b/hypervideo_dl/extractor/npo.py
index a8aaef6..f18cb9e 100644
--- a/hypervideo_dl/extractor/npo.py
+++ b/hypervideo_dl/extractor/npo.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -249,8 +247,6 @@ class NPOIE(NPOBaseIE):
if not self.get_param('allow_unplayable_formats') and drm:
self.report_drm(video_id)
- self._sort_formats(formats)
-
info = {
'id': video_id,
'title': video_id,
@@ -456,8 +452,6 @@ class NPOIE(NPOBaseIE):
'quality': stream.get('kwaliteit'),
})
- self._sort_formats(formats)
-
subtitles = {}
if metadata.get('tt888') == 'ja':
subtitles['nl'] = [{
@@ -601,7 +595,7 @@ class NPORadioFragmentIE(InfoExtractor):
}
-class NPODataMidEmbedIE(InfoExtractor):
+class NPODataMidEmbedIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
@@ -655,7 +649,7 @@ class HetKlokhuisIE(NPODataMidEmbedIE):
}
-class NPOPlaylistBaseIE(NPOIE):
+class NPOPlaylistBaseIE(NPOIE): # XXX: Do not subclass from concrete IE
def _real_extract(self, url):
playlist_id = self._match_id(url)
diff --git a/hypervideo_dl/extractor/npr.py b/hypervideo_dl/extractor/npr.py
index 49f062d..4b6855c 100644
--- a/hypervideo_dl/extractor/npr.py
+++ b/hypervideo_dl/extractor/npr.py
@@ -1,11 +1,5 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
-from ..utils import (
- int_or_none,
- qualities,
- url_or_none,
-)
+from ..utils import int_or_none, qualities, traverse_obj, url_or_none
class NprIE(InfoExtractor):
@@ -53,6 +47,15 @@ class NprIE(InfoExtractor):
# multimedia, no formats, stream
'url': 'https://www.npr.org/2020/02/14/805476846/laura-stevenson-tiny-desk-concert',
'only_matching': True,
+ }, {
+ 'url': 'https://www.npr.org/2022/03/15/1084896560/bonobo-tiny-desk-home-concert',
+ 'info_dict': {
+ 'id': '1086468851',
+ 'ext': 'mp4',
+ 'title': 'Bonobo: Tiny Desk (Home) Concert',
+ 'duration': 1061,
+ 'thumbnail': r're:^https?://media.npr.org/assets/img/.*\.jpg$',
+ },
}]
def _real_extract(self, url):
@@ -112,7 +115,11 @@ class NprIE(InfoExtractor):
formats.extend(self._extract_m3u8_formats(
stream_url, stream_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
- self._sort_formats(formats)
+
+ if not formats:
+ raw_json_ld = self._yield_json_ld(self._download_webpage(url, playlist_id), playlist_id, fatal=False)
+ m3u8_url = traverse_obj(list(raw_json_ld), (..., 'subjectOf', ..., 'embedUrl'), get_all=False)
+ formats = self._extract_m3u8_formats(m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False)
entries.append({
'id': media_id,
diff --git a/hypervideo_dl/extractor/nrk.py b/hypervideo_dl/extractor/nrk.py
index 4d723e8..88d08e5 100644
--- a/hypervideo_dl/extractor/nrk.py
+++ b/hypervideo_dl/extractor/nrk.py
@@ -1,22 +1,19 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import itertools
import random
import re
from .common import InfoExtractor
-from ..compat import compat_str
+from ..compat import compat_HTTPError, compat_str
from ..utils import (
- compat_HTTPError,
- determine_ext,
ExtractorError,
+ determine_ext,
int_or_none,
parse_duration,
+ parse_iso8601,
str_or_none,
try_get,
- urljoin,
url_or_none,
+ urljoin,
)
@@ -61,8 +58,7 @@ class NRKBaseIE(InfoExtractor):
return self._download_json(
urljoin('https://psapi.nrk.no/', path),
video_id, note or 'Downloading %s JSON' % item,
- fatal=fatal, query=query,
- headers={'Accept-Encoding': 'gzip, deflate, br'})
+ fatal=fatal, query=query)
class NRKIE(NRKBaseIE):
@@ -184,7 +180,6 @@ class NRKIE(NRKBaseIE):
'format_id': asset_format,
'vcodec': 'none',
})
- self._sort_formats(formats)
data = call_playback_api('metadata')
@@ -247,6 +242,7 @@ class NRKIE(NRKBaseIE):
'age_limit': age_limit,
'formats': formats,
'subtitles': subtitles,
+ 'timestamp': parse_iso8601(try_get(manifest, lambda x: x['availability']['onDemand']['from'], str))
}
if is_series:
@@ -738,7 +734,7 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
entries, series_id, titles.get('title'), titles.get('subtitle'))
-class NRKTVDirekteIE(NRKTVIE):
+class NRKTVDirekteIE(NRKTVIE): # XXX: Do not subclass from concrete IE
IE_DESC = 'NRK TV Direkte and NRK Radio Direkte'
_VALID_URL = r'https?://(?:tv|radio)\.nrk\.no/direkte/(?P<id>[^/?#&]+)'
@@ -797,7 +793,7 @@ class NRKPlaylistBaseIE(InfoExtractor):
for video_id in re.findall(self._ITEM_RE, webpage)
]
- playlist_title = self. _extract_title(webpage)
+ playlist_title = self._extract_title(webpage)
playlist_description = self._extract_description(webpage)
return self.playlist_result(
diff --git a/hypervideo_dl/extractor/nrl.py b/hypervideo_dl/extractor/nrl.py
index 0bd5086..798d034 100644
--- a/hypervideo_dl/extractor/nrl.py
+++ b/hypervideo_dl/extractor/nrl.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/ntvcojp.py b/hypervideo_dl/extractor/ntvcojp.py
index c9af911..422ec6e 100644
--- a/hypervideo_dl/extractor/ntvcojp.py
+++ b/hypervideo_dl/extractor/ntvcojp.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
ExtractorError,
diff --git a/hypervideo_dl/extractor/ntvde.py b/hypervideo_dl/extractor/ntvde.py
index 035582e..6d7ea3d 100644
--- a/hypervideo_dl/extractor/ntvde.py
+++ b/hypervideo_dl/extractor/ntvde.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -63,7 +60,6 @@ class NTVDeIE(InfoExtractor):
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native',
quality=1, m3u8_id='hls', fatal=False))
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/ntvru.py b/hypervideo_dl/extractor/ntvru.py
index c47d1df..8d5877d 100644
--- a/hypervideo_dl/extractor/ntvru.py
+++ b/hypervideo_dl/extractor/ntvru.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
int_or_none,
@@ -118,7 +115,6 @@ class NTVRuIE(InfoExtractor):
'url': file_,
'filesize': int_or_none(xpath_text(video, './%ssize' % format_id)),
})
- self._sort_formats(formats)
return {
'id': xpath_text(video, './id'),
diff --git a/hypervideo_dl/extractor/nuevo.py b/hypervideo_dl/extractor/nuevo.py
index be1e09d..ec54041 100644
--- a/hypervideo_dl/extractor/nuevo.py
+++ b/hypervideo_dl/extractor/nuevo.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
diff --git a/hypervideo_dl/extractor/nuvid.py b/hypervideo_dl/extractor/nuvid.py
index 84fb97d..6ac351c 100644
--- a/hypervideo_dl/extractor/nuvid.py
+++ b/hypervideo_dl/extractor/nuvid.py
@@ -1,5 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
import re
from .common import InfoExtractor
@@ -82,7 +80,6 @@ class NuvidIE(InfoExtractor):
} for quality, source in video_data.get('files').items() if source]
self._check_formats(formats, video_id)
- self._sort_formats(formats)
duration = parse_duration(traverse_obj(video_data, 'duration', 'duration_format'))
thumbnails = [
diff --git a/hypervideo_dl/extractor/nytimes.py b/hypervideo_dl/extractor/nytimes.py
index 9996473..2e21edb 100644
--- a/hypervideo_dl/extractor/nytimes.py
+++ b/hypervideo_dl/extractor/nytimes.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import hmac
import hashlib
import base64
@@ -75,7 +72,6 @@ class NYTimesBaseIE(InfoExtractor):
'tbr': int_or_none(video.get('bitrate'), 1000) or None,
'ext': ext,
})
- self._sort_formats(formats)
thumbnails = []
for image in video_data.get('images', []):
@@ -106,6 +102,7 @@ class NYTimesBaseIE(InfoExtractor):
class NYTimesIE(NYTimesBaseIE):
_VALID_URL = r'https?://(?:(?:www\.)?nytimes\.com/video/(?:[^/]+/)+?|graphics8\.nytimes\.com/bcvideo/\d+(?:\.\d+)?/iframe/embed\.html\?videoId=)(?P<id>\d+)'
+ _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>']
_TESTS = [{
'url': 'http://www.nytimes.com/video/opinion/100000002847155/verbatim-what-is-a-photocopier.html?playlistId=100000001150263',
diff --git a/hypervideo_dl/extractor/nzherald.py b/hypervideo_dl/extractor/nzherald.py
index e5601b4..062f9a8 100644
--- a/hypervideo_dl/extractor/nzherald.py
+++ b/hypervideo_dl/extractor/nzherald.py
@@ -1,9 +1,7 @@
-# coding: utf-8
-from __future__ import unicode_literals
+import json
from .brightcove import BrightcoveNewIE
from .common import InfoExtractor
-
from ..compat import compat_str
from ..utils import (
ExtractorError,
@@ -16,17 +14,20 @@ class NZHeraldIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?nzherald\.co\.nz/[\w\/-]+\/(?P<id>[A-Z0-9]+)'
_TESTS = [
{
- 'url': 'https://www.nzherald.co.nz/nz/weather-heavy-rain-gales-across-nz-most-days-this-week/PTG7QWY4E2225YHZ5NAIRBTYTQ/',
+ # Video accessible under 'video' key
+ 'url': 'https://www.nzherald.co.nz/nz/queen-elizabeth-death-nz-public-holiday-announced-for-september-26/CEOPBSXO2JDCLNK3H7E3BIE2FA/',
'info_dict': {
- 'id': '6271084466001',
+ 'id': '6312191736112',
'ext': 'mp4',
- 'title': 'MetService severe weather warning: September 6th - 7th',
- 'timestamp': 1630891576,
- 'upload_date': '20210906',
+ 'title': 'Focus: PM holds post-Cabinet press conference',
+ 'duration': 238.08,
+ 'upload_date': '20220912',
'uploader_id': '1308227299001',
- 'description': 'md5:db6ca335a22e2cdf37ab9d2bcda52902'
+ 'timestamp': 1662957159,
+ 'tags': [],
+ 'thumbnail': r're:https?://.*\.jpg$',
+ 'description': 'md5:2f17713fcbfcfbe38bb9e7dfccbb0f2e',
}
-
}, {
# Webpage has brightcove embed player url
'url': 'https://www.nzherald.co.nz/travel/pencarrow-coastal-trail/HDVTPJEPP46HJ2UEMK4EGD2DFI/',
@@ -37,9 +38,11 @@ class NZHeraldIE(InfoExtractor):
'timestamp': 1625102897,
'upload_date': '20210701',
'uploader_id': '1308227299001',
- 'description': 'md5:d361aaa0c6498f7ac1bc4fc0a0aec1e4'
+ 'description': 'md5:d361aaa0c6498f7ac1bc4fc0a0aec1e4',
+ 'thumbnail': r're:https?://.*\.jpg$',
+ 'tags': ['travel', 'video'],
+ 'duration': 43.627,
}
-
}, {
# two video embeds of the same video
'url': 'https://www.nzherald.co.nz/nz/truck-driver-captured-cutting-off-motorist-on-state-highway-1-in-canterbury/FIHNJB7PLLPHWQPK4S7ZBDUC4I/',
@@ -51,6 +54,22 @@ class NZHeraldIE(InfoExtractor):
'upload_date': '20210429',
'uploader_id': '1308227299001',
'description': 'md5:4cae7dfb7613ac4c73b9e73a75c6b5d7'
+ },
+ 'skip': 'video removed',
+ }, {
+ # customVideo embed requiring additional API call
+ 'url': 'https://www.nzherald.co.nz/nz/politics/reserve-bank-rejects-political-criticisms-stands-by-review/2JO5Q4WLZRCBBNWTLACZMOP4RA/',
+ 'info_dict': {
+ 'id': '6315123873112',
+ 'ext': 'mp4',
+ 'timestamp': 1667862725,
+ 'title': 'Focus: Luxon on re-appointment of Reserve Bank governor Adrian Orr',
+ 'upload_date': '20221107',
+ 'description': 'md5:df2f1f7033a8160c66e28e4743f5d934',
+ 'uploader_id': '1308227299001',
+ 'tags': ['video', 'nz herald focus', 'politics', 'politics videos'],
+ 'thumbnail': r're:https?://.*\.jpg$',
+ 'duration': 99.584,
}
}, {
'url': 'https://www.nzherald.co.nz/kahu/kaupapa-companies-my-taiao-supporting-maori-in-study-and-business/PQBO2J25WCG77VGRX7W7BVYEAI/',
@@ -83,6 +102,12 @@ class NZHeraldIE(InfoExtractor):
self._search_regex(r'Fusion\.globalContent\s*=\s*({.+?})\s*;', webpage, 'fusion metadata'), article_id)
video_metadata = fusion_metadata.get('video')
+ if not video_metadata:
+ custom_video_id = traverse_obj(fusion_metadata, ('customVideo', 'embed', 'id'), expected_type=str)
+ if custom_video_id:
+ video_metadata = self._download_json(
+ 'https://www.nzherald.co.nz/pf/api/v3/content/fetch/full-content-by-id', article_id,
+ query={'query': json.dumps({'id': custom_video_id, 'site': 'nzh'}), '_website': 'nzh'})
bc_video_id = traverse_obj(
video_metadata or fusion_metadata, # fusion metadata is the video metadata for video-only pages
'brightcoveId', ('content_elements', ..., 'referent', 'id'),
diff --git a/hypervideo_dl/extractor/nzz.py b/hypervideo_dl/extractor/nzz.py
index 61ee77a..ac3b731 100644
--- a/hypervideo_dl/extractor/nzz.py
+++ b/hypervideo_dl/extractor/nzz.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/odatv.py b/hypervideo_dl/extractor/odatv.py
index 314527f..24ab939 100644
--- a/hypervideo_dl/extractor/odatv.py
+++ b/hypervideo_dl/extractor/odatv.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
ExtractorError,
diff --git a/hypervideo_dl/extractor/odnoklassniki.py b/hypervideo_dl/extractor/odnoklassniki.py
index 293f1aa..4f325f0 100644
--- a/hypervideo_dl/extractor/odnoklassniki.py
+++ b/hypervideo_dl/extractor/odnoklassniki.py
@@ -1,8 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
from .common import InfoExtractor
from ..compat import (
compat_etree_fromstring,
@@ -13,10 +8,12 @@ from ..compat import (
from ..utils import (
ExtractorError,
float_or_none,
- unified_strdate,
int_or_none,
qualities,
+ smuggle_url,
unescapeHTML,
+ unified_strdate,
+ unsmuggle_url,
urlencode_postdata,
)
@@ -27,13 +24,14 @@ class OdnoklassnikiIE(InfoExtractor):
(?:(?:www|m|mobile)\.)?
(?:odnoklassniki|ok)\.ru/
(?:
- video(?:embed)?/|
+ video(?P<embed>embed)?/|
web-api/video/moviePlayer/|
live/|
dk\?.*?st\.mvId=
)
(?P<id>[\d-]+)
'''
+ _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1']
_TESTS = [{
'note': 'Coub embedded',
'url': 'http://ok.ru/video/1484130554189',
@@ -42,7 +40,7 @@ class OdnoklassnikiIE(InfoExtractor):
'ext': 'mp4',
'timestamp': 1545580896,
'view_count': int,
- 'thumbnail': 'https://coub-anubis-a.akamaized.net/coub_storage/coub/simple/cw_image/c5ac87553bd/608e806a1239c210ab692/1545580913_00026.jpg',
+ 'thumbnail': 'https://coub-attachments.akamaized.net/coub_storage/coub/simple/cw_image/c5ac87553bd/608e806a1239c210ab692/1545580913_00026.jpg',
'title': 'Народная забава',
'uploader': 'Nevata',
'upload_date': '20181223',
@@ -69,11 +67,12 @@ class OdnoklassnikiIE(InfoExtractor):
}, {
# metadata in JSON
'url': 'http://ok.ru/video/20079905452',
- 'md5': '0b62089b479e06681abaaca9d204f152',
+ 'md5': '5d2b64756e2af296e3b383a0bc02a6aa',
'info_dict': {
'id': '20079905452',
'ext': 'mp4',
'title': 'Культура меняет нас (прекрасный ролик!))',
+ 'thumbnail': str,
'duration': 100,
'upload_date': '20141207',
'uploader_id': '330537914540',
@@ -84,11 +83,12 @@ class OdnoklassnikiIE(InfoExtractor):
}, {
# metadataUrl
'url': 'http://ok.ru/video/63567059965189-0?fromTime=5',
- 'md5': '6ff470ea2dd51d5d18c295a355b0b6bc',
+ 'md5': 'f8c951122516af72e6e6ffdd3c41103b',
'info_dict': {
'id': '63567059965189-0',
'ext': 'mp4',
'title': 'Девушка без комплексов ...',
+ 'thumbnail': str,
'duration': 191,
'upload_date': '20150518',
'uploader_id': '534380003155',
@@ -99,18 +99,32 @@ class OdnoklassnikiIE(InfoExtractor):
},
}, {
# YouTube embed (metadataUrl, provider == USER_YOUTUBE)
- 'url': 'http://ok.ru/video/64211978996595-1',
- 'md5': '2f206894ffb5dbfcce2c5a14b909eea5',
+ 'url': 'https://ok.ru/video/3952212382174',
+ 'md5': '91749d0bd20763a28d083fa335bbd37a',
'info_dict': {
- 'id': 'V_VztHT5BzY',
+ 'id': '5axVgHHDBvU',
'ext': 'mp4',
- 'title': 'Космическая среда от 26 августа 2015',
- 'description': 'md5:848eb8b85e5e3471a3a803dae1343ed0',
- 'duration': 440,
- 'upload_date': '20150826',
- 'uploader_id': 'tvroscosmos',
- 'uploader': 'Телестудия Роскосмоса',
+ 'title': 'Youtube-dl 101: What is it and HOW to use it! Full Download Walkthrough and Guide',
+ 'description': 'md5:b57209eeb9d5c2f20c984dfb58862097',
+ 'uploader': 'Lod Mer',
+ 'uploader_id': '575186401502',
+ 'duration': 1529,
'age_limit': 0,
+ 'upload_date': '20210405',
+ 'comment_count': int,
+ 'live_status': 'not_live',
+ 'view_count': int,
+ 'thumbnail': 'https://i.mycdn.me/i?r=AEHujHvw2RjEbemUCNEorZbxYpb_p_9AcN2FmGik64Krkcmz37YtlY093oAM5-HIEAt7Zi9s0CiBOSDmbngC-I-k&fn=external_8',
+ 'uploader_url': 'http://www.youtube.com/user/MrKewlkid94',
+ 'channel_follower_count': int,
+ 'tags': ['youtube-dl', 'youtube playlists', 'download videos', 'download audio'],
+ 'channel_id': 'UCVGtvURtEURYHtJFUegdSug',
+ 'like_count': int,
+ 'availability': 'public',
+ 'channel_url': 'https://www.youtube.com/channel/UCVGtvURtEURYHtJFUegdSug',
+ 'categories': ['Education'],
+ 'playable_in_embed': True,
+ 'channel': 'BornToReact',
},
}, {
# YouTube embed (metadata, provider == USER_YOUTUBE, no metadata.movie.title field)
@@ -130,10 +144,12 @@ class OdnoklassnikiIE(InfoExtractor):
},
'skip': 'Video has not been found',
}, {
+ # TODO: HTTP Error 400: Bad Request, it only works if there's no cookies when downloading
'note': 'Only available in mobile webpage',
'url': 'https://m.ok.ru/video/2361249957145',
'info_dict': {
'id': '2361249957145',
+ 'ext': 'mp4',
'title': 'Быковское крещение',
'duration': 3038.181,
},
@@ -162,14 +178,36 @@ class OdnoklassnikiIE(InfoExtractor):
# Paid video
'url': 'https://ok.ru/video/954886983203',
'only_matching': True,
+ }, {
+ 'url': 'https://ok.ru/videoembed/2932705602075',
+ 'info_dict': {
+ 'id': '2932705602075',
+ 'ext': 'mp4',
+ 'thumbnail': 'https://i.mycdn.me/videoPreview?id=1369902483995&type=37&idx=2&tkn=fqlnoQD_xwq5ovIlKfgNyU08qmM&fn=external_8',
+ 'title': 'Boosty для тебя!',
+ 'uploader_id': '597811038747',
+ 'like_count': 0,
+ 'duration': 35,
+ },
+ }]
+
+ _WEBPAGE_TESTS = [{
+ 'url': 'https://boosty.to/ikakprosto/posts/56cedaca-b56a-4dfd-b3ed-98c79cfa0167',
+ 'info_dict': {
+ 'id': '3950343629563',
+ 'ext': 'mp4',
+ 'thumbnail': 'https://i.mycdn.me/videoPreview?id=2776238394107&type=37&idx=11&tkn=F3ejkUFcpuI4DnMRxrDGcH5YcmM&fn=external_8',
+ 'title': 'Заяц Бусти.mp4',
+ 'uploader_id': '571368965883',
+ 'like_count': 0,
+ 'duration': 10444,
+ },
}]
- @staticmethod
- def _extract_url(webpage):
- mobj = re.search(
- r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1', webpage)
- if mobj:
- return mobj.group('url')
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ for x in super()._extract_embed_urls(url, webpage):
+ yield smuggle_url(x, {'referrer': url})
def _real_extract(self, url):
try:
@@ -185,16 +223,23 @@ class OdnoklassnikiIE(InfoExtractor):
start_time = int_or_none(compat_parse_qs(
compat_urllib_parse_urlparse(url).query).get('fromTime', [None])[0])
- video_id = self._match_id(url)
+ url, smuggled = unsmuggle_url(url, {})
+ video_id, is_embed = self._match_valid_url(url).group('id', 'embed')
+ mode = 'videoembed' if is_embed else 'video'
webpage = self._download_webpage(
- 'http://ok.ru/video/%s' % video_id, video_id,
- note='Downloading desktop webpage')
+ f'https://ok.ru/{mode}/{video_id}', video_id,
+ note='Downloading desktop webpage',
+ headers={'Referer': smuggled['referrer']} if smuggled.get('referrer') else {})
error = self._search_regex(
r'[^>]+class="vp_video_stub_txt"[^>]*>([^<]+)<',
webpage, 'error', default=None)
- if error:
+ # Direct link from boosty
+ if (error == 'The author of this video has not been found or is blocked'
+ and not smuggled.get('referrer') and mode == 'videoembed'):
+ return self._extract_desktop(smuggle_url(url, {'referrer': 'https://boosty.to'}))
+ elif error:
raise ExtractorError(error, expected=True)
player = self._parse_json(
@@ -281,7 +326,7 @@ class OdnoklassnikiIE(InfoExtractor):
if provider == 'LIVE_TV_APP':
info['title'] = title
- quality = qualities(('4', '0', '1', '2', '3', '5'))
+ quality = qualities(('4', '0', '1', '2', '3', '5', '6', '7'))
formats = [{
'url': f['url'],
@@ -325,8 +370,6 @@ class OdnoklassnikiIE(InfoExtractor):
if payment_info:
self.raise_no_formats('This video is paid, subscribe to download it', expected=True)
- self._sort_formats(formats)
-
info['formats'] = formats
return info
diff --git a/hypervideo_dl/extractor/oftv.py b/hypervideo_dl/extractor/oftv.py
new file mode 100644
index 0000000..3ae7278
--- /dev/null
+++ b/hypervideo_dl/extractor/oftv.py
@@ -0,0 +1,54 @@
+from .common import InfoExtractor
+from .zype import ZypeIE
+from ..utils import traverse_obj
+
+
+class OfTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?of.tv/video/(?P<id>\w+)'
+ _TESTS = [{
+ 'url': 'https://of.tv/video/627d7d95b353db0001dadd1a',
+ 'md5': 'cb9cd5db3bb9ee0d32bfd7e373d6ef0a',
+ 'info_dict': {
+ 'id': '627d7d95b353db0001dadd1a',
+ 'ext': 'mp4',
+ 'title': 'E1: Jacky vs Eric',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'average_rating': 0,
+ 'description': 'md5:dd16e3e2a8d27d922e7a989f85986853',
+ 'display_id': '',
+ 'duration': 1423,
+ 'timestamp': 1652391300,
+ 'upload_date': '20220512',
+ 'view_count': 0,
+ 'creator': 'This is Fire'
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ info = next(ZypeIE.extract_from_webpage(self._downloader, url, webpage))
+ info['_type'] = 'url_transparent'
+ info['creator'] = self._search_regex(r'<a[^>]+class=\"creator-name\"[^>]+>([^<]+)', webpage, 'creator')
+ return info
+
+
+class OfTVPlaylistIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?of.tv/creators/(?P<id>[a-zA-Z0-9-]+)/.?'
+ _TESTS = [{
+ 'url': 'https://of.tv/creators/this-is-fire/',
+ 'playlist_count': 8,
+ 'info_dict': {
+ 'id': 'this-is-fire'
+ }
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ webpage = self._download_webpage(url, playlist_id)
+
+ json_match = self._search_json(
+ r'var\s*remaining_videos\s*=', webpage, 'oftv playlists', playlist_id, contains_pattern=r'\[.+\]')
+
+ return self.playlist_from_matches(
+ traverse_obj(json_match, (..., 'discovery_url')), playlist_id)
diff --git a/hypervideo_dl/extractor/oktoberfesttv.py b/hypervideo_dl/extractor/oktoberfesttv.py
index 2765674..e0ac856 100644
--- a/hypervideo_dl/extractor/oktoberfesttv.py
+++ b/hypervideo_dl/extractor/oktoberfesttv.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/olympics.py b/hypervideo_dl/extractor/olympics.py
index 784f282..61d1f40 100644
--- a/hypervideo_dl/extractor/olympics.py
+++ b/hypervideo_dl/extractor/olympics.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
int_or_none,
@@ -56,8 +53,7 @@ class OlympicsReplayIE(InfoExtractor):
})
m3u8_url = self._download_json(
f'https://olympics.com/tokenGenerator?url={m3u8_url}', uuid, note='Downloading m3u8 url')
- formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, uuid, m3u8_id='hls')
- self._sort_formats(formats)
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, uuid, 'mp4', m3u8_id='hls')
return {
'id': uuid,
diff --git a/hypervideo_dl/extractor/on24.py b/hypervideo_dl/extractor/on24.py
index d4d8244..9a4abc9 100644
--- a/hypervideo_dl/extractor/on24.py
+++ b/hypervideo_dl/extractor/on24.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
int_or_none,
@@ -79,7 +76,6 @@ class On24IE(InfoExtractor):
'vcodec': 'none',
'acodec': 'wav'
})
- self._sort_formats(formats)
return {
'id': event_id,
diff --git a/hypervideo_dl/extractor/once.py b/hypervideo_dl/extractor/once.py
index 3e44b78..989f10a 100644
--- a/hypervideo_dl/extractor/once.py
+++ b/hypervideo_dl/extractor/once.py
@@ -1,12 +1,9 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
-class OnceIE(InfoExtractor):
+class OnceIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor
_VALID_URL = r'https?://.+?\.unicornmedia\.com/now/(?:ads/vmap/)?[^/]+/[^/]+/(?P<domain_id>[^/]+)/(?P<application_id>[^/]+)/(?:[^/]+/)?(?P<media_item_id>[^/]+)/content\.(?:once|m3u8|mp4)'
ADAPTIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/master/playlist/%s/%s/%s/content.m3u8'
PROGRESSIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/media/progressive/%s/%s/%s/%s/content.mp4'
diff --git a/hypervideo_dl/extractor/ondemandkorea.py b/hypervideo_dl/extractor/ondemandkorea.py
index e933ea2..dd7d1d7 100644
--- a/hypervideo_dl/extractor/ondemandkorea.py
+++ b/hypervideo_dl/extractor/ondemandkorea.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -14,11 +11,11 @@ class OnDemandKoreaIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?P<id>[^/]+)\.html'
_GEO_COUNTRIES = ['US', 'CA']
_TESTS = [{
- 'url': 'https://www.ondemandkorea.com/ask-us-anything-e43.html',
+ 'url': 'https://www.ondemandkorea.com/ask-us-anything-e351.html',
'info_dict': {
- 'id': 'ask-us-anything-e43',
+ 'id': 'ask-us-anything-e351',
'ext': 'mp4',
- 'title': 'Ask Us Anything : Gain, Ji Soo - 09/24/2016',
+ 'title': 'Ask Us Anything : Jung Sung-ho, Park Seul-gi, Kim Bo-min, Yang Seung-won - 09/24/2022',
'description': 'A talk show/game show with a school theme where celebrity guests appear as “transfer students.”',
'thumbnail': r're:^https?://.*\.jpg$',
},
@@ -26,13 +23,13 @@ class OnDemandKoreaIE(InfoExtractor):
'skip_download': 'm3u8 download'
}
}, {
- 'url': 'https://www.ondemandkorea.com/confession-e01-1.html',
+ 'url': 'https://www.ondemandkorea.com/work-later-drink-now-e1.html',
'info_dict': {
- 'id': 'confession-e01-1',
+ 'id': 'work-later-drink-now-e1',
'ext': 'mp4',
- 'title': 'Confession : E01',
- 'description': 'Choi Do-hyun, a criminal attorney, is the son of a death row convict. Ever since Choi Pil-su got arrested for murder, Do-hyun has wanted to solve his ',
- 'thumbnail': r're:^https?://.*\.jpg$',
+ 'title': 'Work Later, Drink Now : E01',
+ 'description': 'Work Later, Drink First follows three women who find solace in a glass of liquor at the end of the day. So-hee, who gets comfort from a cup of soju af',
+ 'thumbnail': r're:^https?://.*\.png$',
'subtitles': {
'English': 'mincount:1',
},
@@ -72,9 +69,11 @@ class OnDemandKoreaIE(InfoExtractor):
webpage, 'episode_title', fatal=False) or self._og_search_title(webpage)
jw_config = self._parse_json(
- self._search_regex(
+ self._search_regex((
+ r'(?P<options>{\s*[\'"]tracks[\'"].*?})[)\];]+$',
r'playlist\s*=\s*\[(?P<options>.+)];?$',
- webpage, 'jw config', flags=re.MULTILINE, group='options'),
+ r'odkPlayer\.init.*?(?P<options>{[^;]+}).*?;',
+ ), webpage, 'jw config', flags=re.MULTILINE | re.DOTALL, group='options'),
video_id, transform_source=js_to_json)
info = self._parse_jwplayer_data(
jw_config, video_id, require_title=False, m3u8_id='hls',
diff --git a/hypervideo_dl/extractor/onefootball.py b/hypervideo_dl/extractor/onefootball.py
index 826faad..591d157 100644
--- a/hypervideo_dl/extractor/onefootball.py
+++ b/hypervideo_dl/extractor/onefootball.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
@@ -39,7 +36,6 @@ class OneFootballIE(InfoExtractor):
data_json = self._search_json_ld(webpage, id)
m3u8_url = self._html_search_regex(r'(https://cdn\.jwplayer\.com/manifests/.+\.m3u8)', webpage, 'm3u8_url')
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, id)
- self._sort_formats(formats)
return {
'id': id,
'title': data_json.get('title'),
diff --git a/hypervideo_dl/extractor/onenewsnz.py b/hypervideo_dl/extractor/onenewsnz.py
new file mode 100644
index 0000000..a46211e
--- /dev/null
+++ b/hypervideo_dl/extractor/onenewsnz.py
@@ -0,0 +1,111 @@
+from .brightcove import BrightcoveNewIE
+from .common import InfoExtractor
+
+from ..utils import (
+ ExtractorError,
+ traverse_obj
+)
+
+
+class OneNewsNZIE(InfoExtractor):
+ IE_NAME = '1News'
+ IE_DESC = '1news.co.nz article videos'
+ _VALID_URL = r'https?://(?:www\.)?(?:1|one)news\.co\.nz/\d+/\d+/\d+/(?P<id>[^/?#&]+)'
+ _TESTS = [
+ { # Brightcove video
+ 'url': 'https://www.1news.co.nz/2022/09/29/cows-painted-green-on-parliament-lawn-in-climate-protest/',
+ 'info_dict': {
+ 'id': 'cows-painted-green-on-parliament-lawn-in-climate-protest',
+ 'title': '\'Cows\' painted green on Parliament lawn in climate protest',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '6312993358112',
+ 'title': 'Activists dressed as cows painted green outside Parliament in climate protest',
+ 'ext': 'mp4',
+ 'tags': 'count:6',
+ 'uploader_id': '963482464001',
+ 'timestamp': 1664416255,
+ 'upload_date': '20220929',
+ 'duration': 38.272,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'description': 'Greenpeace accused the Government of "greenwashing" instead of taking climate action.',
+ }
+ }]
+ }, {
+ # YouTube video
+ 'url': 'https://www.1news.co.nz/2022/09/30/now-is-the-time-to-care-about-womens-rugby/',
+ 'info_dict': {
+ 'id': 'now-is-the-time-to-care-about-womens-rugby',
+ 'title': 'Now is the time to care about women\'s rugby',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': 's4wEB9neTfU',
+ 'title': 'Why I love women’s rugby: Black Fern Ruahei Demant',
+ 'ext': 'mp4',
+ 'channel_follower_count': int,
+ 'channel_url': 'https://www.youtube.com/channel/UC2BQ3U9IxoYIJyulv0bN5PQ',
+ 'tags': 'count:12',
+ 'uploader': 'Re: News',
+ 'upload_date': '20211215',
+ 'uploader_id': 'UC2BQ3U9IxoYIJyulv0bN5PQ',
+ 'uploader_url': 'http://www.youtube.com/channel/UC2BQ3U9IxoYIJyulv0bN5PQ',
+ 'channel_id': 'UC2BQ3U9IxoYIJyulv0bN5PQ',
+ 'channel': 'Re: News',
+ 'like_count': int,
+ 'thumbnail': 'https://i.ytimg.com/vi/s4wEB9neTfU/maxresdefault.jpg',
+ 'age_limit': 0,
+ 'view_count': int,
+ 'categories': ['Sports'],
+ 'duration': 222,
+ 'description': 'md5:8874410e5740ed1d8fd0df839f849813',
+ 'availability': 'public',
+ 'playable_in_embed': True,
+ 'live_status': 'not_live',
+ }
+ }]
+ }, {
+ # 2 Brightcove videos
+ 'url': 'https://www.1news.co.nz/2022/09/29/raw-videos-capture-hurricane-ians-fury-as-it-slams-florida/',
+ 'info_dict': {
+ 'id': 'raw-videos-capture-hurricane-ians-fury-as-it-slams-florida',
+ 'title': 'Raw videos capture Hurricane Ian\'s fury as it slams Florida',
+ },
+ 'playlist_mincount': 2,
+ }, {
+ 'url': 'https://www.onenews.co.nz/2022/09/29/cows-painted-green-on-parliament-lawn-in-climate-protest/',
+ 'only_matching': True,
+ }]
+
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/0xpHIR6IB_default/index.html?videoId=%s'
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ fusion_metadata = self._search_json(r'Fusion\.globalContent\s*=', webpage, 'fusion metadata', display_id)
+
+ entries = []
+ for item in traverse_obj(fusion_metadata, 'content_elements') or []:
+ item_type = traverse_obj(item, 'subtype')
+ if item_type == 'video':
+ brightcove_config = traverse_obj(item, ('embed', 'config'))
+ brightcove_url = self.BRIGHTCOVE_URL_TEMPLATE % (
+ traverse_obj(brightcove_config, 'brightcoveAccount') or '963482464001',
+ traverse_obj(brightcove_config, 'brightcoveVideoId')
+ )
+ entries.append(self.url_result(brightcove_url, BrightcoveNewIE))
+ elif item_type == 'youtube':
+ video_id_or_url = traverse_obj(item, ('referent', 'id'), ('raw_oembed', '_id'))
+ if video_id_or_url:
+ entries.append(self.url_result(video_id_or_url, ie='Youtube'))
+
+ if not entries:
+ raise ExtractorError('This article does not have a video.', expected=True)
+
+ playlist_title = (
+ traverse_obj(fusion_metadata, ('headlines', 'basic'))
+ or self._generic_title('', webpage)
+ )
+ return self.playlist_result(entries, display_id, playlist_title)
diff --git a/hypervideo_dl/extractor/onet.py b/hypervideo_dl/extractor/onet.py
index 95177a2..0d59e8c 100644
--- a/hypervideo_dl/extractor/onet.py
+++ b/hypervideo_dl/extractor/onet.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -83,7 +80,6 @@ class OnetBaseIE(InfoExtractor):
'vbr': float_or_none(f.get('video_bitrate')),
})
formats.append(http_f)
- self._sort_formats(formats)
meta = video.get('meta', {})
diff --git a/hypervideo_dl/extractor/onionstudios.py b/hypervideo_dl/extractor/onionstudios.py
index cf5c39e..5fa49e1 100644
--- a/hypervideo_dl/extractor/onionstudios.py
+++ b/hypervideo_dl/extractor/onionstudios.py
@@ -1,8 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import js_to_json
@@ -10,6 +5,7 @@ from ..utils import js_to_json
class OnionStudiosIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?onionstudios\.com/(?:video(?:s/[^/]+-|/)|embed\?.*\bid=)(?P<id>\d+)(?!-)'
+ _EMBED_REGEX = [r'(?s)<(?:iframe|bulbs-video)[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?onionstudios\.com/(?:embed.+?|video/\d+\.json))\1']
_TESTS = [{
'url': 'http://www.onionstudios.com/videos/hannibal-charges-forward-stops-for-a-cocktail-2937',
@@ -32,13 +28,6 @@ class OnionStudiosIE(InfoExtractor):
'only_matching': True,
}]
- @staticmethod
- def _extract_url(webpage):
- mobj = re.search(
- r'(?s)<(?:iframe|bulbs-video)[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?onionstudios\.com/(?:embed.+?|video/\d+\.json))\1', webpage)
- if mobj:
- return mobj.group('url')
-
def _real_extract(self, url):
video_id = self._match_id(url)
diff --git a/hypervideo_dl/extractor/ooyala.py b/hypervideo_dl/extractor/ooyala.py
index 20cfa0a..65afccd 100644
--- a/hypervideo_dl/extractor/ooyala.py
+++ b/hypervideo_dl/extractor/ooyala.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import base64
import re
@@ -12,6 +10,7 @@ from ..utils import (
determine_ext,
float_or_none,
int_or_none,
+ smuggle_url,
try_get,
unsmuggle_url,
)
@@ -86,7 +85,6 @@ class OoyalaBaseIE(InfoExtractor):
if not formats and not auth_data.get('authorized'):
self.raise_no_formats('%s said: %s' % (
self.IE_NAME, auth_data['message']), expected=True)
- self._sort_formats(formats)
subtitles = {}
for lang, sub in metadata.get('closed_captions_vtt', {}).get('captions', {}).items():
@@ -153,6 +151,29 @@ class OoyalaIE(OoyalaBaseIE):
}
]
+ def _extract_from_webpage(self, url, webpage):
+ mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage)
+ or re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage)
+ or re.search(r'OO\.Player\.create\.apply\(\s*OO\.Player\s*,\s*op\(\s*\[\s*[\'"][^\'"]*[\'"]\s*,\s*[\'"](?P<ec>.{32})[\'"]', webpage)
+ or re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage)
+ or re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
+ if mobj is not None:
+ embed_token = self._search_regex(
+ r'embedToken[\'"]?\s*:\s*[\'"]([^\'"]+)',
+ webpage, 'ooyala embed token', default=None)
+ yield self._build_url_result(smuggle_url(
+ mobj.group('ec'), {
+ 'domain': url,
+ 'embed_token': embed_token,
+ }))
+ return
+
+ # Look for multiple Ooyala embeds on SBN network websites
+ mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
+ if mobj is not None:
+ for v in self._parse_json(mobj.group(1), self._generic_id(url), fatal=False) or []:
+ yield self._build_url_result(smuggle_url(v['provider_video_id'], {'domain': url}))
+
@staticmethod
def _url_for_embed_code(embed_code):
return 'http://player.ooyala.com/player.js?embedCode=%s' % embed_code
diff --git a/hypervideo_dl/extractor/opencast.py b/hypervideo_dl/extractor/opencast.py
index cf8d917..fa46757 100644
--- a/hypervideo_dl/extractor/opencast.py
+++ b/hypervideo_dl/extractor/opencast.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -95,8 +92,6 @@ class OpencastBaseIE(InfoExtractor):
})
formats.append(track_obj)
- self._sort_formats(formats)
-
return {
'id': video_id,
'formats': formats,
diff --git a/hypervideo_dl/extractor/openload.py b/hypervideo_dl/extractor/openload.py
index fe4740a..56b8330 100644
--- a/hypervideo_dl/extractor/openload.py
+++ b/hypervideo_dl/extractor/openload.py
@@ -1,22 +1,19 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
+import collections
+import contextlib
import json
import os
import subprocess
import tempfile
-from ..compat import (
- compat_urlparse,
- compat_kwargs,
-)
+from ..compat import compat_urlparse
from ..utils import (
- check_executable,
- encodeArgument,
ExtractorError,
+ Popen,
+ check_executable,
+ format_field,
get_exe_version,
is_outdated_version,
- Popen,
+ shell_quote,
)
@@ -37,13 +34,11 @@ def cookie_to_dict(cookie):
cookie_dict['secure'] = cookie.secure
if cookie.discard is not None:
cookie_dict['discard'] = cookie.discard
- try:
+ with contextlib.suppress(TypeError):
if (cookie.has_nonstandard_attr('httpOnly')
or cookie.has_nonstandard_attr('httponly')
or cookie.has_nonstandard_attr('HttpOnly')):
cookie_dict['httponly'] = True
- except TypeError:
- pass
return cookie_dict
@@ -51,13 +46,15 @@ def cookie_jar_to_list(cookie_jar):
return [cookie_to_dict(cookie) for cookie in cookie_jar]
-class PhantomJSwrapper(object):
+class PhantomJSwrapper:
"""PhantomJS wrapper class
This class is experimental.
"""
- _TEMPLATE = r'''
+ INSTALL_HINT = 'Please download it from https://phantomjs.org/download.html'
+
+ _BASE_JS = R'''
phantom.onError = function(msg, trace) {{
var msgStack = ['PHANTOM ERROR: ' + msg];
if(trace && trace.length) {{
@@ -70,6 +67,9 @@ class PhantomJSwrapper(object):
console.error(msgStack.join('\n'));
phantom.exit(1);
}};
+ '''
+
+ _TEMPLATE = R'''
var page = require('webpage').create();
var fs = require('fs');
var read = {{ mode: 'r', charset: 'utf-8' }};
@@ -112,9 +112,7 @@ class PhantomJSwrapper(object):
self.exe = check_executable('phantomjs', ['-v'])
if not self.exe:
- raise ExtractorError('PhantomJS executable not found in PATH, '
- 'download it from http://phantomjs.org',
- expected=True)
+ raise ExtractorError(f'PhantomJS not found, {self.INSTALL_HINT}', expected=True)
self.extractor = extractor
@@ -125,23 +123,25 @@ class PhantomJSwrapper(object):
'Your copy of PhantomJS is outdated, update it to version '
'%s or newer if you encounter any errors.' % required_version)
- self.options = {
- 'timeout': timeout,
- }
for name in self._TMP_FILE_NAMES:
tmp = tempfile.NamedTemporaryFile(delete=False)
tmp.close()
self._TMP_FILES[name] = tmp
+ self.options = collections.ChainMap({
+ 'timeout': timeout,
+ }, {
+ x: self._TMP_FILES[x].name.replace('\\', '\\\\').replace('"', '\\"')
+ for x in self._TMP_FILE_NAMES
+ })
+
def __del__(self):
for name in self._TMP_FILE_NAMES:
- try:
+ with contextlib.suppress(OSError, KeyError):
os.remove(self._TMP_FILES[name].name)
- except (IOError, OSError, KeyError):
- pass
def _save_cookies(self, url):
- cookies = cookie_jar_to_list(self.extractor._downloader.cookiejar)
+ cookies = cookie_jar_to_list(self.extractor.cookiejar)
for cookie in cookies:
if 'path' not in cookie:
cookie['path'] = '/'
@@ -158,7 +158,7 @@ class PhantomJSwrapper(object):
cookie['rest'] = {'httpOnly': None}
if 'expiry' in cookie:
cookie['expire_time'] = cookie['expiry']
- self.extractor._set_cookie(**compat_kwargs(cookie))
+ self.extractor._set_cookie(**cookie)
def get(self, url, html=None, video_id=None, note=None, note2='Executing JS on webpage', headers={}, jscode='saveAndExit();'):
"""
@@ -180,7 +180,7 @@ class PhantomJSwrapper(object):
In most cases you don't need to add any `jscode`.
It is executed in `page.onLoadFinished`.
`saveAndExit();` is mandatory, use it instead of `phantom.exit()`
- It is possible to wait for some element on the webpage, for example:
+ It is possible to wait for some element on the webpage, e.g.
var check = function() {
var elementFound = page.evaluate(function() {
return document.querySelector('#b.done') !== null;
@@ -205,33 +205,39 @@ class PhantomJSwrapper(object):
self._save_cookies(url)
- replaces = self.options
- replaces['url'] = url
user_agent = headers.get('User-Agent') or self.extractor.get_param('http_headers')['User-Agent']
- replaces['ua'] = user_agent.replace('"', '\\"')
- replaces['jscode'] = jscode
-
- for x in self._TMP_FILE_NAMES:
- replaces[x] = self._TMP_FILES[x].name.replace('\\', '\\\\').replace('"', '\\"')
-
- with open(self._TMP_FILES['script'].name, 'wb') as f:
- f.write(self._TEMPLATE.format(**replaces).encode('utf-8'))
-
- if video_id is None:
- self.extractor.to_screen('%s' % (note2,))
- else:
- self.extractor.to_screen('%s: %s' % (video_id, note2))
-
- p = Popen(
- [self.exe, '--ssl-protocol=any', self._TMP_FILES['script'].name],
- stdout=subprocess.PIPE, stderr=subprocess.PIPE)
- out, err = p.communicate_or_kill()
- if p.returncode != 0:
- raise ExtractorError(
- 'Executing JS failed\n:' + encodeArgument(err))
+ jscode = self._TEMPLATE.format_map(self.options.new_child({
+ 'url': url,
+ 'ua': user_agent.replace('"', '\\"'),
+ 'jscode': jscode,
+ }))
+
+ stdout = self.execute(jscode, video_id, note=note2)
+
with open(self._TMP_FILES['html'].name, 'rb') as f:
html = f.read().decode('utf-8')
-
self._load_cookies()
- return (html, encodeArgument(out))
+ return html, stdout
+
+ def execute(self, jscode, video_id=None, *, note='Executing JS'):
+ """Execute JS and return stdout"""
+ if 'phantom.exit();' not in jscode:
+ jscode += ';\nphantom.exit();'
+ jscode = self._BASE_JS + jscode
+
+ with open(self._TMP_FILES['script'].name, 'w', encoding='utf-8') as f:
+ f.write(jscode)
+ self.extractor.to_screen(f'{format_field(video_id, None, "%s: ")}{note}')
+
+ cmd = [self.exe, '--ssl-protocol=any', self._TMP_FILES['script'].name]
+ self.extractor.write_debug(f'PhantomJS command line: {shell_quote(cmd)}')
+ try:
+ stdout, stderr, returncode = Popen.run(cmd, timeout=self.options['timeout'] / 1000,
+ text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ except Exception as e:
+ raise ExtractorError(f'{note} failed: Unable to run PhantomJS binary', cause=e)
+ if returncode:
+ raise ExtractorError(f'{note} failed with returncode {returncode}:\n{stderr.strip()}')
+
+ return stdout
diff --git a/hypervideo_dl/extractor/openrec.py b/hypervideo_dl/extractor/openrec.py
index 5eb1cdb..86dc9bb 100644
--- a/hypervideo_dl/extractor/openrec.py
+++ b/hypervideo_dl/extractor/openrec.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
ExtractorError,
@@ -38,8 +35,8 @@ class OpenRecBaseIE(InfoExtractor):
raise ExtractorError(f'Failed to extract {name} info')
formats = list(self._expand_media(video_id, get_first(movie_stores, 'media')))
- if not formats and is_live:
- # archived livestreams
+ if not formats:
+ # archived livestreams or subscriber-only videos
cookies = self._get_cookies('https://www.openrec.tv/')
detail = self._download_json(
f'https://apiv5.openrec.tv/api/v5/movies/{video_id}/detail', video_id,
@@ -53,8 +50,6 @@ class OpenRecBaseIE(InfoExtractor):
formats = list(self._expand_media(video_id, new_media))
is_live = False
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': get_first(movie_stores, 'title'),
@@ -116,7 +111,6 @@ class OpenRecCaptureIE(OpenRecBaseIE):
formats = self._extract_m3u8_formats(
capture_data.get('source'), video_id, ext='mp4')
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/ora.py b/hypervideo_dl/extractor/ora.py
index 422d0b3..d49909d 100644
--- a/hypervideo_dl/extractor/ora.py
+++ b/hypervideo_dl/extractor/ora.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
from ..compat import compat_urlparse
@@ -57,7 +54,6 @@ class OraTVIE(InfoExtractor):
'format_id': q,
'quality': preference(q),
})
- self._sort_formats(formats)
else:
return self.url_result(self._search_regex(
r'"youtube_id"\s*:\s*"([^"]+)', webpage, 'youtube id'), 'Youtube')
diff --git a/hypervideo_dl/extractor/orf.py b/hypervideo_dl/extractor/orf.py
index 0628977..e9d23a4 100644
--- a/hypervideo_dl/extractor/orf.py
+++ b/hypervideo_dl/extractor/orf.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import functools
import re
@@ -15,9 +12,10 @@ from ..utils import (
join_nonempty,
orderedSet,
remove_end,
+ make_archive_id,
smuggle_url,
- str_or_none,
strip_jsonp,
+ try_call,
unescapeHTML,
unified_strdate,
unsmuggle_url,
@@ -136,8 +134,6 @@ class ORFTVthekIE(InfoExtractor):
HEADRequest(http_url), video_id, fatal=False, note='Testing for geoblocking',
errnote=f'This video seems to be blocked outside of {geo_str}. You may want to try the streaming-* formats')
- self._sort_formats(formats)
-
subtitles = {}
for sub in sd.get('subtitles', []):
sub_src = sub.get('src')
@@ -203,208 +199,99 @@ class ORFTVthekIE(InfoExtractor):
class ORFRadioIE(InfoExtractor):
- def _real_extract(self, url):
- mobj = self._match_valid_url(url)
- show_date = mobj.group('date')
- show_id = mobj.group('show')
+ IE_NAME = 'orf:radio'
+
+ STATION_INFO = {
+ 'fm4': ('fm4', 'fm4', 'orffm4'),
+ 'noe': ('noe', 'oe2n', 'orfnoe'),
+ 'wien': ('wie', 'oe2w', 'orfwie'),
+ 'burgenland': ('bgl', 'oe2b', 'orfbgl'),
+ 'ooe': ('ooe', 'oe2o', 'orfooe'),
+ 'steiermark': ('stm', 'oe2st', 'orfstm'),
+ 'kaernten': ('ktn', 'oe2k', 'orfktn'),
+ 'salzburg': ('sbg', 'oe2s', 'orfsbg'),
+ 'tirol': ('tir', 'oe2t', 'orftir'),
+ 'vorarlberg': ('vbg', 'oe2v', 'orfvbg'),
+ 'oe3': ('oe3', 'oe3', 'orfoe3'),
+ 'oe1': ('oe1', 'oe1', 'orfoe1'),
+ }
+ _STATION_RE = '|'.join(map(re.escape, STATION_INFO.keys()))
- data = self._download_json(
- 'http://audioapi.orf.at/%s/api/json/current/broadcast/%s/%s'
- % (self._API_STATION, show_id, show_date), show_id)
+ _VALID_URL = rf'''(?x)
+ https?://(?:
+ (?P<station>{_STATION_RE})\.orf\.at/player|
+ radiothek\.orf\.at/(?P<station2>{_STATION_RE})
+ )/(?P<date>[0-9]+)/(?P<show>\w+)'''
- entries = []
- for info in data['streams']:
- loop_stream_id = str_or_none(info.get('loopStreamId'))
- if not loop_stream_id:
- continue
- title = str_or_none(data.get('title'))
- if not title:
- continue
- start = int_or_none(info.get('start'), scale=1000)
- end = int_or_none(info.get('end'), scale=1000)
- duration = end - start if end and start else None
- entries.append({
- 'id': loop_stream_id.replace('.mp3', ''),
- 'url': 'https://loopstream01.apa.at/?channel=%s&id=%s' % (self._LOOP_STATION, loop_stream_id),
- 'title': title,
- 'description': clean_html(data.get('subtitle')),
- 'duration': duration,
- 'timestamp': start,
+ _TESTS = [{
+ 'url': 'https://radiothek.orf.at/ooe/20220801/OGMO',
+ 'info_dict': {
+ 'id': 'OGMO',
+ 'title': 'Guten Morgen OÖ',
+ 'description': 'md5:a3f6083399ef92b8cbe2d421b180835a',
+ },
+ 'playlist': [{
+ 'md5': 'f33147d954a326e338ea52572c2810e8',
+ 'info_dict': {
+ 'id': '2022-08-01_0459_tl_66_7DaysMon1_319062',
'ext': 'mp3',
- 'series': data.get('programTitle'),
- })
-
- return {
- '_type': 'playlist',
- 'id': show_id,
- 'title': data.get('title'),
- 'description': clean_html(data.get('subtitle')),
- 'entries': entries,
- }
-
-
-class ORFFM4IE(ORFRadioIE):
- IE_NAME = 'orf:fm4'
- IE_DESC = 'radio FM4'
- _VALID_URL = r'https?://(?P<station>fm4)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>4\w+)'
- _API_STATION = 'fm4'
- _LOOP_STATION = 'fm4'
-
- _TEST = {
- 'url': 'http://fm4.orf.at/player/20170107/4CC',
- 'md5': '2b0be47375432a7ef104453432a19212',
+ 'title': 'Guten Morgen OÖ',
+ 'upload_date': '20220801',
+ 'duration': 18000,
+ 'timestamp': 1659322789,
+ 'description': 'md5:a3f6083399ef92b8cbe2d421b180835a',
+ }
+ }]
+ }, {
+ 'url': 'https://ooe.orf.at/player/20220801/OGMO',
'info_dict': {
- 'id': '2017-01-07_2100_tl_54_7DaysSat18_31295',
- 'ext': 'mp3',
- 'title': 'Solid Steel Radioshow',
- 'description': 'Die Mixshow von Coldcut und Ninja Tune.',
- 'duration': 3599,
- 'timestamp': 1483819257,
- 'upload_date': '20170107',
+ 'id': 'OGMO',
+ 'title': 'Guten Morgen OÖ',
+ 'description': 'md5:a3f6083399ef92b8cbe2d421b180835a',
},
- 'skip': 'Shows from ORF radios are only available for 7 days.',
+ 'playlist': [{
+ 'md5': 'f33147d954a326e338ea52572c2810e8',
+ 'info_dict': {
+ 'id': '2022-08-01_0459_tl_66_7DaysMon1_319062',
+ 'ext': 'mp3',
+ 'title': 'Guten Morgen OÖ',
+ 'upload_date': '20220801',
+ 'duration': 18000,
+ 'timestamp': 1659322789,
+ 'description': 'md5:a3f6083399ef92b8cbe2d421b180835a',
+ }
+ }]
+ }, {
+ 'url': 'http://fm4.orf.at/player/20170107/4CC',
'only_matching': True,
- }
-
-
-class ORFNOEIE(ORFRadioIE):
- IE_NAME = 'orf:noe'
- IE_DESC = 'Radio Niederösterreich'
- _VALID_URL = r'https?://(?P<station>noe)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
- _API_STATION = 'noe'
- _LOOP_STATION = 'oe2n'
-
- _TEST = {
+ }, {
'url': 'https://noe.orf.at/player/20200423/NGM',
'only_matching': True,
- }
-
-
-class ORFWIEIE(ORFRadioIE):
- IE_NAME = 'orf:wien'
- IE_DESC = 'Radio Wien'
- _VALID_URL = r'https?://(?P<station>wien)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
- _API_STATION = 'wie'
- _LOOP_STATION = 'oe2w'
-
- _TEST = {
+ }, {
'url': 'https://wien.orf.at/player/20200423/WGUM',
'only_matching': True,
- }
-
-
-class ORFBGLIE(ORFRadioIE):
- IE_NAME = 'orf:burgenland'
- IE_DESC = 'Radio Burgenland'
- _VALID_URL = r'https?://(?P<station>burgenland)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
- _API_STATION = 'bgl'
- _LOOP_STATION = 'oe2b'
-
- _TEST = {
+ }, {
'url': 'https://burgenland.orf.at/player/20200423/BGM',
'only_matching': True,
- }
-
-
-class ORFOOEIE(ORFRadioIE):
- IE_NAME = 'orf:oberoesterreich'
- IE_DESC = 'Radio Oberösterreich'
- _VALID_URL = r'https?://(?P<station>ooe)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
- _API_STATION = 'ooe'
- _LOOP_STATION = 'oe2o'
-
- _TEST = {
- 'url': 'https://ooe.orf.at/player/20200423/OGMO',
- 'only_matching': True,
- }
-
-
-class ORFSTMIE(ORFRadioIE):
- IE_NAME = 'orf:steiermark'
- IE_DESC = 'Radio Steiermark'
- _VALID_URL = r'https?://(?P<station>steiermark)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
- _API_STATION = 'stm'
- _LOOP_STATION = 'oe2st'
-
- _TEST = {
+ }, {
'url': 'https://steiermark.orf.at/player/20200423/STGMS',
'only_matching': True,
- }
-
-
-class ORFKTNIE(ORFRadioIE):
- IE_NAME = 'orf:kaernten'
- IE_DESC = 'Radio Kärnten'
- _VALID_URL = r'https?://(?P<station>kaernten)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
- _API_STATION = 'ktn'
- _LOOP_STATION = 'oe2k'
-
- _TEST = {
+ }, {
'url': 'https://kaernten.orf.at/player/20200423/KGUMO',
'only_matching': True,
- }
-
-
-class ORFSBGIE(ORFRadioIE):
- IE_NAME = 'orf:salzburg'
- IE_DESC = 'Radio Salzburg'
- _VALID_URL = r'https?://(?P<station>salzburg)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
- _API_STATION = 'sbg'
- _LOOP_STATION = 'oe2s'
-
- _TEST = {
+ }, {
'url': 'https://salzburg.orf.at/player/20200423/SGUM',
'only_matching': True,
- }
-
-
-class ORFTIRIE(ORFRadioIE):
- IE_NAME = 'orf:tirol'
- IE_DESC = 'Radio Tirol'
- _VALID_URL = r'https?://(?P<station>tirol)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
- _API_STATION = 'tir'
- _LOOP_STATION = 'oe2t'
-
- _TEST = {
+ }, {
'url': 'https://tirol.orf.at/player/20200423/TGUMO',
'only_matching': True,
- }
-
-
-class ORFVBGIE(ORFRadioIE):
- IE_NAME = 'orf:vorarlberg'
- IE_DESC = 'Radio Vorarlberg'
- _VALID_URL = r'https?://(?P<station>vorarlberg)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
- _API_STATION = 'vbg'
- _LOOP_STATION = 'oe2v'
-
- _TEST = {
+ }, {
'url': 'https://vorarlberg.orf.at/player/20200423/VGUM',
'only_matching': True,
- }
-
-
-class ORFOE3IE(ORFRadioIE):
- IE_NAME = 'orf:oe3'
- IE_DESC = 'Radio Österreich 3'
- _VALID_URL = r'https?://(?P<station>oe3)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
- _API_STATION = 'oe3'
- _LOOP_STATION = 'oe3'
-
- _TEST = {
+ }, {
'url': 'https://oe3.orf.at/player/20200424/3WEK',
'only_matching': True,
- }
-
-
-class ORFOE1IE(ORFRadioIE):
- IE_NAME = 'orf:oe1'
- IE_DESC = 'Radio Österreich 1'
- _VALID_URL = r'https?://(?P<station>oe1)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
- _API_STATION = 'oe1'
- _LOOP_STATION = 'oe1'
-
- _TEST = {
+ }, {
'url': 'http://oe1.orf.at/player/20170108/456544',
'md5': '34d8a6e67ea888293741c86a099b745b',
'info_dict': {
@@ -416,7 +303,35 @@ class ORFOE1IE(ORFRadioIE):
'upload_date': '20170108',
},
'skip': 'Shows from ORF radios are only available for 7 days.'
- }
+ }]
+
+ def _entries(self, data, station):
+ _, loop_station, old_ie = self.STATION_INFO[station]
+ for info in data['streams']:
+ item_id = info.get('loopStreamId')
+ if not item_id:
+ continue
+ video_id = item_id.replace('.mp3', '')
+ yield {
+ 'id': video_id,
+ 'ext': 'mp3',
+ 'url': f'https://loopstream01.apa.at/?channel={loop_station}&id={item_id}',
+ '_old_archive_ids': [make_archive_id(old_ie, video_id)],
+ 'title': data.get('title'),
+ 'description': clean_html(data.get('subtitle')),
+ 'duration': try_call(lambda: (info['end'] - info['start']) / 1000),
+ 'timestamp': int_or_none(info.get('start'), scale=1000),
+ 'series': data.get('programTitle'),
+ }
+
+ def _real_extract(self, url):
+ station, station2, show_date, show_id = self._match_valid_url(url).group('station', 'station2', 'date', 'show')
+ api_station, _, _ = self.STATION_INFO[station or station2]
+ data = self._download_json(
+ f'http://audioapi.orf.at/{api_station}/api/json/current/broadcast/{show_id}/{show_date}', show_id)
+
+ return self.playlist_result(
+ self._entries(data, station or station2), show_id, data.get('title'), clean_html(data.get('subtitle')))
class ORFIPTVIE(InfoExtractor):
@@ -490,7 +405,6 @@ class ORFIPTVIE(InfoExtractor):
format_url, video_id, 'mp4', m3u8_id=format_id))
else:
continue
- self._sort_formats(formats)
title = remove_end(self._og_search_title(webpage), ' - iptv.ORF.at')
description = self._og_search_description(webpage)
@@ -590,7 +504,6 @@ class ORFFM4StoryIE(InfoExtractor):
format_url, video_id, 'mp4', m3u8_id=format_id))
else:
continue
- self._sort_formats(formats)
title = remove_end(self._og_search_title(webpage), ' - fm4.ORF.at')
if idx >= 1:
diff --git a/hypervideo_dl/extractor/outsidetv.py b/hypervideo_dl/extractor/outsidetv.py
index c5333b0..b1fcbd6 100644
--- a/hypervideo_dl/extractor/outsidetv.py
+++ b/hypervideo_dl/extractor/outsidetv.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/packtpub.py b/hypervideo_dl/extractor/packtpub.py
index 62c52cd..51778d8 100644
--- a/hypervideo_dl/extractor/packtpub.py
+++ b/hypervideo_dl/extractor/packtpub.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import json
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/palcomp3.py b/hypervideo_dl/extractor/palcomp3.py
index d0a62fb..4b0801c 100644
--- a/hypervideo_dl/extractor/palcomp3.py
+++ b/hypervideo_dl/extractor/palcomp3.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
diff --git a/hypervideo_dl/extractor/pandoratv.py b/hypervideo_dl/extractor/pandoratv.py
index 6230053..ccc78da 100644
--- a/hypervideo_dl/extractor/pandoratv.py
+++ b/hypervideo_dl/extractor/pandoratv.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..compat import (
compat_str,
@@ -116,7 +112,6 @@ class PandoraTVIE(InfoExtractor):
'url': format_url,
'height': int(height),
})
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/panopto.py b/hypervideo_dl/extractor/panopto.py
index 3388f7f..32c103b 100644
--- a/hypervideo_dl/extractor/panopto.py
+++ b/hypervideo_dl/extractor/panopto.py
@@ -1,4 +1,3 @@
-import re
import calendar
import json
import functools
@@ -73,15 +72,10 @@ class PanoptoBaseIE(InfoExtractor):
def _parse_fragment(url):
return {k: json.loads(v[0]) for k, v in compat_urlparse.parse_qs(compat_urllib_parse_urlparse(url).fragment).items()}
- @staticmethod
- def _extract_urls(webpage):
- return [m.group('url') for m in re.finditer(
- r'<iframe[^>]+src=["\'](?P<url>%s/Pages/(Viewer|Embed|Sessions/List)\.aspx[^"\']+)' % PanoptoIE.BASE_URL_RE,
- webpage)]
-
class PanoptoIE(PanoptoBaseIE):
_VALID_URL = PanoptoBaseIE.BASE_URL_RE + r'/Pages/(Viewer|Embed)\.aspx.*(?:\?|&)id=(?P<id>[a-f0-9-]+)'
+ _EMBED_REGEX = [rf'<iframe[^>]+src=["\'](?P<url>{PanoptoBaseIE.BASE_URL_RE}/Pages/(Viewer|Embed|Sessions/List)\.aspx[^"\']+)']
_TESTS = [
{
'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=26b3ae9e-4a48-4dcc-96ba-0befba08a0fb',
@@ -413,7 +407,6 @@ class PanoptoIE(PanoptoBaseIE):
subtitles = self._merge_subtitles(
podcast_subtitles, streams_subtitles, self.extract_subtitles(base_url, video_id, delivery))
- self._sort_formats(formats)
self.mark_watched(base_url, video_id, delivery_info)
return {
diff --git a/hypervideo_dl/extractor/paramountplus.py b/hypervideo_dl/extractor/paramountplus.py
index 94a9319..7e472a6 100644
--- a/hypervideo_dl/extractor/paramountplus.py
+++ b/hypervideo_dl/extractor/paramountplus.py
@@ -1,9 +1,9 @@
-from __future__ import unicode_literals
import itertools
from .common import InfoExtractor
from .cbs import CBSBaseIE
from ..utils import (
+ ExtractorError,
int_or_none,
url_or_none,
)
@@ -25,10 +25,17 @@ class ParamountPlusIE(CBSBaseIE):
'ext': 'mp4',
'title': 'CatDog - Climb Every CatDog/The Canine Mutiny',
'description': 'md5:7ac835000645a69933df226940e3c859',
- 'duration': 1418,
+ 'duration': 1426,
'timestamp': 920264400,
'upload_date': '19990301',
'uploader': 'CBSI-NEW',
+ 'episode_number': 5,
+ 'thumbnail': r're:https?://.+\.jpg$',
+ 'season': 'Season 2',
+ 'chapters': 'count:3',
+ 'episode': 'Episode 5',
+ 'season_number': 2,
+ 'series': 'CatDog',
},
'params': {
'skip_download': 'm3u8',
@@ -44,6 +51,13 @@ class ParamountPlusIE(CBSBaseIE):
'timestamp': 1627063200,
'upload_date': '20210723',
'uploader': 'CBSI-NEW',
+ 'episode_number': 81,
+ 'thumbnail': r're:https?://.+\.jpg$',
+ 'season': 'Season 2',
+ 'chapters': 'count:4',
+ 'episode': 'Episode 81',
+ 'season_number': 2,
+ 'series': 'Tooning Out The News',
},
'params': {
'skip_download': 'm3u8',
@@ -55,14 +69,18 @@ class ParamountPlusIE(CBSBaseIE):
'ext': 'mp4',
'title': 'Daddy\'s Home',
'upload_date': '20151225',
- 'description': 'md5:a0beaf24e8d3b0e81b2ee41d47c06f33',
+ 'description': 'md5:9a6300c504d5e12000e8707f20c54745',
'uploader': 'CBSI-NEW',
'timestamp': 1451030400,
+ 'thumbnail': r're:https?://.+\.jpg$',
+ 'chapters': 'count:0',
+ 'duration': 5761,
+ 'series': 'Paramount+ Movies',
},
'params': {
'skip_download': 'm3u8',
},
- 'expected_warnings': ['Ignoring subtitle tracks'], # TODO: Investigate this
+ 'skip': 'DRM',
}, {
'url': 'https://www.paramountplus.com/movies/video/5EKDXPOzdVf9voUqW6oRuocyAEeJGbEc/',
'info_dict': {
@@ -73,11 +91,15 @@ class ParamountPlusIE(CBSBaseIE):
'timestamp': 1577865600,
'title': 'Sonic the Hedgehog',
'upload_date': '20200101',
+ 'thumbnail': r're:https?://.+\.jpg$',
+ 'chapters': 'count:0',
+ 'duration': 5932,
+ 'series': 'Paramount+ Movies',
},
'params': {
'skip_download': 'm3u8',
},
- 'expected_warnings': ['Ignoring subtitle tracks'],
+ 'skip': 'DRM',
}, {
'url': 'https://www.paramountplus.com/shows/the-real-world/video/mOVeHeL9ub9yWdyzSZFYz8Uj4ZBkVzQg/the-real-world-reunion/',
'only_matching': True,
@@ -94,24 +116,51 @@ class ParamountPlusIE(CBSBaseIE):
def _extract_video_info(self, content_id, mpx_acc=2198311517):
items_data = self._download_json(
- 'https://www.paramountplus.com/apps-api/v2.0/androidtv/video/cid/%s.json' % content_id,
- content_id, query={'locale': 'en-us', 'at': 'ABCqWNNSwhIqINWIIAG+DFzcFUvF8/vcN6cNyXFFfNzWAIvXuoVgX+fK4naOC7V8MLI='}, headers=self.geo_verification_headers())
+ f'https://www.paramountplus.com/apps-api/v2.0/androidtv/video/cid/{content_id}.json',
+ content_id, query={
+ 'locale': 'en-us',
+ 'at': 'ABCXgPuoStiPipsK0OHVXIVh68zNys+G4f7nW9R6qH68GDOcneW6Kg89cJXGfiQCsj0=',
+ }, headers=self.geo_verification_headers())
asset_types = {
item.get('assetType'): {
'format': 'SMIL',
- 'formats': 'MPEG4,M3U',
+ 'formats': 'M3U+none,MPEG4', # '+none' specifies ProtectionScheme (no DRM)
} for item in items_data['itemList']
}
item = items_data['itemList'][-1]
- return self._extract_common_video_info(content_id, asset_types, mpx_acc, extra_info={
+
+ info, error = {}, None
+ metadata = {
'title': item.get('title'),
'series': item.get('seriesTitle'),
'season_number': int_or_none(item.get('seasonNum')),
'episode_number': int_or_none(item.get('episodeNum')),
'duration': int_or_none(item.get('duration')),
'thumbnail': url_or_none(item.get('thumbnail')),
- })
+ }
+ try:
+ info = self._extract_common_video_info(content_id, asset_types, mpx_acc, extra_info=metadata)
+ except ExtractorError as e:
+ error = e
+
+ # Check for DRM formats to give appropriate error
+ if not info.get('formats'):
+ for query in asset_types.values():
+ query['formats'] = 'MPEG-DASH,M3U,MPEG4' # allows DRM formats
+
+ try:
+ drm_info = self._extract_common_video_info(content_id, asset_types, mpx_acc, extra_info=metadata)
+ except ExtractorError:
+ if error:
+ raise error from None
+ raise
+ if drm_info['formats']:
+ self.report_drm(content_id)
+ elif error:
+ raise error
+
+ return info
class ParamountPlusSeriesIE(InfoExtractor):
diff --git a/hypervideo_dl/extractor/parler.py b/hypervideo_dl/extractor/parler.py
new file mode 100644
index 0000000..68a60bc
--- /dev/null
+++ b/hypervideo_dl/extractor/parler.py
@@ -0,0 +1,111 @@
+from .common import InfoExtractor
+from .youtube import YoutubeIE
+from ..utils import (
+ clean_html,
+ format_field,
+ int_or_none,
+ strip_or_none,
+ traverse_obj,
+ unified_timestamp,
+ urlencode_postdata,
+)
+
+
+class ParlerIE(InfoExtractor):
+ IE_DESC = 'Posts on parler.com'
+ _VALID_URL = r'https://parler\.com/feed/(?P<id>[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12})'
+ _TESTS = [
+ {
+ 'url': 'https://parler.com/feed/df79fdba-07cc-48fe-b085-3293897520d7',
+ 'md5': '16e0f447bf186bb3cf64de5bbbf4d22d',
+ 'info_dict': {
+ 'id': 'df79fdba-07cc-48fe-b085-3293897520d7',
+ 'ext': 'mp4',
+ 'thumbnail': 'https://bl-images.parler.com/videos/6ce7cdf3-a27a-4d72-bf9c-d3e17ce39a66/thumbnail.jpeg',
+ 'title': 'Parler video #df79fdba-07cc-48fe-b085-3293897520d7',
+ 'description': 'md5:6f220bde2df4a97cbb89ac11f1fd8197',
+ 'timestamp': 1659744000,
+ 'upload_date': '20220806',
+ 'uploader': 'Tulsi Gabbard',
+ 'uploader_id': 'TulsiGabbard',
+ 'uploader_url': 'https://parler.com/TulsiGabbard',
+ 'view_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
+ },
+ },
+ {
+ 'url': 'https://parler.com/feed/a7406eb4-91e5-4793-b5e3-ade57a24e287',
+ 'md5': '11687e2f5bb353682cee338d181422ed',
+ 'info_dict': {
+ 'id': 'a7406eb4-91e5-4793-b5e3-ade57a24e287',
+ 'ext': 'mp4',
+ 'thumbnail': 'https://bl-images.parler.com/videos/317827a8-1e48-4cbc-981f-7dd17d4c1183/thumbnail.jpeg',
+ 'title': 'Parler video #a7406eb4-91e5-4793-b5e3-ade57a24e287',
+ 'description': 'This man should run for office',
+ 'timestamp': 1659657600,
+ 'upload_date': '20220805',
+ 'uploader': 'Benny Johnson',
+ 'uploader_id': 'BennyJohnson',
+ 'uploader_url': 'https://parler.com/BennyJohnson',
+ 'view_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
+ },
+ },
+ {
+ 'url': 'https://parler.com/feed/f23b85c1-6558-470f-b9ff-02c145f28da5',
+ 'md5': 'eaba1ff4a10fe281f5ce74e930ab2cb4',
+ 'info_dict': {
+ 'id': 'r5vkSaz8PxQ',
+ 'ext': 'mp4',
+ 'thumbnail': 'https://i.ytimg.com/vi_webp/r5vkSaz8PxQ/maxresdefault.webp',
+ 'title': 'Tom MacDonald Names Reaction',
+ 'description': 'md5:33c21f0d35ae6dc2edf3007d6696baea',
+ 'upload_date': '20220716',
+ 'duration': 1267,
+ 'uploader': 'Mahesh Chookolingo',
+ 'uploader_id': 'maheshchookolingo',
+ 'uploader_url': 'http://www.youtube.com/user/maheshchookolingo',
+ 'channel': 'Mahesh Chookolingo',
+ 'channel_id': 'UCox6YeMSY1PQInbCtTaZj_w',
+ 'channel_url': 'https://www.youtube.com/channel/UCox6YeMSY1PQInbCtTaZj_w',
+ 'categories': ['Entertainment'],
+ 'tags': list,
+ 'availability': 'public',
+ 'live_status': 'not_live',
+ 'view_count': int,
+ 'comment_count': int,
+ 'like_count': int,
+ 'channel_follower_count': int,
+ 'age_limit': 0,
+ 'playable_in_embed': True,
+ },
+ },
+ ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ data = self._download_json(
+ 'https://parler.com/open-api/ParleyDetailEndpoint.php', video_id,
+ data=urlencode_postdata({'uuid': video_id}))['data'][0]
+ primary = data['primary']
+
+ embed = self._parse_json(primary.get('V2LINKLONG') or '', video_id, fatal=False)
+ if embed:
+ return self.url_result(embed[0], YoutubeIE)
+
+ return {
+ 'id': video_id,
+ 'url': traverse_obj(primary, ('video_data', 'videoSrc')),
+ 'thumbnail': traverse_obj(primary, ('video_data', 'thumbnailUrl')),
+ 'title': '',
+ 'description': strip_or_none(clean_html(primary.get('full_body'))) or None,
+ 'timestamp': unified_timestamp(primary.get('date_created')),
+ 'uploader': strip_or_none(primary.get('name')),
+ 'uploader_id': strip_or_none(primary.get('username')),
+ 'uploader_url': format_field(strip_or_none(primary.get('username')), None, 'https://parler.com/%s'),
+ 'view_count': int_or_none(primary.get('view_count')),
+ 'comment_count': int_or_none(traverse_obj(data, ('engagement', 'commentCount'))),
+ 'repost_count': int_or_none(traverse_obj(data, ('engagement', 'echoCount'))),
+ }
diff --git a/hypervideo_dl/extractor/parliamentliveuk.py b/hypervideo_dl/extractor/parliamentliveuk.py
deleted file mode 100644
index 974d654..0000000
--- a/hypervideo_dl/extractor/parliamentliveuk.py
+++ /dev/null
@@ -1,80 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import json
-import uuid
-
-from .common import InfoExtractor
-from ..utils import (
- unified_timestamp,
- try_get,
-)
-
-
-class ParliamentLiveUKIE(InfoExtractor):
- IE_NAME = 'parliamentlive.tv'
- IE_DESC = 'UK parliament videos'
- _VALID_URL = r'(?i)https?://(?:www\.)?parliamentlive\.tv/Event/Index/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
-
- _TESTS = [{
- 'url': 'http://parliamentlive.tv/Event/Index/c1e9d44d-fd6c-4263-b50f-97ed26cc998b',
- 'info_dict': {
- 'id': 'c1e9d44d-fd6c-4263-b50f-97ed26cc998b',
- 'ext': 'mp4',
- 'title': 'Home Affairs Committee',
- 'timestamp': 1395153872,
- 'upload_date': '20140318',
- },
- }, {
- 'url': 'http://parliamentlive.tv/event/index/3f24936f-130f-40bf-9a5d-b3d6479da6a4',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- video_info = self._download_json(f'https://www.parliamentlive.tv/Event/GetShareVideo/{video_id}', video_id)
- _DEVICE_ID = str(uuid.uuid4())
- auth = 'Bearer ' + self._download_json(
- 'https://exposure.api.redbee.live/v2/customer/UKParliament/businessunit/ParliamentLive/auth/anonymous',
- video_id, headers={
- 'Origin': 'https://videoplayback.parliamentlive.tv',
- 'Accept': 'application/json, text/plain, */*',
- 'Content-Type': 'application/json;charset=utf-8'
- }, data=json.dumps({
- 'deviceId': _DEVICE_ID,
- 'device': {
- 'deviceId': _DEVICE_ID,
- 'width': 653,
- 'height': 368,
- 'type': 'WEB',
- 'name': ' Mozilla Firefox 91'
- }
- }).encode('utf-8'))['sessionToken']
-
- video_urls = self._download_json(
- f'https://exposure.api.redbee.live/v2/customer/UKParliament/businessunit/ParliamentLive/entitlement/{video_id}/play',
- video_id, headers={'Authorization': auth, 'Accept': 'application/json, text/plain, */*'})['formats']
-
- formats = []
- for format in video_urls:
- if not format.get('mediaLocator'):
- continue
- if format.get('format') == 'DASH':
- formats.extend(self._extract_mpd_formats(
- format['mediaLocator'], video_id, mpd_id='dash', fatal=False))
- elif format.get('format') == 'SMOOTHSTREAMING':
- formats.extend(self._extract_ism_formats(
- format['mediaLocator'], video_id, ism_id='ism', fatal=False))
- elif format.get('format') == 'HLS':
- formats.extend(self._extract_m3u8_formats(
- format['mediaLocator'], video_id, m3u8_id='hls', fatal=False))
-
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'formats': formats,
- 'title': video_info['event']['title'],
- 'timestamp': unified_timestamp(try_get(video_info, lambda x: x['event']['publishedStartTime'])),
- 'thumbnail': video_info.get('thumbnailUrl'),
- }
diff --git a/hypervideo_dl/extractor/parlview.py b/hypervideo_dl/extractor/parlview.py
index c85eaa7..0b54791 100644
--- a/hypervideo_dl/extractor/parlview.py
+++ b/hypervideo_dl/extractor/parlview.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
@@ -47,7 +44,6 @@ class ParlviewIE(InfoExtractor):
elif stream.get('streamType') != 'VOD':
self.raise_no_formats('Unknown type of stream was detected: "%s"' % str(stream.get('streamType')))
formats = self._extract_m3u8_formats(stream['url'], video_id, 'mp4', 'm3u8_native')
- self._sort_formats(formats)
media_info = self._download_webpage(
self._MEDIA_INFO_URL % video_id, video_id, note='Downloading media info', fatal=False)
diff --git a/hypervideo_dl/extractor/patreon.py b/hypervideo_dl/extractor/patreon.py
index 963a0d6..4dc0298 100644
--- a/hypervideo_dl/extractor/patreon.py
+++ b/hypervideo_dl/extractor/patreon.py
@@ -1,7 +1,5 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import itertools
+from urllib.error import HTTPError
from .common import InfoExtractor
from .vimeo import VimeoIE
@@ -10,17 +8,45 @@ from ..compat import compat_urllib_parse_unquote
from ..utils import (
clean_html,
determine_ext,
+ ExtractorError,
int_or_none,
KNOWN_EXTENSIONS,
mimetype2ext,
parse_iso8601,
str_or_none,
+ traverse_obj,
try_get,
url_or_none,
)
-class PatreonIE(InfoExtractor):
+class PatreonBaseIE(InfoExtractor):
+ USER_AGENT = 'Patreon/7.6.28 (Android; Android 11; Scale/2.10)'
+
+ def _call_api(self, ep, item_id, query=None, headers=None, fatal=True, note=None):
+ if headers is None:
+ headers = {}
+ if 'User-Agent' not in headers:
+ headers['User-Agent'] = self.USER_AGENT
+ if query:
+ query.update({'json-api-version': 1.0})
+
+ try:
+ return self._download_json(
+ f'https://www.patreon.com/api/{ep}',
+ item_id, note='Downloading API JSON' if not note else note,
+ query=query, fatal=fatal, headers=headers)
+ except ExtractorError as e:
+ if not isinstance(e.cause, HTTPError) or mimetype2ext(e.cause.headers.get('Content-Type')) != 'json':
+ raise
+ err_json = self._parse_json(self._webpage_read_content(e.cause, None, item_id), item_id, fatal=False)
+ err_message = traverse_obj(err_json, ('errors', ..., 'detail'), get_all=False)
+ if err_message:
+ raise ExtractorError(f'Patreon said: {err_message}', expected=True)
+ raise
+
+
+class PatreonIE(PatreonBaseIE):
_VALID_URL = r'https?://(?:www\.)?patreon\.com/(?:creation\?hid=|posts/(?:[\w-]+-)?)(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.patreon.com/creation?hid=743933',
@@ -29,12 +55,18 @@ class PatreonIE(InfoExtractor):
'id': '743933',
'ext': 'mp3',
'title': 'Episode 166: David Smalley of Dogma Debate',
- 'description': 'md5:713b08b772cd6271b9f3906683cfacdf',
+ 'description': 'md5:34d207dd29aa90e24f1b3f58841b81c7',
'uploader': 'Cognitive Dissonance Podcast',
'thumbnail': 're:^https?://.*$',
'timestamp': 1406473987,
'upload_date': '20140727',
'uploader_id': '87145',
+ 'like_count': int,
+ 'comment_count': int,
+ 'uploader_url': 'https://www.patreon.com/dissonancepod',
+ 'channel_id': '80642',
+ 'channel_url': 'https://www.patreon.com/dissonancepod',
+ 'channel_follower_count': int,
},
}, {
'url': 'http://www.patreon.com/creation?hid=754133',
@@ -45,6 +77,9 @@ class PatreonIE(InfoExtractor):
'title': 'CD 167 Extra',
'uploader': 'Cognitive Dissonance Podcast',
'thumbnail': 're:^https?://.*$',
+ 'like_count': int,
+ 'comment_count': int,
+ 'uploader_url': 'https://www.patreon.com/dissonancepod',
},
'skip': 'Patron-only content',
}, {
@@ -56,8 +91,23 @@ class PatreonIE(InfoExtractor):
'uploader': 'TraciJHines',
'thumbnail': 're:^https?://.*$',
'upload_date': '20150211',
- 'description': 'md5:c5a706b1f687817a3de09db1eb93acd4',
+ 'description': 'md5:8af6425f50bd46fbf29f3db0fc3a8364',
'uploader_id': 'TraciJHines',
+ 'categories': ['Entertainment'],
+ 'duration': 282,
+ 'view_count': int,
+ 'tags': 'count:39',
+ 'age_limit': 0,
+ 'channel': 'TraciJHines',
+ 'channel_url': 'https://www.youtube.com/channel/UCGLim4T2loE5rwCMdpCIPVg',
+ 'live_status': 'not_live',
+ 'like_count': int,
+ 'channel_id': 'UCGLim4T2loE5rwCMdpCIPVg',
+ 'availability': 'public',
+ 'channel_follower_count': int,
+ 'playable_in_embed': True,
+ 'uploader_url': 'http://www.youtube.com/user/TraciJHines',
+ 'comment_count': int,
},
'params': {
'noplaylist': True,
@@ -83,38 +133,62 @@ class PatreonIE(InfoExtractor):
'uploader_id': '14936315',
},
'skip': 'Patron-only content'
- }]
-
- # Currently Patreon exposes download URL via hidden CSS, so login is not
- # needed. Keeping this commented for when this inevitably changes.
- '''
- def _perform_login(self, username, password):
- login_form = {
- 'redirectUrl': 'http://www.patreon.com/',
- 'email': username,
- 'password': password,
+ }, {
+ # m3u8 video (https://github.com/hypervideo/hypervideo/issues/2277)
+ 'url': 'https://www.patreon.com/posts/video-sketchbook-32452882',
+ 'info_dict': {
+ 'id': '32452882',
+ 'ext': 'mp4',
+ 'comment_count': int,
+ 'uploader_id': '4301314',
+ 'like_count': int,
+ 'timestamp': 1576696962,
+ 'upload_date': '20191218',
+ 'thumbnail': r're:^https?://.*$',
+ 'uploader_url': 'https://www.patreon.com/loish',
+ 'description': 'md5:e2693e97ee299c8ece47ffdb67e7d9d2',
+ 'title': 'VIDEO // sketchbook flipthrough',
+ 'uploader': 'Loish ',
+ 'tags': ['sketchbook', 'video'],
+ 'channel_id': '1641751',
+ 'channel_url': 'https://www.patreon.com/loish',
+ 'channel_follower_count': int,
}
-
- request = sanitized_Request(
- 'https://www.patreon.com/processLogin',
- compat_urllib_parse_urlencode(login_form).encode('utf-8')
- )
- login_page = self._download_webpage(request, None, note='Logging in')
-
- if re.search(r'onLoginFailed', login_page):
- raise ExtractorError('Unable to login, incorrect username and/or password', expected=True)
-
- '''
+ }, {
+ # bad videos under media (if media is included). Real one is under post_file
+ 'url': 'https://www.patreon.com/posts/premium-access-70282931',
+ 'info_dict': {
+ 'id': '70282931',
+ 'ext': 'mp4',
+ 'title': '[Premium Access + Uncut] The Office - 2x6 The Fight - Group Reaction',
+ 'channel_url': 'https://www.patreon.com/thenormies',
+ 'channel_id': '573397',
+ 'uploader_id': '2929435',
+ 'uploader': 'The Normies',
+ 'description': 'md5:79c9fd8778e2cef84049a94c058a5e23',
+ 'comment_count': int,
+ 'upload_date': '20220809',
+ 'thumbnail': r're:^https?://.*$',
+ 'channel_follower_count': int,
+ 'like_count': int,
+ 'timestamp': 1660052820,
+ 'tags': ['The Office', 'early access', 'uncut'],
+ 'uploader_url': 'https://www.patreon.com/thenormies',
+ },
+ 'skip': 'Patron-only content',
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
- post = self._download_json(
- 'https://www.patreon.com/api/posts/' + video_id, video_id, query={
+ post = self._call_api(
+ f'posts/{video_id}', video_id, query={
'fields[media]': 'download_url,mimetype,size_bytes',
- 'fields[post]': 'comment_count,content,embed,image,like_count,post_file,published_at,title',
+ 'fields[post]': 'comment_count,content,embed,image,like_count,post_file,published_at,title,current_user_can_view',
'fields[user]': 'full_name,url',
+ 'fields[post_tag]': 'value',
+ 'fields[campaign]': 'url,name,patron_count',
'json-api-use-default-includes': 'false',
- 'include': 'media,user',
+ 'include': 'audio,user,user_defined_tags,campaign,attachments_media',
})
attributes = post['data']['attributes']
title = attributes['title'].strip()
@@ -128,6 +202,9 @@ class PatreonIE(InfoExtractor):
'like_count': int_or_none(attributes.get('like_count')),
'comment_count': int_or_none(attributes.get('comment_count')),
}
+ can_view_post = traverse_obj(attributes, 'current_user_can_view')
+ if can_view_post and info['comment_count']:
+ info['__post_extractor'] = self.extract_comments(video_id)
for i in post.get('included', []):
i_type = i.get('type')
@@ -135,12 +212,18 @@ class PatreonIE(InfoExtractor):
media_attributes = i.get('attributes') or {}
download_url = media_attributes.get('download_url')
ext = mimetype2ext(media_attributes.get('mimetype'))
- if download_url and ext in KNOWN_EXTENSIONS:
- info.update({
+
+ # if size_bytes is None, this media file is likely unavailable
+ # See: https://github.com/hypervideo/hypervideo/issues/4608
+ size_bytes = int_or_none(media_attributes.get('size_bytes'))
+ if download_url and ext in KNOWN_EXTENSIONS and size_bytes is not None:
+ # XXX: what happens if there are multiple attachments?
+ return {
+ **info,
'ext': ext,
- 'filesize': int_or_none(media_attributes.get('size_bytes')),
+ 'filesize': size_bytes,
'url': download_url,
- })
+ }
elif i_type == 'user':
user_attributes = i.get('attributes')
if user_attributes:
@@ -150,87 +233,222 @@ class PatreonIE(InfoExtractor):
'uploader_url': user_attributes.get('url'),
})
- if not info.get('url'):
- # handle Vimeo embeds
- if try_get(attributes, lambda x: x['embed']['provider']) == 'Vimeo':
- embed_html = try_get(attributes, lambda x: x['embed']['html'])
- v_url = url_or_none(compat_urllib_parse_unquote(
- self._search_regex(r'(https(?:%3A%2F%2F|://)player\.vimeo\.com.+app_id(?:=|%3D)+\d+)', embed_html, 'vimeo url', fatal=False)))
- if v_url:
- info.update({
- '_type': 'url_transparent',
- 'url': VimeoIE._smuggle_referrer(v_url, 'https://patreon.com'),
- 'ie_key': 'Vimeo',
- })
+ elif i_type == 'post_tag':
+ info.setdefault('tags', []).append(traverse_obj(i, ('attributes', 'value')))
- if not info.get('url'):
- embed_url = try_get(attributes, lambda x: x['embed']['url'])
- if embed_url:
+ elif i_type == 'campaign':
info.update({
- '_type': 'url',
- 'url': embed_url,
+ 'channel': traverse_obj(i, ('attributes', 'title')),
+ 'channel_id': str_or_none(i.get('id')),
+ 'channel_url': traverse_obj(i, ('attributes', 'url')),
+ 'channel_follower_count': int_or_none(traverse_obj(i, ('attributes', 'patron_count'))),
})
- if not info.get('url'):
- post_file = attributes['post_file']
- ext = determine_ext(post_file.get('name'))
+ # handle Vimeo embeds
+ if try_get(attributes, lambda x: x['embed']['provider']) == 'Vimeo':
+ embed_html = try_get(attributes, lambda x: x['embed']['html'])
+ v_url = url_or_none(compat_urllib_parse_unquote(
+ self._search_regex(r'(https(?:%3A%2F%2F|://)player\.vimeo\.com.+app_id(?:=|%3D)+\d+)', embed_html, 'vimeo url', fatal=False)))
+ if v_url:
+ return {
+ **info,
+ '_type': 'url_transparent',
+ 'url': VimeoIE._smuggle_referrer(v_url, 'https://patreon.com'),
+ 'ie_key': 'Vimeo',
+ }
+
+ embed_url = try_get(attributes, lambda x: x['embed']['url'])
+ if embed_url:
+ return {
+ **info,
+ '_type': 'url',
+ 'url': embed_url,
+ }
+
+ post_file = traverse_obj(attributes, 'post_file')
+ if post_file:
+ name = post_file.get('name')
+ ext = determine_ext(name)
if ext in KNOWN_EXTENSIONS:
- info.update({
+ return {
+ **info,
'ext': ext,
'url': post_file['url'],
- })
+ }
+ elif name == 'video':
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(post_file['url'], video_id)
+ return {
+ **info,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+ if can_view_post is False:
+ self.raise_no_formats('You do not have access to this post', video_id=video_id, expected=True)
+ else:
+ self.raise_no_formats('No supported media found in this post', video_id=video_id, expected=True)
return info
+ def _get_comments(self, post_id):
+ cursor = None
+ count = 0
+ params = {
+ 'page[count]': 50,
+ 'include': 'parent.commenter.campaign,parent.post.user,parent.post.campaign.creator,parent.replies.parent,parent.replies.commenter.campaign,parent.replies.post.user,parent.replies.post.campaign.creator,commenter.campaign,post.user,post.campaign.creator,replies.parent,replies.commenter.campaign,replies.post.user,replies.post.campaign.creator,on_behalf_of_campaign',
+ 'fields[comment]': 'body,created,is_by_creator',
+ 'fields[user]': 'image_url,full_name,url',
+ 'filter[flair]': 'image_tiny_url,name',
+ 'sort': '-created',
+ 'json-api-version': 1.0,
+ 'json-api-use-default-includes': 'false',
+ }
+
+ for page in itertools.count(1):
-class PatreonUserIE(InfoExtractor):
+ params.update({'page[cursor]': cursor} if cursor else {})
+ response = self._call_api(
+ f'posts/{post_id}/comments', post_id, query=params, note='Downloading comments page %d' % page)
+
+ cursor = None
+ for comment in traverse_obj(response, (('data', ('included', lambda _, v: v['type'] == 'comment')), ...), default=[]):
+ count += 1
+ comment_id = comment.get('id')
+ attributes = comment.get('attributes') or {}
+ if comment_id is None:
+ continue
+ author_id = traverse_obj(comment, ('relationships', 'commenter', 'data', 'id'))
+ author_info = traverse_obj(
+ response, ('included', lambda _, v: v['id'] == author_id and v['type'] == 'user', 'attributes'),
+ get_all=False, expected_type=dict, default={})
- _VALID_URL = r'https?://(?:www\.)?patreon\.com/(?!rss)(?P<id>[-\w]+)'
+ yield {
+ 'id': comment_id,
+ 'text': attributes.get('body'),
+ 'timestamp': parse_iso8601(attributes.get('created')),
+ 'parent': traverse_obj(comment, ('relationships', 'parent', 'data', 'id'), default='root'),
+ 'author_is_uploader': attributes.get('is_by_creator'),
+ 'author_id': author_id,
+ 'author': author_info.get('full_name'),
+ 'author_thumbnail': author_info.get('image_url'),
+ }
+ if count < traverse_obj(response, ('meta', 'count')):
+ cursor = traverse_obj(response, ('data', -1, 'id'))
+
+ if cursor is None:
+ break
+
+
+class PatreonCampaignIE(PatreonBaseIE):
+
+ _VALID_URL = r'https?://(?:www\.)?patreon\.com/(?!rss)(?:(?:m/(?P<campaign_id>\d+))|(?P<vanity>[-\w]+))'
_TESTS = [{
'url': 'https://www.patreon.com/dissonancepod/',
'info_dict': {
- 'title': 'dissonancepod',
+ 'title': 'Cognitive Dissonance Podcast',
+ 'channel_url': 'https://www.patreon.com/dissonancepod',
+ 'id': '80642',
+ 'description': 'md5:eb2fa8b83da7ab887adeac34da6b7af7',
+ 'channel_id': '80642',
+ 'channel': 'Cognitive Dissonance Podcast',
+ 'age_limit': 0,
+ 'channel_follower_count': int,
+ 'uploader_id': '87145',
+ 'uploader_url': 'https://www.patreon.com/dissonancepod',
+ 'uploader': 'Cognitive Dissonance Podcast',
+ 'thumbnail': r're:^https?://.*$',
},
'playlist_mincount': 68,
- 'expected_warnings': 'Post not viewable by current user! Skipping!',
+ }, {
+ 'url': 'https://www.patreon.com/m/4767637/posts',
+ 'info_dict': {
+ 'title': 'Not Just Bikes',
+ 'channel_follower_count': int,
+ 'id': '4767637',
+ 'channel_id': '4767637',
+ 'channel_url': 'https://www.patreon.com/notjustbikes',
+ 'description': 'md5:595c6e7dca76ae615b1d38c298a287a1',
+ 'age_limit': 0,
+ 'channel': 'Not Just Bikes',
+ 'uploader_url': 'https://www.patreon.com/notjustbikes',
+ 'uploader': 'Not Just Bikes',
+ 'uploader_id': '37306634',
+ 'thumbnail': r're:^https?://.*$',
+ },
+ 'playlist_mincount': 71
}, {
'url': 'https://www.patreon.com/dissonancepod/posts',
'only_matching': True
- }, ]
+ }, {
+ 'url': 'https://www.patreon.com/m/5932659',
+ 'only_matching': True
+ }]
@classmethod
def suitable(cls, url):
- return False if PatreonIE.suitable(url) else super(PatreonUserIE, cls).suitable(url)
+ return False if PatreonIE.suitable(url) else super(PatreonCampaignIE, cls).suitable(url)
- def _entries(self, campaign_id, user_id):
+ def _entries(self, campaign_id):
cursor = None
params = {
- 'fields[campaign]': 'show_audio_post_download_links,name,url',
- 'fields[post]': 'current_user_can_view,embed,image,is_paid,post_file,published_at,patreon_url,url,post_type,thumbnail_url,title',
+ 'fields[post]': 'patreon_url,url',
'filter[campaign_id]': campaign_id,
'filter[is_draft]': 'false',
'sort': '-published_at',
- 'json-api-version': 1.0,
'json-api-use-default-includes': 'false',
}
for page in itertools.count(1):
params.update({'page[cursor]': cursor} if cursor else {})
- posts_json = self._download_json('https://www.patreon.com/api/posts', user_id, note='Downloading posts page %d' % page, query=params, headers={'Cookie': '.'})
-
- cursor = try_get(posts_json, lambda x: x['meta']['pagination']['cursors']['next'])
+ posts_json = self._call_api('posts', campaign_id, query=params, note='Downloading posts page %d' % page)
+ cursor = traverse_obj(posts_json, ('meta', 'pagination', 'cursors', 'next'))
for post in posts_json.get('data') or []:
- yield self.url_result(url_or_none(try_get(post, lambda x: x['attributes']['patreon_url'])), 'Patreon')
+ yield self.url_result(url_or_none(traverse_obj(post, ('attributes', 'patreon_url'))), 'Patreon')
if cursor is None:
break
def _real_extract(self, url):
- user_id = self._match_id(url)
- webpage = self._download_webpage(url, user_id, headers={'Cookie': '.'})
- campaign_id = self._search_regex(r'https://www.patreon.com/api/campaigns/(\d+)/?', webpage, 'Campaign ID')
- return self.playlist_result(self._entries(campaign_id, user_id), playlist_title=user_id)
+ campaign_id, vanity = self._match_valid_url(url).group('campaign_id', 'vanity')
+ if campaign_id is None:
+ webpage = self._download_webpage(url, vanity, headers={'User-Agent': self.USER_AGENT})
+ campaign_id = self._search_regex(r'https://www.patreon.com/api/campaigns/(\d+)/?', webpage, 'Campaign ID')
+
+ params = {
+ 'json-api-use-default-includes': 'false',
+ 'fields[user]': 'full_name,url',
+ 'fields[campaign]': 'name,summary,url,patron_count,creation_count,is_nsfw,avatar_photo_url',
+ 'include': 'creator'
+ }
+
+ campaign_response = self._call_api(
+ f'campaigns/{campaign_id}', campaign_id,
+ note='Downloading campaign info', fatal=False,
+ query=params) or {}
+
+ campaign_info = campaign_response.get('data') or {}
+ channel_name = traverse_obj(campaign_info, ('attributes', 'name'))
+ user_info = traverse_obj(
+ campaign_response, ('included', lambda _, v: v['type'] == 'user'),
+ default={}, expected_type=dict, get_all=False)
+
+ return {
+ '_type': 'playlist',
+ 'id': campaign_id,
+ 'title': channel_name,
+ 'entries': self._entries(campaign_id),
+ 'description': clean_html(traverse_obj(campaign_info, ('attributes', 'summary'))),
+ 'channel_url': traverse_obj(campaign_info, ('attributes', 'url')),
+ 'channel_follower_count': int_or_none(traverse_obj(campaign_info, ('attributes', 'patron_count'))),
+ 'channel_id': campaign_id,
+ 'channel': channel_name,
+ 'uploader_url': traverse_obj(user_info, ('attributes', 'url')),
+ 'uploader_id': str_or_none(user_info.get('id')),
+ 'uploader': traverse_obj(user_info, ('attributes', 'full_name')),
+ 'playlist_count': traverse_obj(campaign_info, ('attributes', 'creation_count')),
+ 'age_limit': 18 if traverse_obj(campaign_info, ('attributes', 'is_nsfw')) else 0,
+ 'thumbnail': url_or_none(traverse_obj(campaign_info, ('attributes', 'avatar_photo_url'))),
+ }
diff --git a/hypervideo_dl/extractor/pbs.py b/hypervideo_dl/extractor/pbs.py
index e48a2b8..5bdf561 100644
--- a/hypervideo_dl/extractor/pbs.py
+++ b/hypervideo_dl/extractor/pbs.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -663,7 +660,6 @@ class PBSIE(InfoExtractor):
for f in formats:
if (f.get('format_note') or '').endswith(' AD'): # Audio description
f['language_preference'] = -10
- self._sort_formats(formats)
rating_str = info.get('rating')
if rating_str is not None:
diff --git a/hypervideo_dl/extractor/pearvideo.py b/hypervideo_dl/extractor/pearvideo.py
index 1d77722..e27e5a7 100644
--- a/hypervideo_dl/extractor/pearvideo.py
+++ b/hypervideo_dl/extractor/pearvideo.py
@@ -1,12 +1,10 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
from ..utils import (
qualities,
unified_timestamp,
+ traverse_obj,
)
@@ -39,7 +37,14 @@ class PearVideoIE(InfoExtractor):
} for mobj in re.finditer(
r'(?P<id>[a-zA-Z]+)Url\s*=\s*(["\'])(?P<url>(?:https?:)?//.+?)\2',
webpage)]
- self._sort_formats(formats)
+ if not formats:
+ info = self._download_json(
+ 'https://www.pearvideo.com/videoStatus.jsp', video_id=video_id,
+ query={'contId': video_id}, headers={'Referer': url})
+ formats = [{
+ 'format_id': k,
+ 'url': v.replace(info['systemTime'], f'cont-{video_id}') if k == 'srcUrl' else v
+ } for k, v in traverse_obj(info, ('videoInfo', 'videos'), default={}).items() if v]
title = self._search_regex(
(r'<h1[^>]+\bclass=(["\'])video-tt\1[^>]*>(?P<value>[^<]+)',
diff --git a/hypervideo_dl/extractor/peekvids.py b/hypervideo_dl/extractor/peekvids.py
index 4bf6855..2d9b9a7 100644
--- a/hypervideo_dl/extractor/peekvids.py
+++ b/hypervideo_dl/extractor/peekvids.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
@@ -43,7 +40,6 @@ class PeekVidsIE(InfoExtractor):
} for name, url in srcs.items() if len(name) > 8 and name.startswith('data-src')]
if not formats:
formats = [{'url': url} for url in srcs.values()]
- self._sort_formats(formats)
info = self._search_json_ld(webpage, video_id, expected_type='VideoObject')
info.update({
@@ -54,7 +50,7 @@ class PeekVidsIE(InfoExtractor):
return info
-class PlayVidsIE(PeekVidsIE):
+class PlayVidsIE(PeekVidsIE): # XXX: Do not subclass from concrete IE
_VALID_URL = r'https?://(?:www\.)?playvids\.com/(?:embed/|[^/]{2}/)?(?P<id>[^/?#]*)'
_TESTS = [{
'url': 'https://www.playvids.com/U3pBrYhsjXM/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp',
diff --git a/hypervideo_dl/extractor/peertube.py b/hypervideo_dl/extractor/peertube.py
index 9d6b821..68e1573 100644
--- a/hypervideo_dl/extractor/peertube.py
+++ b/hypervideo_dl/extractor/peertube.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import functools
import re
@@ -1060,6 +1057,7 @@ class PeerTubeIE(InfoExtractor):
)
(?P<id>%s)
''' % (_INSTANCES_RE, _UUID_RE)
+ _EMBED_REGEX = [r'''(?x)<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//{_INSTANCES_RE}/videos/embed/{cls._UUID_RE})''']
_TESTS = [{
'url': 'https://framatube.org/videos/watch/9c9de5e8-0a1e-484a-b099-e80766180a6d',
'md5': '8563064d245a4be5705bddb22bb00a28',
@@ -1161,16 +1159,15 @@ class PeerTubeIE(InfoExtractor):
'>We are sorry but it seems that PeerTube is not compatible with your web browser.<')):
return 'peertube:%s:%s' % mobj.group('host', 'id')
- @staticmethod
- def _extract_urls(webpage, source_url):
- entries = re.findall(
- r'''(?x)<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//%s/videos/embed/%s)'''
- % (PeerTubeIE._INSTANCES_RE, PeerTubeIE._UUID_RE), webpage)
- if not entries:
- peertube_url = PeerTubeIE._extract_peertube_url(webpage, source_url)
- if peertube_url:
- entries = [peertube_url]
- return entries
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ embeds = tuple(super()._extract_embed_urls(url, webpage))
+ if embeds:
+ return embeds
+
+ peertube_url = cls._extract_peertube_url(webpage, url)
+ if peertube_url:
+ return [peertube_url]
def _call_api(self, host, video_id, path, note=None, errnote=None, fatal=True):
return self._download_json(
@@ -1236,7 +1233,6 @@ class PeerTubeIE(InfoExtractor):
else:
f['fps'] = int_or_none(file_.get('fps'))
formats.append(f)
- self._sort_formats(formats)
description = video.get('description')
if description and len(description) >= 250:
diff --git a/hypervideo_dl/extractor/peertv.py b/hypervideo_dl/extractor/peertv.py
index 002d33a..a709e21 100644
--- a/hypervideo_dl/extractor/peertv.py
+++ b/hypervideo_dl/extractor/peertv.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import js_to_json
@@ -46,8 +43,6 @@ class PeerTVIE(InfoExtractor):
formats = self._extract_m3u8_formats(m3u8_url, video_id, m3u8_id='hls')
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': self._html_search_regex(r'<h1>(.+?)</h1>', webpage, 'title').replace('\xa0', ' '),
diff --git a/hypervideo_dl/extractor/peloton.py b/hypervideo_dl/extractor/peloton.py
index 7d83225..4835822 100644
--- a/hypervideo_dl/extractor/peloton.py
+++ b/hypervideo_dl/extractor/peloton.py
@@ -1,14 +1,9 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import json
import re
+import urllib.parse
from .common import InfoExtractor
-from ..compat import (
- compat_HTTPError,
- compat_urllib_parse,
-)
+from ..compat import compat_HTTPError
from ..utils import (
ExtractorError,
float_or_none,
@@ -128,7 +123,7 @@ class PelotonIE(InfoExtractor):
is_live = False
if ride_data.get('content_format') == 'audio':
- url = self._MANIFEST_URL_TEMPLATE % (ride_data.get('vod_stream_url'), compat_urllib_parse.quote(token))
+ url = self._MANIFEST_URL_TEMPLATE % (ride_data.get('vod_stream_url'), urllib.parse.quote(token))
formats = [{
'url': url,
'ext': 'm4a',
@@ -141,9 +136,9 @@ class PelotonIE(InfoExtractor):
url = 'https://members.onepeloton.com/.netlify/functions/m3u8-proxy?displayLanguage=en&acceptedSubtitles=%s&url=%s?hdnea=%s' % (
','.join([re.sub('^([a-z]+)-([A-Z]+)$', r'\1', caption) for caption in ride_data['captions']]),
ride_data['vod_stream_url'],
- compat_urllib_parse.quote(compat_urllib_parse.quote(token)))
+ urllib.parse.quote(urllib.parse.quote(token)))
elif ride_data.get('live_stream_url'):
- url = self._MANIFEST_URL_TEMPLATE % (ride_data.get('live_stream_url'), compat_urllib_parse.quote(token))
+ url = self._MANIFEST_URL_TEMPLATE % (ride_data.get('live_stream_url'), urllib.parse.quote(token))
is_live = True
else:
raise ExtractorError('Missing video URL')
@@ -162,7 +157,6 @@ class PelotonIE(InfoExtractor):
'title': segment.get('name')
} for segment in traverse_obj(metadata, ('segments', 'segment_list'))]
- self._sort_formats(formats)
return {
'id': video_id,
'title': ride_data.get('title'),
diff --git a/hypervideo_dl/extractor/people.py b/hypervideo_dl/extractor/people.py
index 6ca9571..c5143c3 100644
--- a/hypervideo_dl/extractor/people.py
+++ b/hypervideo_dl/extractor/people.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/performgroup.py b/hypervideo_dl/extractor/performgroup.py
index c00d393..f4d7f22 100644
--- a/hypervideo_dl/extractor/performgroup.py
+++ b/hypervideo_dl/extractor/performgroup.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..utils import int_or_none
@@ -69,7 +65,6 @@ class PerformGroupIE(InfoExtractor):
'vbr': int_or_none(c.get('videoRate'), 1000),
'abr': int_or_none(c.get('audioRate'), 1000),
})
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/periscope.py b/hypervideo_dl/extractor/periscope.py
index 1a292b8..84bcf15 100644
--- a/hypervideo_dl/extractor/periscope.py
+++ b/hypervideo_dl/extractor/periscope.py
@@ -1,8 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
from .common import InfoExtractor
from ..utils import (
int_or_none,
@@ -70,6 +65,7 @@ class PeriscopeIE(PeriscopeBaseIE):
IE_DESC = 'Periscope'
IE_NAME = 'periscope'
_VALID_URL = r'https?://(?:www\.)?(?:periscope|pscp)\.tv/[^/]+/(?P<id>[^/?#]+)'
+ _EMBED_REGEX = [r'<iframe[^>]+src=([\'"])(?P<url>(?:https?:)?//(?:www\.)?(?:periscope|pscp)\.tv/(?:(?!\1).)+)\1']
# Alive example URLs can be found here https://www.periscope.tv/
_TESTS = [{
'url': 'https://www.periscope.tv/w/aJUQnjY3MjA3ODF8NTYxMDIyMDl2zCg2pECBgwTqRpQuQD352EMPTKQjT4uqlM3cgWFA-g==',
@@ -95,13 +91,6 @@ class PeriscopeIE(PeriscopeBaseIE):
'only_matching': True,
}]
- @staticmethod
- def _extract_url(webpage):
- mobj = re.search(
- r'<iframe[^>]+src=([\'"])(?P<url>(?:https?:)?//(?:www\.)?(?:periscope|pscp)\.tv/(?:(?!\1).)+)\1', webpage)
- if mobj:
- return mobj.group('url')
-
def _real_extract(self, url):
token = self._match_id(url)
@@ -138,7 +127,6 @@ class PeriscopeIE(PeriscopeBaseIE):
}
self._add_width_and_height(rtmp_format)
formats.append(rtmp_format)
- self._sort_formats(formats)
info['formats'] = formats
return info
diff --git a/hypervideo_dl/extractor/philharmoniedeparis.py b/hypervideo_dl/extractor/philharmoniedeparis.py
index 9f4899c..e8494a0 100644
--- a/hypervideo_dl/extractor/philharmoniedeparis.py
+++ b/hypervideo_dl/extractor/philharmoniedeparis.py
@@ -1,12 +1,6 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import compat_str
-from ..utils import (
- try_get,
- urljoin,
-)
+from ..utils import try_get
class PhilharmonieDeParisIE(InfoExtractor):
@@ -15,27 +9,29 @@ class PhilharmonieDeParisIE(InfoExtractor):
https?://
(?:
live\.philharmoniedeparis\.fr/(?:[Cc]oncert/|embed(?:app)?/|misc/Playlist\.ashx\?id=)|
- pad\.philharmoniedeparis\.fr/doc/CIMU/
+ pad\.philharmoniedeparis\.fr/(?:doc/CIMU/|player\.aspx\?id=)|
+ philharmoniedeparis\.fr/fr/live/concert/|
+ otoplayer\.philharmoniedeparis\.fr/fr/embed/
)
(?P<id>\d+)
'''
_TESTS = [{
- 'url': 'http://pad.philharmoniedeparis.fr/doc/CIMU/1086697/jazz-a-la-villette-knower',
- 'md5': 'a0a4b195f544645073631cbec166a2c2',
+ 'url': 'https://philharmoniedeparis.fr/fr/live/concert/1129666-danses-symphoniques',
+ 'md5': '24bdb7e86c200c107680e1f7770330ae',
'info_dict': {
- 'id': '1086697',
+ 'id': '1129666',
'ext': 'mp4',
- 'title': 'Jazz à la Villette : Knower',
+ 'title': 'Danses symphoniques. Orchestre symphonique Divertimento - Zahia Ziouani. Bizet, de Falla, Stravinski, Moussorgski, Saint-Saëns',
},
}, {
- 'url': 'http://live.philharmoniedeparis.fr/concert/1032066.html',
+ 'url': 'https://philharmoniedeparis.fr/fr/live/concert/1032066-akademie-fur-alte-musik-berlin-rias-kammerchor-rene-jacobs-passion-selon-saint-jean-de-johann',
'info_dict': {
'id': '1032066',
- 'title': 'md5:0a031b81807b3593cffa3c9a87a167a0',
+ 'title': 'Akademie für alte Musik Berlin, Rias Kammerchor, René Jacobs : Passion selon saint Jean de Johann Sebastian Bach',
},
'playlist_mincount': 2,
}, {
- 'url': 'http://live.philharmoniedeparis.fr/Concert/1030324.html',
+ 'url': 'https://philharmoniedeparis.fr/fr/live/concert/1030324-orchestre-philharmonique-de-radio-france-myung-whun-chung-renaud-capucon-pascal-dusapin-johannes',
'only_matching': True,
}, {
'url': 'http://live.philharmoniedeparis.fr/misc/Playlist.ashx?id=1030324&track=&lang=fr',
@@ -44,16 +40,15 @@ class PhilharmonieDeParisIE(InfoExtractor):
'url': 'https://live.philharmoniedeparis.fr/embedapp/1098406/berlioz-fantastique-lelio-les-siecles-national-youth-choir-of.html?lang=fr-FR',
'only_matching': True,
}, {
- 'url': 'https://live.philharmoniedeparis.fr/embed/1098406/berlioz-fantastique-lelio-les-siecles-national-youth-choir-of.html?lang=fr-FR',
+ 'url': 'https://otoplayer.philharmoniedeparis.fr/fr/embed/1098406?lang=fr-FR',
'only_matching': True,
}]
- _LIVE_URL = 'https://live.philharmoniedeparis.fr'
def _real_extract(self, url):
video_id = self._match_id(url)
config = self._download_json(
- '%s/otoPlayer/config.ashx' % self._LIVE_URL, video_id, query={
+ 'https://otoplayer.philharmoniedeparis.fr/fr/config/%s.json' % video_id, video_id, query={
'id': video_id,
'lang': 'fr-FR',
})
@@ -75,31 +70,27 @@ class PhilharmonieDeParisIE(InfoExtractor):
if not format_url or format_url in format_urls:
continue
format_urls.add(format_url)
- m3u8_url = urljoin(self._LIVE_URL, format_url)
formats.extend(self._extract_m3u8_formats(
- m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ format_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
if not formats and not self.get_param('ignore_no_formats'):
return
- self._sort_formats(formats)
return {
'title': title,
'formats': formats,
+ 'thumbnail': files.get('thumbnail'),
}
-
- thumbnail = urljoin(self._LIVE_URL, config.get('image'))
-
info = extract_entry(config)
if info:
info.update({
'id': video_id,
- 'thumbnail': thumbnail,
})
return info
-
entries = []
for num, chapter in enumerate(config['chapters'], start=1):
entry = extract_entry(chapter)
+ if entry is None:
+ continue
entry['id'] = '%s-%d' % (video_id, num)
entries.append(entry)
diff --git a/hypervideo_dl/extractor/phoenix.py b/hypervideo_dl/extractor/phoenix.py
index e3ea014..5fa133a 100644
--- a/hypervideo_dl/extractor/phoenix.py
+++ b/hypervideo_dl/extractor/phoenix.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .youtube import YoutubeIE
diff --git a/hypervideo_dl/extractor/photobucket.py b/hypervideo_dl/extractor/photobucket.py
index 53aebe2..71e9a48 100644
--- a/hypervideo_dl/extractor/photobucket.py
+++ b/hypervideo_dl/extractor/photobucket.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import json
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/piapro.py b/hypervideo_dl/extractor/piapro.py
index c4eb491..d8d9c78 100644
--- a/hypervideo_dl/extractor/piapro.py
+++ b/hypervideo_dl/extractor/piapro.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
@@ -27,6 +24,18 @@ class PiaproIE(InfoExtractor):
'title': '裏表ラバーズ',
'thumbnail': r're:^https?://.*\.jpg$',
}
+ }, {
+ 'note': 'There are break lines in description, mandating (?s) flag',
+ 'url': 'https://piapro.jp/t/9cSd',
+ 'md5': '952bb6d1e8de95050206408a87790676',
+ 'info_dict': {
+ 'id': '9cSd',
+ 'ext': 'mp3',
+ 'title': '青に溶けた風船 / 初音ミク',
+ 'description': 'md5:d395a9bd151447631a5a1460bc7f9132',
+ 'uploader': 'シアン・キノ',
+ 'uploader_id': 'cyankino',
+ }
}]
_login_status = False
@@ -81,7 +90,7 @@ class PiaproIE(InfoExtractor):
return {
'id': video_id,
'title': self._html_search_regex(r'<h1\s+class="cd_works-title">(.+?)</h1>', webpage, 'title', fatal=False),
- 'description': self._html_search_regex(r'<p\s+class="cd_dtl_cap">(.+?)</p>\s*<div', webpage, 'description', fatal=False),
+ 'description': self._html_search_regex(r'(?s)<p\s+class="cd_dtl_cap">(.+?)</p>\s*<div', webpage, 'description', fatal=False),
'uploader': uploader,
'uploader_id': uploader_id,
'timestamp': unified_timestamp(create_date, False),
diff --git a/hypervideo_dl/extractor/picarto.py b/hypervideo_dl/extractor/picarto.py
index adf21fd..36a062d 100644
--- a/hypervideo_dl/extractor/picarto.py
+++ b/hypervideo_dl/extractor/picarto.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
ExtractorError,
@@ -67,7 +64,6 @@ class PicartoIE(InfoExtractor):
formats.append({
'url': source_url,
})
- self._sort_formats(formats)
mature = metadata.get('adult')
if mature is None:
@@ -117,7 +113,6 @@ class PicartoVodIE(InfoExtractor):
formats = self._extract_m3u8_formats(
vod_info['vod'], video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls')
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/piksel.py b/hypervideo_dl/extractor/piksel.py
index 84c3de2..cc60b30 100644
--- a/hypervideo_dl/extractor/piksel.py
+++ b/hypervideo_dl/extractor/piksel.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -33,6 +30,7 @@ class PikselIE(InfoExtractor):
)\.jp|
vidego\.baltimorecity\.gov
)/v/(?:refid/(?P<refid>[^/]+)/prefid/)?(?P<id>[\w-]+)'''
+ _EMBED_REGEX = [r'<iframe[^>]+src=["\'](?P<url>(?:https?:)?//player\.piksel\.com/v/[a-z0-9]+)']
_TESTS = [
{
'url': 'http://player.piksel.com/v/ums2867l',
@@ -65,14 +63,6 @@ class PikselIE(InfoExtractor):
}
]
- @staticmethod
- def _extract_url(webpage):
- mobj = re.search(
- r'<iframe[^>]+src=["\'](?P<url>(?:https?:)?//player\.piksel\.com/v/[a-z0-9]+)',
- webpage)
- if mobj:
- return mobj.group('url')
-
def _call_api(self, app_token, resource, display_id, query, fatal=True):
response = (self._download_json(
'http://player.piksel.com/ws/ws_%s/api/%s/mode/json/apiv/5' % (resource, app_token),
@@ -163,8 +153,6 @@ class PikselIE(InfoExtractor):
re.sub(r'/od/[^/]+/', '/od/http/', smil_url), video_id,
transform_source=transform_source, fatal=False))
- self._sort_formats(formats, ('tbr', )) # Incomplete resolution information
-
subtitles = {}
for caption in video_data.get('captions', []):
caption_url = caption.get('url')
@@ -180,4 +168,5 @@ class PikselIE(InfoExtractor):
'timestamp': parse_iso8601(video_data.get('dateadd')),
'formats': formats,
'subtitles': subtitles,
+ '_format_sort_fields': ('tbr', ), # Incomplete resolution information
}
diff --git a/hypervideo_dl/extractor/pinkbike.py b/hypervideo_dl/extractor/pinkbike.py
index 9f3501f..e4e1caa 100644
--- a/hypervideo_dl/extractor/pinkbike.py
+++ b/hypervideo_dl/extractor/pinkbike.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -52,7 +49,6 @@ class PinkbikeIE(InfoExtractor):
'format_id': format_id,
'height': height,
})
- self._sort_formats(formats)
title = remove_end(self._og_search_title(webpage), ' Video - Pinkbike')
description = self._html_search_regex(
diff --git a/hypervideo_dl/extractor/pinterest.py b/hypervideo_dl/extractor/pinterest.py
index 80e9cd0..2c6cd6d 100644
--- a/hypervideo_dl/extractor/pinterest.py
+++ b/hypervideo_dl/extractor/pinterest.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import json
from .common import InfoExtractor
@@ -55,7 +52,6 @@ class PinterestBaseIE(InfoExtractor):
'height': int_or_none(format_dict.get('height')),
'duration': duration,
})
- self._sort_formats(formats)
description = data.get('description') or data.get('description_html') or data.get('seo_description')
timestamp = unified_timestamp(data.get('created_at'))
diff --git a/hypervideo_dl/extractor/pixivsketch.py b/hypervideo_dl/extractor/pixivsketch.py
index f0ad0b2..850c6f2 100644
--- a/hypervideo_dl/extractor/pixivsketch.py
+++ b/hypervideo_dl/extractor/pixivsketch.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
ExtractorError,
@@ -74,7 +71,6 @@ class PixivSketchIE(PixivSketchBaseIE):
formats = self._extract_m3u8_formats(
m3u8_url, video_id, ext='mp4',
entry_protocol='m3u8_native', m3u8_id='hls')
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/pladform.py b/hypervideo_dl/extractor/pladform.py
index 99ade85..dcf18e1 100644
--- a/hypervideo_dl/extractor/pladform.py
+++ b/hypervideo_dl/extractor/pladform.py
@@ -1,8 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
from .common import InfoExtractor
from ..utils import (
determine_ext,
@@ -27,6 +22,7 @@ class PladformIE(InfoExtractor):
)
(?P<id>\d+)
'''
+ _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//out\.pladform\.ru/player\?.+?)\1']
_TESTS = [{
'url': 'http://out.pladform.ru/player?pl=18079&type=html5&videoid=100231282',
'info_dict': {
@@ -64,13 +60,6 @@ class PladformIE(InfoExtractor):
'only_matching': True,
}]
- @staticmethod
- def _extract_url(webpage):
- mobj = re.search(
- r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//out\.pladform\.ru/player\?.+?)\1', webpage)
- if mobj:
- return mobj.group('url')
-
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -122,8 +111,6 @@ class PladformIE(InfoExtractor):
if error:
fail(error)
- self._sort_formats(formats)
-
webpage = self._download_webpage(
'http://video.pladform.ru/catalog/video/videoid/%s' % video_id,
video_id)
diff --git a/hypervideo_dl/extractor/planetmarathi.py b/hypervideo_dl/extractor/planetmarathi.py
index 07ac15b..25753fe 100644
--- a/hypervideo_dl/extractor/planetmarathi.py
+++ b/hypervideo_dl/extractor/planetmarathi.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
try_get,
@@ -60,7 +57,6 @@ class PlanetMarathiIE(InfoExtractor):
asset_title = id.replace('-', ' ')
asset_id = f'{asset["sk"]}_{id}'.replace('#', '-')
formats, subtitles = self._extract_m3u8_formats_and_subtitles(asset['mediaAssetURL'], asset_id)
- self._sort_formats(formats)
entries.append({
'id': asset_id,
'title': asset_title,
diff --git a/hypervideo_dl/extractor/platzi.py b/hypervideo_dl/extractor/platzi.py
index 17f52e7..b8a4414 100644
--- a/hypervideo_dl/extractor/platzi.py
+++ b/hypervideo_dl/extractor/platzi.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import (
compat_b64decode,
@@ -130,7 +127,6 @@ class PlatziIE(PlatziBaseIE):
format_url, lecture_id, mpd_id=format_id,
note='Downloading %s MPD manifest' % server_id,
fatal=False))
- self._sort_formats(formats)
content = str_or_none(desc.get('content'))
description = (clean_html(compat_b64decode(content).decode('utf-8'))
diff --git a/hypervideo_dl/extractor/playfm.py b/hypervideo_dl/extractor/playfm.py
index 4298cbe..e895ba4 100644
--- a/hypervideo_dl/extractor/playfm.py
+++ b/hypervideo_dl/extractor/playfm.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
diff --git a/hypervideo_dl/extractor/playplustv.py b/hypervideo_dl/extractor/playplustv.py
index cad2c3a..316f220 100644
--- a/hypervideo_dl/extractor/playplustv.py
+++ b/hypervideo_dl/extractor/playplustv.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import json
from .common import InfoExtractor
@@ -82,7 +79,6 @@ class PlayPlusTVIE(InfoExtractor):
'width': int_or_none(file_info.get('width')),
'height': int_or_none(file_info.get('height')),
})
- self._sort_formats(formats)
thumbnails = []
for thumb in media.get('thumbs', []):
diff --git a/hypervideo_dl/extractor/plays.py b/hypervideo_dl/extractor/plays.py
index ddfc6f1..9371f7b 100644
--- a/hypervideo_dl/extractor/plays.py
+++ b/hypervideo_dl/extractor/plays.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -41,7 +38,6 @@ class PlaysTVIE(InfoExtractor):
'format_id': 'http-' + format_id,
'height': int_or_none(height),
})
- self._sort_formats(formats)
info.update({
'id': video_id,
diff --git a/hypervideo_dl/extractor/playstuff.py b/hypervideo_dl/extractor/playstuff.py
index 5a32995..b424ba1 100644
--- a/hypervideo_dl/extractor/playstuff.py
+++ b/hypervideo_dl/extractor/playstuff.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
diff --git a/hypervideo_dl/extractor/playsuisse.py b/hypervideo_dl/extractor/playsuisse.py
new file mode 100644
index 0000000..a635ac9
--- /dev/null
+++ b/hypervideo_dl/extractor/playsuisse.py
@@ -0,0 +1,147 @@
+import json
+
+from .common import InfoExtractor
+from ..utils import int_or_none, traverse_obj
+
+
+class PlaySuisseIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?playsuisse\.ch/watch/(?P<id>[0-9]+)'
+ _TESTS = [
+ {
+ 'url': 'https://www.playsuisse.ch/watch/763211/0',
+ 'md5': '82df2a470b2dfa60c2d33772a8a60cf8',
+ 'info_dict': {
+ 'id': '763211',
+ 'ext': 'mp4',
+ 'title': 'Knochen',
+ 'description': 'md5:8ea7a8076ba000cd9e8bc132fd0afdd8',
+ 'duration': 3344,
+ 'series': 'Wilder',
+ 'season': 'Season 1',
+ 'season_number': 1,
+ 'episode': 'Knochen',
+ 'episode_number': 1,
+ 'thumbnail': 'md5:9260abe0c0ec9b69914d0a10d54c5878'
+ }
+ },
+ {
+ 'url': 'https://www.playsuisse.ch/watch/808675/0',
+ 'md5': '818b94c1d2d7c4beef953f12cb8f3e75',
+ 'info_dict': {
+ 'id': '808675',
+ 'ext': 'mp4',
+ 'title': 'Der Läufer',
+ 'description': 'md5:9f61265c7e6dcc3e046137a792b275fd',
+ 'duration': 5280,
+ 'episode': 'Der Läufer',
+ 'thumbnail': 'md5:44af7d65ee02bbba4576b131868bb783'
+ }
+ },
+ {
+ 'url': 'https://www.playsuisse.ch/watch/817193/0',
+ 'md5': '1d6c066f92cd7fffd8b28a53526d6b59',
+ 'info_dict': {
+ 'id': '817193',
+ 'ext': 'mp4',
+ 'title': 'Die Einweihungsparty',
+ 'description': 'md5:91ebf04d3a42cb3ab70666acf750a930',
+ 'duration': 1380,
+ 'series': 'Nr. 47',
+ 'season': 'Season 1',
+ 'season_number': 1,
+ 'episode': 'Die Einweihungsparty',
+ 'episode_number': 1,
+ 'thumbnail': 'md5:637585fb106e3a4bcd991958924c7e44'
+ }
+ }
+ ]
+
+ _GRAPHQL_QUERY = '''
+ query AssetWatch($assetId: ID!) {
+ assetV2(id: $assetId) {
+ ...Asset
+ episodes {
+ ...Asset
+ }
+ }
+ }
+ fragment Asset on AssetV2 {
+ id
+ name
+ description
+ duration
+ episodeNumber
+ seasonNumber
+ seriesName
+ medias {
+ type
+ url
+ }
+ thumbnail16x9 {
+ ...ImageDetails
+ }
+ thumbnail2x3 {
+ ...ImageDetails
+ }
+ thumbnail16x9WithTitle {
+ ...ImageDetails
+ }
+ thumbnail2x3WithTitle {
+ ...ImageDetails
+ }
+ }
+ fragment ImageDetails on AssetImage {
+ id
+ url
+ }'''
+
+ def _get_media_data(self, media_id):
+ # NOTE In the web app, the "locale" header is used to switch between languages,
+ # However this doesn't seem to take effect when passing the header here.
+ response = self._download_json(
+ 'https://4bbepzm4ef.execute-api.eu-central-1.amazonaws.com/prod/graphql',
+ media_id, data=json.dumps({
+ 'operationName': 'AssetWatch',
+ 'query': self._GRAPHQL_QUERY,
+ 'variables': {'assetId': media_id}
+ }).encode('utf-8'),
+ headers={'Content-Type': 'application/json', 'locale': 'de'})
+
+ return response['data']['assetV2']
+
+ def _real_extract(self, url):
+ media_id = self._match_id(url)
+ media_data = self._get_media_data(media_id)
+ info = self._extract_single(media_data)
+ if media_data.get('episodes'):
+ info.update({
+ '_type': 'playlist',
+ 'entries': map(self._extract_single, media_data['episodes']),
+ })
+ return info
+
+ def _extract_single(self, media_data):
+ thumbnails = traverse_obj(media_data, lambda k, _: k.startswith('thumbnail'))
+
+ formats, subtitles = [], {}
+ for media in traverse_obj(media_data, 'medias', default=[]):
+ if not media.get('url') or media.get('type') != 'HLS':
+ continue
+ f, subs = self._extract_m3u8_formats_and_subtitles(
+ media['url'], media_data['id'], 'mp4', m3u8_id='HLS', fatal=False)
+ formats.extend(f)
+ self._merge_subtitles(subs, target=subtitles)
+
+ return {
+ 'id': media_data['id'],
+ 'title': media_data.get('name'),
+ 'description': media_data.get('description'),
+ 'thumbnails': thumbnails,
+ 'duration': int_or_none(media_data.get('duration')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'series': media_data.get('seriesName'),
+ 'season_number': int_or_none(media_data.get('seasonNumber')),
+ 'episode': media_data.get('name'),
+ 'episode_number': int_or_none(media_data.get('episodeNumber')),
+ }
diff --git a/hypervideo_dl/extractor/playtvak.py b/hypervideo_dl/extractor/playtvak.py
index 30c8a59..c418f88 100644
--- a/hypervideo_dl/extractor/playtvak.py
+++ b/hypervideo_dl/extractor/playtvak.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import (
compat_urlparse,
@@ -163,7 +160,6 @@ class PlaytvakIE(InfoExtractor):
'quality': quality(fmt.get('quality')),
'preference': preference,
})
- self._sort_formats(formats)
title = item['title']
is_live = item['type'] == 'stream'
diff --git a/hypervideo_dl/extractor/playvid.py b/hypervideo_dl/extractor/playvid.py
index e1c406b..1e0989d 100644
--- a/hypervideo_dl/extractor/playvid.py
+++ b/hypervideo_dl/extractor/playvid.py
@@ -1,16 +1,9 @@
-from __future__ import unicode_literals
-
import re
+import urllib.parse
from .common import InfoExtractor
-from ..compat import (
- compat_urllib_parse_unquote,
- compat_urllib_parse_unquote_plus,
-)
-from ..utils import (
- clean_html,
- ExtractorError,
-)
+from ..compat import compat_urllib_parse_unquote
+from ..utils import ExtractorError, clean_html
class PlayvidIE(InfoExtractor):
@@ -64,7 +57,7 @@ class PlayvidIE(InfoExtractor):
val = videovars_match.group(2)
if key == 'title':
- video_title = compat_urllib_parse_unquote_plus(val)
+ video_title = urllib.parse.unquote_plus(val)
if key == 'duration':
try:
duration = int(val)
@@ -81,7 +74,6 @@ class PlayvidIE(InfoExtractor):
'height': height,
'url': val,
})
- self._sort_formats(formats)
# Extract title - should be in the flashvars; if not, look elsewhere
if video_title is None:
diff --git a/hypervideo_dl/extractor/playwire.py b/hypervideo_dl/extractor/playwire.py
index 9c9e597..1057bff 100644
--- a/hypervideo_dl/extractor/playwire.py
+++ b/hypervideo_dl/extractor/playwire.py
@@ -1,6 +1,3 @@
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..utils import (
dict_get,
@@ -10,6 +7,8 @@ from ..utils import (
class PlaywireIE(InfoExtractor):
_VALID_URL = r'https?://(?:config|cdn)\.playwire\.com(?:/v2)?/(?P<publisher_id>\d+)/(?:videos/v2|embed|config)/(?P<id>\d+)'
+ _EMBED_REGEX = [r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1']
+
_TESTS = [{
'url': 'http://config.playwire.com/14907/videos/v2/3353705/player.json',
'md5': 'e6398701e3595888125729eaa2329ed9',
@@ -63,7 +62,6 @@ class PlaywireIE(InfoExtractor):
for a_format in formats:
if not dict_get(a_format, ['tbr', 'width', 'height']):
a_format['quality'] = 1 if '-hd.' in a_format['url'] else 0
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/pluralsight.py b/hypervideo_dl/extractor/pluralsight.py
index 2a5e0e4..809b656 100644
--- a/hypervideo_dl/extractor/pluralsight.py
+++ b/hypervideo_dl/extractor/pluralsight.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import collections
import json
import os
@@ -412,8 +410,6 @@ query viewClip {
})
formats.append(clip_f)
- self._sort_formats(formats)
-
duration = int_or_none(
clip.get('duration')) or parse_duration(clip.get('formattedDuration'))
diff --git a/hypervideo_dl/extractor/plutotv.py b/hypervideo_dl/extractor/plutotv.py
index 26aff1a..71a05cc 100644
--- a/hypervideo_dl/extractor/plutotv.py
+++ b/hypervideo_dl/extractor/plutotv.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
import uuid
@@ -138,7 +135,6 @@ class PlutoTVIE(InfoExtractor):
subtitles = self._merge_subtitles(subtitles, subs)
formats, subtitles = self._to_ad_free_formats(video_id, formats, subtitles)
- self._sort_formats(formats)
info = {
'id': video_id,
diff --git a/hypervideo_dl/extractor/podbayfm.py b/hypervideo_dl/extractor/podbayfm.py
new file mode 100644
index 0000000..2a26fd2
--- /dev/null
+++ b/hypervideo_dl/extractor/podbayfm.py
@@ -0,0 +1,75 @@
+from .common import InfoExtractor
+from ..utils import OnDemandPagedList, int_or_none, jwt_decode_hs256, try_call
+
+
+def result_from_props(props, episode_id=None):
+ return {
+ 'id': props.get('podcast_id') or episode_id,
+ 'title': props.get('title'),
+ 'url': props['mediaURL'],
+ 'ext': 'mp3',
+ 'thumbnail': try_call(lambda: jwt_decode_hs256(props['image'])['url']),
+ 'timestamp': props.get('timestamp'),
+ 'duration': int_or_none(props.get('duration')),
+ }
+
+
+class PodbayFMIE(InfoExtractor):
+ _VALID_URL = r'https?://podbay\.fm/p/[^/]*/e/(?P<id>[^/]*)/?(?:[\?#].*)?$'
+ _TESTS = [{
+ 'url': 'https://podbay.fm/p/behind-the-bastards/e/1647338400',
+ 'md5': '98b41285dcf7989d105a4ed0404054cf',
+ 'info_dict': {
+ 'id': '1647338400',
+ 'title': 'Part One: Kissinger',
+ 'ext': 'mp3',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'timestamp': 1647338400,
+ 'duration': 5001,
+ 'upload_date': '20220315',
+ },
+ }]
+
+ def _real_extract(self, url):
+ episode_id = self._match_id(url)
+ webpage = self._download_webpage(url, episode_id)
+ data = self._search_nextjs_data(webpage, episode_id)
+ return result_from_props(data['props']['pageProps']['episode'], episode_id)
+
+
+class PodbayFMChannelIE(InfoExtractor):
+ _VALID_URL = r'https?://podbay\.fm/p/(?P<id>[^/]*)/?(?:[\?#].*)?$'
+ _TESTS = [{
+ 'url': 'https://podbay.fm/p/behind-the-bastards',
+ 'info_dict': {
+ 'id': 'behind-the-bastards',
+ 'title': 'Behind the Bastards',
+ },
+ }]
+ _PAGE_SIZE = 10
+
+ def _fetch_page(self, channel_id, pagenum):
+ return self._download_json(
+ f'https://podbay.fm/api/podcast?reverse=true&page={pagenum}&slug={channel_id}',
+ channel_id)['podcast']
+
+ @staticmethod
+ def _results_from_page(channel_id, page):
+ return [{
+ **result_from_props(e),
+ 'extractor': PodbayFMIE.IE_NAME,
+ 'extractor_key': PodbayFMIE.ie_key(),
+ # somehow they use timestamps as the episode identifier
+ 'webpage_url': f'https://podbay.fm/p/{channel_id}/e/{e["timestamp"]}',
+ } for e in page['episodes']]
+
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
+
+ first_page = self._fetch_page(channel_id, 0)
+ entries = OnDemandPagedList(
+ lambda pagenum: self._results_from_page(
+ channel_id, self._fetch_page(channel_id, pagenum) if pagenum else first_page),
+ self._PAGE_SIZE)
+
+ return self.playlist_result(entries, channel_id, first_page.get('title'))
diff --git a/hypervideo_dl/extractor/podchaser.py b/hypervideo_dl/extractor/podchaser.py
new file mode 100644
index 0000000..290c488
--- /dev/null
+++ b/hypervideo_dl/extractor/podchaser.py
@@ -0,0 +1,97 @@
+import functools
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ OnDemandPagedList,
+ float_or_none,
+ str_or_none,
+ str_to_int,
+ traverse_obj,
+ unified_timestamp,
+)
+
+
+class PodchaserIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?podchaser\.com/podcasts/[\w-]+-(?P<podcast_id>\d+)(?:/episodes/[\w-]+-(?P<id>\d+))?'
+ _PAGE_SIZE = 100
+ _TESTS = [{
+ 'url': 'https://www.podchaser.com/podcasts/cum-town-36924/episodes/ep-285-freeze-me-off-104365585',
+ 'info_dict': {
+ 'id': '104365585',
+ 'title': 'Ep. 285 – freeze me off',
+ 'description': 'cam ahn',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'ext': 'mp3',
+ 'categories': ['Comedy'],
+ 'tags': ['comedy', 'dark humor'],
+ 'series': 'Cum Town',
+ 'duration': 3708,
+ 'timestamp': 1636531259,
+ 'upload_date': '20211110',
+ 'rating': 4.0
+ }
+ }, {
+ 'url': 'https://www.podchaser.com/podcasts/the-bone-zone-28853',
+ 'info_dict': {
+ 'id': '28853',
+ 'title': 'The Bone Zone',
+ 'description': 'Podcast by The Bone Zone',
+ },
+ 'playlist_count': 275
+ }, {
+ 'url': 'https://www.podchaser.com/podcasts/sean-carrolls-mindscape-scienc-699349/episodes',
+ 'info_dict': {
+ 'id': '699349',
+ 'title': 'Sean Carroll\'s Mindscape: Science, Society, Philosophy, Culture, Arts, and Ideas',
+ 'description': 'md5:2cbd8f4749891a84dc8235342e0b5ff1'
+ },
+ 'playlist_mincount': 225
+ }]
+
+ @staticmethod
+ def _parse_episode(episode, podcast):
+ return {
+ 'id': str(episode.get('id')),
+ 'title': episode.get('title'),
+ 'description': episode.get('description'),
+ 'url': episode.get('audio_url'),
+ 'thumbnail': episode.get('image_url'),
+ 'duration': str_to_int(episode.get('length')),
+ 'timestamp': unified_timestamp(episode.get('air_date')),
+ 'rating': float_or_none(episode.get('rating')),
+ 'categories': list(set(traverse_obj(podcast, (('summary', None), 'categories', ..., 'text')))),
+ 'tags': traverse_obj(podcast, ('tags', ..., 'text')),
+ 'series': podcast.get('title'),
+ }
+
+ def _call_api(self, path, *args, **kwargs):
+ return self._download_json(f'https://api.podchaser.com/{path}', *args, **kwargs)
+
+ def _fetch_page(self, podcast_id, podcast, page):
+ json_response = self._call_api(
+ 'list/episode', podcast_id,
+ headers={'Content-Type': 'application/json;charset=utf-8'},
+ data=json.dumps({
+ 'start': page * self._PAGE_SIZE,
+ 'count': self._PAGE_SIZE,
+ 'sort_order': 'SORT_ORDER_RECENT',
+ 'filters': {
+ 'podcast_id': podcast_id
+ },
+ 'options': {}
+ }).encode())
+
+ for episode in json_response['entities']:
+ yield self._parse_episode(episode, podcast)
+
+ def _real_extract(self, url):
+ podcast_id, episode_id = self._match_valid_url(url).group('podcast_id', 'id')
+ podcast = self._call_api(f'podcasts/{podcast_id}', episode_id or podcast_id)
+ if not episode_id:
+ return self.playlist_result(
+ OnDemandPagedList(functools.partial(self._fetch_page, podcast_id, podcast), self._PAGE_SIZE),
+ str_or_none(podcast.get('id')), podcast.get('title'), podcast.get('description'))
+
+ episode = self._call_api(f'episodes/{episode_id}', episode_id)
+ return self._parse_episode(episode, podcast)
diff --git a/hypervideo_dl/extractor/podomatic.py b/hypervideo_dl/extractor/podomatic.py
index 673a3ab..985bfae 100644
--- a/hypervideo_dl/extractor/podomatic.py
+++ b/hypervideo_dl/extractor/podomatic.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import json
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/pokemon.py b/hypervideo_dl/extractor/pokemon.py
index b411390..0911893 100644
--- a/hypervideo_dl/extractor/pokemon.py
+++ b/hypervideo_dl/extractor/pokemon.py
@@ -1,8 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
from .common import InfoExtractor
from ..utils import (
ExtractorError,
@@ -139,42 +134,3 @@ class PokemonWatchIE(InfoExtractor):
'episode': video_data.get('title'),
'episode_number': int_or_none(video_data.get('episode')),
})
-
-
-class PokemonSoundLibraryIE(InfoExtractor):
- _VALID_URL = r'https?://soundlibrary\.pokemon\.co\.jp'
-
- _TESTS = [{
- 'url': 'https://soundlibrary.pokemon.co.jp/',
- 'info_dict': {
- 'title': 'Pokémon Diamond and Pearl Sound Tracks',
- },
- 'playlist_mincount': 149,
- }]
-
- def _real_extract(self, url):
- musicbox_webpage = self._download_webpage(
- 'https://soundlibrary.pokemon.co.jp/musicbox', None,
- 'Downloading list of songs')
- song_titles = [x.group(1) for x in re.finditer(r'<span>([^>]+?)</span><br/>をてもち曲に加えます。', musicbox_webpage)]
- song_titles = song_titles[4::2]
-
- # each songs don't have permalink; instead we return all songs at once
- song_entries = [{
- 'id': f'pokemon-soundlibrary-{song_id}',
- 'url': f'https://soundlibrary.pokemon.co.jp/api/assets/signing/sounds/wav/{song_id}.wav',
- # note: the server always serves MP3 files, despite its extension of the URL above
- 'ext': 'mp3',
- 'acodec': 'mp3',
- 'vcodec': 'none',
- 'title': song_title,
- 'track': song_title,
- 'artist': 'Nintendo / Creatures Inc. / GAME FREAK inc.',
- 'uploader': 'Pokémon',
- 'release_year': 2006,
- 'release_date': '20060928',
- 'track_number': song_id,
- 'album': 'Pokémon Diamond and Pearl',
- } for song_id, song_title in enumerate(song_titles, 1)]
-
- return self.playlist_result(song_entries, playlist_title='Pokémon Diamond and Pearl Sound Tracks')
diff --git a/hypervideo_dl/extractor/pokergo.py b/hypervideo_dl/extractor/pokergo.py
index c9e2fed..5c7baad 100644
--- a/hypervideo_dl/extractor/pokergo.py
+++ b/hypervideo_dl/extractor/pokergo.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import base64
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/polsatgo.py b/hypervideo_dl/extractor/polsatgo.py
index 1e3f46c..1524a1f 100644
--- a/hypervideo_dl/extractor/polsatgo.py
+++ b/hypervideo_dl/extractor/polsatgo.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from uuid import uuid4
import json
@@ -45,7 +42,6 @@ class PolsatGoIE(InfoExtractor):
formats = list(self._extract_formats(
try_get(media, lambda x: x['playback']['mediaSources']), video_id))
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/polskieradio.py b/hypervideo_dl/extractor/polskieradio.py
index b2b3eb2..99244f6 100644
--- a/hypervideo_dl/extractor/polskieradio.py
+++ b/hypervideo_dl/extractor/polskieradio.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import itertools
import json
import math
@@ -298,8 +295,6 @@ class PolskieRadioPlayerIE(InfoExtractor):
'url': stream_url,
})
- self._sort_formats(formats)
-
return {
'id': compat_str(channel['id']),
'formats': formats,
diff --git a/hypervideo_dl/extractor/popcorntimes.py b/hypervideo_dl/extractor/popcorntimes.py
index 5f9d0e7..ddc5ec8 100644
--- a/hypervideo_dl/extractor/popcorntimes.py
+++ b/hypervideo_dl/extractor/popcorntimes.py
@@ -1,12 +1,5 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
-from ..compat import (
- compat_b64decode,
- compat_chr,
-)
+from ..compat import compat_b64decode
from ..utils import int_or_none
@@ -54,7 +47,7 @@ class PopcorntimesIE(InfoExtractor):
c_ord += 13
if upper < c_ord:
c_ord -= 26
- loc_b64 += compat_chr(c_ord)
+ loc_b64 += chr(c_ord)
video_url = compat_b64decode(loc_b64).decode('utf-8')
diff --git a/hypervideo_dl/extractor/popcorntv.py b/hypervideo_dl/extractor/popcorntv.py
index 66d2e50..7798462 100644
--- a/hypervideo_dl/extractor/popcorntv.py
+++ b/hypervideo_dl/extractor/popcorntv.py
@@ -1,6 +1,3 @@
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..utils import (
extract_attributes,
diff --git a/hypervideo_dl/extractor/porn91.py b/hypervideo_dl/extractor/porn91.py
index 20eac64..af4a0dc 100644
--- a/hypervideo_dl/extractor/porn91.py
+++ b/hypervideo_dl/extractor/porn91.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
parse_duration,
diff --git a/hypervideo_dl/extractor/porncom.py b/hypervideo_dl/extractor/porncom.py
index 83df221..c8ef240 100644
--- a/hypervideo_dl/extractor/porncom.py
+++ b/hypervideo_dl/extractor/porncom.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -75,8 +73,6 @@ class PornComIE(InfoExtractor):
thumbnail = None
duration = None
- self._sort_formats(formats)
-
view_count = str_to_int(self._search_regex(
(r'Views:\s*</span>\s*<span>\s*([\d,.]+)',
r'class=["\']views["\'][^>]*><p>([\d,.]+)'), webpage,
diff --git a/hypervideo_dl/extractor/pornez.py b/hypervideo_dl/extractor/pornez.py
index 713dc00..df0e44a 100644
--- a/hypervideo_dl/extractor/pornez.py
+++ b/hypervideo_dl/extractor/pornez.py
@@ -1,5 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import int_or_none
diff --git a/hypervideo_dl/extractor/pornflip.py b/hypervideo_dl/extractor/pornflip.py
index accf452..51a9cf3 100644
--- a/hypervideo_dl/extractor/pornflip.py
+++ b/hypervideo_dl/extractor/pornflip.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
int_or_none,
@@ -63,7 +60,6 @@ class PornFlipIE(InfoExtractor):
r'class="btn btn-down-rating[^>]*>[^<]*<i[^>]*>[^<]*</i>[^>]*<span[^>]*>[^0-9]*([0-9]+)[^<0-9]*<', webpage, 'dislike_count', fatal=False)
mpd_url = self._search_regex(r'"([^"]+userscontent.net/dash/[0-9]+/manifest.mpd[^"]*)"', webpage, 'mpd_url').replace('&amp;', '&')
formats = self._extract_mpd_formats(mpd_url, video_id, mpd_id='dash')
- self._sort_formats(formats)
return {
'age_limit': 18,
diff --git a/hypervideo_dl/extractor/pornhd.py b/hypervideo_dl/extractor/pornhd.py
index 9dbd72f..c8a1ec8 100644
--- a/hypervideo_dl/extractor/pornhd.py
+++ b/hypervideo_dl/extractor/pornhd.py
@@ -1,6 +1,3 @@
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..utils import (
determine_ext,
@@ -87,7 +84,6 @@ class PornHdIE(InfoExtractor):
})
if formats:
info['formats'] = formats
- self._sort_formats(info['formats'])
description = self._html_search_regex(
(r'(?s)<section[^>]+class=["\']video-description[^>]+>(?P<value>.+?)</section>',
diff --git a/hypervideo_dl/extractor/pornhub.py b/hypervideo_dl/extractor/pornhub.py
index 17c8c91..5d8d7c1 100644
--- a/hypervideo_dl/extractor/pornhub.py
+++ b/hypervideo_dl/extractor/pornhub.py
@@ -1,33 +1,28 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import functools
import itertools
import math
import operator
import re
+import urllib.request
from .common import InfoExtractor
-from ..compat import (
- compat_HTTPError,
- compat_str,
- compat_urllib_request,
-)
from .openload import PhantomJSwrapper
+from ..compat import compat_HTTPError, compat_str
from ..utils import (
+ NO_DEFAULT,
+ ExtractorError,
clean_html,
determine_ext,
- ExtractorError,
format_field,
int_or_none,
merge_dicts,
- NO_DEFAULT,
orderedSet,
remove_quotes,
+ remove_start,
str_to_int,
update_url_query,
- urlencode_postdata,
url_or_none,
+ urlencode_postdata,
)
@@ -52,7 +47,7 @@ class PornHubBaseIE(InfoExtractor):
r'document\.location\.reload\(true\)')):
url_or_request = args[0]
url = (url_or_request.get_full_url()
- if isinstance(url_or_request, compat_urllib_request.Request)
+ if isinstance(url_or_request, urllib.request.Request)
else url_or_request)
phantom = PhantomJSwrapper(self, required_version='2.0')
phantom.get(url, html=webpage)
@@ -133,6 +128,7 @@ class PornHubIE(PornHubBaseIE):
)
(?P<id>[\da-z]+)
''' % PornHubBaseIE._PORNHUB_HOST_RE
+ _EMBED_REGEX = [r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub(?:premium)?\.(?:com|net|org)/embed/[\da-z]+)']
_TESTS = [{
'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
'md5': 'a6391306d050e4547f62b3f485dd9ba9',
@@ -202,6 +198,16 @@ class PornHubIE(PornHubBaseIE):
},
'skip': 'This video has been disabled',
}, {
+ 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph601dc30bae19a',
+ 'info_dict': {
+ 'id': 'ph601dc30bae19a',
+ 'uploader': 'Projekt Melody',
+ 'uploader_id': 'projekt-melody',
+ 'upload_date': '20210205',
+ 'title': '"Welcome to My Pussy Mansion" - CB Stream (02/03/21)',
+ 'thumbnail': r're:https?://.+',
+ },
+ }, {
'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d',
'only_matching': True,
}, {
@@ -252,12 +258,6 @@ class PornHubIE(PornHubBaseIE):
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- return re.findall(
- r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub(?:premium)?\.(?:com|net|org)/embed/[\da-z]+)',
- webpage)
-
def _extract_count(self, pattern, webpage, name):
return str_to_int(self._search_regex(pattern, webpage, '%s count' % name, default=None))
@@ -432,7 +432,7 @@ class PornHubIE(PornHubBaseIE):
default=None))
formats.append({
'url': format_url,
- 'format_id': format_field(height, template='%dp'),
+ 'format_id': format_field(height, None, '%dp'),
'height': height,
})
@@ -456,13 +456,11 @@ class PornHubIE(PornHubBaseIE):
continue
add_format(video_url)
- # field_preference is unnecessary here, but kept for code-similarity with youtube-dl
- self._sort_formats(
- formats, field_preference=('height', 'width', 'fps', 'format_id'))
-
+ model_profile = self._search_json(
+ r'var\s+MODEL_PROFILE\s*=', webpage, 'model profile', video_id, fatal=False)
video_uploader = self._html_search_regex(
r'(?s)From:&nbsp;.+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
- webpage, 'uploader', default=None)
+ webpage, 'uploader', default=None) or model_profile.get('username')
def extract_vote_count(kind, name):
return self._extract_count(
@@ -491,6 +489,7 @@ class PornHubIE(PornHubBaseIE):
return merge_dicts({
'id': video_id,
'uploader': video_uploader,
+ 'uploader_id': remove_start(model_profile.get('modelProfileLink'), '/model/'),
'upload_date': upload_date,
'title': title,
'thumbnail': thumbnail,
diff --git a/hypervideo_dl/extractor/pornotube.py b/hypervideo_dl/extractor/pornotube.py
index 1b5b9a3..e0960f4 100644
--- a/hypervideo_dl/extractor/pornotube.py
+++ b/hypervideo_dl/extractor/pornotube.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import json
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/pornovoisines.py b/hypervideo_dl/extractor/pornovoisines.py
index 18459fc..aa48da0 100644
--- a/hypervideo_dl/extractor/pornovoisines.py
+++ b/hypervideo_dl/extractor/pornovoisines.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..utils import (
int_or_none,
@@ -59,7 +55,6 @@ class PornoVoisinesIE(InfoExtractor):
'height': item.get('height'),
'bitrate': item.get('bitrate'),
})
- self._sort_formats(formats)
webpage = self._download_webpage(url, video_id)
diff --git a/hypervideo_dl/extractor/pornoxo.py b/hypervideo_dl/extractor/pornoxo.py
index 489dc2b..5104d8a 100644
--- a/hypervideo_dl/extractor/pornoxo.py
+++ b/hypervideo_dl/extractor/pornoxo.py
@@ -1,6 +1,3 @@
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..utils import (
str_to_int,
diff --git a/hypervideo_dl/extractor/prankcast.py b/hypervideo_dl/extractor/prankcast.py
new file mode 100644
index 0000000..0eb5f98
--- /dev/null
+++ b/hypervideo_dl/extractor/prankcast.py
@@ -0,0 +1,66 @@
+from .common import InfoExtractor
+from ..utils import parse_iso8601, traverse_obj, try_call
+
+
+class PrankCastIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?prankcast\.com/[^/?#]+/showreel/(?P<id>\d+)-(?P<display_id>[^/?#]+)'
+ _TESTS = [{
+ 'url': 'https://prankcast.com/Devonanustart/showreel/1561-Beverly-is-back-like-a-heart-attack-',
+ 'info_dict': {
+ 'id': '1561',
+ 'ext': 'mp3',
+ 'title': 'Beverly is back like a heart attack!',
+ 'display_id': 'Beverly-is-back-like-a-heart-attack-',
+ 'timestamp': 1661391575,
+ 'uploader': 'Devonanustart',
+ 'channel_id': 4,
+ 'duration': 7918,
+ 'cast': ['Devonanustart', 'Phonelosers'],
+ 'description': '',
+ 'categories': ['prank'],
+ 'tags': ['prank call', 'prank'],
+ 'upload_date': '20220825'
+ }
+ }, {
+ 'url': 'https://prankcast.com/phonelosers/showreel/2048-NOT-COOL',
+ 'info_dict': {
+ 'id': '2048',
+ 'ext': 'mp3',
+ 'title': 'NOT COOL',
+ 'display_id': 'NOT-COOL',
+ 'timestamp': 1665028364,
+ 'uploader': 'phonelosers',
+ 'channel_id': 6,
+ 'duration': 4044,
+ 'cast': ['phonelosers'],
+ 'description': '',
+ 'categories': ['prank'],
+ 'tags': ['prank call', 'prank'],
+ 'upload_date': '20221006'
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
+
+ webpage = self._download_webpage(url, video_id)
+ json_info = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['ssr_data_showreel']
+
+ uploader = json_info.get('user_name')
+ guests_json = self._parse_json(json_info.get('guests_json') or '{}', video_id)
+ start_date = parse_iso8601(json_info.get('start_date'))
+
+ return {
+ 'id': video_id,
+ 'title': json_info.get('broadcast_title') or self._og_search_title(webpage),
+ 'display_id': display_id,
+ 'url': f'{json_info["broadcast_url"]}{json_info["recording_hash"]}.mp3',
+ 'timestamp': start_date,
+ 'uploader': uploader,
+ 'channel_id': json_info.get('user_id'),
+ 'duration': try_call(lambda: parse_iso8601(json_info['end_date']) - start_date),
+ 'cast': list(filter(None, [uploader] + traverse_obj(guests_json, (..., 'name')))),
+ 'description': json_info.get('broadcast_description'),
+ 'categories': [json_info.get('broadcast_category')],
+ 'tags': self._parse_json(json_info.get('broadcast_tags') or '{}', video_id)
+ }
diff --git a/hypervideo_dl/extractor/premiershiprugby.py b/hypervideo_dl/extractor/premiershiprugby.py
new file mode 100644
index 0000000..67d41fd
--- /dev/null
+++ b/hypervideo_dl/extractor/premiershiprugby.py
@@ -0,0 +1,39 @@
+from .common import InfoExtractor
+from ..utils import int_or_none, traverse_obj
+
+
+class PremiershipRugbyIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:\w+\.)premiershiprugby\.(?:com)/watch/(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://www.premiershiprugby.com/watch/full-match-harlequins-v-newcastle-falcons',
+ 'info_dict': {
+ 'id': '0_mbkb7ldt',
+ 'title': 'Full Match: Harlequins v Newcastle Falcons',
+ 'ext': 'mp4',
+ 'thumbnail': 'https://open.http.mp.streamamg.com/p/3000914/sp/300091400/thumbnail/entry_id/0_mbkb7ldt//width/960/height/540/type/1/quality/75',
+ 'duration': 6093.0,
+ 'tags': ['video'],
+ 'categories': ['Full Match', 'Harlequins', 'Newcastle Falcons', 'gallaher premiership'],
+ }
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ json_data = self._download_json(
+ f'https://article-cms-api.incrowdsports.com/v2/articles/slug/{display_id}',
+ display_id, query={'clientId': 'PRL'})['data']['article']
+
+ formats, subs = self._extract_m3u8_formats_and_subtitles(
+ json_data['heroMedia']['content']['videoLink'], display_id)
+
+ return {
+ 'id': json_data['heroMedia']['content']['sourceSystemId'],
+ 'display_id': display_id,
+ 'title': traverse_obj(json_data, ('heroMedia', 'title')),
+ 'formats': formats,
+ 'subtitles': subs,
+ 'thumbnail': traverse_obj(json_data, ('heroMedia', 'content', 'videoThumbnail')),
+ 'duration': int_or_none(traverse_obj(json_data, ('heroMedia', 'content', 'metadata', 'msDuration')), scale=1000),
+ 'tags': json_data.get('tags'),
+ 'categories': traverse_obj(json_data, ('categories', ..., 'text')),
+ }
diff --git a/hypervideo_dl/extractor/presstv.py b/hypervideo_dl/extractor/presstv.py
index bfb2eb7..26ce74a 100644
--- a/hypervideo_dl/extractor/presstv.py
+++ b/hypervideo_dl/extractor/presstv.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..utils import remove_start
diff --git a/hypervideo_dl/extractor/projectveritas.py b/hypervideo_dl/extractor/projectveritas.py
index 9e9867b..0e029ce 100644
--- a/hypervideo_dl/extractor/projectveritas.py
+++ b/hypervideo_dl/extractor/projectveritas.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
ExtractorError,
@@ -45,7 +42,6 @@ class ProjectVeritasIE(InfoExtractor):
raise ExtractorError('No video on the provided url.', expected=True)
playback_id = traverse_obj(mux_asset, 'playbackId', ('en-US', 'playbackId'))
formats = self._extract_m3u8_formats(f'https://stream.mux.com/{playback_id}.m3u8', video_id)
- self._sort_formats(formats)
return {
'id': video_id,
'title': main_data['title'],
diff --git a/hypervideo_dl/extractor/prosiebensat1.py b/hypervideo_dl/extractor/prosiebensat1.py
index e89bbfd..46e2e8a 100644
--- a/hypervideo_dl/extractor/prosiebensat1.py
+++ b/hypervideo_dl/extractor/prosiebensat1.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from hashlib import sha1
@@ -159,7 +156,6 @@ class ProSiebenSat1BaseIE(InfoExtractor):
'tbr': tbr,
'format_id': 'http%s' % ('-%d' % tbr if tbr else ''),
})
- self._sort_formats(formats)
return {
'duration': float_or_none(video.get('duration')),
diff --git a/hypervideo_dl/extractor/prx.py b/hypervideo_dl/extractor/prx.py
index 80561b8..5bb1832 100644
--- a/hypervideo_dl/extractor/prx.py
+++ b/hypervideo_dl/extractor/prx.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import itertools
from .common import InfoExtractor, SearchInfoExtractor
from ..utils import (
diff --git a/hypervideo_dl/extractor/puhutv.py b/hypervideo_dl/extractor/puhutv.py
index ca71665..482e570 100644
--- a/hypervideo_dl/extractor/puhutv.py
+++ b/hypervideo_dl/extractor/puhutv.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import (
compat_HTTPError,
@@ -114,7 +111,6 @@ class PuhuTVIE(InfoExtractor):
format_id += '-%sp' % quality
f['format_id'] = format_id
formats.append(f)
- self._sort_formats(formats)
creator = try_get(
show, lambda x: x['producer']['name'], compat_str)
diff --git a/hypervideo_dl/extractor/puls4.py b/hypervideo_dl/extractor/puls4.py
index 80091b8..38c5d11 100644
--- a/hypervideo_dl/extractor/puls4.py
+++ b/hypervideo_dl/extractor/puls4.py
@@ -1,12 +1,6 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .prosiebensat1 import ProSiebenSat1BaseIE
-from ..utils import (
- unified_strdate,
- parse_duration,
- compat_str,
-)
+from ..compat import compat_str
+from ..utils import parse_duration, unified_strdate
class Puls4IE(ProSiebenSat1BaseIE):
diff --git a/hypervideo_dl/extractor/pyvideo.py b/hypervideo_dl/extractor/pyvideo.py
index 8696197..7b25166 100644
--- a/hypervideo_dl/extractor/pyvideo.py
+++ b/hypervideo_dl/extractor/pyvideo.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/qingting.py b/hypervideo_dl/extractor/qingting.py
new file mode 100644
index 0000000..aa690d4
--- /dev/null
+++ b/hypervideo_dl/extractor/qingting.py
@@ -0,0 +1,47 @@
+from .common import InfoExtractor
+
+from ..utils import traverse_obj
+
+
+class QingTingIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.|m\.)?(?:qingting\.fm|qtfm\.cn)/v?channels/(?P<channel>\d+)/programs/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.qingting.fm/channels/378005/programs/22257411/',
+ 'md5': '47e6a94f4e621ed832c316fd1888fb3c',
+ 'info_dict': {
+ 'id': '22257411',
+ 'title': '用了十年才修改,谁在乎教科书?',
+ 'channel_id': '378005',
+ 'channel': '睡前消息',
+ 'uploader': '马督工',
+ 'ext': 'm4a',
+ }
+ }, {
+ 'url': 'https://m.qtfm.cn/vchannels/378005/programs/23023573/',
+ 'md5': '2703120b6abe63b5fa90b975a58f4c0e',
+ 'info_dict': {
+ 'id': '23023573',
+ 'title': '【睡前消息488】重庆山火之后,有图≠真相',
+ 'channel_id': '378005',
+ 'channel': '睡前消息',
+ 'uploader': '马督工',
+ 'ext': 'm4a',
+ }
+ }]
+
+ def _real_extract(self, url):
+ channel_id, pid = self._match_valid_url(url).group('channel', 'id')
+ webpage = self._download_webpage(
+ f'https://m.qtfm.cn/vchannels/{channel_id}/programs/{pid}/', pid)
+ info = self._search_json(r'window\.__initStores\s*=', webpage, 'program info', pid)
+ return {
+ 'id': pid,
+ 'title': traverse_obj(info, ('ProgramStore', 'programInfo', 'title')),
+ 'channel_id': channel_id,
+ 'channel': traverse_obj(info, ('ProgramStore', 'channelInfo', 'title')),
+ 'uploader': traverse_obj(info, ('ProgramStore', 'podcasterInfo', 'podcaster', 'nickname')),
+ 'url': traverse_obj(info, ('ProgramStore', 'programInfo', 'audioUrl')),
+ 'vcodec': 'none',
+ 'acodec': 'm4a',
+ 'ext': 'm4a',
+ }
diff --git a/hypervideo_dl/extractor/qqmusic.py b/hypervideo_dl/extractor/qqmusic.py
index 0106d16..9285825 100644
--- a/hypervideo_dl/extractor/qqmusic.py
+++ b/hypervideo_dl/extractor/qqmusic.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import random
import re
import time
@@ -125,7 +122,6 @@ class QQMusicIE(InfoExtractor):
'abr': details.get('abr'),
})
self._check_formats(formats, mid)
- self._sort_formats(formats)
actual_lrc_lyrics = ''.join(
line + '\n' for line in re.findall(
diff --git a/hypervideo_dl/extractor/r7.py b/hypervideo_dl/extractor/r7.py
index e2202d6..f067a05 100644
--- a/hypervideo_dl/extractor/r7.py
+++ b/hypervideo_dl/extractor/r7.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import int_or_none
@@ -69,7 +66,6 @@ class R7IE(InfoExtractor):
f_copy['protocol'] = 'http'
f = f_copy
formats.append(f)
- self._sort_formats(formats)
description = video.get('description')
thumbnail = video.get('thumb')
diff --git a/hypervideo_dl/extractor/radiko.py b/hypervideo_dl/extractor/radiko.py
index 1e60de1..f102922 100644
--- a/hypervideo_dl/extractor/radiko.py
+++ b/hypervideo_dl/extractor/radiko.py
@@ -1,29 +1,22 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
import base64
-import calendar
-import datetime
+import re
+import urllib.parse
from .common import InfoExtractor
from ..utils import (
ExtractorError,
- update_url_query,
clean_html,
+ time_seconds,
+ try_call,
unified_timestamp,
+ update_url_query,
)
-from ..compat import compat_urllib_parse
class RadikoBaseIE(InfoExtractor):
_FULL_KEY = None
def _auth_client(self):
- auth_cache = self._downloader.cache.load('radiko', 'auth_data')
- if auth_cache:
- return auth_cache
-
_, auth1_handle = self._download_webpage_handle(
'https://radiko.jp/v2/api/auth1', None, 'Downloading authentication page',
headers={
@@ -50,7 +43,7 @@ class RadikoBaseIE(InfoExtractor):
}).split(',')[0]
auth_data = (auth_token, area_id)
- self._downloader.cache.store('radiko', 'auth_data', auth_data)
+ self.cache.store('radiko', 'auth_data', auth_data)
return auth_data
def _extract_full_key(self):
@@ -92,8 +85,8 @@ class RadikoBaseIE(InfoExtractor):
def _extract_formats(self, video_id, station, is_onair, ft, cursor, auth_token, area_id, query):
m3u8_playlist_data = self._download_xml(
- 'https://radiko.jp/v3/station/stream/pc_html5/%s.xml' % station, video_id,
- note='Downloading m3u8 information')
+ f'https://radiko.jp/v3/station/stream/pc_html5/{station}.xml', video_id,
+ note='Downloading stream information')
m3u8_urls = m3u8_playlist_data.findall('.//url')
formats = []
@@ -105,7 +98,7 @@ class RadikoBaseIE(InfoExtractor):
'station_id': station,
**query,
'l': '15',
- 'lsid': '77d0678df93a1034659c14d6fc89f018',
+ 'lsid': '88ecea37e968c1f17d5413312d9f8003',
'type': 'b',
})
if playlist_url in found:
@@ -115,23 +108,23 @@ class RadikoBaseIE(InfoExtractor):
time_to_skip = None if is_onair else cursor - ft
+ domain = urllib.parse.urlparse(playlist_url).netloc
subformats = self._extract_m3u8_formats(
playlist_url, video_id, ext='m4a',
- live=True, fatal=False, m3u8_id=None,
+ live=True, fatal=False, m3u8_id=domain,
+ note=f'Downloading m3u8 information from {domain}',
headers={
'X-Radiko-AreaId': area_id,
'X-Radiko-AuthToken': auth_token,
})
for sf in subformats:
- domain = sf['format_id'] = compat_urllib_parse.urlparse(sf['url']).netloc
- if re.match(r'^[cf]-radiko\.smartstream\.ne\.jp$', domain):
+ if re.fullmatch(r'[cf]-radiko\.smartstream\.ne\.jp', domain):
# Prioritize live radio vs playback based on extractor
sf['preference'] = 100 if is_onair else -100
if not is_onair and url_attrib['timefree'] == '1' and time_to_skip:
- sf['_ffmpeg_args'] = ['-ss', time_to_skip]
+ sf['downloader_options'] = {'ffmpeg_args': ['-ss', time_to_skip]}
formats.extend(subformats)
- self._sort_formats(formats)
return formats
@@ -154,31 +147,29 @@ class RadikoIE(RadikoBaseIE):
def _real_extract(self, url):
station, video_id = self._match_valid_url(url).groups()
vid_int = unified_timestamp(video_id, False)
-
- auth_token, area_id = self._auth_client()
-
prog, station_program, ft, radio_begin, radio_end = self._find_program(video_id, station, vid_int)
- title = prog.find('title').text
- description = clean_html(prog.find('info').text)
- station_name = station_program.find('.//name').text
-
- formats = self._extract_formats(
- video_id=video_id, station=station, is_onair=False,
- ft=ft, cursor=vid_int, auth_token=auth_token, area_id=area_id,
- query={
- 'start_at': radio_begin,
- 'ft': radio_begin,
- 'end_at': radio_end,
- 'to': radio_end,
- 'seek': video_id,
- })
+ auth_cache = self.cache.load('radiko', 'auth_data')
+ for attempt in range(2):
+ auth_token, area_id = (not attempt and auth_cache) or self._auth_client()
+ formats = self._extract_formats(
+ video_id=video_id, station=station, is_onair=False,
+ ft=ft, cursor=vid_int, auth_token=auth_token, area_id=area_id,
+ query={
+ 'start_at': radio_begin,
+ 'ft': radio_begin,
+ 'end_at': radio_end,
+ 'to': radio_end,
+ 'seek': video_id,
+ })
+ if formats:
+ break
return {
'id': video_id,
- 'title': title,
- 'description': description,
- 'uploader': station_name,
+ 'title': try_call(lambda: prog.find('title').text),
+ 'description': clean_html(try_call(lambda: prog.find('info').text)),
+ 'uploader': try_call(lambda: station_program.find('.//name').text),
'uploader_id': station,
'timestamp': vid_int,
'formats': formats,
@@ -208,8 +199,7 @@ class RadikoRadioIE(RadikoBaseIE):
auth_token, area_id = self._auth_client()
# get current time in JST (GMT+9:00 w/o DST)
- vid_now = datetime.datetime.now(datetime.timezone(datetime.timedelta(hours=9)))
- vid_now = calendar.timegm(vid_now.timetuple())
+ vid_now = time_seconds(hours=9)
prog, station_program, ft, _, _ = self._find_program(station, station, vid_now)
diff --git a/hypervideo_dl/extractor/radiobremen.py b/hypervideo_dl/extractor/radiobremen.py
index 2c35f98..99ba050 100644
--- a/hypervideo_dl/extractor/radiobremen.py
+++ b/hypervideo_dl/extractor/radiobremen.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/radiocanada.py b/hypervideo_dl/extractor/radiocanada.py
index 4b4445c..72c21d5 100644
--- a/hypervideo_dl/extractor/radiocanada.py
+++ b/hypervideo_dl/extractor/radiocanada.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..compat import compat_HTTPError
from ..utils import (
@@ -117,7 +113,6 @@ class RadioCanadaIE(InfoExtractor):
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, error), expected=True)
formats = self._extract_m3u8_formats(v_url, video_id, 'mp4')
- self._sort_formats(formats)
subtitles = {}
closed_caption_url = get_meta('closedCaption') or get_meta('closedCaptionHTML5')
diff --git a/hypervideo_dl/extractor/radiode.py b/hypervideo_dl/extractor/radiode.py
index 0382873..32c36d5 100644
--- a/hypervideo_dl/extractor/radiode.py
+++ b/hypervideo_dl/extractor/radiode.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
@@ -40,7 +38,6 @@ class RadioDeIE(InfoExtractor):
'abr': stream['bitRate'],
'asr': stream['sampleRate']
} for stream in broadcast['streamUrls']]
- self._sort_formats(formats)
return {
'id': radio_id,
diff --git a/hypervideo_dl/extractor/radiofrance.py b/hypervideo_dl/extractor/radiofrance.py
index 082238b..11765d0 100644
--- a/hypervideo_dl/extractor/radiofrance.py
+++ b/hypervideo_dl/extractor/radiofrance.py
@@ -1,9 +1,7 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
+from ..utils import parse_duration, unified_strdate
class RadioFranceIE(InfoExtractor):
@@ -48,7 +46,6 @@ class RadioFranceIE(InfoExtractor):
for i, fm in
enumerate(re.findall(r"([a-z0-9]+)\s*:\s*'([^']+)'", formats_str))
]
- self._sort_formats(formats)
return {
'id': video_id,
@@ -57,3 +54,51 @@ class RadioFranceIE(InfoExtractor):
'description': description,
'uploader': uploader,
}
+
+
+class FranceCultureIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?radiofrance\.fr/(?:franceculture|fip|francemusique|mouv|franceinter)/podcasts/(?:[^?#]+/)?(?P<display_id>[^?#]+)-(?P<id>\d+)($|[?#])'
+ _TESTS = [
+ {
+ 'url': 'https://www.radiofrance.fr/franceculture/podcasts/science-en-questions/la-physique-d-einstein-aiderait-elle-a-comprendre-le-cerveau-8440487',
+ 'info_dict': {
+ 'id': '8440487',
+ 'display_id': 'la-physique-d-einstein-aiderait-elle-a-comprendre-le-cerveau',
+ 'ext': 'mp3',
+ 'title': 'La physique d’Einstein aiderait-elle à comprendre le cerveau ?',
+ 'description': 'Existerait-il un pont conceptuel entre la physique de l’espace-temps et les neurosciences ?',
+ 'thumbnail': 'https://cdn.radiofrance.fr/s3/cruiser-production/2022/05/d184e7a3-4827-4494-bf94-04ed7b120db4/1200x630_gettyimages-200171095-001.jpg',
+ 'upload_date': '20220514',
+ 'duration': 2750,
+ },
+ },
+ {
+ 'url': 'https://www.radiofrance.fr/franceinter/podcasts/la-rafle-du-vel-d-hiv-une-affaire-d-etat/les-racines-du-crime-episode-1-3715507',
+ 'only_matching': True,
+ }
+ ]
+
+ def _real_extract(self, url):
+ video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
+ webpage = self._download_webpage(url, display_id)
+
+ # _search_json_ld doesn't correctly handle this. See https://github.com/hypervideo/hypervideo/pull/3874#discussion_r891903846
+ video_data = self._search_json('', webpage, 'audio data', display_id, contains_pattern=r'{\s*"@type"\s*:\s*"AudioObject".+}')
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'url': video_data['contentUrl'],
+ 'ext': video_data.get('encodingFormat'),
+ 'vcodec': 'none' if video_data.get('encodingFormat') == 'mp3' else None,
+ 'duration': parse_duration(video_data.get('duration')),
+ 'title': self._html_search_regex(r'(?s)<h1[^>]*itemprop="[^"]*name[^"]*"[^>]*>(.+?)</h1>',
+ webpage, 'title', default=self._og_search_title(webpage)),
+ 'description': self._html_search_regex(
+ r'(?s)<meta name="description"\s*content="([^"]+)', webpage, 'description', default=None),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'uploader': self._html_search_regex(
+ r'(?s)<span class="author">(.*?)</span>', webpage, 'uploader', default=None),
+ 'upload_date': unified_strdate(self._search_regex(
+ r'"datePublished"\s*:\s*"([^"]+)', webpage, 'timestamp', fatal=False))
+ }
diff --git a/hypervideo_dl/extractor/radiojavan.py b/hypervideo_dl/extractor/radiojavan.py
index 3f74f0c..6a91394 100644
--- a/hypervideo_dl/extractor/radiojavan.py
+++ b/hypervideo_dl/extractor/radiojavan.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -52,7 +50,6 @@ class RadioJavanIE(InfoExtractor):
'format_id': format_id,
})
formats.append(f)
- self._sort_formats(formats)
title = self._og_search_title(webpage)
thumbnail = self._og_search_thumbnail(webpage)
diff --git a/hypervideo_dl/extractor/radiokapital.py b/hypervideo_dl/extractor/radiokapital.py
index 2e93e03..8f9737a 100644
--- a/hypervideo_dl/extractor/radiokapital.py
+++ b/hypervideo_dl/extractor/radiokapital.py
@@ -1,5 +1,3 @@
-# coding: utf-8
-
from .common import InfoExtractor
from ..utils import (
clean_html,
diff --git a/hypervideo_dl/extractor/radiozet.py b/hypervideo_dl/extractor/radiozet.py
index 2e1ff36..6752017 100644
--- a/hypervideo_dl/extractor/radiozet.py
+++ b/hypervideo_dl/extractor/radiozet.py
@@ -1,4 +1,3 @@
-# coding: utf-8
from .common import InfoExtractor
from ..utils import (
traverse_obj,
diff --git a/hypervideo_dl/extractor/radlive.py b/hypervideo_dl/extractor/radlive.py
index dc98973..9bcbb11 100644
--- a/hypervideo_dl/extractor/radlive.py
+++ b/hypervideo_dl/extractor/radlive.py
@@ -62,7 +62,6 @@ class RadLiveIE(InfoExtractor):
raise ExtractorError('Unable to extract video info, make sure the URL is valid')
formats = self._extract_m3u8_formats(video_info['assets']['videos'][0]['url'], video_id)
- self._sort_formats(formats)
data = video_info.get('structured_data', {})
@@ -80,7 +79,7 @@ class RadLiveIE(InfoExtractor):
'release_timestamp': release_date,
'channel': channel.get('name'),
'channel_id': channel_id,
- 'channel_url': format_field(channel_id, template='https://rad.live/content/channel/%s'),
+ 'channel_url': format_field(channel_id, None, 'https://rad.live/content/channel/%s'),
}
if content_type == 'episode':
@@ -94,7 +93,7 @@ class RadLiveIE(InfoExtractor):
return result
-class RadLiveSeasonIE(RadLiveIE):
+class RadLiveSeasonIE(RadLiveIE): # XXX: Do not subclass from concrete IE
IE_NAME = 'radlive:season'
_VALID_URL = r'https?://(?:www\.)?rad\.live/content/season/(?P<id>[a-f0-9-]+)'
_TESTS = [{
@@ -134,7 +133,7 @@ class RadLiveSeasonIE(RadLiveIE):
return self.playlist_result(entries, season_id, video_info.get('title'))
-class RadLiveChannelIE(RadLiveIE):
+class RadLiveChannelIE(RadLiveIE): # XXX: Do not subclass from concrete IE
IE_NAME = 'radlive:channel'
_VALID_URL = r'https?://(?:www\.)?rad\.live/content/channel/(?P<id>[a-f0-9-]+)'
_TESTS = [{
diff --git a/hypervideo_dl/extractor/rai.py b/hypervideo_dl/extractor/rai.py
index 6864129..cab12cc 100644
--- a/hypervideo_dl/extractor/rai.py
+++ b/hypervideo_dl/extractor/rai.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -9,6 +6,7 @@ from ..compat import (
compat_urlparse,
)
from ..utils import (
+ clean_html,
determine_ext,
ExtractorError,
filter_dict,
@@ -48,11 +46,14 @@ class RaiBaseIE(InfoExtractor):
for platform in ('mon', 'flash', 'native'):
relinker = self._download_xml(
relinker_url, video_id,
- note='Downloading XML metadata for platform %s' % platform,
+ note=f'Downloading XML metadata for platform {platform}',
transform_source=fix_xml_ampersands,
query={'output': 45, 'pl': platform},
headers=self.geo_verification_headers())
+ if xpath_text(relinker, './license_url', default='{}') != '{}':
+ self.report_drm(video_id)
+
if not geoprotection:
geoprotection = xpath_text(
relinker, './geoprotection', default=None) == 'Y'
@@ -102,7 +103,7 @@ class RaiBaseIE(InfoExtractor):
formats.append({
'url': media_url,
'tbr': bitrate if bitrate > 0 else None,
- 'format_id': 'http-%d' % bitrate if bitrate > 0 else 'http',
+ 'format_id': f'http-{bitrate if bitrate > 0 else "http"}',
})
if not formats and geoprotection is True:
@@ -155,7 +156,7 @@ class RaiBaseIE(InfoExtractor):
br = int_or_none(tbr)
if len(fmts) == 1 and not br:
br = fmts[0].get('tbr')
- if br > 300:
+ if br and br > 300:
tbr = compat_str(math.floor(br / 100) * 100)
else:
tbr = '250'
@@ -174,11 +175,11 @@ class RaiBaseIE(InfoExtractor):
'vcodec': format_copy.get('vcodec'),
'acodec': format_copy.get('acodec'),
'fps': format_copy.get('fps'),
- 'format_id': 'https-%s' % tbr,
+ 'format_id': f'https-{tbr}',
} if format_copy else {
'width': _QUALITY[tbr][0],
'height': _QUALITY[tbr][1],
- 'format_id': 'https-%s' % tbr,
+ 'format_id': f'https-{tbr}',
'tbr': int(tbr),
}
@@ -201,8 +202,8 @@ class RaiBaseIE(InfoExtractor):
'url': _MP4_TMPL % (relinker_url, q),
'protocol': 'https',
'ext': 'mp4',
+ **get_format_info(q)
}
- fmt.update(get_format_info(q))
formats.append(fmt)
return formats
@@ -233,7 +234,7 @@ class RaiBaseIE(InfoExtractor):
class RaiPlayIE(RaiBaseIE):
- _VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/.+?-(?P<id>%s))\.(?:html|json)' % RaiBaseIE._UUID_RE
+ _VALID_URL = rf'(?P<base>https?://(?:www\.)?raiplay\.it/.+?-(?P<id>{RaiBaseIE._UUID_RE}))\.(?:html|json)'
_TESTS = [{
'url': 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html',
'md5': '8970abf8caf8aef4696e7b1f2adfc696',
@@ -251,6 +252,10 @@ class RaiPlayIE(RaiBaseIE):
'subtitles': {
'it': 'count:4',
},
+ 'release_year': 2022,
+ 'episode': 'Espresso nel caffè - 07/04/2014',
+ 'timestamp': 1396919880,
+ 'upload_date': '20140408',
},
'params': {
'skip_download': True,
@@ -270,6 +275,12 @@ class RaiPlayIE(RaiBaseIE):
'duration': 6493,
'series': 'Blanca',
'season': 'Season 1',
+ 'episode_number': 1,
+ 'release_year': 2021,
+ 'season_number': 1,
+ 'episode': 'Senza occhi',
+ 'timestamp': 1637318940,
+ 'upload_date': '20211119',
},
}, {
'url': 'http://www.raiplay.it/video/2016/11/gazebotraindesi-efebe701-969c-4593-92f3-285f0d1ce750.html?',
@@ -280,7 +291,7 @@ class RaiPlayIE(RaiBaseIE):
'only_matching': True,
}, {
# DRM protected
- 'url': 'https://www.raiplay.it/video/2020/09/Lo-straordinario-mondo-di-Zoey-S1E1-Lo-straordinario-potere-di-Zoey-ed493918-1d32-44b7-8454-862e473d00ff.html',
+ 'url': 'https://www.raiplay.it/video/2021/06/Lo-straordinario-mondo-di-Zoey-S2E1-Lo-straordinario-ritorno-di-Zoey-3ba992de-2332-41ad-9214-73e32ab209f4.html',
'only_matching': True,
}]
@@ -302,7 +313,6 @@ class RaiPlayIE(RaiBaseIE):
video = media['video']
relinker_info = self._extract_relinker_info(video['content_url'], video_id)
- self._sort_formats(relinker_info['formats'])
thumbnails = []
for _, value in media.get('images', {}).items():
@@ -323,13 +333,13 @@ class RaiPlayIE(RaiBaseIE):
alt_title = join_nonempty(media.get('subtitle'), media.get('toptitle'), delim=' - ')
- info = {
+ return {
'id': remove_start(media.get('id'), 'ContentItem-') or video_id,
'display_id': video_id,
'title': title,
- 'alt_title': strip_or_none(alt_title),
+ 'alt_title': strip_or_none(alt_title or None),
'description': media.get('description'),
- 'uploader': strip_or_none(media.get('channel')),
+ 'uploader': strip_or_none(media.get('channel') or None),
'creator': strip_or_none(media.get('editor') or None),
'duration': parse_duration(video.get('duration')),
'timestamp': unified_timestamp(date_published),
@@ -340,13 +350,12 @@ class RaiPlayIE(RaiBaseIE):
'episode': media.get('episode_title'),
'episode_number': int_or_none(media.get('episode')),
'subtitles': subtitles,
+ 'release_year': int_or_none(traverse_obj(media, ('track_info', 'edit_year'))),
+ **relinker_info
}
- info.update(relinker_info)
- return info
-
-class RaiPlayLiveIE(RaiPlayIE):
+class RaiPlayLiveIE(RaiPlayIE): # XXX: Do not subclass from concrete IE
_VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/dirette/(?P<id>[^/?#&]+))'
_TESTS = [{
'url': 'http://www.raiplay.it/dirette/rainews24',
@@ -359,6 +368,9 @@ class RaiPlayLiveIE(RaiPlayIE):
'uploader': 'Rai News 24',
'creator': 'Rai News 24',
'is_live': True,
+ 'live_status': 'is_live',
+ 'upload_date': '20090502',
+ 'timestamp': 1241276220,
},
'params': {
'skip_download': True,
@@ -409,7 +421,7 @@ class RaiPlayPlaylistIE(InfoExtractor):
if not s_id:
continue
medias = self._download_json(
- '%s/%s.json' % (base, s_id), s_id,
+ f'{base}/{s_id}.json', s_id,
'Downloading content set JSON', fatal=False)
if not medias:
continue
@@ -428,7 +440,7 @@ class RaiPlayPlaylistIE(InfoExtractor):
class RaiPlaySoundIE(RaiBaseIE):
- _VALID_URL = r'(?P<base>https?://(?:www\.)?raiplaysound\.it/.+?-(?P<id>%s))\.(?:html|json)' % RaiBaseIE._UUID_RE
+ _VALID_URL = rf'(?P<base>https?://(?:www\.)?raiplaysound\.it/.+?-(?P<id>{RaiBaseIE._UUID_RE}))\.(?:html|json)'
_TESTS = [{
'url': 'https://www.raiplaysound.it/audio/2021/12/IL-RUGGITO-DEL-CONIGLIO-1ebae2a7-7cdb-42bb-842e-fe0d193e9707.html',
'md5': '8970abf8caf8aef4696e7b1f2adfc696',
@@ -436,11 +448,16 @@ class RaiPlaySoundIE(RaiBaseIE):
'id': '1ebae2a7-7cdb-42bb-842e-fe0d193e9707',
'ext': 'mp3',
'title': 'Il Ruggito del Coniglio del 10/12/2021',
+ 'alt_title': 'md5:0e6476cd57858bb0f3fcc835d305b455',
'description': 'md5:2a17d2107e59a4a8faa0e18334139ee2',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'rai radio 2',
'duration': 5685,
'series': 'Il Ruggito del Coniglio',
+ 'episode': 'Il Ruggito del Coniglio del 10/12/2021',
+ 'creator': 'rai radio 2',
+ 'timestamp': 1638346620,
+ 'upload_date': '20211201',
},
'params': {
'skip_download': True,
@@ -472,7 +489,7 @@ class RaiPlaySoundIE(RaiBaseIE):
'id': uid or audio_id,
'display_id': audio_id,
'title': traverse_obj(media, 'title', 'episode_title'),
- 'alt_title': traverse_obj(media, ('track_info', 'media_name')),
+ 'alt_title': traverse_obj(media, ('track_info', 'media_name'), expected_type=strip_or_none),
'description': media.get('description'),
'uploader': traverse_obj(media, ('track_info', 'channel'), expected_type=strip_or_none),
'creator': traverse_obj(media, ('track_info', 'editor'), expected_type=strip_or_none),
@@ -486,7 +503,7 @@ class RaiPlaySoundIE(RaiBaseIE):
}
-class RaiPlaySoundLiveIE(RaiPlaySoundIE):
+class RaiPlaySoundLiveIE(RaiPlaySoundIE): # XXX: Do not subclass from concrete IE
_VALID_URL = r'(?P<base>https?://(?:www\.)?raiplaysound\.it/(?P<id>[^/?#&]+)$)'
_TESTS = [{
'url': 'https://www.raiplaysound.it/radio2',
@@ -494,10 +511,13 @@ class RaiPlaySoundLiveIE(RaiPlaySoundIE):
'id': 'b00a50e6-f404-4af6-8f8c-ff3b9af73a44',
'display_id': 'radio2',
'ext': 'mp4',
- 'title': 'Rai Radio 2',
+ 'title': r're:Rai Radio 2 \d+-\d+-\d+ \d+:\d+',
+ 'thumbnail': r're:https://www.raiplaysound.it/dl/img/.+?png',
'uploader': 'rai radio 2',
+ 'series': 'Rai Radio 2',
'creator': 'raiplaysound',
'is_live': True,
+ 'live_status': 'is_live',
},
'params': {
'skip_download': 'live',
@@ -546,11 +566,11 @@ class RaiPlaySoundPlaylistIE(InfoExtractor):
class RaiIE(RaiBaseIE):
- _VALID_URL = r'https?://[^/]+\.(?:rai\.(?:it|tv)|rainews\.it)/.+?-(?P<id>%s)(?:-.+?)?\.html' % RaiBaseIE._UUID_RE
+ _VALID_URL = rf'https?://[^/]+\.(?:rai\.(?:it|tv))/.+?-(?P<id>{RaiBaseIE._UUID_RE})(?:-.+?)?\.html'
_TESTS = [{
# var uniquename = "ContentItem-..."
# data-id="ContentItem-..."
- 'url': 'http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html',
+ 'url': 'https://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html',
'info_dict': {
'id': '04a9f4bd-b563-40cf-82a6-aad3529cb4a9',
'ext': 'mp4',
@@ -561,20 +581,8 @@ class RaiIE(RaiBaseIE):
},
'skip': 'This content is available only in Italy',
}, {
- # with ContentItem in many metas
- 'url': 'http://www.rainews.it/dl/rainews/media/Weekend-al-cinema-da-Hollywood-arriva-il-thriller-di-Tate-Taylor-La-ragazza-del-treno-1632c009-c843-4836-bb65-80c33084a64b.html',
- 'info_dict': {
- 'id': '1632c009-c843-4836-bb65-80c33084a64b',
- 'ext': 'mp4',
- 'title': 'Weekend al cinema, da Hollywood arriva il thriller di Tate Taylor "La ragazza del treno"',
- 'description': 'I film in uscita questa settimana.',
- 'thumbnail': r're:^https?://.*\.png$',
- 'duration': 833,
- 'upload_date': '20161103',
- }
- }, {
# with ContentItem in og:url
- 'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-efb17665-691c-45d5-a60c-5301333cbb0c.html',
+ 'url': 'https://www.rai.it/dl/RaiTV/programmi/media/ContentItem-efb17665-691c-45d5-a60c-5301333cbb0c.html',
'md5': '06345bd97c932f19ffb129973d07a020',
'info_dict': {
'id': 'efb17665-691c-45d5-a60c-5301333cbb0c',
@@ -583,42 +591,17 @@ class RaiIE(RaiBaseIE):
'description': 'TG1 edizione integrale ore 20:00 del giorno 03/11/2016',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 2214,
- 'upload_date': '20161103',
+ 'upload_date': '20161103'
}
}, {
- # initEdizione('ContentItem-...'
- 'url': 'http://www.tg1.rai.it/dl/tg1/2010/edizioni/ContentSet-9b6e0cba-4bef-4aef-8cf0-9f7f665b7dfb-tg1.html?item=undefined',
- 'info_dict': {
- 'id': 'c2187016-8484-4e3a-8ac8-35e475b07303',
- 'ext': 'mp4',
- 'title': r're:TG1 ore \d{2}:\d{2} del \d{2}/\d{2}/\d{4}',
- 'duration': 2274,
- 'upload_date': '20170401',
- },
- 'skip': 'Changes daily',
- }, {
- # HLS live stream with ContentItem in og:url
- 'url': 'http://www.rainews.it/dl/rainews/live/ContentItem-3156f2f2-dc70-4953-8e2f-70d7489d4ce9.html',
- 'info_dict': {
- 'id': '3156f2f2-dc70-4953-8e2f-70d7489d4ce9',
- 'ext': 'mp4',
- 'title': 'La diretta di Rainews24',
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
# Direct MMS URL
'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-b63a4089-ac28-48cf-bca5-9f5b5bc46df5.html',
'only_matching': True,
- }, {
- 'url': 'https://www.rainews.it/tgr/marche/notiziari/video/2019/02/ContentItem-6ba945a2-889c-4a80-bdeb-8489c70a8db9.html',
- 'only_matching': True,
}]
def _extract_from_content_id(self, content_id, url):
media = self._download_json(
- 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-%s.html?json' % content_id,
+ f'https://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-{content_id}.html?json',
content_id, 'Downloading video JSON')
title = media['name'].strip()
@@ -637,8 +620,6 @@ class RaiIE(RaiBaseIE):
else:
raise ExtractorError('not a media file')
- self._sort_formats(relinker_info['formats'])
-
thumbnails = []
for image_type in ('image', 'image_medium', 'image_300'):
thumbnail_url = media.get(image_type)
@@ -649,21 +630,18 @@ class RaiIE(RaiBaseIE):
subtitles = self._extract_subtitles(url, media)
- info = {
+ return {
'id': content_id,
'title': title,
- 'description': strip_or_none(media.get('desc')),
+ 'description': strip_or_none(media.get('desc') or None),
'thumbnails': thumbnails,
- 'uploader': media.get('author'),
+ 'uploader': strip_or_none(media.get('author') or None),
'upload_date': unified_strdate(media.get('date')),
'duration': parse_duration(media.get('length')),
'subtitles': subtitles,
+ **relinker_info
}
- info.update(relinker_info)
-
- return info
-
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -676,20 +654,20 @@ class RaiIE(RaiBaseIE):
'twitter:player', 'jsonlink'), webpage, default=None)
if content_item_url:
content_item_id = self._search_regex(
- r'ContentItem-(%s)' % self._UUID_RE, content_item_url,
+ rf'ContentItem-({self._UUID_RE})', content_item_url,
'content item id', default=None)
if not content_item_id:
content_item_id = self._search_regex(
- r'''(?x)
+ rf'''(?x)
(?:
(?:initEdizione|drawMediaRaiTV)\(|
<(?:[^>]+\bdata-id|var\s+uniquename)=|
<iframe[^>]+\bsrc=
)
(["\'])
- (?:(?!\1).)*\bContentItem-(?P<id>%s)
- ''' % self._UUID_RE,
+ (?:(?!\1).)*\bContentItem-(?P<id>{self._UUID_RE})
+ ''',
webpage, 'content item id', default=None, group='id')
content_item_ids = set()
@@ -722,18 +700,121 @@ class RaiIE(RaiBaseIE):
relinker_info = self._extract_relinker_info(
urljoin(url, relinker_url), video_id)
- self._sort_formats(relinker_info['formats'])
title = self._search_regex(
r'var\s+videoTitolo\s*=\s*([\'"])(?P<title>[^\'"]+)\1',
webpage, 'title', group='title',
default=None) or self._og_search_title(webpage)
- info = {
+ return {
'id': video_id,
'title': title,
+ **relinker_info
}
- info.update(relinker_info)
- return info
+class RaiNewsIE(RaiIE): # XXX: Do not subclass from concrete IE
+ _VALID_URL = rf'https?://(www\.)?rainews\.it/(?!articoli)[^?#]+-(?P<id>{RaiBaseIE._UUID_RE})(?:-[^/?#]+)?\.html'
+ _EMBED_REGEX = [rf'<iframe[^>]+data-src="(?P<url>/iframe/[^?#]+?{RaiBaseIE._UUID_RE}\.html)']
+ _TESTS = [{
+ # new rainews player (#3911)
+ 'url': 'https://www.rainews.it/rubriche/24mm/video/2022/05/24mm-del-29052022-12cf645d-1ffd-4220-b27c-07c226dbdecf.html',
+ 'info_dict': {
+ 'id': '12cf645d-1ffd-4220-b27c-07c226dbdecf',
+ 'ext': 'mp4',
+ 'title': 'Puntata del 29/05/2022',
+ 'duration': 1589,
+ 'upload_date': '20220529',
+ 'uploader': 'rainews',
+ }
+ }, {
+ # old content with fallback method to extract media urls
+ 'url': 'https://www.rainews.it/dl/rainews/media/Weekend-al-cinema-da-Hollywood-arriva-il-thriller-di-Tate-Taylor-La-ragazza-del-treno-1632c009-c843-4836-bb65-80c33084a64b.html',
+ 'info_dict': {
+ 'id': '1632c009-c843-4836-bb65-80c33084a64b',
+ 'ext': 'mp4',
+ 'title': 'Weekend al cinema, da Hollywood arriva il thriller di Tate Taylor "La ragazza del treno"',
+ 'description': 'I film in uscita questa settimana.',
+ 'thumbnail': r're:^https?://.*\.png$',
+ 'duration': 833,
+ 'upload_date': '20161103'
+ },
+ 'expected_warnings': ['unable to extract player_data'],
+ }, {
+ # iframe + drm
+ 'url': 'https://www.rainews.it/iframe/video/2022/07/euro2022-europei-calcio-femminile-italia-belgio-gol-0-1-video-4de06a69-de75-4e32-a657-02f0885f8118.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ player_data = self._search_json(
+ r'<rainews-player\s*data=\'', webpage, 'player_data', video_id,
+ transform_source=clean_html, fatal=False)
+ track_info = player_data.get('track_info')
+ relinker_url = traverse_obj(player_data, 'mediapolis', 'content_url')
+
+ if not relinker_url:
+ # fallback on old implementation for some old content
+ try:
+ return self._extract_from_content_id(video_id, url)
+ except GeoRestrictedError:
+ raise
+ except ExtractorError as e:
+ raise ExtractorError('Relinker URL not found', cause=e)
+
+ relinker_info = self._extract_relinker_info(urljoin(url, relinker_url), video_id)
+
+ return {
+ 'id': video_id,
+ 'title': track_info.get('title') or self._og_search_title(webpage),
+ 'upload_date': unified_strdate(track_info.get('date')),
+ 'uploader': strip_or_none(track_info.get('editor') or None),
+ **relinker_info
+ }
+
+
+class RaiSudtirolIE(RaiBaseIE):
+ _VALID_URL = r'https?://raisudtirol\.rai\.it/.+?media=(?P<id>[TP]tv\d+)'
+ _TESTS = [{
+ 'url': 'https://raisudtirol.rai.it/la/index.php?media=Ptv1619729460',
+ 'info_dict': {
+ 'id': 'Ptv1619729460',
+ 'ext': 'mp4',
+ 'title': 'Euro: trasmisciun d\'economia - 29-04-2021 20:51',
+ 'series': 'Euro: trasmisciun d\'economia',
+ 'upload_date': '20210429',
+ 'thumbnail': r're:https://raisudtirol\.rai\.it/img/.+?\.jpg',
+ 'uploader': 'raisudtirol',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ video_date = self._html_search_regex(r'<span class="med_data">(.+?)</span>', webpage, 'video_date', fatal=False)
+ video_title = self._html_search_regex(r'<span class="med_title">(.+?)</span>', webpage, 'video_title', fatal=False)
+ video_url = self._html_search_regex(r'sources:\s*\[\{file:\s*"(.+?)"\}\]', webpage, 'video_url')
+ video_thumb = self._html_search_regex(r'image: \'(.+?)\'', webpage, 'video_thumb', fatal=False)
+
+ return {
+ 'id': video_id,
+ 'title': join_nonempty(video_title, video_date, delim=' - '),
+ 'series': video_title,
+ 'upload_date': unified_strdate(video_date),
+ 'thumbnail': urljoin('https://raisudtirol.rai.it/', video_thumb),
+ 'uploader': 'raisudtirol',
+ 'formats': [{
+ 'format_id': 'https-mp4',
+ 'url': self._proto_relative_url(video_url),
+ 'width': 1024,
+ 'height': 576,
+ 'fps': 25,
+ 'vcodec': 'h264',
+ 'acodec': 'aac',
+ }],
+ }
diff --git a/hypervideo_dl/extractor/raywenderlich.py b/hypervideo_dl/extractor/raywenderlich.py
index f04d51f..e0e3c3e 100644
--- a/hypervideo_dl/extractor/raywenderlich.py
+++ b/hypervideo_dl/extractor/raywenderlich.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/rbmaradio.py b/hypervideo_dl/extractor/rbmaradio.py
index 9642fbb..86c63db 100644
--- a/hypervideo_dl/extractor/rbmaradio.py
+++ b/hypervideo_dl/extractor/rbmaradio.py
@@ -1,6 +1,3 @@
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
diff --git a/hypervideo_dl/extractor/rcs.py b/hypervideo_dl/extractor/rcs.py
index ace611b..b905f8d 100644
--- a/hypervideo_dl/extractor/rcs.py
+++ b/hypervideo_dl/extractor/rcs.py
@@ -1,14 +1,11 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
from ..utils import (
- clean_html,
ExtractorError,
- js_to_json,
base_url,
+ clean_html,
+ js_to_json,
url_basename,
urljoin,
)
@@ -199,7 +196,6 @@ class RCSBaseIE(InfoExtractor):
'format_id': 'http-mp4',
'url': urls['mp4']
})
- self._sort_formats(formats)
return formats
def _real_extract(self, url):
@@ -284,6 +280,20 @@ class RCSEmbedsIE(RCSBaseIE):
(?:gazzanet\.)?gazzetta
)\.it)
/video-embed/(?P<id>[^/=&\?]+?)(?:$|\?)'''
+ _EMBED_REGEX = [r'''(?x)
+ (?:
+ data-frame-src=|
+ <iframe[^\n]+src=
+ )
+ (["'])
+ (?P<url>(?:https?:)?//video\.
+ (?:
+ rcs|
+ (?:corriere\w+\.)?corriere|
+ (?:gazzanet\.)?gazzetta
+ )
+ \.it/video-embed/.+?)
+ \1''']
_TESTS = [{
'url': 'https://video.rcs.it/video-embed/iodonna-0001585037',
'md5': '623ecc8ffe7299b2d0c1046d8331a9df',
@@ -324,30 +334,9 @@ class RCSEmbedsIE(RCSBaseIE):
urls[i] = urljoin(base_url(e), url_basename(e))
return urls
- @staticmethod
- def _extract_urls(webpage):
- entries = [
- mobj.group('url')
- for mobj in re.finditer(r'''(?x)
- (?:
- data-frame-src=|
- <iframe[^\n]+src=
- )
- (["'])
- (?P<url>(?:https?:)?//video\.
- (?:
- rcs|
- (?:corriere\w+\.)?corriere|
- (?:gazzanet\.)?gazzetta
- )
- \.it/video-embed/.+?)
- \1''', webpage)]
- return RCSEmbedsIE._sanitize_urls(entries)
-
- @staticmethod
- def _extract_url(webpage):
- urls = RCSEmbedsIE._extract_urls(webpage)
- return urls[0] if urls else None
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ return cls._sanitize_urls(list(super()._extract_embed_urls(url, webpage)))
class RCSIE(RCSBaseIE):
diff --git a/hypervideo_dl/extractor/rcti.py b/hypervideo_dl/extractor/rcti.py
index ac42e58..27b4ad7 100644
--- a/hypervideo_dl/extractor/rcti.py
+++ b/hypervideo_dl/extractor/rcti.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import json
import random
import time
@@ -197,8 +194,6 @@ class RCTIPlusIE(RCTIPlusBaseIE):
if 'akamaized' in f['url'] or 'cloudfront' in f['url']:
f.setdefault('http_headers', {})['Referer'] = 'https://www.rctiplus.com/' # Referer header is required for akamai/cloudfront CDNs
- self._sort_formats(formats)
-
return {
'id': video_meta.get('product_id') or video_json.get('product_id'),
'title': dict_get(video_meta, ('title', 'name')) or dict_get(video_json, ('content_name', 'assets_name')),
diff --git a/hypervideo_dl/extractor/rds.py b/hypervideo_dl/extractor/rds.py
index 0c49785..9a2e0d9 100644
--- a/hypervideo_dl/extractor/rds.py
+++ b/hypervideo_dl/extractor/rds.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
parse_duration,
diff --git a/hypervideo_dl/extractor/redbee.py b/hypervideo_dl/extractor/redbee.py
new file mode 100644
index 0000000..eb40a81
--- /dev/null
+++ b/hypervideo_dl/extractor/redbee.py
@@ -0,0 +1,379 @@
+import json
+import re
+import time
+import urllib.parse
+import uuid
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ strip_or_none,
+ traverse_obj,
+ try_call,
+ unified_timestamp,
+)
+
+
+class RedBeeBaseIE(InfoExtractor):
+ _DEVICE_ID = str(uuid.uuid4())
+
+ @property
+ def _API_URL(self):
+ """
+ Ref: https://apidocs.emp.ebsd.ericsson.net
+ Subclasses must set _REDBEE_CUSTOMER, _REDBEE_BUSINESS_UNIT
+ """
+ return f'https://exposure.api.redbee.live/v2/customer/{self._REDBEE_CUSTOMER}/businessunit/{self._REDBEE_BUSINESS_UNIT}'
+
+ def _get_bearer_token(self, asset_id, jwt=None):
+ request = {
+ 'deviceId': self._DEVICE_ID,
+ 'device': {
+ 'deviceId': self._DEVICE_ID,
+ 'name': 'Mozilla Firefox 102',
+ 'type': 'WEB',
+ },
+ }
+ if jwt:
+ request['jwt'] = jwt
+
+ return self._download_json(
+ f'{self._API_URL}/auth/{"gigyaLogin" if jwt else "anonymous"}',
+ asset_id, data=json.dumps(request).encode('utf-8'), headers={
+ 'Content-Type': 'application/json;charset=utf-8'
+ })['sessionToken']
+
+ def _get_formats_and_subtitles(self, asset_id, **kwargs):
+ bearer_token = self._get_bearer_token(asset_id, **kwargs)
+ api_response = self._download_json(
+ f'{self._API_URL}/entitlement/{asset_id}/play',
+ asset_id, headers={
+ 'Authorization': f'Bearer {bearer_token}',
+ 'Accept': 'application/json, text/plain, */*'
+ })
+
+ formats, subtitles = [], {}
+ for format in api_response['formats']:
+ if not format.get('mediaLocator'):
+ continue
+
+ fmts, subs = [], {}
+ if format.get('format') == 'DASH':
+ fmts, subs = self._extract_mpd_formats_and_subtitles(
+ format['mediaLocator'], asset_id, fatal=False)
+ elif format.get('format') == 'SMOOTHSTREAMING':
+ fmts, subs = self._extract_ism_formats_and_subtitles(
+ format['mediaLocator'], asset_id, fatal=False)
+ elif format.get('format') == 'HLS':
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ format['mediaLocator'], asset_id, fatal=False)
+
+ if format.get('drm'):
+ for f in fmts:
+ f['has_drm'] = True
+
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+
+ return formats, subtitles
+
+
+class ParliamentLiveUKIE(RedBeeBaseIE):
+ IE_NAME = 'parliamentlive.tv'
+ IE_DESC = 'UK parliament videos'
+ _VALID_URL = r'(?i)https?://(?:www\.)?parliamentlive\.tv/Event/Index/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+
+ _REDBEE_CUSTOMER = 'UKParliament'
+ _REDBEE_BUSINESS_UNIT = 'ParliamentLive'
+
+ _TESTS = [{
+ 'url': 'http://parliamentlive.tv/Event/Index/c1e9d44d-fd6c-4263-b50f-97ed26cc998b',
+ 'info_dict': {
+ 'id': 'c1e9d44d-fd6c-4263-b50f-97ed26cc998b',
+ 'ext': 'mp4',
+ 'title': 'Home Affairs Committee',
+ 'timestamp': 1395153872,
+ 'upload_date': '20140318',
+ 'thumbnail': r're:https?://[^?#]+c1e9d44d-fd6c-4263-b50f-97ed26cc998b[^/]*/thumbnail',
+ },
+ }, {
+ 'url': 'http://parliamentlive.tv/event/index/3f24936f-130f-40bf-9a5d-b3d6479da6a4',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://parliamentlive.tv/Event/Index/27cf25e4-e77b-42a3-93c5-c815cd6d7377',
+ 'info_dict': {
+ 'id': '27cf25e4-e77b-42a3-93c5-c815cd6d7377',
+ 'ext': 'mp4',
+ 'title': 'House of Commons',
+ 'timestamp': 1658392447,
+ 'upload_date': '20220721',
+ 'thumbnail': r're:https?://[^?#]+27cf25e4-e77b-42a3-93c5-c815cd6d7377[^/]*/thumbnail',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ formats, subtitles = self._get_formats_and_subtitles(video_id)
+
+ video_info = self._download_json(
+ f'https://www.parliamentlive.tv/Event/GetShareVideo/{video_id}', video_id, fatal=False)
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'title': traverse_obj(video_info, ('event', 'title')),
+ 'thumbnail': traverse_obj(video_info, 'thumbnailUrl'),
+ 'timestamp': traverse_obj(
+ video_info, ('event', 'publishedStartTime'), expected_type=unified_timestamp),
+ '_format_sort_fields': ('res', 'proto'),
+ }
+
+
+class RTBFIE(RedBeeBaseIE):
+ _VALID_URL = r'''(?x)
+ https?://(?:www\.)?rtbf\.be/
+ (?:
+ video/[^?]+\?.*\bid=|
+ ouftivi/(?:[^/]+/)*[^?]+\?.*\bvideoId=|
+ auvio/[^/]+\?.*\b(?P<live>l)?id=
+ )(?P<id>\d+)'''
+ _NETRC_MACHINE = 'rtbf'
+
+ _REDBEE_CUSTOMER = 'RTBF'
+ _REDBEE_BUSINESS_UNIT = 'Auvio'
+
+ _TESTS = [{
+ 'url': 'https://www.rtbf.be/video/detail_les-diables-au-coeur-episode-2?id=1921274',
+ 'md5': '8c876a1cceeb6cf31b476461ade72384',
+ 'info_dict': {
+ 'id': '1921274',
+ 'ext': 'mp4',
+ 'title': 'Les Diables au coeur (épisode 2)',
+ 'description': '(du 25/04/2014)',
+ 'duration': 3099.54,
+ 'upload_date': '20140425',
+ 'timestamp': 1398456300,
+ },
+ 'skip': 'No longer available',
+ }, {
+ # geo restricted
+ 'url': 'http://www.rtbf.be/ouftivi/heros/detail_scooby-doo-mysteres-associes?id=1097&videoId=2057442',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.rtbf.be/ouftivi/niouzz?videoId=2055858',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.rtbf.be/auvio/detail_jeudi-en-prime-siegfried-bracke?id=2102996',
+ 'only_matching': True,
+ }, {
+ # Live
+ 'url': 'https://www.rtbf.be/auvio/direct_pure-fm?lid=134775',
+ 'only_matching': True,
+ }, {
+ # Audio
+ 'url': 'https://www.rtbf.be/auvio/detail_cinq-heures-cinema?id=2360811',
+ 'only_matching': True,
+ }, {
+ # With Subtitle
+ 'url': 'https://www.rtbf.be/auvio/detail_les-carnets-du-bourlingueur?id=2361588',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.rtbf.be/auvio/detail_investigation?id=2921926',
+ 'md5': 'd5d11bb62169fef38d7ce7ac531e034f',
+ 'info_dict': {
+ 'id': '2921926',
+ 'ext': 'mp4',
+ 'title': 'Le handicap un confinement perpétuel - Maladie de Lyme',
+ 'description': 'md5:dcbd5dcf6015488c9069b057c15ccc52',
+ 'duration': 5258.8,
+ 'upload_date': '20220727',
+ 'timestamp': 1658934000,
+ 'series': '#Investigation',
+ 'thumbnail': r're:^https?://[^?&]+\.jpg$',
+ },
+ }, {
+ 'url': 'https://www.rtbf.be/auvio/detail_la-belgique-criminelle?id=2920492',
+ 'md5': '054f9f143bc79c89647c35e5a7d35fa8',
+ 'info_dict': {
+ 'id': '2920492',
+ 'ext': 'mp4',
+ 'title': '04 - Le crime de la rue Royale',
+ 'description': 'md5:0c3da1efab286df83f2ab3f8f96bd7a6',
+ 'duration': 1574.6,
+ 'upload_date': '20220723',
+ 'timestamp': 1658596887,
+ 'series': 'La Belgique criminelle - TV',
+ 'thumbnail': r're:^https?://[^?&]+\.jpg$',
+ },
+ }]
+
+ _IMAGE_HOST = 'http://ds1.ds.static.rtbf.be'
+ _PROVIDERS = {
+ 'YOUTUBE': 'Youtube',
+ 'DAILYMOTION': 'Dailymotion',
+ 'VIMEO': 'Vimeo',
+ }
+ _QUALITIES = [
+ ('mobile', 'SD'),
+ ('web', 'MD'),
+ ('high', 'HD'),
+ ]
+ _LOGIN_URL = 'https://login.rtbf.be/accounts.login'
+ _GIGYA_API_KEY = '3_kWKuPgcdAybqnqxq_MvHVk0-6PN8Zk8pIIkJM_yXOu-qLPDDsGOtIDFfpGivtbeO'
+ _LOGIN_COOKIE_ID = f'glt_{_GIGYA_API_KEY}'
+
+ def _perform_login(self, username, password):
+ if self._get_cookies(self._LOGIN_URL).get(self._LOGIN_COOKIE_ID):
+ return
+
+ self._set_cookie('.rtbf.be', 'gmid', 'gmid.ver4', secure=True, expire_time=time.time() + 3600)
+
+ login_response = self._download_json(
+ self._LOGIN_URL, None, data=urllib.parse.urlencode({
+ 'loginID': username,
+ 'password': password,
+ 'APIKey': self._GIGYA_API_KEY,
+ 'targetEnv': 'jssdk',
+ 'sessionExpiration': '-2',
+ }).encode('utf-8'), headers={
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ })
+
+ if login_response['statusCode'] != 200:
+ raise ExtractorError('Login failed. Server message: %s' % login_response['errorMessage'], expected=True)
+
+ self._set_cookie('.rtbf.be', self._LOGIN_COOKIE_ID, login_response['sessionInfo']['login_token'],
+ secure=True, expire_time=time.time() + 3600)
+
+ def _get_formats_and_subtitles(self, url, media_id):
+ login_token = self._get_cookies(url).get(self._LOGIN_COOKIE_ID)
+ if not login_token:
+ self.raise_login_required()
+
+ session_jwt = try_call(lambda: self._get_cookies(url)['rtbf_jwt'].value) or self._download_json(
+ 'https://login.rtbf.be/accounts.getJWT', media_id, query={
+ 'login_token': login_token.value,
+ 'APIKey': self._GIGYA_API_KEY,
+ 'sdk': 'js_latest',
+ 'authMode': 'cookie',
+ 'pageURL': url,
+ 'sdkBuild': '13273',
+ 'format': 'json',
+ })['id_token']
+
+ return super()._get_formats_and_subtitles(media_id, jwt=session_jwt)
+
+ def _real_extract(self, url):
+ live, media_id = self._match_valid_url(url).groups()
+ embed_page = self._download_webpage(
+ 'https://www.rtbf.be/auvio/embed/' + ('direct' if live else 'media'),
+ media_id, query={'id': media_id})
+
+ media_data = self._html_search_regex(r'data-media="([^"]+)"', embed_page, 'media data', fatal=False)
+ if not media_data:
+ if re.search(r'<div[^>]+id="js-error-expired"[^>]+class="(?![^"]*hidden)', embed_page):
+ raise ExtractorError('Livestream has ended.', expected=True)
+ if re.search(r'<div[^>]+id="js-sso-connect"[^>]+class="(?![^"]*hidden)', embed_page):
+ self.raise_login_required()
+
+ raise ExtractorError('Could not find media data')
+
+ data = self._parse_json(media_data, media_id)
+
+ error = data.get('error')
+ if error:
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
+
+ provider = data.get('provider')
+ if provider in self._PROVIDERS:
+ return self.url_result(data['url'], self._PROVIDERS[provider])
+
+ title = traverse_obj(data, 'subtitle', 'title')
+ is_live = data.get('isLive')
+ height_re = r'-(\d+)p\.'
+ formats, subtitles = [], {}
+
+ # The old api still returns m3u8 and mpd manifest for livestreams, but these are 'fake'
+ # since all they contain is a 20s video that is completely unrelated.
+ # https://github.com/hypervideo/hypervideo/issues/4656#issuecomment-1214461092
+ m3u8_url = None if data.get('isLive') else traverse_obj(data, 'urlHlsAes128', 'urlHls')
+ if m3u8_url:
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+
+ fix_url = lambda x: x.replace('//rtbf-vod.', '//rtbf.') if '/geo/drm/' in x else x
+ http_url = data.get('url')
+ if formats and http_url and re.search(height_re, http_url):
+ http_url = fix_url(http_url)
+ for m3u8_f in formats[:]:
+ height = m3u8_f.get('height')
+ if not height:
+ continue
+ f = m3u8_f.copy()
+ del f['protocol']
+ f.update({
+ 'format_id': m3u8_f['format_id'].replace('hls-', 'http-'),
+ 'url': re.sub(height_re, '-%dp.' % height, http_url),
+ })
+ formats.append(f)
+ else:
+ sources = data.get('sources') or {}
+ for key, format_id in self._QUALITIES:
+ format_url = sources.get(key)
+ if not format_url:
+ continue
+ height = int_or_none(self._search_regex(
+ height_re, format_url, 'height', default=None))
+ formats.append({
+ 'format_id': format_id,
+ 'url': fix_url(format_url),
+ 'height': height,
+ })
+
+ mpd_url = None if data.get('isLive') else data.get('urlDash')
+ if mpd_url and (self.get_param('allow_unplayable_formats') or not data.get('drm')):
+ fmts, subs = self._extract_mpd_formats_and_subtitles(
+ mpd_url, media_id, mpd_id='dash', fatal=False)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+
+ audio_url = data.get('urlAudio')
+ if audio_url:
+ formats.append({
+ 'format_id': 'audio',
+ 'url': audio_url,
+ 'vcodec': 'none',
+ })
+
+ for track in (data.get('tracks') or {}).values():
+ sub_url = track.get('url')
+ if not sub_url:
+ continue
+ subtitles.setdefault(track.get('lang') or 'fr', []).append({
+ 'url': sub_url,
+ })
+
+ if not formats:
+ fmts, subs = self._get_formats_and_subtitles(url, f'live_{media_id}' if is_live else media_id)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+
+ return {
+ 'id': media_id,
+ 'formats': formats,
+ 'title': title,
+ 'description': strip_or_none(data.get('description')),
+ 'thumbnail': data.get('thumbnail'),
+ 'duration': float_or_none(data.get('realDuration')),
+ 'timestamp': int_or_none(data.get('liveFrom')),
+ 'series': data.get('programLabel'),
+ 'subtitles': subtitles,
+ 'is_live': is_live,
+ '_format_sort_fields': ('res', 'proto'),
+ }
diff --git a/hypervideo_dl/extractor/redbulltv.py b/hypervideo_dl/extractor/redbulltv.py
index 756a366..a01bc84 100644
--- a/hypervideo_dl/extractor/redbulltv.py
+++ b/hypervideo_dl/extractor/redbulltv.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..compat import compat_HTTPError
from ..utils import (
@@ -84,7 +80,6 @@ class RedBullTVIE(InfoExtractor):
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
'https://dms.redbull.tv/v3/%s/%s/playlist.m3u8' % (video_id, token),
video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
- self._sort_formats(formats)
for resource in video.get('resources', []):
if resource.startswith('closed_caption_'):
@@ -114,7 +109,7 @@ class RedBullTVIE(InfoExtractor):
return self.extract_info(video_id)
-class RedBullEmbedIE(RedBullTVIE):
+class RedBullEmbedIE(RedBullTVIE): # XXX: Do not subclass from concrete IE
_VALID_URL = r'https?://(?:www\.)?redbull\.com/embed/(?P<id>rrn:content:[^:]+:[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}:[a-z]{2}-[A-Z]{2,3})'
_TESTS = [{
# HLS manifest accessible only using assetId
diff --git a/hypervideo_dl/extractor/reddit.py b/hypervideo_dl/extractor/reddit.py
index a042a59..f1a5c85 100644
--- a/hypervideo_dl/extractor/reddit.py
+++ b/hypervideo_dl/extractor/reddit.py
@@ -1,14 +1,15 @@
import random
+import urllib.parse
from .common import InfoExtractor
from ..utils import (
ExtractorError,
- int_or_none,
float_or_none,
+ int_or_none,
+ traverse_obj,
try_get,
unescapeHTML,
url_or_none,
- traverse_obj
)
@@ -19,6 +20,7 @@ class RedditIE(InfoExtractor):
'info_dict': {
'id': 'zv89llsvexdz',
'ext': 'mp4',
+ 'display_id': '6rrwyj',
'title': 'That small heart attack.',
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
'thumbnails': 'count:4',
@@ -35,6 +37,34 @@ class RedditIE(InfoExtractor):
'skip_download': True,
},
}, {
+ # 1080p fallback format
+ 'url': 'https://www.reddit.com/r/aww/comments/90bu6w/heat_index_was_110_degrees_so_we_offered_him_a/',
+ 'md5': '8b5902cfda3006bf90faea7adf765a49',
+ 'info_dict': {
+ 'id': 'gyh95hiqc0b11',
+ 'ext': 'mp4',
+ 'display_id': '90bu6w',
+ 'title': 'Heat index was 110 degrees so we offered him a cold drink. He went for a full body soak instead',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+ 'thumbnails': 'count:7',
+ 'timestamp': 1532051078,
+ 'upload_date': '20180720',
+ 'uploader': 'FootLoosePickleJuice',
+ 'duration': 14,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'comment_count': int,
+ 'age_limit': 0,
+ },
+ }, {
+ # videos embedded in reddit text post
+ 'url': 'https://www.reddit.com/r/KamenRider/comments/wzqkxp/finale_kamen_rider_revice_episode_50_family_to/',
+ 'playlist_count': 2,
+ 'info_dict': {
+ 'id': 'wzqkxp',
+ 'title': 'md5:72d3d19402aa11eff5bd32fc96369b37',
+ },
+ }, {
'url': 'https://www.reddit.com/r/videos/comments/6rrwyj',
'only_matching': True,
}, {
@@ -80,10 +110,6 @@ class RedditIE(InfoExtractor):
data = data[0]['data']['children'][0]['data']
video_url = data['url']
- # Avoid recursing into the same reddit URL
- if 'reddit.com/' in video_url and '/%s/' % video_id in video_url:
- raise ExtractorError('No media found', expected=True)
-
over_18 = data.get('over_18')
if over_18 is True:
age_limit = 18
@@ -126,6 +152,32 @@ class RedditIE(InfoExtractor):
'age_limit': age_limit,
}
+ parsed_url = urllib.parse.urlparse(video_url)
+
+ # Check for embeds in text posts, or else raise to avoid recursing into the same reddit URL
+ if 'reddit.com' in parsed_url.netloc and f'/{video_id}/' in parsed_url.path:
+ entries = []
+ for media in traverse_obj(data, ('media_metadata', ...), expected_type=dict):
+ if not media.get('id') or media.get('e') != 'RedditVideo':
+ continue
+ formats = []
+ if media.get('hlsUrl'):
+ formats.extend(self._extract_m3u8_formats(
+ unescapeHTML(media['hlsUrl']), video_id, 'mp4', m3u8_id='hls', fatal=False))
+ if media.get('dashUrl'):
+ formats.extend(self._extract_mpd_formats(
+ unescapeHTML(media['dashUrl']), video_id, mpd_id='dash', fatal=False))
+ if formats:
+ entries.append({
+ 'id': media['id'],
+ 'display_id': video_id,
+ 'formats': formats,
+ **info,
+ })
+ if entries:
+ return self.playlist_result(entries, video_id, info.get('title'))
+ raise ExtractorError('No media found', expected=True)
+
# Check if media is hosted on reddit:
reddit_video = traverse_obj(data, (('media', 'secure_media'), 'reddit_video'), get_all=False)
if reddit_video:
@@ -143,12 +195,21 @@ class RedditIE(InfoExtractor):
dash_playlist_url = playlist_urls[0] or f'https://v.redd.it/{video_id}/DASHPlaylist.mpd'
hls_playlist_url = playlist_urls[1] or f'https://v.redd.it/{video_id}/HLSPlaylist.m3u8'
- formats = self._extract_m3u8_formats(
- hls_playlist_url, display_id, 'mp4',
- entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
+ formats = [{
+ 'url': unescapeHTML(reddit_video['fallback_url']),
+ 'height': int_or_none(reddit_video.get('height')),
+ 'width': int_or_none(reddit_video.get('width')),
+ 'tbr': int_or_none(reddit_video.get('bitrate_kbps')),
+ 'acodec': 'none',
+ 'vcodec': 'h264',
+ 'ext': 'mp4',
+ 'format_id': 'fallback',
+ 'format_note': 'DASH video, mp4_dash',
+ }]
+ formats.extend(self._extract_m3u8_formats(
+ hls_playlist_url, display_id, 'mp4', m3u8_id='hls', fatal=False))
formats.extend(self._extract_mpd_formats(
dash_playlist_url, display_id, mpd_id='dash', fatal=False))
- self._sort_formats(formats)
return {
**info,
@@ -158,6 +219,14 @@ class RedditIE(InfoExtractor):
'duration': int_or_none(reddit_video.get('duration')),
}
+ if parsed_url.netloc == 'v.redd.it':
+ self.raise_no_formats('This video is processing', expected=True, video_id=video_id)
+ return {
+ **info,
+ 'id': parsed_url.path.split('/')[1],
+ 'display_id': video_id,
+ }
+
# Not hosted on reddit, must continue extraction
return {
**info,
diff --git a/hypervideo_dl/extractor/redgifs.py b/hypervideo_dl/extractor/redgifs.py
index 55196b7..098fb81 100644
--- a/hypervideo_dl/extractor/redgifs.py
+++ b/hypervideo_dl/extractor/redgifs.py
@@ -1,5 +1,5 @@
-# coding: utf-8
import functools
+import urllib
from .common import InfoExtractor
from ..compat import compat_parse_qs
@@ -19,6 +19,12 @@ class RedGifsBaseInfoExtractor(InfoExtractor):
'hd': None,
}
+ _API_HEADERS = {
+ 'referer': 'https://www.redgifs.com/',
+ 'origin': 'https://www.redgifs.com',
+ 'content-type': 'application/json',
+ }
+
def _parse_gif_data(self, gif_data):
video_id = gif_data.get('id')
quality = qualities(tuple(self._FORMATS.keys()))
@@ -39,12 +45,11 @@ class RedGifsBaseInfoExtractor(InfoExtractor):
'height': height,
'quality': quality(format_id),
})
- self._sort_formats(formats)
return {
'id': video_id,
'webpage_url': f'https://redgifs.com/watch/{video_id}',
- 'ie_key': RedGifsIE.ie_key(),
+ 'extractor_key': RedGifsIE.ie_key(),
'extractor': 'RedGifs',
'title': ' '.join(gif_data.get('tags') or []) or 'RedGifs',
'timestamp': int_or_none(gif_data.get('createDate')),
@@ -58,9 +63,30 @@ class RedGifsBaseInfoExtractor(InfoExtractor):
'formats': formats,
}
+ def _fetch_oauth_token(self, video_id):
+ # https://github.com/Redgifs/api/wiki/Temporary-tokens
+ auth = self._download_json('https://api.redgifs.com/v2/auth/temporary',
+ video_id, note='Fetching temporary token')
+ if not auth.get('token'):
+ raise ExtractorError('Unable to get temporary token')
+ self._API_HEADERS['authorization'] = f'Bearer {auth["token"]}'
+
def _call_api(self, ep, video_id, *args, **kwargs):
- data = self._download_json(
- f'https://api.redgifs.com/v2/{ep}', video_id, *args, **kwargs)
+ for first_attempt in True, False:
+ if 'authorization' not in self._API_HEADERS:
+ self._fetch_oauth_token(video_id)
+ try:
+ headers = dict(self._API_HEADERS)
+ headers['x-customheader'] = f'https://www.redgifs.com/watch/{video_id}'
+ data = self._download_json(
+ f'https://api.redgifs.com/v2/{ep}', video_id, headers=headers, *args, **kwargs)
+ break
+ except ExtractorError as e:
+ if first_attempt and isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 401:
+ del self._API_HEADERS['authorization'] # refresh the token
+ continue
+ raise
+
if 'error' in data:
raise ExtractorError(f'RedGifs said: {data["error"]}', expected=True, video_id=video_id)
return data
@@ -103,6 +129,7 @@ class RedGifsIE(RedGifsBaseInfoExtractor):
'like_count': int,
'categories': list,
'age_limit': 18,
+ 'tags': list,
}
}, {
'url': 'https://thumbs2.redgifs.com/SqueakyHelplessWisent-mobile.mp4#t=0',
@@ -118,13 +145,14 @@ class RedGifsIE(RedGifsBaseInfoExtractor):
'like_count': int,
'categories': list,
'age_limit': 18,
+ 'tags': list,
}
}]
def _real_extract(self, url):
video_id = self._match_id(url).lower()
video_info = self._call_api(
- f'gifs/{video_id}', video_id, note='Downloading video info')
+ f'gifs/{video_id}?views=yes', video_id, note='Downloading video info')
return self._parse_gif_data(video_info['gif'])
diff --git a/hypervideo_dl/extractor/redtube.py b/hypervideo_dl/extractor/redtube.py
index 7fee54f..49076cc 100644
--- a/hypervideo_dl/extractor/redtube.py
+++ b/hypervideo_dl/extractor/redtube.py
@@ -1,7 +1,3 @@
-from __future__ import unicode_literals
-
-import re
-
from .common import InfoExtractor
from ..utils import (
determine_ext,
@@ -16,6 +12,7 @@ from ..utils import (
class RedTubeIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:\w+\.)?redtube\.com/|embed\.redtube\.com/\?.*?\bid=)(?P<id>[0-9]+)'
+ _EMBED_REGEX = [r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//embed\.redtube\.com/\?.*?\bid=\d+)']
_TESTS = [{
'url': 'https://www.redtube.com/38864951',
'md5': '4fba70cbca3aefd25767ab4b523c9878',
@@ -39,12 +36,6 @@ class RedTubeIE(InfoExtractor):
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- return re.findall(
- r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//embed\.redtube\.com/\?.*?\bid=\d+)',
- webpage)
-
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
@@ -119,7 +110,6 @@ class RedTubeIE(InfoExtractor):
video_url = self._html_search_regex(
r'<source src="(.+?)" type="video/mp4">', webpage, 'video URL')
formats.append({'url': video_url, 'ext': 'mp4'})
- self._sort_formats(formats)
thumbnail = self._og_search_thumbnail(webpage)
upload_date = unified_strdate(self._search_regex(
diff --git a/hypervideo_dl/extractor/regiotv.py b/hypervideo_dl/extractor/regiotv.py
index e250a52..6114841 100644
--- a/hypervideo_dl/extractor/regiotv.py
+++ b/hypervideo_dl/extractor/regiotv.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
diff --git a/hypervideo_dl/extractor/rentv.py b/hypervideo_dl/extractor/rentv.py
index 7c8909d..fdde317 100644
--- a/hypervideo_dl/extractor/rentv.py
+++ b/hypervideo_dl/extractor/rentv.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
@@ -50,7 +47,6 @@ class RENTVIE(InfoExtractor):
formats.append({
'url': src,
})
- self._sort_formats(formats)
return {
'id': video_id,
'title': title,
diff --git a/hypervideo_dl/extractor/restudy.py b/hypervideo_dl/extractor/restudy.py
index d47fb45..6d03256 100644
--- a/hypervideo_dl/extractor/restudy.py
+++ b/hypervideo_dl/extractor/restudy.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
@@ -34,7 +31,6 @@ class RestudyIE(InfoExtractor):
formats = self._extract_smil_formats(
'https://cdn.portal.restudy.dk/dynamic/themes/front/awsmedia/SmilDirectory/video_%s.xml' % video_id,
video_id)
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/reuters.py b/hypervideo_dl/extractor/reuters.py
index 9dc482d..6919425 100644
--- a/hypervideo_dl/extractor/reuters.py
+++ b/hypervideo_dl/extractor/reuters.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -58,7 +55,6 @@ class ReutersIE(InfoExtractor):
'ext': ext,
'container': container if method != 'mobile' else None,
})
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/reverbnation.py b/hypervideo_dl/extractor/reverbnation.py
index 4cb99c2..06b6c3c 100644
--- a/hypervideo_dl/extractor/reverbnation.py
+++ b/hypervideo_dl/extractor/reverbnation.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
qualities,
diff --git a/hypervideo_dl/extractor/rice.py b/hypervideo_dl/extractor/rice.py
index cf2bb1b..3dd4d31 100644
--- a/hypervideo_dl/extractor/rice.py
+++ b/hypervideo_dl/extractor/rice.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -91,7 +88,6 @@ class RICEIE(InfoExtractor):
'ext': 'flv',
})
formats.append(fmt)
- self._sort_formats(formats)
thumbnails = []
for content_asset in content_data.findall('.//contentAssets'):
diff --git a/hypervideo_dl/extractor/rmcdecouverte.py b/hypervideo_dl/extractor/rmcdecouverte.py
index 8bfce34..8d29b30 100644
--- a/hypervideo_dl/extractor/rmcdecouverte.py
+++ b/hypervideo_dl/extractor/rmcdecouverte.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from .brightcove import BrightcoveLegacyIE
from ..compat import (
diff --git a/hypervideo_dl/extractor/ro220.py b/hypervideo_dl/extractor/ro220.py
deleted file mode 100644
index 69934ef..0000000
--- a/hypervideo_dl/extractor/ro220.py
+++ /dev/null
@@ -1,43 +0,0 @@
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..compat import compat_urllib_parse_unquote
-
-
-class Ro220IE(InfoExtractor):
- IE_NAME = '220.ro'
- _VALID_URL = r'(?x)(?:https?://)?(?:www\.)?220\.ro/(?P<category>[^/]+)/(?P<shorttitle>[^/]+)/(?P<id>[^/]+)'
- _TEST = {
- 'url': 'http://www.220.ro/sport/Luati-Le-Banii-Sez-4-Ep-1/LYV6doKo7f/',
- 'md5': '03af18b73a07b4088753930db7a34add',
- 'info_dict': {
- 'id': 'LYV6doKo7f',
- 'ext': 'mp4',
- 'title': 'Luati-le Banii sez 4 ep 1',
- 'description': r're:^Iata-ne reveniti dupa o binemeritata vacanta\. +Va astept si pe Facebook cu pareri si comentarii.$',
- }
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
- url = compat_urllib_parse_unquote(self._search_regex(
- r'(?s)clip\s*:\s*{.*?url\s*:\s*\'([^\']+)\'', webpage, 'url'))
- title = self._og_search_title(webpage)
- description = self._og_search_description(webpage)
- thumbnail = self._og_search_thumbnail(webpage)
-
- formats = [{
- 'format_id': 'sd',
- 'url': url,
- 'ext': 'mp4',
- }]
-
- return {
- 'id': video_id,
- 'formats': formats,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- }
diff --git a/hypervideo_dl/extractor/rockstargames.py b/hypervideo_dl/extractor/rockstargames.py
index cd6904b..c491aaf 100644
--- a/hypervideo_dl/extractor/rockstargames.py
+++ b/hypervideo_dl/extractor/rockstargames.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
int_or_none,
@@ -57,8 +54,6 @@ class RockstarGamesIE(InfoExtractor):
if youtube_id:
return self.url_result(youtube_id, 'Youtube')
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': title,
diff --git a/hypervideo_dl/extractor/rokfin.py b/hypervideo_dl/extractor/rokfin.py
index 0fd65db..ade3cd0 100644
--- a/hypervideo_dl/extractor/rokfin.py
+++ b/hypervideo_dl/extractor/rokfin.py
@@ -1,26 +1,33 @@
-# coding: utf-8
import itertools
+import json
+import re
+import urllib.parse
from datetime import datetime
-from .common import InfoExtractor
+from .common import InfoExtractor, SearchInfoExtractor
from ..utils import (
- determine_ext,
ExtractorError,
+ determine_ext,
float_or_none,
format_field,
int_or_none,
str_or_none,
traverse_obj,
+ try_get,
+ unescapeHTML,
unified_timestamp,
url_or_none,
+ urlencode_postdata,
)
-
_API_BASE_URL = 'https://prod-api-v2.production.rokfin.com/api/v2/public/'
class RokfinIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?rokfin\.com/(?P<id>(?P<type>post|stream)/\d+)'
+ _NETRC_MACHINE = 'rokfin'
+ _AUTH_BASE = 'https://secure.rokfin.com/auth/realms/rokfin-web/protocol/openid-connect'
+ _access_mgmt_tokens = {} # OAuth 2.0: RFC 6749, Sec. 1.4-5
_TESTS = [{
'url': 'https://www.rokfin.com/post/57548/Mitt-Romneys-Crazy-Solution-To-Climate-Change',
'info_dict': {
@@ -84,8 +91,7 @@ class RokfinIE(InfoExtractor):
def _real_extract(self, url):
video_id, video_type = self._match_valid_url(url).group('id', 'type')
-
- metadata = self._download_json(f'{_API_BASE_URL}{video_id}', video_id)
+ metadata = self._download_json_using_access_token(f'{_API_BASE_URL}{video_id}', video_id)
scheduled = unified_timestamp(metadata.get('scheduledAt'))
live_status = ('was_live' if metadata.get('stoppedAt')
@@ -104,9 +110,8 @@ class RokfinIE(InfoExtractor):
self.raise_login_required('This video is only available to premium users', True, method='cookies')
elif scheduled:
self.raise_no_formats(
- f'Stream is offline; sheduled for {datetime.fromtimestamp(scheduled).strftime("%Y-%m-%d %H:%M:%S")}',
+ f'Stream is offline; scheduled for {datetime.fromtimestamp(scheduled).strftime("%Y-%m-%d %H:%M:%S")}',
video_id=video_id, expected=True)
- self._sort_formats(formats)
uploader = traverse_obj(metadata, ('createdBy', 'username'), ('creator', 'username'))
timestamp = (scheduled or float_or_none(metadata.get('postedAtMilli'), 1000)
@@ -140,7 +145,7 @@ class RokfinIE(InfoExtractor):
for page_n in itertools.count():
raw_comments = self._download_json(
f'{_API_BASE_URL}comment?postId={video_id[5:]}&page={page_n}&size=50',
- video_id, note=f'Downloading viewer comments page {page_n + 1}{format_field(pages_total, template=" of %s")}',
+ video_id, note=f'Downloading viewer comments page {page_n + 1}{format_field(pages_total, None, " of %s")}',
fatal=False) or {}
for comment in raw_comments.get('content') or []:
@@ -160,6 +165,79 @@ class RokfinIE(InfoExtractor):
if not raw_comments.get('content') or is_last or (page_n > pages_total if pages_total else is_last is not False):
return
+ def _perform_login(self, username, password):
+ # https://openid.net/specs/openid-connect-core-1_0.html#CodeFlowAuth (Sec. 3.1)
+ login_page = self._download_webpage(
+ f'{self._AUTH_BASE}/auth?client_id=web&redirect_uri=https%3A%2F%2Frokfin.com%2Ffeed&response_mode=fragment&response_type=code&scope=openid',
+ None, note='loading login page', errnote='error loading login page')
+ authentication_point_url = unescapeHTML(self._search_regex(
+ r'<form\s+[^>]+action\s*=\s*"(https://secure\.rokfin\.com/auth/realms/rokfin-web/login-actions/authenticate\?[^"]+)"',
+ login_page, name='Authentication URL'))
+
+ resp_body = self._download_webpage(
+ authentication_point_url, None, note='logging in', fatal=False, expected_status=404,
+ data=urlencode_postdata({'username': username, 'password': password, 'rememberMe': 'off', 'credentialId': ''}))
+ if not self._authentication_active():
+ if re.search(r'(?i)(invalid\s+username\s+or\s+password)', resp_body or ''):
+ raise ExtractorError('invalid username/password', expected=True)
+ raise ExtractorError('Login failed')
+
+ urlh = self._request_webpage(
+ f'{self._AUTH_BASE}/auth', None,
+ note='granting user authorization', errnote='user authorization rejected by Rokfin',
+ query={
+ 'client_id': 'web',
+ 'prompt': 'none',
+ 'redirect_uri': 'https://rokfin.com/silent-check-sso.html',
+ 'response_mode': 'fragment',
+ 'response_type': 'code',
+ 'scope': 'openid',
+ })
+ self._access_mgmt_tokens = self._download_json(
+ f'{self._AUTH_BASE}/token', None,
+ note='getting access credentials', errnote='error getting access credentials',
+ data=urlencode_postdata({
+ 'code': urllib.parse.parse_qs(urllib.parse.urldefrag(urlh.geturl()).fragment).get('code')[0],
+ 'client_id': 'web',
+ 'grant_type': 'authorization_code',
+ 'redirect_uri': 'https://rokfin.com/silent-check-sso.html'
+ }))
+
+ def _authentication_active(self):
+ return not (
+ {'KEYCLOAK_IDENTITY', 'KEYCLOAK_IDENTITY_LEGACY', 'KEYCLOAK_SESSION', 'KEYCLOAK_SESSION_LEGACY'}
+ - set(self._get_cookies(self._AUTH_BASE)))
+
+ def _get_auth_token(self):
+ return try_get(self._access_mgmt_tokens, lambda x: ' '.join([x['token_type'], x['access_token']]))
+
+ def _download_json_using_access_token(self, url_or_request, video_id, headers={}, query={}):
+ assert 'authorization' not in headers
+ headers = headers.copy()
+ auth_token = self._get_auth_token()
+ refresh_token = self._access_mgmt_tokens.get('refresh_token')
+ if auth_token:
+ headers['authorization'] = auth_token
+
+ json_string, urlh = self._download_webpage_handle(
+ url_or_request, video_id, headers=headers, query=query, expected_status=401)
+ if not auth_token or urlh.code != 401 or refresh_token is None:
+ return self._parse_json(json_string, video_id)
+
+ self._access_mgmt_tokens = self._download_json(
+ f'{self._AUTH_BASE}/token', video_id,
+ note='User authorization expired or canceled by Rokfin. Re-authorizing ...', errnote='Failed to re-authorize',
+ data=urlencode_postdata({
+ 'grant_type': 'refresh_token',
+ 'refresh_token': refresh_token,
+ 'client_id': 'web'
+ }))
+ headers['authorization'] = self._get_auth_token()
+ if headers['authorization'] is None:
+ raise ExtractorError('User authorization lost', expected=True)
+
+ return self._download_json(url_or_request, video_id, headers=headers, query=query)
+
class RokfinPlaylistBaseIE(InfoExtractor):
_TYPES = {
@@ -183,6 +261,7 @@ class RokfinPlaylistBaseIE(InfoExtractor):
class RokfinStackIE(RokfinPlaylistBaseIE):
IE_NAME = 'rokfin:stack'
+ IE_DESC = 'Rokfin Stacks'
_VALID_URL = r'https?://(?:www\.)?rokfin\.com/stack/(?P<id>[^/]+)'
_TESTS = [{
'url': 'https://www.rokfin.com/stack/271/Tulsi-Gabbard-Portsmouth-Townhall-FULL--Feb-9-2020',
@@ -200,6 +279,7 @@ class RokfinStackIE(RokfinPlaylistBaseIE):
class RokfinChannelIE(RokfinPlaylistBaseIE):
IE_NAME = 'rokfin:channel'
+ IE_DESC = 'Rokfin Channels'
_VALID_URL = r'https?://(?:www\.)?rokfin\.com/(?!((feed/?)|(discover/?)|(channels/?))$)(?P<id>[^/]+)/?$'
_TESTS = [{
'url': 'https://rokfin.com/TheConvoCouch',
@@ -237,7 +317,7 @@ class RokfinChannelIE(RokfinPlaylistBaseIE):
data_url = f'{_API_BASE_URL}post/search/{tab}?page={page_n}&size=50&creator={channel_id}'
metadata = self._download_json(
data_url, channel_name,
- note=f'Downloading video metadata page {page_n + 1}{format_field(pages_total, template=" of %s")}')
+ note=f'Downloading video metadata page {page_n + 1}{format_field(pages_total, None, " of %s")}')
yield from self._get_video_data(metadata)
pages_total = int_or_none(metadata.get('totalPages')) or None
@@ -254,3 +334,76 @@ class RokfinChannelIE(RokfinPlaylistBaseIE):
return self.playlist_result(
self._entries(channel_id, channel_name, self._TABS[tab]),
f'{channel_id}-{tab}', f'{channel_name} - {tab.title()}', str_or_none(channel_info.get('description')))
+
+
+class RokfinSearchIE(SearchInfoExtractor):
+ IE_NAME = 'rokfin:search'
+ IE_DESC = 'Rokfin Search'
+ _SEARCH_KEY = 'rkfnsearch'
+ _TYPES = {
+ 'video': (('id', 'raw'), 'post'),
+ 'audio': (('id', 'raw'), 'post'),
+ 'stream': (('content_id', 'raw'), 'stream'),
+ 'dead_stream': (('content_id', 'raw'), 'stream'),
+ 'stack': (('content_id', 'raw'), 'stack'),
+ }
+ _TESTS = [{
+ 'url': 'rkfnsearch5:"zelenko"',
+ 'playlist_count': 5,
+ 'info_dict': {
+ 'id': '"zelenko"',
+ 'title': '"zelenko"',
+ }
+ }]
+ _db_url = None
+ _db_access_key = None
+
+ def _real_initialize(self):
+ self._db_url, self._db_access_key = self.cache.load(self.ie_key(), 'auth', default=(None, None))
+ if not self._db_url:
+ self._get_db_access_credentials()
+
+ def _search_results(self, query):
+ total_pages = None
+ for page_number in itertools.count(1):
+ search_results = self._run_search_query(
+ query, data={'query': query, 'page': {'size': 100, 'current': page_number}},
+ note=f'Downloading page {page_number}{format_field(total_pages, None, " of ~%s")}')
+ total_pages = traverse_obj(search_results, ('meta', 'page', 'total_pages'), expected_type=int_or_none)
+
+ for result in search_results.get('results') or []:
+ video_id_key, video_type = self._TYPES.get(traverse_obj(result, ('content_type', 'raw')), (None, None))
+ video_id = traverse_obj(result, video_id_key, expected_type=int_or_none)
+ if video_id and video_type:
+ yield self.url_result(url=f'https://rokfin.com/{video_type}/{video_id}')
+ if not search_results.get('results'):
+ return
+
+ def _run_search_query(self, video_id, data, **kwargs):
+ data = json.dumps(data).encode()
+ for attempt in range(2):
+ search_results = self._download_json(
+ self._db_url, video_id, data=data, fatal=(attempt == 1),
+ headers={'authorization': self._db_access_key}, **kwargs)
+ if search_results:
+ return search_results
+ self.write_debug('Updating access credentials')
+ self._get_db_access_credentials(video_id)
+
+ def _get_db_access_credentials(self, video_id=None):
+ auth_data = {'SEARCH_KEY': None, 'ENDPOINT_BASE': None}
+ notfound_err_page = self._download_webpage(
+ 'https://rokfin.com/discover', video_id, expected_status=404, note='Downloading home page')
+ for js_file_path in re.findall(r'<script\b[^>]*\ssrc\s*=\s*"(/static/js/[^">]+)"', notfound_err_page):
+ js_content = self._download_webpage(
+ f'https://rokfin.com{js_file_path}', video_id, note='Downloading JavaScript file', fatal=False)
+ auth_data.update(re.findall(
+ rf'REACT_APP_({"|".join(auth_data.keys())})\s*:\s*"([^"]+)"', js_content or ''))
+ if not all(auth_data.values()):
+ continue
+
+ self._db_url = url_or_none(f'{auth_data["ENDPOINT_BASE"]}/api/as/v1/engines/rokfin-search/search.json')
+ self._db_access_key = f'Bearer {auth_data["SEARCH_KEY"]}'
+ self.cache.store(self.ie_key(), 'auth', (self._db_url, self._db_access_key))
+ return
+ raise ExtractorError('Unable to extract access credentials')
diff --git a/hypervideo_dl/extractor/roosterteeth.py b/hypervideo_dl/extractor/roosterteeth.py
index a55dd4f..776fbfb 100644
--- a/hypervideo_dl/extractor/roosterteeth.py
+++ b/hypervideo_dl/extractor/roosterteeth.py
@@ -1,4 +1,3 @@
-# coding: utf-8
from .common import InfoExtractor
from ..compat import compat_HTTPError
from ..utils import (
@@ -147,7 +146,6 @@ class RoosterTeethIE(RoosterTeethBaseIE):
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
m3u8_url, display_id, 'mp4', 'm3u8_native', m3u8_id='hls')
- self._sort_formats(formats)
episode = self._download_json(
api_episode_url, display_id,
diff --git a/hypervideo_dl/extractor/rottentomatoes.py b/hypervideo_dl/extractor/rottentomatoes.py
index 14c8e82..f133c85 100644
--- a/hypervideo_dl/extractor/rottentomatoes.py
+++ b/hypervideo_dl/extractor/rottentomatoes.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from .internetvideoarchive import InternetVideoArchiveIE
diff --git a/hypervideo_dl/extractor/roxwel.py b/hypervideo_dl/extractor/roxwel.py
deleted file mode 100644
index 84bb1aa..0000000
--- a/hypervideo_dl/extractor/roxwel.py
+++ /dev/null
@@ -1,52 +0,0 @@
-from __future__ import unicode_literals
-
-
-from .common import InfoExtractor
-from ..utils import unified_strdate, determine_ext
-
-
-class RoxwelIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?roxwel\.com/player/(?P<filename>.+?)(\.|\?|$)'
-
- _TEST = {
- 'url': 'http://www.roxwel.com/player/passionpittakeawalklive.html',
- 'info_dict': {
- 'id': 'passionpittakeawalklive',
- 'ext': 'flv',
- 'title': 'Take A Walk (live)',
- 'uploader': 'Passion Pit',
- 'uploader_id': 'passionpit',
- 'upload_date': '20120928',
- 'description': 'Passion Pit performs "Take A Walk\" live at The Backyard in Austin, Texas. ',
- },
- 'params': {
- # rtmp download
- 'skip_download': True,
- }
- }
-
- def _real_extract(self, url):
- mobj = self._match_valid_url(url)
- filename = mobj.group('filename')
- info_url = 'http://www.roxwel.com/api/videos/%s' % filename
- info = self._download_json(info_url, filename)
-
- rtmp_rates = sorted([int(r.replace('flv_', '')) for r in info['media_rates'] if r.startswith('flv_')])
- best_rate = rtmp_rates[-1]
- url_page_url = 'http://roxwel.com/pl_one_time.php?filename=%s&quality=%s' % (filename, best_rate)
- rtmp_url = self._download_webpage(url_page_url, filename, 'Downloading video url')
- ext = determine_ext(rtmp_url)
- if ext == 'f4v':
- rtmp_url = rtmp_url.replace(filename, 'mp4:%s' % filename)
-
- return {
- 'id': filename,
- 'title': info['title'],
- 'url': rtmp_url,
- 'ext': 'flv',
- 'description': info['description'],
- 'thumbnail': info.get('player_image_url') or info.get('image_url_large'),
- 'uploader': info['artist'],
- 'uploader_id': info['artistname'],
- 'upload_date': unified_strdate(info['dbdate']),
- }
diff --git a/hypervideo_dl/extractor/rozhlas.py b/hypervideo_dl/extractor/rozhlas.py
index fccf694..a818967 100644
--- a/hypervideo_dl/extractor/rozhlas.py
+++ b/hypervideo_dl/extractor/rozhlas.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
int_or_none,
diff --git a/hypervideo_dl/extractor/rtbf.py b/hypervideo_dl/extractor/rtbf.py
deleted file mode 100644
index 4b61fdb..0000000
--- a/hypervideo_dl/extractor/rtbf.py
+++ /dev/null
@@ -1,159 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
- float_or_none,
- int_or_none,
- strip_or_none,
-)
-
-
-class RTBFIE(InfoExtractor):
- _VALID_URL = r'''(?x)
- https?://(?:www\.)?rtbf\.be/
- (?:
- video/[^?]+\?.*\bid=|
- ouftivi/(?:[^/]+/)*[^?]+\?.*\bvideoId=|
- auvio/[^/]+\?.*\b(?P<live>l)?id=
- )(?P<id>\d+)'''
- _TESTS = [{
- 'url': 'https://www.rtbf.be/video/detail_les-diables-au-coeur-episode-2?id=1921274',
- 'md5': '8c876a1cceeb6cf31b476461ade72384',
- 'info_dict': {
- 'id': '1921274',
- 'ext': 'mp4',
- 'title': 'Les Diables au coeur (épisode 2)',
- 'description': '(du 25/04/2014)',
- 'duration': 3099.54,
- 'upload_date': '20140425',
- 'timestamp': 1398456300,
- }
- }, {
- # geo restricted
- 'url': 'http://www.rtbf.be/ouftivi/heros/detail_scooby-doo-mysteres-associes?id=1097&videoId=2057442',
- 'only_matching': True,
- }, {
- 'url': 'http://www.rtbf.be/ouftivi/niouzz?videoId=2055858',
- 'only_matching': True,
- }, {
- 'url': 'http://www.rtbf.be/auvio/detail_jeudi-en-prime-siegfried-bracke?id=2102996',
- 'only_matching': True,
- }, {
- # Live
- 'url': 'https://www.rtbf.be/auvio/direct_pure-fm?lid=134775',
- 'only_matching': True,
- }, {
- # Audio
- 'url': 'https://www.rtbf.be/auvio/detail_cinq-heures-cinema?id=2360811',
- 'only_matching': True,
- }, {
- # With Subtitle
- 'url': 'https://www.rtbf.be/auvio/detail_les-carnets-du-bourlingueur?id=2361588',
- 'only_matching': True,
- }]
- _IMAGE_HOST = 'http://ds1.ds.static.rtbf.be'
- _PROVIDERS = {
- 'YOUTUBE': 'Youtube',
- 'DAILYMOTION': 'Dailymotion',
- 'VIMEO': 'Vimeo',
- }
- _QUALITIES = [
- ('mobile', 'SD'),
- ('web', 'MD'),
- ('high', 'HD'),
- ]
-
- def _real_extract(self, url):
- live, media_id = self._match_valid_url(url).groups()
- embed_page = self._download_webpage(
- 'https://www.rtbf.be/auvio/embed/' + ('direct' if live else 'media'),
- media_id, query={'id': media_id})
- data = self._parse_json(self._html_search_regex(
- r'data-media="([^"]+)"', embed_page, 'media data'), media_id)
-
- error = data.get('error')
- if error:
- raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
-
- provider = data.get('provider')
- if provider in self._PROVIDERS:
- return self.url_result(data['url'], self._PROVIDERS[provider])
-
- title = data['title']
- is_live = data.get('isLive')
- height_re = r'-(\d+)p\.'
- formats = []
-
- m3u8_url = data.get('urlHlsAes128') or data.get('urlHls')
- if m3u8_url:
- formats.extend(self._extract_m3u8_formats(
- m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False))
-
- fix_url = lambda x: x.replace('//rtbf-vod.', '//rtbf.') if '/geo/drm/' in x else x
- http_url = data.get('url')
- if formats and http_url and re.search(height_re, http_url):
- http_url = fix_url(http_url)
- for m3u8_f in formats[:]:
- height = m3u8_f.get('height')
- if not height:
- continue
- f = m3u8_f.copy()
- del f['protocol']
- f.update({
- 'format_id': m3u8_f['format_id'].replace('hls-', 'http-'),
- 'url': re.sub(height_re, '-%dp.' % height, http_url),
- })
- formats.append(f)
- else:
- sources = data.get('sources') or {}
- for key, format_id in self._QUALITIES:
- format_url = sources.get(key)
- if not format_url:
- continue
- height = int_or_none(self._search_regex(
- height_re, format_url, 'height', default=None))
- formats.append({
- 'format_id': format_id,
- 'url': fix_url(format_url),
- 'height': height,
- })
-
- mpd_url = data.get('urlDash')
- if mpd_url and (self.get_param('allow_unplayable_formats') or not data.get('drm')):
- formats.extend(self._extract_mpd_formats(
- mpd_url, media_id, mpd_id='dash', fatal=False))
-
- audio_url = data.get('urlAudio')
- if audio_url:
- formats.append({
- 'format_id': 'audio',
- 'url': audio_url,
- 'vcodec': 'none',
- })
- self._sort_formats(formats)
-
- subtitles = {}
- for track in (data.get('tracks') or {}).values():
- sub_url = track.get('url')
- if not sub_url:
- continue
- subtitles.setdefault(track.get('lang') or 'fr', []).append({
- 'url': sub_url,
- })
-
- return {
- 'id': media_id,
- 'formats': formats,
- 'title': title,
- 'description': strip_or_none(data.get('description')),
- 'thumbnail': data.get('thumbnail'),
- 'duration': float_or_none(data.get('realDuration')),
- 'timestamp': int_or_none(data.get('liveFrom')),
- 'series': data.get('programLabel'),
- 'subtitles': subtitles,
- 'is_live': is_live,
- }
diff --git a/hypervideo_dl/extractor/rte.py b/hypervideo_dl/extractor/rte.py
index 1fbc729..aedaa5b 100644
--- a/hypervideo_dl/extractor/rte.py
+++ b/hypervideo_dl/extractor/rte.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -97,8 +94,6 @@ class RteBaseIE(InfoExtractor):
formats.extend(self._extract_f4m_formats(
hds_url, item_id, f4m_id='hds', fatal=False))
- self._sort_formats(formats)
-
info_dict['formats'] = formats
return info_dict
diff --git a/hypervideo_dl/extractor/rtl2.py b/hypervideo_dl/extractor/rtl2.py
index e291714..056cf87 100644
--- a/hypervideo_dl/extractor/rtl2.py
+++ b/hypervideo_dl/extractor/rtl2.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -97,8 +94,6 @@ class RTL2IE(InfoExtractor):
if m3u8_url:
formats.extend(self._extract_akamai_formats(m3u8_url, display_id))
- self._sort_formats(formats)
-
return {
'id': display_id,
'title': title,
@@ -145,7 +140,6 @@ class RTL2YouIE(RTL2YouBaseIE):
raise ExtractorError('video not found', expected=True)
formats = self._extract_m3u8_formats(stream_url.decode(), video_id, 'mp4', 'm3u8_native')
- self._sort_formats(formats)
video_data = self._download_json(
self._BACKWERK_BASE_URL + 'video/' + video_id, video_id)
diff --git a/hypervideo_dl/extractor/rtlnl.py b/hypervideo_dl/extractor/rtlnl.py
index 9eaa06f..724cb64 100644
--- a/hypervideo_dl/extractor/rtlnl.py
+++ b/hypervideo_dl/extractor/rtlnl.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
int_or_none,
@@ -11,6 +8,7 @@ from ..utils import (
class RtlNlIE(InfoExtractor):
IE_NAME = 'rtl.nl'
IE_DESC = 'rtl.nl and rtlxl.nl'
+ _EMBED_REGEX = [r'<iframe[^>]+?\bsrc=(?P<q1>[\'"])(?P<url>(?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)(?P=q1)']
_VALID_URL = r'''(?x)
https?://(?:(?:www|static)\.)?
(?:
@@ -118,7 +116,6 @@ class RtlNlIE(InfoExtractor):
formats = self._extract_m3u8_formats(
m3u8_url, uuid, 'mp4', m3u8_id='hls', fatal=False)
- self._sort_formats(formats)
thumbnails = []
@@ -144,3 +141,154 @@ class RtlNlIE(InfoExtractor):
'duration': parse_duration(material.get('duration')),
'thumbnails': thumbnails,
}
+
+
+class RTLLuBaseIE(InfoExtractor):
+ _MEDIA_REGEX = {
+ 'video': r'<rtl-player\s[^>]*\bhls\s*=\s*"([^"]+)',
+ 'audio': r'<rtl-audioplayer\s[^>]*\bsrc\s*=\s*"([^"]+)',
+ 'thumbnail': r'<rtl-player\s[^>]*\bposter\s*=\s*"([^"]+)',
+ }
+
+ def get_media_url(self, webpage, video_id, media_type):
+ return self._search_regex(self._MEDIA_REGEX[media_type], webpage, f'{media_type} url', default=None)
+
+ def get_formats_and_subtitles(self, webpage, video_id):
+ video_url, audio_url = self.get_media_url(webpage, video_id, 'video'), self.get_media_url(webpage, video_id, 'audio')
+
+ formats, subtitles = [], {}
+ if video_url is not None:
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_url, video_id)
+ if audio_url is not None:
+ formats.append({'url': audio_url, 'ext': 'mp3', 'vcodec': 'none'})
+
+ return formats, subtitles
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ is_live = video_id in ('live', 'live-2', 'lauschteren')
+
+ # TODO: extract comment from https://www.rtl.lu/comments?status=1&order=desc&context=news|article|<video_id>
+ # we can context from <rtl-comments context=<context> in webpage
+ webpage = self._download_webpage(url, video_id)
+
+ formats, subtitles = self.get_formats_and_subtitles(webpage, video_id)
+
+ return {
+ 'id': video_id,
+ 'title': self._og_search_title(webpage),
+ 'description': self._og_search_description(webpage, default=None),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'thumbnail': self.get_media_url(webpage, video_id, 'thumbnail') or self._og_search_thumbnail(webpage, default=None),
+ 'is_live': is_live,
+ }
+
+
+class RTLLuTeleVODIE(RTLLuBaseIE):
+ IE_NAME = 'rtl.lu:tele-vod'
+ _VALID_URL = r'https?://(?:www\.)?rtl\.lu/(tele/(?P<slug>[\w-]+)/v/|video/)(?P<id>\d+)(\.html)?'
+ _TESTS = [{
+ 'url': 'https://www.rtl.lu/tele/de-journal-vun-der-tele/v/3266757.html',
+ 'info_dict': {
+ 'id': '3266757',
+ 'title': 'Informatiounsversammlung Héichwaasser',
+ 'ext': 'mp4',
+ 'thumbnail': 'https://replay-assets.rtl.lu/2021/11/16/d3647fc4-470d-11ec-adc2-3a00abd6e90f_00008.jpg',
+ 'description': 'md5:b1db974408cc858c9fd241812e4a2a14',
+ }
+ }, {
+ 'url': 'https://www.rtl.lu/video/3295215',
+ 'info_dict': {
+ 'id': '3295215',
+ 'title': 'Kulturassisen iwwer d\'Bestandsopnam vum Lëtzebuerger Konscht',
+ 'ext': 'mp4',
+ 'thumbnail': 'https://replay-assets.rtl.lu/2022/06/28/0000_3295215_0000.jpg',
+ 'description': 'md5:85bcd4e0490aa6ec969d9bf16927437b',
+ }
+ }]
+
+
+class RTLLuArticleIE(RTLLuBaseIE):
+ IE_NAME = 'rtl.lu:article'
+ _VALID_URL = r'https?://(?:(www|5minutes|today)\.)rtl\.lu/(?:[\w-]+)/(?:[\w-]+)/a/(?P<id>\d+)\.html'
+ _TESTS = [{
+ # Audio-only
+ 'url': 'https://www.rtl.lu/sport/news/a/1934360.html',
+ 'info_dict': {
+ 'id': '1934360',
+ 'ext': 'mp3',
+ 'thumbnail': 'https://static.rtl.lu/rtl2008.lu/nt/p/2022/06/28/19/e4b37d66ddf00bab4c45617b91a5bb9b.jpeg',
+ 'description': 'md5:5eab4a2a911c1fff7efc1682a38f9ef7',
+ 'title': 'md5:40aa85f135578fbd549d3c9370321f99',
+ }
+ }, {
+ # 5minutes
+ 'url': 'https://5minutes.rtl.lu/espace-frontaliers/frontaliers-en-questions/a/1853173.html',
+ 'info_dict': {
+ 'id': '1853173',
+ 'ext': 'mp4',
+ 'description': 'md5:ac031da0740e997a5cf4633173634fee',
+ 'title': 'md5:87e17722ed21af0f24be3243f4ec0c46',
+ 'thumbnail': 'https://replay-assets.rtl.lu/2022/01/26/screenshot_20220126104933_3274749_12b249833469b0d6e4440a1dec83cdfa.jpg',
+ }
+ }, {
+ # today.lu
+ 'url': 'https://today.rtl.lu/entertainment/news/a/1936203.html',
+ 'info_dict': {
+ 'id': '1936203',
+ 'ext': 'mp4',
+ 'title': 'Once Upon A Time...zu Lëtzebuerg: The Three Witches\' Tower',
+ 'description': 'The witchy theme continues in the latest episode of Once Upon A Time...',
+ 'thumbnail': 'https://replay-assets.rtl.lu/2022/07/02/screenshot_20220702122859_3290019_412dc5185951b7f6545a4039c8be9235.jpg',
+ }
+ }]
+
+
+class RTLLuLiveIE(RTLLuBaseIE):
+ _VALID_URL = r'https?://www\.rtl\.lu/(?:tele|radio)/(?P<id>live(?:-\d+)?|lauschteren)'
+ _TESTS = [{
+ # Tele:live
+ 'url': 'https://www.rtl.lu/tele/live',
+ 'info_dict': {
+ 'id': 'live',
+ 'ext': 'mp4',
+ 'live_status': 'is_live',
+ 'title': r're:RTL - Télé LIVE \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
+ 'thumbnail': 'https://static.rtl.lu/livestream/channel1.jpg',
+ }
+ }, {
+ # Tele:live-2
+ 'url': 'https://www.rtl.lu/tele/live-2',
+ 'info_dict': {
+ 'id': 'live-2',
+ 'ext': 'mp4',
+ 'live_status': 'is_live',
+ 'title': r're:RTL - Télé LIVE \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
+ 'thumbnail': 'https://static.rtl.lu/livestream/channel2.jpg',
+ }
+ }, {
+ # Radio:lauschteren
+ 'url': 'https://www.rtl.lu/radio/lauschteren',
+ 'info_dict': {
+ 'id': 'lauschteren',
+ 'ext': 'mp4',
+ 'live_status': 'is_live',
+ 'title': r're:RTL - Radio LIVE \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
+ 'thumbnail': 'https://static.rtl.lu/livestream/rtlradiowebtv.jpg',
+ }
+ }]
+
+
+class RTLLuRadioIE(RTLLuBaseIE):
+ _VALID_URL = r'https?://www\.rtl\.lu/radio/(?:[\w-]+)/s/(?P<id>\d+)(\.html)?'
+ _TESTS = [{
+ 'url': 'https://www.rtl.lu/radio/5-vir-12/s/4033058.html',
+ 'info_dict': {
+ 'id': '4033058',
+ 'ext': 'mp3',
+ 'description': 'md5:f855a4f3e3235393ae47ed1db5d934b9',
+ 'title': '5 vir 12 - Stau um Stau',
+ 'thumbnail': 'https://static.rtl.lu/rtlg//2022/06/24/c9c19e5694a14be46a3647a3760e1f62.jpg',
+ }
+ }]
diff --git a/hypervideo_dl/extractor/rtnews.py b/hypervideo_dl/extractor/rtnews.py
index 68b6044..6be9945 100644
--- a/hypervideo_dl/extractor/rtnews.py
+++ b/hypervideo_dl/extractor/rtnews.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/rtp.py b/hypervideo_dl/extractor/rtp.py
index c165ade..5928a20 100644
--- a/hypervideo_dl/extractor/rtp.py
+++ b/hypervideo_dl/extractor/rtp.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import js_to_json
import re
diff --git a/hypervideo_dl/extractor/rtrfm.py b/hypervideo_dl/extractor/rtrfm.py
index 93d51e8..7381d82 100644
--- a/hypervideo_dl/extractor/rtrfm.py
+++ b/hypervideo_dl/extractor/rtrfm.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/rts.py b/hypervideo_dl/extractor/rts.py
index 865a730..81c4d7c 100644
--- a/hypervideo_dl/extractor/rts.py
+++ b/hypervideo_dl/extractor/rts.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .srgssr import SRGSSRIE
@@ -15,7 +12,7 @@ from ..utils import (
)
-class RTSIE(SRGSSRIE):
+class RTSIE(SRGSSRIE): # XXX: Do not subclass from concrete IE
IE_DESC = 'RTS.ch'
_VALID_URL = r'rts:(?P<rts_id>\d+)|https?://(?:.+?\.)?rts\.ch/(?:[^/]+/){2,}(?P<id>[0-9]+)-(?P<display_id>.+?)\.html'
@@ -215,7 +212,6 @@ class RTSIE(SRGSSRIE):
})
self._check_formats(formats, media_id)
- self._sort_formats(formats)
duration = info.get('duration') or info.get('cutout') or info.get('cutduration')
if isinstance(duration, compat_str):
diff --git a/hypervideo_dl/extractor/rtve.py b/hypervideo_dl/extractor/rtve.py
index 7a1dc6f..a99a266 100644
--- a/hypervideo_dl/extractor/rtve.py
+++ b/hypervideo_dl/extractor/rtve.py
@@ -1,18 +1,12 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import base64
import io
-import sys
+import struct
from .common import InfoExtractor
-from ..compat import (
- compat_b64decode,
- compat_struct_unpack,
-)
+from ..compat import compat_b64decode
from ..utils import (
- determine_ext,
ExtractorError,
+ determine_ext,
float_or_none,
qualities,
remove_end,
@@ -20,8 +14,6 @@ from ..utils import (
try_get,
)
-_bytes_to_chr = (lambda x: x) if sys.version_info[0] == 2 else (lambda x: map(chr, x))
-
class RTVEALaCartaIE(InfoExtractor):
IE_NAME = 'rtve.es:alacarta'
@@ -79,7 +71,7 @@ class RTVEALaCartaIE(InfoExtractor):
def _decrypt_url(png):
encrypted_data = io.BytesIO(compat_b64decode(png)[8:])
while True:
- length = compat_struct_unpack('!I', encrypted_data.read(4))[0]
+ length = struct.unpack('!I', encrypted_data.read(4))[0]
chunk_type = encrypted_data.read(4)
if chunk_type == b'IEND':
break
@@ -90,7 +82,7 @@ class RTVEALaCartaIE(InfoExtractor):
alphabet = []
e = 0
d = 0
- for l in _bytes_to_chr(alphabet_data):
+ for l in alphabet_data.decode('iso-8859-1'):
if d == 0:
alphabet.append(l)
d = e = (e + 1) % 4
@@ -100,7 +92,7 @@ class RTVEALaCartaIE(InfoExtractor):
f = 0
e = 3
b = 1
- for letter in _bytes_to_chr(url_data):
+ for letter in url_data.decode('iso-8859-1'):
if f == 0:
l = int(letter) * 10
f = 1
@@ -138,7 +130,6 @@ class RTVEALaCartaIE(InfoExtractor):
'quality': q(quality),
'url': video_url,
})
- self._sort_formats(formats)
return formats
def _real_extract(self, url):
@@ -178,7 +169,7 @@ class RTVEALaCartaIE(InfoExtractor):
for s in subs)
-class RTVEAudioIE(RTVEALaCartaIE):
+class RTVEAudioIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE
IE_NAME = 'rtve.es:audio'
IE_DESC = 'RTVE audio'
_VALID_URL = r'https?://(?:www\.)?rtve\.es/(alacarta|play)/audios/[^/]+/[^/]+/(?P<id>[0-9]+)'
@@ -246,7 +237,6 @@ class RTVEAudioIE(RTVEALaCartaIE):
'quality': q(quality),
'url': audio_url,
})
- self._sort_formats(formats)
return formats
def _real_extract(self, url):
@@ -265,7 +255,7 @@ class RTVEAudioIE(RTVEALaCartaIE):
}
-class RTVEInfantilIE(RTVEALaCartaIE):
+class RTVEInfantilIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE
IE_NAME = 'rtve.es:infantil'
IE_DESC = 'RTVE infantil'
_VALID_URL = r'https?://(?:www\.)?rtve\.es/infantil/serie/[^/]+/video/[^/]+/(?P<id>[0-9]+)/'
@@ -284,7 +274,7 @@ class RTVEInfantilIE(RTVEALaCartaIE):
}]
-class RTVELiveIE(RTVEALaCartaIE):
+class RTVELiveIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE
IE_NAME = 'rtve.es:live'
IE_DESC = 'RTVE.es live streams'
_VALID_URL = r'https?://(?:www\.)?rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)'
diff --git a/hypervideo_dl/extractor/rtvnh.py b/hypervideo_dl/extractor/rtvnh.py
index 6a00f70..7c61744 100644
--- a/hypervideo_dl/extractor/rtvnh.py
+++ b/hypervideo_dl/extractor/rtvnh.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import ExtractorError
@@ -52,7 +49,6 @@ class RTVNHIE(InfoExtractor):
formats.extend(self._extract_f4m_formats(
http_base_url + '/manifest.f4m',
video_id, f4m_id='hds', fatal=False))
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/rtvs.py b/hypervideo_dl/extractor/rtvs.py
index 3ea0f18..a84a78d 100644
--- a/hypervideo_dl/extractor/rtvs.py
+++ b/hypervideo_dl/extractor/rtvs.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -75,7 +72,6 @@ class RTVSIE(InfoExtractor):
formats = [{'url': traverse_obj(data, ('playlist', 0, 'sources', 0, 'src'))}]
else:
formats = self._extract_m3u8_formats(traverse_obj(data, ('playlist', 0, 'sources', 0, 'src')), video_id)
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/rtvslo.py b/hypervideo_dl/extractor/rtvslo.py
new file mode 100644
index 0000000..05942b6
--- /dev/null
+++ b/hypervideo_dl/extractor/rtvslo.py
@@ -0,0 +1,150 @@
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ parse_duration,
+ traverse_obj,
+ unified_timestamp,
+ url_or_none,
+)
+
+
+class RTVSLOIE(InfoExtractor):
+ IE_NAME = 'rtvslo.si'
+ _VALID_URL = r'''(?x)
+ https?://(?:
+ (?:365|4d)\.rtvslo.si/arhiv/[^/?#&;]+|
+ (?:www\.)?rtvslo\.si/rtv365/arhiv
+ )/(?P<id>\d+)'''
+ _GEO_COUNTRIES = ['SI']
+
+ _API_BASE = 'https://api.rtvslo.si/ava/{}/{}?client_id=82013fb3a531d5414f478747c1aca622'
+ SUB_LANGS_MAP = {'Slovenski': 'sl'}
+
+ _TESTS = [
+ {
+ 'url': 'https://www.rtvslo.si/rtv365/arhiv/174842550?s=tv',
+ 'info_dict': {
+ 'id': '174842550',
+ 'ext': 'flv',
+ 'release_timestamp': 1643140032,
+ 'upload_date': '20220125',
+ 'series': 'Dnevnik',
+ 'thumbnail': 'https://img.rtvcdn.si/_up/ava/ava_misc/show_logos/92/dnevnik_3_wide2.jpg',
+ 'description': 'md5:76a18692757aeb8f0f51221106277dd2',
+ 'timestamp': 1643137046,
+ 'title': 'Dnevnik',
+ 'series_id': '92',
+ 'release_date': '20220125',
+ 'duration': 1789,
+ },
+ }, {
+ 'url': 'https://365.rtvslo.si/arhiv/utrip/174843754',
+ 'info_dict': {
+ 'id': '174843754',
+ 'ext': 'mp4',
+ 'series_id': '94',
+ 'release_date': '20220129',
+ 'timestamp': 1643484455,
+ 'title': 'Utrip',
+ 'duration': 813,
+ 'thumbnail': 'https://img.rtvcdn.si/_up/ava/ava_misc/show_logos/94/utrip_1_wide2.jpg',
+ 'description': 'md5:77f2892630c7b17bb7a5bb84319020c9',
+ 'release_timestamp': 1643485825,
+ 'upload_date': '20220129',
+ 'series': 'Utrip',
+ },
+ }, {
+ 'url': 'https://365.rtvslo.si/arhiv/il-giornale-della-sera/174844609',
+ 'info_dict': {
+ 'id': '174844609',
+ 'ext': 'mp3',
+ 'series_id': '106615841',
+ 'title': 'Il giornale della sera',
+ 'duration': 1328,
+ 'series': 'Il giornale della sera',
+ 'timestamp': 1643743800,
+ 'release_timestamp': 1643745424,
+ 'thumbnail': 'https://img.rtvcdn.si/_up/ava/ava_misc/show_logos/il-giornale-della-sera_wide2.jpg',
+ 'upload_date': '20220201',
+ 'tbr': 128000,
+ 'release_date': '20220201',
+ },
+
+ }, {
+ 'url': 'https://4d.rtvslo.si/arhiv/dnevnik/174842550',
+ 'only_matching': True
+ }
+ ]
+
+ def _real_extract(self, url):
+ v_id = self._match_id(url)
+ meta = self._download_json(self._API_BASE.format('getRecordingDrm', v_id), v_id)['response']
+
+ thumbs = [{'id': k, 'url': v, 'http_headers': {'Accept': 'image/jpeg'}}
+ for k, v in (meta.get('images') or {}).items()]
+
+ subs = {}
+ for s in traverse_obj(meta, 'subs', 'subtitles', default=[]):
+ lang = self.SUB_LANGS_MAP.get(s.get('language'), s.get('language') or 'und')
+ subs.setdefault(lang, []).append({
+ 'url': s.get('file'),
+ 'ext': traverse_obj(s, 'format', expected_type=str.lower),
+ })
+
+ jwt = meta.get('jwt')
+ if not jwt:
+ raise ExtractorError('Site did not provide an authentication token, cannot proceed.')
+
+ media = self._download_json(self._API_BASE.format('getMedia', v_id), v_id, query={'jwt': jwt})['response']
+
+ formats = []
+ adaptive_url = traverse_obj(media, ('addaptiveMedia', 'hls_sec'), expected_type=url_or_none)
+ if adaptive_url:
+ formats = self._extract_wowza_formats(adaptive_url, v_id, skip_protocols=['smil'])
+
+ adaptive_url = traverse_obj(media, ('addaptiveMedia_sl', 'hls_sec'), expected_type=url_or_none)
+ if adaptive_url:
+ for f in self._extract_wowza_formats(adaptive_url, v_id, skip_protocols=['smil']):
+ formats.append({
+ **f,
+ 'format_id': 'sign-' + f['format_id'],
+ 'format_note': 'Sign language interpretation', 'preference': -10,
+ 'language': (
+ 'slv' if f.get('language') == 'eng' and f.get('acodec') != 'none'
+ else f.get('language'))
+ })
+
+ formats.extend(
+ {
+ 'url': f['streams'][strm],
+ 'ext': traverse_obj(f, 'mediaType', expected_type=str.lower),
+ 'width': f.get('width'),
+ 'height': f.get('height'),
+ 'tbr': f.get('bitrate'),
+ 'filesize': f.get('filesize'),
+ }
+ for strm in ('http', 'https')
+ for f in media.get('mediaFiles') or []
+ if traverse_obj(f, ('streams', strm))
+ )
+
+ if any('intermission.mp4' in x['url'] for x in formats):
+ self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True)
+ if any('dummy_720p.mp4' in x.get('manifest_url', '') for x in formats) and meta.get('stub') == 'error':
+ raise ExtractorError(f'{self.IE_NAME} said: Clip not available', expected=True)
+
+ return {
+ 'id': v_id,
+ 'webpage_url': ''.join(traverse_obj(meta, ('canonical', ('domain', 'path')))),
+ 'title': meta.get('title'),
+ 'formats': formats,
+ 'subtitles': subs,
+ 'thumbnails': thumbs,
+ 'description': meta.get('description'),
+ 'timestamp': unified_timestamp(traverse_obj(meta, 'broadcastDate', ('broadcastDates', 0))),
+ 'release_timestamp': unified_timestamp(meta.get('recordingDate')),
+ 'duration': meta.get('duration') or parse_duration(meta.get('length')),
+ 'tags': meta.get('genre'),
+ 'series': meta.get('showName'),
+ 'series_id': meta.get('showId'),
+ }
diff --git a/hypervideo_dl/extractor/ruhd.py b/hypervideo_dl/extractor/ruhd.py
index 3c8053a..abaa3f9 100644
--- a/hypervideo_dl/extractor/ruhd.py
+++ b/hypervideo_dl/extractor/ruhd.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/rule34video.py b/hypervideo_dl/extractor/rule34video.py
index a602a9f..9d15f4d 100644
--- a/hypervideo_dl/extractor/rule34video.py
+++ b/hypervideo_dl/extractor/rule34video.py
@@ -1,5 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
import re
from ..utils import parse_duration
@@ -53,8 +51,6 @@ class Rule34VideoIE(InfoExtractor):
thumbnail = self._html_search_regex(r'preview_url:\s+\'([^\']+)\'', webpage, 'thumbnail', default=None)
duration = self._html_search_regex(r'"icon-clock"></i>\s+<span>((?:\d+:?)+)', webpage, 'duration', default=None)
- self._sort_formats(formats)
-
return {
'id': video_id,
'formats': formats,
diff --git a/hypervideo_dl/extractor/rumble.py b/hypervideo_dl/extractor/rumble.py
index a0d5f88..102615c 100644
--- a/hypervideo_dl/extractor/rumble.py
+++ b/hypervideo_dl/extractor/rumble.py
@@ -1,16 +1,12 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import itertools
import re
from .common import InfoExtractor
-from ..compat import compat_str, compat_HTTPError
+from ..compat import compat_HTTPError
from ..utils import (
- determine_ext,
int_or_none,
parse_iso8601,
- try_get,
+ traverse_obj,
unescapeHTML,
ExtractorError,
)
@@ -18,6 +14,7 @@ from ..utils import (
class RumbleEmbedIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?rumble\.com/embed/(?:[0-9a-z]+\.)?(?P<id>[0-9a-z]+)'
+ _EMBED_REGEX = [fr'(?:<(?:script|iframe)[^>]+\bsrc=|["\']embedUrl["\']\s*:\s*)["\'](?P<url>{_VALID_URL})']
_TESTS = [{
'url': 'https://rumble.com/embed/v5pv5f',
'md5': '36a18a049856720189f30977ccbb2c34',
@@ -27,6 +24,12 @@ class RumbleEmbedIE(InfoExtractor):
'title': 'WMAR 2 News Latest Headlines | October 20, 6pm',
'timestamp': 1571611968,
'upload_date': '20191020',
+ 'channel_url': 'https://rumble.com/c/WMAR',
+ 'channel': 'WMAR',
+ 'thumbnail': 'https://sp.rmbl.ws/s8/1/5/M/z/1/5Mz1a.OvCc-small-WMAR-2-News-Latest-Headline.jpg',
+ 'duration': 234,
+ 'uploader': 'WMAR',
+ 'live_status': 'not_live',
}
}, {
'url': 'https://rumble.com/embed/vslb7v',
@@ -41,56 +44,194 @@ class RumbleEmbedIE(InfoExtractor):
'channel': 'CTNews',
'thumbnail': 'https://sp.rmbl.ws/s8/6/7/i/9/h/7i9hd.OvCc.jpg',
'duration': 901,
+ 'uploader': 'CTNews',
+ 'live_status': 'not_live',
}
}, {
+ 'url': 'https://rumble.com/embed/vunh1h',
+ 'info_dict': {
+ 'id': 'vunh1h',
+ 'ext': 'mp4',
+ 'title': '‘Gideon, op zoek naar de waarheid’ including ENG SUBS',
+ 'timestamp': 1647197663,
+ 'upload_date': '20220313',
+ 'channel_url': 'https://rumble.com/user/BLCKBX',
+ 'channel': 'BLCKBX',
+ 'thumbnail': r're:https://.+\.jpg',
+ 'duration': 5069,
+ 'uploader': 'BLCKBX',
+ 'live_status': 'not_live',
+ 'subtitles': {
+ 'en': [
+ {
+ 'url': r're:https://.+\.vtt',
+ 'name': 'English',
+ 'ext': 'vtt'
+ }
+ ]
+ },
+ },
+ 'params': {'skip_download': True}
+ }, {
+ 'url': 'https://rumble.com/embed/v1essrt',
+ 'info_dict': {
+ 'id': 'v1essrt',
+ 'ext': 'mp4',
+ 'title': 'startswith:lofi hip hop radio - beats to relax/study',
+ 'timestamp': 1661519399,
+ 'upload_date': '20220826',
+ 'channel_url': 'https://rumble.com/c/LofiGirl',
+ 'channel': 'Lofi Girl',
+ 'thumbnail': r're:https://.+\.jpg',
+ 'duration': None,
+ 'uploader': 'Lofi Girl',
+ 'live_status': 'is_live',
+ },
+ 'params': {'skip_download': True}
+ }, {
+ 'url': 'https://rumble.com/embed/v1amumr',
+ 'info_dict': {
+ 'id': 'v1amumr',
+ 'ext': 'webm',
+ 'fps': 60,
+ 'title': 'Turning Point USA 2022 Student Action Summit DAY 1 - Rumble Exclusive Live',
+ 'timestamp': 1658518457,
+ 'upload_date': '20220722',
+ 'channel_url': 'https://rumble.com/c/RumbleEvents',
+ 'channel': 'Rumble Events',
+ 'thumbnail': r're:https://.+\.jpg',
+ 'duration': 16427,
+ 'uploader': 'Rumble Events',
+ 'live_status': 'was_live',
+ },
+ 'params': {'skip_download': True}
+ }, {
'url': 'https://rumble.com/embed/ufe9n.v5pv5f',
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- return [
- mobj.group('url')
- for mobj in re.finditer(
- r'(?:<(?:script|iframe)[^>]+\bsrc=|["\']embedUrl["\']\s*:\s*)["\'](?P<url>%s)' % RumbleEmbedIE._VALID_URL,
- webpage)]
+ _WEBPAGE_TESTS = [
+ {
+ 'note': 'Rumble embed',
+ 'url': 'https://rumble.com/vdmum1-moose-the-dog-helps-girls-dig-a-snow-fort.html',
+ 'md5': '53af34098a7f92c4e51cf0bd1c33f009',
+ 'info_dict': {
+ 'id': 'vb0ofn',
+ 'ext': 'mp4',
+ 'timestamp': 1612662578,
+ 'uploader': 'LovingMontana',
+ 'channel': 'LovingMontana',
+ 'upload_date': '20210207',
+ 'title': 'Winter-loving dog helps girls dig a snow fort ',
+ 'channel_url': 'https://rumble.com/c/c-546523',
+ 'thumbnail': 'https://sp.rmbl.ws/s8/1/5/f/x/x/5fxxb.OvCc.1-small-Moose-The-Dog-Helps-Girls-D.jpg',
+ 'duration': 103,
+ 'live_status': 'not_live',
+ }
+ },
+ {
+ 'note': 'Rumble JS embed',
+ 'url': 'https://therightscoop.com/what-does-9-plus-1-plus-1-equal-listen-to-this-audio-of-attempted-kavanaugh-assassins-call-and-youll-get-it',
+ 'md5': '4701209ac99095592e73dbba21889690',
+ 'info_dict': {
+ 'id': 'v15eqxl',
+ 'ext': 'mp4',
+ 'channel': 'Mr Producer Media',
+ 'duration': 92,
+ 'title': '911 Audio From The Man Who Wanted To Kill Supreme Court Justice Kavanaugh',
+ 'channel_url': 'https://rumble.com/c/RichSementa',
+ 'thumbnail': 'https://sp.rmbl.ws/s8/1/P/j/f/A/PjfAe.OvCc-small-911-Audio-From-The-Man-Who-.jpg',
+ 'timestamp': 1654892716,
+ 'uploader': 'Mr Producer Media',
+ 'upload_date': '20220610',
+ 'live_status': 'not_live',
+ }
+ },
+ ]
+
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ embeds = tuple(super()._extract_embed_urls(url, webpage))
+ if embeds:
+ return embeds
+ return [f'https://rumble.com/embed/{mobj.group("id")}' for mobj in re.finditer(
+ r'<script>\s*Rumble\(\s*"play"\s*,\s*{\s*[\'"]video[\'"]\s*:\s*[\'"](?P<id>[0-9a-z]+)[\'"]', webpage)]
def _real_extract(self, url):
video_id = self._match_id(url)
video = self._download_json(
- 'https://rumble.com/embedJS/', video_id,
- query={'request': 'video', 'v': video_id})
- title = unescapeHTML(video['title'])
+ 'https://rumble.com/embedJS/u3/', video_id,
+ query={'request': 'video', 'ver': 2, 'v': video_id})
+
+ sys_msg = traverse_obj(video, ('sys', 'msg'))
+ if sys_msg:
+ self.report_warning(sys_msg, video_id=video_id)
+
+ if video.get('live') == 0:
+ live_status = 'not_live' if video.get('livestream_has_dvr') is None else 'was_live'
+ elif video.get('live') == 1:
+ live_status = 'is_upcoming' if video.get('livestream_has_dvr') else 'was_live'
+ elif video.get('live') == 2:
+ live_status = 'is_live'
+ else:
+ live_status = None
formats = []
- for height, ua in (video.get('ua') or {}).items():
- for i in range(2):
- f_url = try_get(ua, lambda x: x[i], compat_str)
- if f_url:
- ext = determine_ext(f_url)
- f = {
- 'ext': ext,
- 'format_id': '%s-%sp' % (ext, height),
- 'height': int_or_none(height),
- 'url': f_url,
- }
- bitrate = try_get(ua, lambda x: x[i + 2]['bitrate'])
- if bitrate:
- f['tbr'] = int_or_none(bitrate)
- formats.append(f)
- self._sort_formats(formats)
+ for ext, ext_info in (video.get('ua') or {}).items():
+ for height, video_info in (ext_info or {}).items():
+ meta = video_info.get('meta') or {}
+ if not video_info.get('url'):
+ continue
+ if ext == 'hls':
+ if meta.get('live') is True and video.get('live') == 1:
+ live_status = 'post_live'
+ formats.extend(self._extract_m3u8_formats(
+ video_info['url'], video_id,
+ ext='mp4', m3u8_id='hls', fatal=False, live=live_status == 'is_live'))
+ continue
+ formats.append({
+ 'ext': ext,
+ 'url': video_info['url'],
+ 'format_id': '%s-%sp' % (ext, height),
+ 'height': int_or_none(height),
+ 'fps': video.get('fps'),
+ **traverse_obj(meta, {
+ 'tbr': 'bitrate',
+ 'filesize': 'size',
+ 'width': 'w',
+ 'height': 'h',
+ }, default={})
+ })
+
+ subtitles = {
+ lang: [{
+ 'url': sub_info['path'],
+ 'name': sub_info.get('language') or '',
+ }] for lang, sub_info in (video.get('cc') or {}).items() if sub_info.get('path')
+ }
author = video.get('author') or {}
+ thumbnails = traverse_obj(video, ('t', ..., {'url': 'i', 'width': 'w', 'height': 'h'}))
+ if not thumbnails and video.get('i'):
+ thumbnails = [{'url': video['i']}]
+
+ if live_status in {'is_live', 'post_live'}:
+ duration = None
+ else:
+ duration = int_or_none(video.get('duration'))
return {
'id': video_id,
- 'title': title,
+ 'title': unescapeHTML(video.get('title')),
'formats': formats,
- 'thumbnail': video.get('i'),
+ 'subtitles': subtitles,
+ 'thumbnails': thumbnails,
'timestamp': parse_iso8601(video.get('pubDate')),
'channel': author.get('name'),
'channel_url': author.get('url'),
- 'duration': int_or_none(video.get('duration')),
+ 'duration': duration,
+ 'uploader': author.get('name'),
+ 'live_status': live_status,
}
@@ -105,7 +246,7 @@ class RumbleChannelIE(InfoExtractor):
},
}, {
'url': 'https://rumble.com/user/goldenpoodleharleyeuna',
- 'playlist_count': 4,
+ 'playlist_mincount': 4,
'info_dict': {
'id': 'goldenpoodleharleyeuna',
},
diff --git a/hypervideo_dl/extractor/rutube.py b/hypervideo_dl/extractor/rutube.py
index 2f753b4..5a4fd97 100644
--- a/hypervideo_dl/extractor/rutube.py
+++ b/hypervideo_dl/extractor/rutube.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
import itertools
from .common import InfoExtractor
@@ -85,7 +81,6 @@ class RutubeBaseIE(InfoExtractor):
'url': format_url,
'format_id': format_id,
})
- self._sort_formats(formats)
return formats
def _download_and_extract_formats(self, video_id, query=None):
@@ -97,6 +92,7 @@ class RutubeIE(RutubeBaseIE):
IE_NAME = 'rutube'
IE_DESC = 'Rutube videos'
_VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/(?P<id>[\da-z]{32})'
+ _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//rutube\.ru/(?:play/)?embed/[\da-z]{32}.*?)\1']
_TESTS = [{
'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
@@ -131,12 +127,6 @@ class RutubeIE(RutubeBaseIE):
def suitable(cls, url):
return False if RutubePlaylistIE.suitable(url) else super(RutubeIE, cls).suitable(url)
- @staticmethod
- def _extract_urls(webpage):
- return [mobj.group('url') for mobj in re.finditer(
- r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//rutube\.ru/embed/[\da-z]{32}.*?)\1',
- webpage)]
-
def _real_extract(self, url):
video_id = self._match_id(url)
info = self._download_and_extract_info(video_id)
@@ -249,7 +239,6 @@ class RutubeMovieIE(RutubePlaylistBaseIE):
IE_NAME = 'rutube:movie'
IE_DESC = 'Rutube movies'
_VALID_URL = r'https?://rutube\.ru/metainfo/tv/(?P<id>\d+)'
- _TESTS = []
_MOVIE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/?format=json'
_PAGE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/video?page=%s&format=json'
diff --git a/hypervideo_dl/extractor/rutv.py b/hypervideo_dl/extractor/rutv.py
index 0ea8253..d7f9a73 100644
--- a/hypervideo_dl/extractor/rutv.py
+++ b/hypervideo_dl/extractor/rutv.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -23,6 +20,10 @@ class RUTVIE(InfoExtractor):
)
(?P<id>\d+)
'''
+ _EMBED_URLS = [
+ r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:test)?player\.(?:rutv\.ru|vgtrk\.com)/(?:iframe/(?:swf|video|live)/id|index/iframe/cast_id)/.+?)\1',
+ r'<meta[^>]+?property=(["\'])og:video\1[^>]+?content=(["\'])(?P<url>https?://(?:test)?player\.(?:rutv\.ru|vgtrk\.com)/flash\d+v/container\.swf\?id=.+?\2)',
+ ]
_TESTS = [
{
@@ -110,19 +111,6 @@ class RUTVIE(InfoExtractor):
},
]
- @classmethod
- def _extract_url(cls, webpage):
- mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:test)?player\.(?:rutv\.ru|vgtrk\.com)/(?:iframe/(?:swf|video|live)/id|index/iframe/cast_id)/.+?)\1', webpage)
- if mobj:
- return mobj.group('url')
-
- mobj = re.search(
- r'<meta[^>]+?property=(["\'])og:video\1[^>]+?content=(["\'])(?P<url>https?://(?:test)?player\.(?:rutv\.ru|vgtrk\.com)/flash\d+v/container\.swf\?id=.+?\2)',
- webpage)
- if mobj:
- return mobj.group('url')
-
def _real_extract(self, url):
mobj = self._match_valid_url(url)
video_id = mobj.group('id')
@@ -153,7 +141,7 @@ class RUTVIE(InfoExtractor):
if media['errors']:
raise ExtractorError('%s said: %s' % (self.IE_NAME, media['errors']), expected=True)
- view_count = playlist.get('count_views')
+ view_count = int_or_none(playlist.get('count_views'))
priority_transport = playlist['priority_transport']
thumbnail = media['picture']
@@ -164,6 +152,7 @@ class RUTVIE(InfoExtractor):
duration = int_or_none(media.get('duration'))
formats = []
+ subtitles = {}
for transport, links in media['sources'].items():
for quality, url in links.items():
@@ -183,8 +172,10 @@ class RUTVIE(InfoExtractor):
'vbr': str_to_int(quality),
}
elif transport == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- url, video_id, 'mp4', quality=preference, m3u8_id='hls'))
+ fmt, subs = self._extract_m3u8_formats_and_subtitles(
+ url, video_id, 'mp4', quality=preference, m3u8_id='hls')
+ formats.extend(fmt)
+ self._merge_subtitles(subs, target=subtitles)
continue
else:
fmt = {
@@ -198,8 +189,6 @@ class RUTVIE(InfoExtractor):
})
formats.append(fmt)
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': title,
@@ -208,5 +197,7 @@ class RUTVIE(InfoExtractor):
'view_count': view_count,
'duration': duration,
'formats': formats,
+ 'subtitles': subtitles,
'is_live': is_live,
+ '_format_sort_fields': ('source', ),
}
diff --git a/hypervideo_dl/extractor/ruutu.py b/hypervideo_dl/extractor/ruutu.py
index 5a30e33..33f6652 100644
--- a/hypervideo_dl/extractor/ruutu.py
+++ b/hypervideo_dl/extractor/ruutu.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import json
import re
@@ -41,6 +38,7 @@ class RuutuIE(InfoExtractor):
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 114,
'age_limit': 0,
+ 'upload_date': '20150508',
},
},
{
@@ -54,6 +52,9 @@ class RuutuIE(InfoExtractor):
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 40,
'age_limit': 0,
+ 'upload_date': '20150507',
+ 'series': 'Superpesis',
+ 'categories': ['Urheilu'],
},
},
{
@@ -66,6 +67,8 @@ class RuutuIE(InfoExtractor):
'description': 'md5:7d90f358c47542e3072ff65d7b1bcffe',
'thumbnail': r're:^https?://.*\.jpg$',
'age_limit': 0,
+ 'upload_date': '20151012',
+ 'series': 'Läpivalaisu',
},
},
# Episode where <SourceFile> is "NOT-USED", but has other
@@ -85,6 +88,9 @@ class RuutuIE(InfoExtractor):
'description': 'md5:bbb6963df17dfd0ecd9eb9a61bf14b52',
'thumbnail': r're:^https?://.*\.jpg$',
'age_limit': 0,
+ 'upload_date': '20190320',
+ 'series': 'Mysteeritarinat',
+ 'duration': 1324,
},
'expected_warnings': [
'HTTP Error 502: Bad Gateway',
@@ -129,14 +135,30 @@ class RuutuIE(InfoExtractor):
_API_BASE = 'https://gatling.nelonenmedia.fi'
@classmethod
- def _extract_url(cls, webpage):
+ def _extract_embed_urls(cls, url, webpage):
+ # nelonen.fi
settings = try_call(
lambda: json.loads(re.search(
r'jQuery\.extend\(Drupal\.settings, ({.+?})\);', webpage).group(1), strict=False))
- video_id = traverse_obj(settings, (
- 'mediaCrossbowSettings', 'file', 'field_crossbow_video_id', 'und', 0, 'value'))
- if video_id:
- return f'http://www.ruutu.fi/video/{video_id}'
+ if settings:
+ video_id = traverse_obj(settings, (
+ 'mediaCrossbowSettings', 'file', 'field_crossbow_video_id', 'und', 0, 'value'))
+ if video_id:
+ return [f'http://www.ruutu.fi/video/{video_id}']
+ # hs.fi and is.fi
+ settings = try_call(
+ lambda: json.loads(re.search(
+ '(?s)<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)</script>',
+ webpage).group(1), strict=False))
+ if settings:
+ video_ids = set(traverse_obj(settings, (
+ 'props', 'pageProps', 'page', 'assetData', 'splitBody', ..., 'video', 'sourceId')) or [])
+ if video_ids:
+ return [f'http://www.ruutu.fi/video/{v}' for v in video_ids]
+ video_id = traverse_obj(settings, (
+ 'props', 'pageProps', 'page', 'assetData', 'mainVideo', 'sourceId'))
+ if video_id:
+ return [f'http://www.ruutu.fi/video/{video_id}']
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -209,10 +231,10 @@ class RuutuIE(InfoExtractor):
extract_formats(video_xml.find('./Clip'))
def pv(name):
- node = find_xpath_attr(
- video_xml, './Clip/PassthroughVariables/variable', 'name', name)
- if node is not None:
- return node.get('value')
+ value = try_call(lambda: find_xpath_attr(
+ video_xml, './Clip/PassthroughVariables/variable', 'name', name).get('value'))
+ if value != 'NA':
+ return value or None
if not formats:
if (not self.get_param('allow_unplayable_formats')
@@ -222,8 +244,6 @@ class RuutuIE(InfoExtractor):
if ns_st_cds != 'free':
raise ExtractorError('This video is %s.' % ns_st_cds, expected=True)
- self._sort_formats(formats)
-
themes = pv('themes')
return {
@@ -237,6 +257,6 @@ class RuutuIE(InfoExtractor):
'series': pv('series_name'),
'season_number': int_or_none(pv('season_number')),
'episode_number': int_or_none(pv('episode_number')),
- 'categories': themes.split(',') if themes else [],
+ 'categories': themes.split(',') if themes else None,
'formats': formats,
}
diff --git a/hypervideo_dl/extractor/ruv.py b/hypervideo_dl/extractor/ruv.py
index d806ed0..12499d6 100644
--- a/hypervideo_dl/extractor/ruv.py
+++ b/hypervideo_dl/extractor/ruv.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
determine_ext,
diff --git a/hypervideo_dl/extractor/safari.py b/hypervideo_dl/extractor/safari.py
index 7b4571d..450a661 100644
--- a/hypervideo_dl/extractor/safari.py
+++ b/hypervideo_dl/extractor/safari.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import json
import re
diff --git a/hypervideo_dl/extractor/saitosan.py b/hypervideo_dl/extractor/saitosan.py
index 621335c..d2f60e9 100644
--- a/hypervideo_dl/extractor/saitosan.py
+++ b/hypervideo_dl/extractor/saitosan.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import ExtractorError, try_get
diff --git a/hypervideo_dl/extractor/samplefocus.py b/hypervideo_dl/extractor/samplefocus.py
index 806c3c3..e9f5c22 100644
--- a/hypervideo_dl/extractor/samplefocus.py
+++ b/hypervideo_dl/extractor/samplefocus.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/sapo.py b/hypervideo_dl/extractor/sapo.py
index df202a3..beffaee 100644
--- a/hypervideo_dl/extractor/sapo.py
+++ b/hypervideo_dl/extractor/sapo.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -101,8 +98,6 @@ class SapoIE(InfoExtractor):
'height': 720,
})
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': title,
diff --git a/hypervideo_dl/extractor/savefrom.py b/hypervideo_dl/extractor/savefrom.py
index 98efdc2..9c9e74b 100644
--- a/hypervideo_dl/extractor/savefrom.py
+++ b/hypervideo_dl/extractor/savefrom.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import os.path
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/sbs.py b/hypervideo_dl/extractor/sbs.py
index 4090f63..4532033 100644
--- a/hypervideo_dl/extractor/sbs.py
+++ b/hypervideo_dl/extractor/sbs.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
smuggle_url,
@@ -15,9 +12,16 @@ class SBSIE(InfoExtractor):
ondemand(?:
/video/(?:single/)?|
/movie/[^/]+/|
+ /(?:tv|news)-series/(?:[^/]+/){3}|
.*?\bplay=|/watch/
)|news/(?:embeds/)?video/
)(?P<id>[0-9]+)'''
+ _EMBED_REGEX = [r'''(?x)]
+ (?:
+ <meta\s+property="og:video"\s+content=|
+ <iframe[^>]+?src=
+ )
+ (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''']
_TESTS = [{
# Original URL is handled by the generic IE which finds the iframe:
@@ -60,6 +64,12 @@ class SBSIE(InfoExtractor):
'note': 'Live stream',
'url': 'https://www.sbs.com.au/ondemand/video/1726824003663/sbs-24x7-live-stream-nsw',
'only_matching': True,
+ }, {
+ 'url': 'https://www.sbs.com.au/ondemand/news-series/dateline/dateline-2022/dateline-s2022-ep26/2072245827515',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.sbs.com.au/ondemand/tv-series/the-handmaids-tale/season-5/the-handmaids-tale-s5-ep1/2065631811776',
+ 'only_matching': True,
}]
def _real_extract(self, url):
diff --git a/hypervideo_dl/extractor/screen9.py b/hypervideo_dl/extractor/screen9.py
new file mode 100644
index 0000000..5ab0b6c
--- /dev/null
+++ b/hypervideo_dl/extractor/screen9.py
@@ -0,0 +1,62 @@
+from .common import InfoExtractor
+from ..utils import traverse_obj
+
+
+class Screen9IE(InfoExtractor):
+ _VALID_URL = r'https?://(?:\w+\.screen9\.(?:tv|com)|play\.su\.se)/(?:embed|media)/(?P<id>[^?#/]+)'
+ _TESTS = [
+ {
+ 'url': 'https://api.screen9.com/embed/8kTNEjvoXGM33dmWwF0uDA',
+ 'md5': 'd60d23f8980583b930724b01fa6ddb41',
+ 'info_dict': {
+ 'id': '8kTNEjvoXGM33dmWwF0uDA',
+ 'ext': 'mp4',
+ 'title': 'Östersjön i förändrat klimat',
+ 'thumbnail': r're:^https?://.+\.jpg',
+ },
+ },
+ {
+ 'url': 'https://folkhogskolekanalen.screen9.tv/media/gy35PKLHe-5K29RYHga2bw/ett-starkare-samhalle-en-snabbguide-om-sveriges-folkhogskolor',
+ 'md5': 'c9389806e78573ea34fc48b6f94465dc',
+ 'info_dict': {
+ 'id': 'gy35PKLHe-5K29RYHga2bw',
+ 'ext': 'mp4',
+ 'title': 'Ett starkare samhälle - en snabbguide om Sveriges folkhögskolor',
+ 'thumbnail': r're:^https?://.+\.jpg',
+ },
+ },
+ {
+ 'url': 'https://play.su.se/media/H1YA0EYNCxiesrSU1kaRBQ/baltic-breakfast',
+ 'md5': '2b817647c3058002526269deff4c0683',
+ 'info_dict': {
+ 'id': 'H1YA0EYNCxiesrSU1kaRBQ',
+ 'ext': 'mp4',
+ 'title': 'Baltic Breakfast',
+ 'thumbnail': r're:^https?://.+\.jpg',
+ },
+ },
+ ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(f'https://api.screen9.com/embed/{video_id}', video_id)
+ config = self._search_json(r'var\s+config\s*=', webpage, 'config', video_id)
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ traverse_obj(config, ('src', lambda _, v: v['type'] == 'application/x-mpegURL', 'src'), get_all=False),
+ video_id, ext='mp4')
+ formats.append({
+ 'url': traverse_obj(config, ('src', lambda _, v: v['type'] == 'video/mp4', 'src'), get_all=False),
+ 'format': 'mp4',
+ })
+
+ return {
+ 'id': video_id,
+ 'title': traverse_obj(
+ config,
+ ('plugins', (('title', 'title'), ('googleAnalytics', 'title'), ('share', 'mediaTitle'))),
+ get_all=False),
+ 'description': traverse_obj(config, ('plugins', 'title', 'description')),
+ 'thumbnail': traverse_obj(config, ('poster')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
diff --git a/hypervideo_dl/extractor/screencast.py b/hypervideo_dl/extractor/screencast.py
index 69a0d01..df5e79b 100644
--- a/hypervideo_dl/extractor/screencast.py
+++ b/hypervideo_dl/extractor/screencast.py
@@ -1,14 +1,8 @@
-# coding: utf-8
-from __future__ import unicode_literals
+import urllib.request
from .common import InfoExtractor
-from ..compat import (
- compat_parse_qs,
- compat_urllib_request,
-)
-from ..utils import (
- ExtractorError,
-)
+from ..compat import compat_parse_qs
+from ..utils import ExtractorError
class ScreencastIE(InfoExtractor):
@@ -78,7 +72,7 @@ class ScreencastIE(InfoExtractor):
flash_vars_s = flash_vars_s.replace(',', '&')
if flash_vars_s:
flash_vars = compat_parse_qs(flash_vars_s)
- video_url_raw = compat_urllib_request.quote(
+ video_url_raw = urllib.request.quote(
flash_vars['content'][0])
video_url = video_url_raw.replace('http%3A', 'http:')
diff --git a/hypervideo_dl/extractor/screencastify.py b/hypervideo_dl/extractor/screencastify.py
new file mode 100644
index 0000000..136b847
--- /dev/null
+++ b/hypervideo_dl/extractor/screencastify.py
@@ -0,0 +1,52 @@
+import urllib.parse
+
+from .common import InfoExtractor
+from ..utils import traverse_obj, update_url_query
+
+
+class ScreencastifyIE(InfoExtractor):
+ _VALID_URL = r'https?://watch\.screencastify\.com/v/(?P<id>[^/?#]+)'
+ _TESTS = [{
+ 'url': 'https://watch.screencastify.com/v/sYVkZip3quLKhHw4Ybk8',
+ 'info_dict': {
+ 'id': 'sYVkZip3quLKhHw4Ybk8',
+ 'ext': 'mp4',
+ 'title': 'Inserting and Aligning the Case Top and Bottom',
+ 'description': '',
+ 'uploader': 'Paul Gunn',
+ 'extra_param_to_segment_url': str,
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ info = self._download_json(
+ f'https://umbrella.svc.screencastify.com/api/umbrellaService/watch/{video_id}', video_id)
+
+ query_string = traverse_obj(info, ('manifest', 'auth', 'query'))
+ query = urllib.parse.parse_qs(query_string)
+ formats = []
+ dash_manifest_url = traverse_obj(info, ('manifest', 'url'))
+ if dash_manifest_url:
+ formats.extend(
+ self._extract_mpd_formats(
+ dash_manifest_url, video_id, mpd_id='dash', query=query, fatal=False))
+ hls_manifest_url = traverse_obj(info, ('manifest', 'hlsUrl'))
+ if hls_manifest_url:
+ formats.extend(
+ self._extract_m3u8_formats(
+ hls_manifest_url, video_id, ext='mp4', m3u8_id='hls', query=query, fatal=False))
+ for f in formats:
+ f['url'] = update_url_query(f['url'], query)
+
+ return {
+ 'id': video_id,
+ 'title': info.get('title'),
+ 'description': info.get('description'),
+ 'uploader': info.get('userName'),
+ 'formats': formats,
+ 'extra_param_to_segment_url': query_string,
+ }
diff --git a/hypervideo_dl/extractor/screencastomatic.py b/hypervideo_dl/extractor/screencastomatic.py
index 0afdc17..28e25e9 100644
--- a/hypervideo_dl/extractor/screencastomatic.py
+++ b/hypervideo_dl/extractor/screencastomatic.py
@@ -1,13 +1,12 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
+ ExtractorError,
get_element_by_class,
int_or_none,
remove_start,
strip_or_none,
unified_strdate,
+ urlencode_postdata,
)
@@ -37,6 +36,28 @@ class ScreencastOMaticIE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(
'https://screencast-o-matic.com/player/' + video_id, video_id)
+
+ if (self._html_extract_title(webpage) == 'Protected Content'
+ or 'This video is private and requires a password' in webpage):
+ password = self.get_param('videopassword')
+
+ if not password:
+ raise ExtractorError('Password protected video, use --video-password <password>', expected=True)
+
+ form = self._search_regex(
+ r'(?is)<form[^>]*>(?P<form>.+?)</form>', webpage, 'login form', group='form')
+ form_data = self._hidden_inputs(form)
+ form_data.update({
+ 'scPassword': password,
+ })
+
+ webpage = self._download_webpage(
+ 'https://screencast-o-matic.com/player/password', video_id, 'Logging in',
+ data=urlencode_postdata(form_data))
+
+ if '<small class="text-danger">Invalid password</small>' in webpage:
+ raise ExtractorError('Unable to login: Invalid password', expected=True)
+
info = self._parse_html5_media_entries(url, webpage, video_id)[0]
info.update({
'id': video_id,
diff --git a/hypervideo_dl/extractor/scrippsnetworks.py b/hypervideo_dl/extractor/scrippsnetworks.py
index 84918b6..c3cee6e 100644
--- a/hypervideo_dl/extractor/scrippsnetworks.py
+++ b/hypervideo_dl/extractor/scrippsnetworks.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import json
import hashlib
diff --git a/hypervideo_dl/extractor/scrolller.py b/hypervideo_dl/extractor/scrolller.py
new file mode 100644
index 0000000..4f9fa14
--- /dev/null
+++ b/hypervideo_dl/extractor/scrolller.py
@@ -0,0 +1,102 @@
+import json
+
+from .common import InfoExtractor
+from ..utils import determine_ext, int_or_none
+
+
+class ScrolllerIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?scrolller\.com/(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://scrolller.com/a-helping-hand-1k9pxikxkw',
+ 'info_dict': {
+ 'id': 'a-helping-hand-1k9pxikxkw',
+ 'ext': 'mp4',
+ 'thumbnail': 'https://zepto.scrolller.com/a-helping-hand-3ty9q8x094-540x960.jpg',
+ 'title': 'A helping hand',
+ 'age_limit': 0,
+ }
+ }, {
+ 'url': 'https://scrolller.com/tigers-chasing-a-drone-c5d1f2so6j',
+ 'info_dict': {
+ 'id': 'tigers-chasing-a-drone-c5d1f2so6j',
+ 'ext': 'mp4',
+ 'thumbnail': 'https://zepto.scrolller.com/tigers-chasing-a-drone-az9pkpguwe-540x303.jpg',
+ 'title': 'Tigers chasing a drone',
+ 'age_limit': 0,
+ }
+ }, {
+ 'url': 'https://scrolller.com/baby-rhino-smells-something-9chhugsv9p',
+ 'info_dict': {
+ 'id': 'baby-rhino-smells-something-9chhugsv9p',
+ 'ext': 'mp4',
+ 'thumbnail': 'https://atto.scrolller.com/hmm-whats-that-smell-bh54mf2c52-300x224.jpg',
+ 'title': 'Baby rhino smells something',
+ 'age_limit': 0,
+ }
+ }, {
+ 'url': 'https://scrolller.com/its-all-fun-and-games-cco8jjmoh7',
+ 'info_dict': {
+ 'id': 'its-all-fun-and-games-cco8jjmoh7',
+ 'ext': 'mp4',
+ 'thumbnail': 'https://atto.scrolller.com/its-all-fun-and-games-3amk9vg7m3-540x649.jpg',
+ 'title': 'It\'s all fun and games...',
+ 'age_limit': 0,
+ }
+ }, {
+ 'url': 'https://scrolller.com/may-the-force-be-with-you-octokuro-yeytg1fs7a',
+ 'info_dict': {
+ 'id': 'may-the-force-be-with-you-octokuro-yeytg1fs7a',
+ 'ext': 'mp4',
+ 'thumbnail': 'https://thumbs2.redgifs.com/DarkStarchyNautilus-poster.jpg',
+ 'title': 'May the force be with you (Octokuro)',
+ 'age_limit': 18,
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ query = {
+ 'query': '''{
+ getSubredditPost(url:"/%s"){
+ id
+ title
+ isNsfw
+ mediaSources{
+ url
+ width
+ height
+ }
+ }
+ }''' % video_id
+ }
+
+ video_data = self._download_json(
+ 'https://api.scrolller.com/api/v2/graphql', video_id, data=json.dumps(query).encode(),
+ headers={'Content-Type': 'application/json'})['data']['getSubredditPost']
+
+ formats, thumbnails = [], []
+ for source in video_data['mediaSources']:
+ if determine_ext(source.get('url')) in ('jpg', 'png'):
+ thumbnails.append({
+ 'url': source['url'],
+ 'width': int_or_none(source.get('width')),
+ 'height': int_or_none(source.get('height')),
+ })
+ elif source.get('url'):
+ formats.append({
+ 'url': source['url'],
+ 'width': int_or_none(source.get('width')),
+ 'height': int_or_none(source.get('height')),
+ })
+
+ if not formats:
+ self.raise_no_formats('There is no video.', expected=True, video_id=video_id)
+
+ return {
+ 'id': video_id,
+ 'title': video_data.get('title'),
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ 'age_limit': 18 if video_data.get('isNsfw') else 0
+ }
diff --git a/hypervideo_dl/extractor/scte.py b/hypervideo_dl/extractor/scte.py
index 7215cf5..d839ffc 100644
--- a/hypervideo_dl/extractor/scte.py
+++ b/hypervideo_dl/extractor/scte.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/seeker.py b/hypervideo_dl/extractor/seeker.py
index e5c18c7..65eb16a 100644
--- a/hypervideo_dl/extractor/seeker.py
+++ b/hypervideo_dl/extractor/seeker.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/senategov.py b/hypervideo_dl/extractor/senategov.py
index b295184..7ff0cf5 100644
--- a/hypervideo_dl/extractor/senategov.py
+++ b/hypervideo_dl/extractor/senategov.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -52,6 +49,7 @@ _COMMITTEES = {
class SenateISVPIE(InfoExtractor):
_IE_NAME = 'senate.gov:isvp'
_VALID_URL = r'https?://(?:www\.)?senate\.gov/isvp/?\?(?P<qs>.+)'
+ _EMBED_REGEX = [r"<iframe[^>]+src=['\"](?P<url>https?://www\.senate\.gov/isvp/?\?[^'\"]+)['\"]"]
_TESTS = [{
'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png',
@@ -90,14 +88,6 @@ class SenateISVPIE(InfoExtractor):
'only_matching': True,
}]
- @staticmethod
- def _search_iframe_url(webpage):
- mobj = re.search(
- r"<iframe[^>]+src=['\"](?P<url>https?://www\.senate\.gov/isvp/?\?[^'\"]+)['\"]",
- webpage)
- if mobj:
- return mobj.group('url')
-
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
@@ -141,8 +131,6 @@ class SenateISVPIE(InfoExtractor):
entry['format_id'] += mobj.group('tag')
formats.append(entry)
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': title,
@@ -197,7 +185,6 @@ class SenateGovIE(InfoExtractor):
formats = self._extract_m3u8_formats(
f'{stream_domain}/i/{filename}_1@{stream_num}/master.m3u8',
display_id, ext='mp4')
- self._sort_formats(formats)
title = self._html_search_regex(
(*self._og_regexes('title'), r'(?s)<title>([^<]*?)</title>'), webpage, 'video title')
diff --git a/hypervideo_dl/extractor/senateisvp.py b/hypervideo_dl/extractor/senateisvp.py
deleted file mode 100644
index 8794d47..0000000
--- a/hypervideo_dl/extractor/senateisvp.py
+++ /dev/null
@@ -1,153 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
- unsmuggle_url,
-)
-from ..compat import (
- compat_parse_qs,
- compat_urlparse,
-)
-
-
-class SenateISVPIE(InfoExtractor):
- _COMM_MAP = [
- ['ag', '76440', 'http://ag-f.akamaihd.net'],
- ['aging', '76442', 'http://aging-f.akamaihd.net'],
- ['approps', '76441', 'http://approps-f.akamaihd.net'],
- ['armed', '76445', 'http://armed-f.akamaihd.net'],
- ['banking', '76446', 'http://banking-f.akamaihd.net'],
- ['budget', '76447', 'http://budget-f.akamaihd.net'],
- ['cecc', '76486', 'http://srs-f.akamaihd.net'],
- ['commerce', '80177', 'http://commerce1-f.akamaihd.net'],
- ['csce', '75229', 'http://srs-f.akamaihd.net'],
- ['dpc', '76590', 'http://dpc-f.akamaihd.net'],
- ['energy', '76448', 'http://energy-f.akamaihd.net'],
- ['epw', '76478', 'http://epw-f.akamaihd.net'],
- ['ethics', '76449', 'http://ethics-f.akamaihd.net'],
- ['finance', '76450', 'http://finance-f.akamaihd.net'],
- ['foreign', '76451', 'http://foreign-f.akamaihd.net'],
- ['govtaff', '76453', 'http://govtaff-f.akamaihd.net'],
- ['help', '76452', 'http://help-f.akamaihd.net'],
- ['indian', '76455', 'http://indian-f.akamaihd.net'],
- ['intel', '76456', 'http://intel-f.akamaihd.net'],
- ['intlnarc', '76457', 'http://intlnarc-f.akamaihd.net'],
- ['jccic', '85180', 'http://jccic-f.akamaihd.net'],
- ['jec', '76458', 'http://jec-f.akamaihd.net'],
- ['judiciary', '76459', 'http://judiciary-f.akamaihd.net'],
- ['rpc', '76591', 'http://rpc-f.akamaihd.net'],
- ['rules', '76460', 'http://rules-f.akamaihd.net'],
- ['saa', '76489', 'http://srs-f.akamaihd.net'],
- ['smbiz', '76461', 'http://smbiz-f.akamaihd.net'],
- ['srs', '75229', 'http://srs-f.akamaihd.net'],
- ['uscc', '76487', 'http://srs-f.akamaihd.net'],
- ['vetaff', '76462', 'http://vetaff-f.akamaihd.net'],
- ['arch', '', 'http://ussenate-f.akamaihd.net/']
- ]
- _IE_NAME = 'senate.gov'
- _VALID_URL = r'https?://(?:www\.)?senate\.gov/isvp/?\?(?P<qs>.+)'
- _TESTS = [{
- 'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png',
- 'info_dict': {
- 'id': 'judiciary031715',
- 'ext': 'mp4',
- 'title': 'Integrated Senate Video Player',
- 'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- }, {
- 'url': 'http://www.senate.gov/isvp/?type=live&comm=commerce&filename=commerce011514.mp4&auto_play=false',
- 'info_dict': {
- 'id': 'commerce011514',
- 'ext': 'mp4',
- 'title': 'Integrated Senate Video Player'
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- }, {
- 'url': 'http://www.senate.gov/isvp/?type=arch&comm=intel&filename=intel090613&hc_location=ufi',
- # checksum differs each time
- 'info_dict': {
- 'id': 'intel090613',
- 'ext': 'mp4',
- 'title': 'Integrated Senate Video Player'
- }
- }, {
- # From http://www.c-span.org/video/?96791-1
- 'url': 'http://www.senate.gov/isvp?type=live&comm=banking&filename=banking012715',
- 'only_matching': True,
- }]
-
- @staticmethod
- def _search_iframe_url(webpage):
- mobj = re.search(
- r"<iframe[^>]+src=['\"](?P<url>https?://www\.senate\.gov/isvp/?\?[^'\"]+)['\"]",
- webpage)
- if mobj:
- return mobj.group('url')
-
- def _get_info_for_comm(self, committee):
- for entry in self._COMM_MAP:
- if entry[0] == committee:
- return entry[1:]
-
- def _real_extract(self, url):
- url, smuggled_data = unsmuggle_url(url, {})
-
- qs = compat_parse_qs(self._match_valid_url(url).group('qs'))
- if not qs.get('filename') or not qs.get('type') or not qs.get('comm'):
- raise ExtractorError('Invalid URL', expected=True)
-
- video_id = re.sub(r'.mp4$', '', qs['filename'][0])
-
- webpage = self._download_webpage(url, video_id)
-
- if smuggled_data.get('force_title'):
- title = smuggled_data['force_title']
- else:
- title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, video_id)
- poster = qs.get('poster')
- thumbnail = poster[0] if poster else None
-
- video_type = qs['type'][0]
- committee = video_type if video_type == 'arch' else qs['comm'][0]
- stream_num, domain = self._get_info_for_comm(committee)
-
- formats = []
- if video_type == 'arch':
- filename = video_id if '.' in video_id else video_id + '.mp4'
- formats = [{
- # All parameters in the query string are necessary to prevent a 403 error
- 'url': compat_urlparse.urljoin(domain, filename) + '?v=3.1.0&fp=&r=&g=',
- }]
- else:
- hdcore_sign = 'hdcore=3.1.0'
- url_params = (domain, video_id, stream_num)
- f4m_url = '%s/z/%s_1@%s/manifest.f4m?' % url_params + hdcore_sign
- m3u8_url = '%s/i/%s_1@%s/master.m3u8' % url_params
- for entry in self._extract_f4m_formats(f4m_url, video_id, f4m_id='f4m'):
- # URLs without the extra param induce an 404 error
- entry.update({'extra_param_to_segment_url': hdcore_sign})
- formats.append(entry)
- for entry in self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', m3u8_id='m3u8'):
- mobj = re.search(r'(?P<tag>(?:-p|-b)).m3u8', entry['url'])
- if mobj:
- entry['format_id'] += mobj.group('tag')
- formats.append(entry)
-
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'title': title,
- 'formats': formats,
- 'thumbnail': thumbnail,
- }
diff --git a/hypervideo_dl/extractor/sendtonews.py b/hypervideo_dl/extractor/sendtonews.py
index 858547b..3600e2e 100644
--- a/hypervideo_dl/extractor/sendtonews.py
+++ b/hypervideo_dl/extractor/sendtonews.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -46,14 +43,14 @@ class SendtoNewsIE(InfoExtractor):
_URL_TEMPLATE = '//embed.sendtonews.com/player2/embedplayer.php?SC=%s'
@classmethod
- def _extract_url(cls, webpage):
+ def _extract_embed_urls(cls, url, webpage):
mobj = re.search(r'''(?x)<script[^>]+src=([\'"])
(?:https?:)?//embed\.sendtonews\.com/player/responsiveembed\.php\?
.*\bSC=(?P<SC>[0-9a-zA-Z-]+).*
\1>''', webpage)
if mobj:
sc = mobj.group('SC')
- return cls._URL_TEMPLATE % sc
+ yield cls._URL_TEMPLATE % sc
def _real_extract(self, url):
playlist_id = self._match_id(url)
@@ -80,9 +77,6 @@ class SendtoNewsIE(InfoExtractor):
'format_id': '%s-%d' % (determine_protocol(f), tbr),
'tbr': tbr,
})
- # 'tbr' was explicitly set to be preferred over 'height' originally,
- # So this is being kept unless someone can confirm this is unnecessary
- self._sort_formats(info_dict['formats'], ('tbr', 'res'))
thumbnails = []
if video.get('thumbnailUrl'):
@@ -101,6 +95,9 @@ class SendtoNewsIE(InfoExtractor):
'thumbnails': thumbnails,
'duration': float_or_none(video.get('SM_length')),
'timestamp': parse_iso8601(video.get('S_sysDate'), delimiter=' '),
+ # 'tbr' was explicitly set to be preferred over 'height' originally,
+ # So this is being kept unless someone can confirm this is unnecessary
+ '_format_sort_fields': ('tbr', 'res')
})
entries.append(info_dict)
diff --git a/hypervideo_dl/extractor/servus.py b/hypervideo_dl/extractor/servus.py
index 1610ddc..490d562 100644
--- a/hypervideo_dl/extractor/servus.py
+++ b/hypervideo_dl/extractor/servus.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
determine_ext,
@@ -107,7 +104,6 @@ class ServusIE(InfoExtractor):
'width': int_or_none(resource.get('width')),
'height': int_or_none(resource.get('height')),
})
- self._sort_formats(formats)
attrs = {}
for attribute in video['attributes']:
diff --git a/hypervideo_dl/extractor/sevenplus.py b/hypervideo_dl/extractor/sevenplus.py
index 9867961..222bf6c 100644
--- a/hypervideo_dl/extractor/sevenplus.py
+++ b/hypervideo_dl/extractor/sevenplus.py
@@ -1,10 +1,7 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import json
import re
-from .brightcove import BrightcoveNewIE
+from .brightcove import BrightcoveNewBaseIE
from ..compat import (
compat_HTTPError,
compat_str,
@@ -16,7 +13,7 @@ from ..utils import (
)
-class SevenPlusIE(BrightcoveNewIE):
+class SevenPlusIE(BrightcoveNewBaseIE):
IE_NAME = '7plus'
_VALID_URL = r'https?://(?:www\.)?7plus\.com\.au/(?P<path>[^?]+\?.*?\bepisode-id=(?P<id>[^&#]+))'
_TESTS = [{
diff --git a/hypervideo_dl/extractor/sexu.py b/hypervideo_dl/extractor/sexu.py
index 3df5152..3117f81 100644
--- a/hypervideo_dl/extractor/sexu.py
+++ b/hypervideo_dl/extractor/sexu.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
@@ -36,7 +34,6 @@ class SexuIE(InfoExtractor):
r'^(\d+)[pP]', source.get('label', ''), 'height',
default=None)),
} for source in sources if source.get('file')]
- self._sort_formats(formats)
title = self._html_search_regex(
r'<title>([^<]+)\s*-\s*Sexu\.Com</title>', webpage, 'title')
diff --git a/hypervideo_dl/extractor/seznamzpravy.py b/hypervideo_dl/extractor/seznamzpravy.py
index eef4975..79e8885 100644
--- a/hypervideo_dl/extractor/seznamzpravy.py
+++ b/hypervideo_dl/extractor/seznamzpravy.py
@@ -1,8 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
from .common import InfoExtractor
from ..compat import (
compat_str,
@@ -23,6 +18,7 @@ def _raw_id(src_url):
class SeznamZpravyIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?seznamzpravy\.cz/iframe/player\?.*\bsrc='
+ _EMBED_REGEX = [r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?seznamzpravy\.cz/iframe/player\?.*?)\1']
_TESTS = [{
'url': 'https://www.seznamzpravy.cz/iframe/player?duration=241&serviceSlug=zpravy&src=https%3A%2F%2Fv39-a.sdn.szn.cz%2Fv_39%2Fvmd%2F5999c902ea707c67d8e267a9%3Ffl%3Dmdk%2C432f65a0%7C&itemType=video&autoPlay=false&title=Sv%C4%9Bt%20bez%20obalu%3A%20%C4%8Ce%C5%A1t%C3%AD%20voj%C3%A1ci%20na%20mis%C3%ADch%20(kr%C3%A1tk%C3%A1%20verze)&series=Sv%C4%9Bt%20bez%20obalu&serviceName=Seznam%20Zpr%C3%A1vy&poster=%2F%2Fd39-a.sdn.szn.cz%2Fd_39%2Fc_img_F_I%2FR5puJ.jpeg%3Ffl%3Dcro%2C0%2C0%2C1920%2C1080%7Cres%2C1200%2C%2C1%7Cjpg%2C80%2C%2C1&width=1920&height=1080&cutFrom=0&cutTo=0&splVersion=VOD&contentId=170889&contextId=35990&showAdvert=true&collocation=&autoplayPossible=true&embed=&isVideoTooShortForPreroll=false&isVideoTooLongForPostroll=true&videoCommentOpKey=&videoCommentId=&version=4.0.76&dotService=zpravy&gemiusPrismIdentifier=bVc1ZIb_Qax4W2v5xOPGpMeCP31kFfrTzj0SqPTLh_b.Z7&zoneIdPreroll=seznam.pack.videospot&skipOffsetPreroll=5&sectionPrefixPreroll=%2Fzpravy',
'info_dict': {
@@ -51,13 +47,6 @@ class SeznamZpravyIE(InfoExtractor):
},
}]
- @staticmethod
- def _extract_urls(webpage):
- return [
- mobj.group('url') for mobj in re.finditer(
- r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?seznamzpravy\.cz/iframe/player\?.*?)\1',
- webpage)]
-
def _extract_sdn_formats(self, sdn_url, video_id):
sdn_data = self._download_json(sdn_url, video_id)
@@ -104,7 +93,6 @@ class SeznamZpravyIE(InfoExtractor):
urljoin(sdn_url, hls_rel_url), video_id, ext='mp4',
m3u8_id='hls', fatal=False))
- self._sort_formats(formats)
return formats
def _real_extract(self, url):
@@ -165,5 +153,5 @@ class SeznamZpravyArticleIE(InfoExtractor):
return self.playlist_result([
self.url_result(entry_url, ie=SeznamZpravyIE.ie_key())
- for entry_url in SeznamZpravyIE._extract_urls(webpage)],
+ for entry_url in SeznamZpravyIE._extract_embed_urls(url, webpage)],
article_id, title, description)
diff --git a/hypervideo_dl/extractor/shahid.py b/hypervideo_dl/extractor/shahid.py
index ab45d9c..26a0bff 100644
--- a/hypervideo_dl/extractor/shahid.py
+++ b/hypervideo_dl/extractor/shahid.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import json
import math
import re
@@ -121,7 +118,6 @@ class ShahidIE(ShahidBaseIE):
# https://docs.aws.amazon.com/mediapackage/latest/ug/manifest-filtering.html
r'aws\.manifestfilter=[\w:;,-]+&?',
'', playout['url']), video_id, 'mp4')
- self._sort_formats(formats)
# video = self._call_api(
# 'product/id', video_id, {
diff --git a/hypervideo_dl/extractor/shared.py b/hypervideo_dl/extractor/shared.py
index 93ab2a1..9a237b3 100644
--- a/hypervideo_dl/extractor/shared.py
+++ b/hypervideo_dl/extractor/shared.py
@@ -1,16 +1,13 @@
-from __future__ import unicode_literals
+import urllib.parse
from .common import InfoExtractor
-from ..compat import (
- compat_b64decode,
- compat_urllib_parse_unquote_plus,
-)
+from ..compat import compat_b64decode
from ..utils import (
- determine_ext,
+ KNOWN_EXTENSIONS,
ExtractorError,
+ determine_ext,
int_or_none,
js_to_json,
- KNOWN_EXTENSIONS,
parse_filesize,
rot47,
url_or_none,
@@ -132,7 +129,7 @@ class VivoIE(SharedBaseIE):
return stream_url
def decode_url(encoded_url):
- return rot47(compat_urllib_parse_unquote_plus(encoded_url))
+ return rot47(urllib.parse.unquote_plus(encoded_url))
return decode_url(self._parse_json(
self._search_regex(
diff --git a/hypervideo_dl/extractor/sharevideos.py b/hypervideo_dl/extractor/sharevideos.py
new file mode 100644
index 0000000..3132c7a
--- /dev/null
+++ b/hypervideo_dl/extractor/sharevideos.py
@@ -0,0 +1,6 @@
+from .common import InfoExtractor
+
+
+class ShareVideosEmbedIE(InfoExtractor):
+ _VALID_URL = False
+ _EMBED_REGEX = [r'<iframe[^>]+?\bsrc\s*=\s*(["\'])(?P<url>(?:https?:)?//embed\.share-videos\.se/auto/embed/\d+\?.*?\buid=\d+.*?)\1']
diff --git a/hypervideo_dl/extractor/shemaroome.py b/hypervideo_dl/extractor/shemaroome.py
index 45c1291..7a78c6e 100644
--- a/hypervideo_dl/extractor/shemaroome.py
+++ b/hypervideo_dl/extractor/shemaroome.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..aes import aes_cbc_decrypt, unpad_pkcs7
from ..compat import (
@@ -77,7 +74,6 @@ class ShemarooMeIE(InfoExtractor):
iv = [0] * 16
m3u8_url = unpad_pkcs7(intlist_to_bytes(aes_cbc_decrypt(url_data, key, iv))).decode('ascii')
formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, fatal=False, headers={'stream_key': data_json['stream_key']})
- self._sort_formats(formats)
release_date = self._html_search_regex(
(r'itemprop="uploadDate">\s*([\d-]+)', r'id="release_date" value="([\d-]+)'),
diff --git a/hypervideo_dl/extractor/showroomlive.py b/hypervideo_dl/extractor/showroomlive.py
index 1aada69..ab18953 100644
--- a/hypervideo_dl/extractor/showroomlive.py
+++ b/hypervideo_dl/extractor/showroomlive.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
@@ -69,7 +66,6 @@ class ShowRoomLiveIE(InfoExtractor):
'format_note': stream.get('label'),
'quality': int_or_none(stream.get('quality', 100)),
})
- self._sort_formats(formats)
return {
'id': compat_str(room.get('live_id') or broadcaster_id),
diff --git a/hypervideo_dl/extractor/simplecast.py b/hypervideo_dl/extractor/simplecast.py
index 857e941..ec349dd 100644
--- a/hypervideo_dl/extractor/simplecast.py
+++ b/hypervideo_dl/extractor/simplecast.py
@@ -1,8 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
from .common import InfoExtractor
from ..utils import (
clean_podcast_url,
@@ -71,6 +66,11 @@ class SimplecastBaseIE(InfoExtractor):
class SimplecastIE(SimplecastBaseIE):
IE_NAME = 'simplecast'
_VALID_URL = r'https?://(?:api\.simplecast\.com/episodes|player\.simplecast\.com)/(?P<id>%s)' % SimplecastBaseIE._UUID_REGEX
+ _EMBED_REGEX = [rf'''(?x)<iframe[^>]+src=["\']
+ (?P<url>https?://(?:
+ embed\.simplecast\.com/[0-9a-f]{8}|
+ player\.simplecast\.com/{SimplecastBaseIE._UUID_REGEX}
+ ))''']
_COMMON_TEST_INFO = {
'display_id': 'errant-signal-chris-franklin-new-wave-video-essays',
'id': 'b6dc49a2-9404-4853-9aa9-9cfc097be876',
@@ -97,15 +97,6 @@ class SimplecastIE(SimplecastBaseIE):
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- return re.findall(
- r'''(?x)<iframe[^>]+src=["\']
- (
- https?://(?:embed\.simplecast\.com/[0-9a-f]{8}|
- player\.simplecast\.com/%s
- ))''' % SimplecastBaseIE._UUID_REGEX, webpage)
-
def _real_extract(self, url):
episode_id = self._match_id(url)
episode = self._call_api('episodes/%s', episode_id)
diff --git a/hypervideo_dl/extractor/sina.py b/hypervideo_dl/extractor/sina.py
index b62b0c3..aeba4e3 100644
--- a/hypervideo_dl/extractor/sina.py
+++ b/hypervideo_dl/extractor/sina.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..utils import (
HEADRequest,
@@ -101,7 +97,6 @@ class SinaIE(InfoExtractor):
'quality': preference(quality_id),
'ext': 'mp4',
})
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/sixplay.py b/hypervideo_dl/extractor/sixplay.py
index fd747f5..a6fb6c1 100644
--- a/hypervideo_dl/extractor/sixplay.py
+++ b/hypervideo_dl/extractor/sixplay.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..compat import (
compat_str,
@@ -108,7 +104,6 @@ class SixPlayIE(InfoExtractor):
'quality': quality_key(quality),
'ext': ext,
})
- self._sort_formats(formats)
def get(getter):
for src in (data, clip_data):
diff --git a/hypervideo_dl/extractor/skeb.py b/hypervideo_dl/extractor/skeb.py
index 81aecb3..e02f8ce 100644
--- a/hypervideo_dl/extractor/skeb.py
+++ b/hypervideo_dl/extractor/skeb.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import ExtractorError, determine_ext, parse_qs, traverse_obj
diff --git a/hypervideo_dl/extractor/sky.py b/hypervideo_dl/extractor/sky.py
index ad1e62d..0a8b6cc 100644
--- a/hypervideo_dl/extractor/sky.py
+++ b/hypervideo_dl/extractor/sky.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/skyit.py b/hypervideo_dl/extractor/skyit.py
index ddb43c0..42d30f7 100644
--- a/hypervideo_dl/extractor/skyit.py
+++ b/hypervideo_dl/extractor/skyit.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
@@ -28,7 +25,6 @@ class SkyItPlayerIE(InfoExtractor):
'salesforce': 'C6D585FD1615272C98DE38235F38BD86',
'sitocommerciale': 'VJwfFuSGnLKnd9Phe9y96WkXgYDCguPMJ2dLhGMb2RE',
'sky': 'F96WlOd8yoFmLQgiqv6fNQRvHZcsWk5jDaYnDvhbiJk',
- 'skyacademy': 'A6LAn7EkO2Q26FRy0IAMBekX6jzDXYL3',
'skyarte': 'LWk29hfiU39NNdq87ePeRach3nzTSV20o0lTv2001Cd',
'theupfront': 'PRSGmDMsg6QMGc04Obpoy7Vsbn7i2Whp',
}
@@ -45,12 +41,7 @@ class SkyItPlayerIE(InfoExtractor):
if not hls_url and video.get('geoblock' if is_live else 'geob'):
self.raise_geo_restricted(countries=['IT'])
- if is_live:
- formats = self._extract_m3u8_formats(hls_url, video_id, 'mp4')
- else:
- formats = self._extract_akamai_formats(
- hls_url, video_id, {'http': 'videoplatform.sky.it'})
- self._sort_formats(formats)
+ formats = self._extract_m3u8_formats(hls_url, video_id, 'mp4')
return {
'id': video_id,
@@ -78,19 +69,22 @@ class SkyItPlayerIE(InfoExtractor):
return self._parse_video(video, video_id)
-class SkyItVideoIE(SkyItPlayerIE):
+class SkyItVideoIE(SkyItPlayerIE): # XXX: Do not subclass from concrete IE
IE_NAME = 'video.sky.it'
_VALID_URL = r'https?://(?:masterchef|video|xfactor)\.sky\.it(?:/[^/]+)*/video/[0-9a-z-]+-(?P<id>\d+)'
_TESTS = [{
'url': 'https://video.sky.it/news/mondo/video/uomo-ucciso-da-uno-squalo-in-australia-631227',
- 'md5': 'fe5c91e59a84a3437eaa0bca6e134ccd',
+ 'md5': '5b858a62d9ffe2ab77b397553024184a',
'info_dict': {
'id': '631227',
'ext': 'mp4',
'title': 'Uomo ucciso da uno squalo in Australia',
'timestamp': 1606036192,
'upload_date': '20201122',
- }
+ 'duration': 26,
+ 'thumbnail': 'https://video.sky.it/captures/thumbs/631227/631227_thumb_880x494.jpg',
+ },
+ 'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://xfactor.sky.it/video/x-factor-2020-replay-audizioni-1-615820',
'only_matching': True,
@@ -104,7 +98,7 @@ class SkyItVideoIE(SkyItPlayerIE):
return self._player_url_result(video_id)
-class SkyItVideoLiveIE(SkyItPlayerIE):
+class SkyItVideoLiveIE(SkyItPlayerIE): # XXX: Do not subclass from concrete IE
IE_NAME = 'video.sky.it:live'
_VALID_URL = r'https?://video\.sky\.it/diretta/(?P<id>[^/?&#]+)'
_TEST = {
@@ -113,7 +107,8 @@ class SkyItVideoLiveIE(SkyItPlayerIE):
'id': '1',
'ext': 'mp4',
'title': r're:Diretta TG24 \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
- 'description': 'Guarda la diretta streaming di SkyTg24, segui con Sky tutti gli appuntamenti e gli speciali di Tg24.',
+ 'description': r're:(?:Clicca play e )?[Gg]uarda la diretta streaming di SkyTg24, segui con Sky tutti gli appuntamenti e gli speciali di Tg24\.',
+ 'live_status': 'is_live',
},
'params': {
# m3u8 download
@@ -131,19 +126,21 @@ class SkyItVideoLiveIE(SkyItPlayerIE):
return self._parse_video(livestream, asset_id)
-class SkyItIE(SkyItPlayerIE):
+class SkyItIE(SkyItPlayerIE): # XXX: Do not subclass from concrete IE
IE_NAME = 'sky.it'
_VALID_URL = r'https?://(?:sport|tg24)\.sky\.it(?:/[^/]+)*/\d{4}/\d{2}/\d{2}/(?P<id>[^/?&#]+)'
_TESTS = [{
- 'url': 'https://sport.sky.it/calcio/serie-a/2020/11/21/juventus-cagliari-risultato-gol',
+ 'url': 'https://sport.sky.it/calcio/serie-a/2022/11/03/brozovic-inter-news',
'info_dict': {
- 'id': '631201',
+ 'id': '789222',
'ext': 'mp4',
- 'title': 'Un rosso alla violenza: in campo per i diritti delle donne',
- 'upload_date': '20201121',
- 'timestamp': 1605995753,
+ 'title': 'Brozovic con il gruppo: verso convocazione per Juve-Inter',
+ 'upload_date': '20221103',
+ 'timestamp': 1667484130,
+ 'duration': 22,
+ 'thumbnail': 'https://videoplatform.sky.it/still/2022/11/03/1667480526353_brozovic_videostill_1.jpg',
},
- 'expected_warnings': ['Unable to download f4m manifest'],
+ 'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://tg24.sky.it/mondo/2020/11/22/australia-squalo-uccide-uomo',
'md5': 'fe5c91e59a84a3437eaa0bca6e134ccd',
@@ -153,7 +150,10 @@ class SkyItIE(SkyItPlayerIE):
'title': 'Uomo ucciso da uno squalo in Australia',
'timestamp': 1606036192,
'upload_date': '20201122',
+ 'duration': 26,
+ 'thumbnail': 'https://video.sky.it/captures/thumbs/631227/631227_thumb_880x494.jpg',
},
+ 'params': {'skip_download': 'm3u8'},
}]
_VIDEO_ID_REGEX = r'data-videoid="(\d+)"'
@@ -165,43 +165,28 @@ class SkyItIE(SkyItPlayerIE):
return self._player_url_result(video_id)
-class SkyItAcademyIE(SkyItIE):
- IE_NAME = 'skyacademy.it'
- _VALID_URL = r'https?://(?:www\.)?skyacademy\.it(?:/[^/]+)*/\d{4}/\d{2}/\d{2}/(?P<id>[^/?&#]+)'
- _TESTS = [{
- 'url': 'https://www.skyacademy.it/eventi-speciali/2019/07/05/a-lezione-di-cinema-con-sky-academy-/',
- 'md5': 'ced5c26638b7863190cbc44dd6f6ba08',
- 'info_dict': {
- 'id': '523458',
- 'ext': 'mp4',
- 'title': 'Sky Academy "The Best CineCamp 2019"',
- 'timestamp': 1562843784,
- 'upload_date': '20190711',
- }
- }]
- _DOMAIN = 'skyacademy'
- _VIDEO_ID_REGEX = r'id="news-videoId_(\d+)"'
-
-
-class SkyItArteIE(SkyItIE):
+class SkyItArteIE(SkyItIE): # XXX: Do not subclass from concrete IE
IE_NAME = 'arte.sky.it'
_VALID_URL = r'https?://arte\.sky\.it/video/(?P<id>[^/?&#]+)'
_TESTS = [{
- 'url': 'https://arte.sky.it/video/serie-musei-venezia-collezionismo-12-novembre/',
+ 'url': 'https://arte.sky.it/video/oliviero-toscani-torino-galleria-mazzoleni-788962',
'md5': '515aee97b87d7a018b6c80727d3e7e17',
'info_dict': {
- 'id': '627926',
+ 'id': '788962',
'ext': 'mp4',
- 'title': "Musei Galleria Franchetti alla Ca' d'Oro Palazzo Grimani",
- 'upload_date': '20201106',
- 'timestamp': 1604664493,
- }
+ 'title': 'La fotografia di Oliviero Toscani conquista Torino',
+ 'upload_date': '20221102',
+ 'timestamp': 1667399996,
+ 'duration': 12,
+ 'thumbnail': 'https://videoplatform.sky.it/still/2022/11/02/1667396388552_oliviero-toscani-torino-galleria-mazzoleni_videostill_1.jpg',
+ },
+ 'params': {'skip_download': 'm3u8'},
}]
_DOMAIN = 'skyarte'
- _VIDEO_ID_REGEX = r'(?s)<iframe[^>]+src="(?:https:)?//player\.sky\.it/player/external\.html\?[^"]*\bid=(\d+)'
+ _VIDEO_ID_REGEX = r'"embedUrl"\s*:\s*"(?:https:)?//player\.sky\.it/player/external\.html\?[^"]*\bid=(\d+)'
-class CieloTVItIE(SkyItIE):
+class CieloTVItIE(SkyItIE): # XXX: Do not subclass from concrete IE
IE_NAME = 'cielotv.it'
_VALID_URL = r'https?://(?:www\.)?cielotv\.it/video/(?P<id>[^.]+)\.html'
_TESTS = [{
@@ -213,17 +198,20 @@ class CieloTVItIE(SkyItIE):
'title': 'Il lunedì è sempre un dramma',
'upload_date': '20190329',
'timestamp': 1553862178,
- }
+ 'duration': 30,
+ 'thumbnail': 'https://videoplatform.sky.it/still/2019/03/29/1553858575610_lunedi_dramma_mant_videostill_1.jpg',
+ },
+ 'params': {'skip_download': 'm3u8'},
}]
_DOMAIN = 'cielo'
_VIDEO_ID_REGEX = r'videoId\s*=\s*"(\d+)"'
-class TV8ItIE(SkyItVideoIE):
+class TV8ItIE(SkyItVideoIE): # XXX: Do not subclass from concrete IE
IE_NAME = 'tv8.it'
- _VALID_URL = r'https?://tv8\.it/showvideo/(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:www\.)?tv8\.it/(?:show)?video/[0-9a-z-]+-(?P<id>\d+)'
_TESTS = [{
- 'url': 'https://tv8.it/showvideo/630529/ogni-mattina-ucciso-asino-di-andrea-lo-cicero/18-11-2020/',
+ 'url': 'https://www.tv8.it/video/ogni-mattina-ucciso-asino-di-andrea-lo-cicero-630529',
'md5': '9ab906a3f75ea342ed928442f9dabd21',
'info_dict': {
'id': '630529',
@@ -231,6 +219,9 @@ class TV8ItIE(SkyItVideoIE):
'title': 'Ogni mattina - Ucciso asino di Andrea Lo Cicero',
'timestamp': 1605721374,
'upload_date': '20201118',
- }
+ 'duration': 114,
+ 'thumbnail': 'https://videoplatform.sky.it/still/2020/11/18/1605717753954_ogni-mattina-ucciso-asino-di-andrea-lo-cicero_videostill_1.jpg',
+ },
+ 'params': {'skip_download': 'm3u8'},
}]
_DOMAIN = 'mtv8'
diff --git a/hypervideo_dl/extractor/skylinewebcams.py b/hypervideo_dl/extractor/skylinewebcams.py
index 47bbb76..4292bb2 100644
--- a/hypervideo_dl/extractor/skylinewebcams.py
+++ b/hypervideo_dl/extractor/skylinewebcams.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/skynewsarabia.py b/hypervideo_dl/extractor/skynewsarabia.py
index fffc9aa..6264b04 100644
--- a/hypervideo_dl/extractor/skynewsarabia.py
+++ b/hypervideo_dl/extractor/skynewsarabia.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
diff --git a/hypervideo_dl/extractor/skynewsau.py b/hypervideo_dl/extractor/skynewsau.py
index 8e079ee..43a9c82 100644
--- a/hypervideo_dl/extractor/skynewsau.py
+++ b/hypervideo_dl/extractor/skynewsau.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
try_get,
diff --git a/hypervideo_dl/extractor/slideshare.py b/hypervideo_dl/extractor/slideshare.py
index 9b3ad0a..ab9dad0 100644
--- a/hypervideo_dl/extractor/slideshare.py
+++ b/hypervideo_dl/extractor/slideshare.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import json
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/slideslive.py b/hypervideo_dl/extractor/slideslive.py
index df60846..9a60a79 100644
--- a/hypervideo_dl/extractor/slideslive.py
+++ b/hypervideo_dl/extractor/slideslive.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
bool_or_none,
@@ -12,6 +9,7 @@ from ..utils import (
class SlidesLiveIE(InfoExtractor):
_VALID_URL = r'https?://slideslive\.com/(?P<id>[0-9]+)'
+ _WORKING = False
_TESTS = [{
# video_service_name = YOUTUBE
'url': 'https://slideslive.com/38902413/gcc-ia16-backend',
@@ -87,7 +85,6 @@ class SlidesLiveIE(InfoExtractor):
formats.extend(self._extract_mpd_formats(
_MANIFEST_PATTERN % (service_id, 'mpd'), service_id,
mpd_id='dash', fatal=False))
- self._sort_formats(formats)
info.update({
'id': service_id,
'formats': formats,
diff --git a/hypervideo_dl/extractor/slutload.py b/hypervideo_dl/extractor/slutload.py
index 661f9e5..8e6e89c 100644
--- a/hypervideo_dl/extractor/slutload.py
+++ b/hypervideo_dl/extractor/slutload.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/smotrim.py b/hypervideo_dl/extractor/smotrim.py
new file mode 100644
index 0000000..d3f1b69
--- /dev/null
+++ b/hypervideo_dl/extractor/smotrim.py
@@ -0,0 +1,65 @@
+from .common import InfoExtractor
+from ..utils import ExtractorError
+
+
+class SmotrimIE(InfoExtractor):
+ _VALID_URL = r'https?://smotrim\.ru/(?P<type>brand|video|article|live)/(?P<id>[0-9]+)'
+ _TESTS = [{ # video
+ 'url': 'https://smotrim.ru/video/1539617',
+ 'md5': 'b1923a533c8cab09679789d720d0b1c5',
+ 'info_dict': {
+ 'id': '1539617',
+ 'ext': 'mp4',
+ 'title': 'Полиглот. Китайский с нуля за 16 часов! Урок №16',
+ 'description': '',
+ },
+ 'add_ie': ['RUTV'],
+ }, { # article (geo-restricted? plays fine from the US and JP)
+ 'url': 'https://smotrim.ru/article/2813445',
+ 'md5': 'e0ac453952afbc6a2742e850b4dc8e77',
+ 'info_dict': {
+ 'id': '2431846',
+ 'ext': 'mp4',
+ 'title': 'Новости культуры. Съёмки первой программы "Большие и маленькие"',
+ 'description': 'md5:94a4a22472da4252bf5587a4ee441b99',
+ },
+ 'add_ie': ['RUTV'],
+ }, { # brand, redirect
+ 'url': 'https://smotrim.ru/brand/64356',
+ 'md5': '740472999ccff81d7f6df79cecd91c18',
+ 'info_dict': {
+ 'id': '2354523',
+ 'ext': 'mp4',
+ 'title': 'Большие и маленькие. Лучшее. 4-й выпуск',
+ 'description': 'md5:84089e834429008371ea41ea3507b989',
+ },
+ 'add_ie': ['RUTV'],
+ }, { # live
+ 'url': 'https://smotrim.ru/live/19201',
+ 'info_dict': {
+ 'id': '19201',
+ 'ext': 'mp4',
+ # this looks like a TV channel name
+ 'title': 'Россия Культура. Прямой эфир',
+ 'description': '',
+ },
+ 'add_ie': ['RUTV'],
+ }]
+
+ def _real_extract(self, url):
+ video_id, typ = self._match_valid_url(url).group('id', 'type')
+ rutv_type = 'video'
+ if typ not in ('video', 'live'):
+ webpage = self._download_webpage(url, video_id, f'Resolving {typ} link')
+ # there are two cases matching regex:
+ # 1. "embedUrl" in JSON LD (/brand/)
+ # 2. "src" attribute from iframe (/article/)
+ video_id = self._search_regex(
+ r'"https://player.smotrim.ru/iframe/video/id/(?P<video_id>\d+)/',
+ webpage, 'video_id', default=None)
+ if not video_id:
+ raise ExtractorError('There are no video in this page.', expected=True)
+ elif typ == 'live':
+ rutv_type = 'live'
+
+ return self.url_result(f'https://player.vgtrk.com/iframe/{rutv_type}/id/{video_id}')
diff --git a/hypervideo_dl/extractor/snotr.py b/hypervideo_dl/extractor/snotr.py
index 0bb5482..6889f19 100644
--- a/hypervideo_dl/extractor/snotr.py
+++ b/hypervideo_dl/extractor/snotr.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..utils import (
parse_duration,
diff --git a/hypervideo_dl/extractor/sohu.py b/hypervideo_dl/extractor/sohu.py
index 3bff5c5..a8f1e46 100644
--- a/hypervideo_dl/extractor/sohu.py
+++ b/hypervideo_dl/extractor/sohu.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -179,7 +176,6 @@ class SohuIE(InfoExtractor):
'height': int_or_none(data.get('height')),
'fps': int_or_none(data.get('fps')),
})
- self._sort_formats(formats)
playlist.append({
'id': '%s_part%d' % (video_id, i + 1),
diff --git a/hypervideo_dl/extractor/sonyliv.py b/hypervideo_dl/extractor/sonyliv.py
index 5b6849f..aaad420 100644
--- a/hypervideo_dl/extractor/sonyliv.py
+++ b/hypervideo_dl/extractor/sonyliv.py
@@ -1,7 +1,5 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import datetime
+import json
import math
import random
import time
@@ -85,21 +83,32 @@ class SonyLIVIE(InfoExtractor):
raise ExtractorError(f'Invalid username/password; {self._LOGIN_HINT}')
self.report_login()
- data = '''{"mobileNumber":"%s","channelPartnerID":"MSMIND","country":"IN","timestamp":"%s",
- "otpSize":6,"loginType":"REGISTERORSIGNIN","isMobileMandatory":true}
- ''' % (username, datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S.%MZ"))
otp_request_json = self._download_json(
'https://apiv2.sonyliv.com/AGL/1.6/A/ENG/WEB/IN/HR/CREATEOTP-V2',
- None, note='Sending OTP', data=data.encode(), headers=self._HEADERS)
+ None, note='Sending OTP', headers=self._HEADERS, data=json.dumps({
+ 'mobileNumber': username,
+ 'channelPartnerID': 'MSMIND',
+ 'country': 'IN',
+ 'timestamp': datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S.%MZ'),
+ 'otpSize': 6,
+ 'loginType': 'REGISTERORSIGNIN',
+ 'isMobileMandatory': True,
+ }).encode())
if otp_request_json['resultCode'] == 'KO':
raise ExtractorError(otp_request_json['message'], expected=True)
- otp_code = self._get_tfa_info('OTP')
- data = '''{"channelPartnerID":"MSMIND","mobileNumber":"%s","country":"IN","otp":"%s",
- "dmaId":"IN","ageConfirmation":true,"timestamp":"%s","isMobileMandatory":true}
- ''' % (username, otp_code, datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S.%MZ"))
+
otp_verify_json = self._download_json(
'https://apiv2.sonyliv.com/AGL/2.0/A/ENG/WEB/IN/HR/CONFIRMOTP-V2',
- None, note='Verifying OTP', data=data.encode(), headers=self._HEADERS)
+ None, note='Verifying OTP', headers=self._HEADERS, data=json.dumps({
+ 'channelPartnerID': 'MSMIND',
+ 'mobileNumber': username,
+ 'country': 'IN',
+ 'otp': self._get_tfa_info('OTP'),
+ 'dmaId': 'IN',
+ 'ageConfirmation': True,
+ 'timestamp': datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S.%MZ'),
+ 'isMobileMandatory': True,
+ }).encode())
if otp_verify_json['resultCode'] == 'KO':
raise ExtractorError(otp_request_json['message'], expected=True)
self._HEADERS['authorization'] = otp_verify_json['resultObj']['accessToken']
@@ -141,7 +150,6 @@ class SonyLIVIE(InfoExtractor):
video_id, 'mp4', m3u8_id='hls', headers=headers, fatal=False))
for f in formats:
f.setdefault('http_headers', {}).update(headers)
- self._sort_formats(formats)
metadata = self._call_api(
'1.6', 'IN/DETAIL/' + video_id, video_id)['containers'][0]['metadata']
diff --git a/hypervideo_dl/extractor/soundcloud.py b/hypervideo_dl/extractor/soundcloud.py
index 92535f7..c2344dd 100644
--- a/hypervideo_dl/extractor/soundcloud.py
+++ b/hypervideo_dl/extractor/soundcloud.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import itertools
import re
import json
@@ -12,7 +9,6 @@ from .common import (
)
from ..compat import (
compat_HTTPError,
- compat_kwargs,
compat_str,
)
from ..utils import (
@@ -23,7 +19,6 @@ from ..utils import (
int_or_none,
KNOWN_EXTENSIONS,
mimetype2ext,
- remove_end,
parse_qs,
str_or_none,
try_get,
@@ -37,18 +32,13 @@ from ..utils import (
class SoundcloudEmbedIE(InfoExtractor):
_VALID_URL = r'https?://(?:w|player|p)\.soundcloud\.com/player/?.*?\burl=(?P<id>.+)'
+ _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:w\.)?soundcloud\.com/player.+?)\1']
_TEST = {
# from https://www.soundi.fi/uutiset/ennakkokuuntelussa-timo-kaukolammen-station-to-station-to-station-julkaisua-juhlitaan-tanaan-g-livelabissa/
'url': 'https://w.soundcloud.com/player/?visual=true&url=https%3A%2F%2Fapi.soundcloud.com%2Fplaylists%2F922213810&show_artwork=true&maxwidth=640&maxheight=960&dnt=1&secret_token=s-ziYey',
'only_matching': True,
}
- @staticmethod
- def _extract_urls(webpage):
- return [m.group('url') for m in re.finditer(
- r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:w\.)?soundcloud\.com/player.+?)\1',
- webpage)]
-
def _real_extract(self, url):
query = parse_qs(url)
api_url = query['url'][0]
@@ -70,8 +60,23 @@ class SoundcloudBaseIE(InfoExtractor):
_access_token = None
_HEADERS = {}
+ _IMAGE_REPL_RE = r'-([0-9a-z]+)\.jpg'
+
+ _ARTWORK_MAP = {
+ 'mini': 16,
+ 'tiny': 20,
+ 'small': 32,
+ 'badge': 47,
+ 't67x67': 67,
+ 'large': 100,
+ 't300x300': 300,
+ 'crop': 400,
+ 't500x500': 500,
+ 'original': 0,
+ }
+
def _store_client_id(self, client_id):
- self._downloader.cache.store('soundcloud', 'client_id', client_id)
+ self.cache.store('soundcloud', 'client_id', client_id)
def _update_client_id(self):
webpage = self._download_webpage('https://soundcloud.com/', None)
@@ -96,7 +101,7 @@ class SoundcloudBaseIE(InfoExtractor):
query['client_id'] = self._CLIENT_ID
kwargs['query'] = query
try:
- return super()._download_json(*args, **compat_kwargs(kwargs))
+ return super()._download_json(*args, **kwargs)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403):
self._store_client_id(None)
@@ -108,7 +113,7 @@ class SoundcloudBaseIE(InfoExtractor):
raise
def _initialize_pre_login(self):
- self._CLIENT_ID = self._downloader.cache.load('soundcloud', 'client_id') or 'a3e059563d7fd3372b49b37f00a00bcf'
+ self._CLIENT_ID = self.cache.load('soundcloud', 'client_id') or 'a3e059563d7fd3372b49b37f00a00bcf'
def _perform_login(self, username, password):
if username != 'oauth':
@@ -189,6 +194,157 @@ class SoundcloudBaseIE(InfoExtractor):
return out
+ def _extract_info_dict(self, info, full_title=None, secret_token=None, extract_flat=False):
+ track_id = compat_str(info['id'])
+ title = info['title']
+
+ format_urls = set()
+ formats = []
+ query = {'client_id': self._CLIENT_ID}
+ if secret_token:
+ query['secret_token'] = secret_token
+
+ if not extract_flat and info.get('downloadable') and info.get('has_downloads_left'):
+ download_url = update_url_query(
+ self._API_V2_BASE + 'tracks/' + track_id + '/download', query)
+ redirect_url = (self._download_json(download_url, track_id, fatal=False) or {}).get('redirectUri')
+ if redirect_url:
+ urlh = self._request_webpage(
+ HEADRequest(redirect_url), track_id, fatal=False)
+ if urlh:
+ format_url = urlh.geturl()
+ format_urls.add(format_url)
+ formats.append({
+ 'format_id': 'download',
+ 'ext': urlhandle_detect_ext(urlh) or 'mp3',
+ 'filesize': int_or_none(urlh.headers.get('Content-Length')),
+ 'url': format_url,
+ 'quality': 10,
+ })
+
+ def invalid_url(url):
+ return not url or url in format_urls
+
+ def add_format(f, protocol, is_preview=False):
+ mobj = re.search(r'\.(?P<abr>\d+)\.(?P<ext>[0-9a-z]{3,4})(?=[/?])', stream_url)
+ if mobj:
+ for k, v in mobj.groupdict().items():
+ if not f.get(k):
+ f[k] = v
+ format_id_list = []
+ if protocol:
+ format_id_list.append(protocol)
+ ext = f.get('ext')
+ if ext == 'aac':
+ f['abr'] = '256'
+ for k in ('ext', 'abr'):
+ v = f.get(k)
+ if v:
+ format_id_list.append(v)
+ preview = is_preview or re.search(r'/(?:preview|playlist)/0/30/', f['url'])
+ if preview:
+ format_id_list.append('preview')
+ abr = f.get('abr')
+ if abr:
+ f['abr'] = int(abr)
+ if protocol == 'hls':
+ protocol = 'm3u8' if ext == 'aac' else 'm3u8_native'
+ else:
+ protocol = 'http'
+ f.update({
+ 'format_id': '_'.join(format_id_list),
+ 'protocol': protocol,
+ 'preference': -10 if preview else None,
+ })
+ formats.append(f)
+
+ # New API
+ transcodings = try_get(
+ info, lambda x: x['media']['transcodings'], list) or []
+ for t in transcodings:
+ if not isinstance(t, dict):
+ continue
+ format_url = url_or_none(t.get('url'))
+ if not format_url:
+ continue
+ stream = None if extract_flat else self._download_json(
+ format_url, track_id, query=query, fatal=False, headers=self._HEADERS)
+ if not isinstance(stream, dict):
+ continue
+ stream_url = url_or_none(stream.get('url'))
+ if invalid_url(stream_url):
+ continue
+ format_urls.add(stream_url)
+ stream_format = t.get('format') or {}
+ protocol = stream_format.get('protocol')
+ if protocol != 'hls' and '/hls' in format_url:
+ protocol = 'hls'
+ ext = None
+ preset = str_or_none(t.get('preset'))
+ if preset:
+ ext = preset.split('_')[0]
+ if ext not in KNOWN_EXTENSIONS:
+ ext = mimetype2ext(stream_format.get('mime_type'))
+ add_format({
+ 'url': stream_url,
+ 'ext': ext,
+ }, 'http' if protocol == 'progressive' else protocol,
+ t.get('snipped') or '/preview/' in format_url)
+
+ for f in formats:
+ f['vcodec'] = 'none'
+
+ if not formats and info.get('policy') == 'BLOCK':
+ self.raise_geo_restricted(metadata_available=True)
+
+ user = info.get('user') or {}
+
+ thumbnails = []
+ artwork_url = info.get('artwork_url')
+ thumbnail = artwork_url or user.get('avatar_url')
+ if isinstance(thumbnail, compat_str):
+ if re.search(self._IMAGE_REPL_RE, thumbnail):
+ for image_id, size in self._ARTWORK_MAP.items():
+ i = {
+ 'id': image_id,
+ 'url': re.sub(self._IMAGE_REPL_RE, '-%s.jpg' % image_id, thumbnail),
+ }
+ if image_id == 'tiny' and not artwork_url:
+ size = 18
+ elif image_id == 'original':
+ i['preference'] = 10
+ if size:
+ i.update({
+ 'width': size,
+ 'height': size,
+ })
+ thumbnails.append(i)
+ else:
+ thumbnails = [{'url': thumbnail}]
+
+ def extract_count(key):
+ return int_or_none(info.get('%s_count' % key))
+
+ return {
+ 'id': track_id,
+ 'uploader': user.get('username'),
+ 'uploader_id': str_or_none(user.get('id')) or user.get('permalink'),
+ 'uploader_url': user.get('permalink_url'),
+ 'timestamp': unified_timestamp(info.get('created_at')),
+ 'title': title,
+ 'description': info.get('description'),
+ 'thumbnails': thumbnails,
+ 'duration': float_or_none(info.get('duration'), 1000),
+ 'webpage_url': info.get('permalink_url'),
+ 'license': info.get('license'),
+ 'view_count': extract_count('playback'),
+ 'like_count': extract_count('favoritings') or extract_count('likes'),
+ 'comment_count': extract_count('comment'),
+ 'repost_count': extract_count('reposts'),
+ 'genre': info.get('genre'),
+ 'formats': formats if not extract_flat else None
+ }
+
@classmethod
def _resolv_url(cls, url):
return cls._API_V2_BASE + 'resolve?url=' + url
@@ -387,173 +543,6 @@ class SoundcloudIE(SoundcloudBaseIE):
},
]
- _IMAGE_REPL_RE = r'-([0-9a-z]+)\.jpg'
-
- _ARTWORK_MAP = {
- 'mini': 16,
- 'tiny': 20,
- 'small': 32,
- 'badge': 47,
- 't67x67': 67,
- 'large': 100,
- 't300x300': 300,
- 'crop': 400,
- 't500x500': 500,
- 'original': 0,
- }
-
- def _extract_info_dict(self, info, full_title=None, secret_token=None):
- track_id = compat_str(info['id'])
- title = info['title']
-
- format_urls = set()
- formats = []
- query = {'client_id': self._CLIENT_ID}
- if secret_token:
- query['secret_token'] = secret_token
-
- if info.get('downloadable') and info.get('has_downloads_left'):
- download_url = update_url_query(
- self._API_V2_BASE + 'tracks/' + track_id + '/download', query)
- redirect_url = (self._download_json(download_url, track_id, fatal=False) or {}).get('redirectUri')
- if redirect_url:
- urlh = self._request_webpage(
- HEADRequest(redirect_url), track_id, fatal=False)
- if urlh:
- format_url = urlh.geturl()
- format_urls.add(format_url)
- formats.append({
- 'format_id': 'download',
- 'ext': urlhandle_detect_ext(urlh) or 'mp3',
- 'filesize': int_or_none(urlh.headers.get('Content-Length')),
- 'url': format_url,
- 'quality': 10,
- })
-
- def invalid_url(url):
- return not url or url in format_urls
-
- def add_format(f, protocol, is_preview=False):
- mobj = re.search(r'\.(?P<abr>\d+)\.(?P<ext>[0-9a-z]{3,4})(?=[/?])', stream_url)
- if mobj:
- for k, v in mobj.groupdict().items():
- if not f.get(k):
- f[k] = v
- format_id_list = []
- if protocol:
- format_id_list.append(protocol)
- ext = f.get('ext')
- if ext == 'aac':
- f['abr'] = '256'
- for k in ('ext', 'abr'):
- v = f.get(k)
- if v:
- format_id_list.append(v)
- preview = is_preview or re.search(r'/(?:preview|playlist)/0/30/', f['url'])
- if preview:
- format_id_list.append('preview')
- abr = f.get('abr')
- if abr:
- f['abr'] = int(abr)
- if protocol == 'hls':
- protocol = 'm3u8' if ext == 'aac' else 'm3u8_native'
- else:
- protocol = 'http'
- f.update({
- 'format_id': '_'.join(format_id_list),
- 'protocol': protocol,
- 'preference': -10 if preview else None,
- })
- formats.append(f)
-
- # New API
- transcodings = try_get(
- info, lambda x: x['media']['transcodings'], list) or []
- for t in transcodings:
- if not isinstance(t, dict):
- continue
- format_url = url_or_none(t.get('url'))
- if not format_url:
- continue
- stream = self._download_json(
- format_url, track_id, query=query, fatal=False, headers=self._HEADERS)
- if not isinstance(stream, dict):
- continue
- stream_url = url_or_none(stream.get('url'))
- if invalid_url(stream_url):
- continue
- format_urls.add(stream_url)
- stream_format = t.get('format') or {}
- protocol = stream_format.get('protocol')
- if protocol != 'hls' and '/hls' in format_url:
- protocol = 'hls'
- ext = None
- preset = str_or_none(t.get('preset'))
- if preset:
- ext = preset.split('_')[0]
- if ext not in KNOWN_EXTENSIONS:
- ext = mimetype2ext(stream_format.get('mime_type'))
- add_format({
- 'url': stream_url,
- 'ext': ext,
- }, 'http' if protocol == 'progressive' else protocol,
- t.get('snipped') or '/preview/' in format_url)
-
- for f in formats:
- f['vcodec'] = 'none'
-
- if not formats and info.get('policy') == 'BLOCK':
- self.raise_geo_restricted(metadata_available=True)
- self._sort_formats(formats)
-
- user = info.get('user') or {}
-
- thumbnails = []
- artwork_url = info.get('artwork_url')
- thumbnail = artwork_url or user.get('avatar_url')
- if isinstance(thumbnail, compat_str):
- if re.search(self._IMAGE_REPL_RE, thumbnail):
- for image_id, size in self._ARTWORK_MAP.items():
- i = {
- 'id': image_id,
- 'url': re.sub(self._IMAGE_REPL_RE, '-%s.jpg' % image_id, thumbnail),
- }
- if image_id == 'tiny' and not artwork_url:
- size = 18
- elif image_id == 'original':
- i['preference'] = 10
- if size:
- i.update({
- 'width': size,
- 'height': size,
- })
- thumbnails.append(i)
- else:
- thumbnails = [{'url': thumbnail}]
-
- def extract_count(key):
- return int_or_none(info.get('%s_count' % key))
-
- return {
- 'id': track_id,
- 'uploader': user.get('username'),
- 'uploader_id': str_or_none(user.get('id')) or user.get('permalink'),
- 'uploader_url': user.get('permalink_url'),
- 'timestamp': unified_timestamp(info.get('created_at')),
- 'title': title,
- 'description': info.get('description'),
- 'thumbnails': thumbnails,
- 'duration': float_or_none(info.get('duration'), 1000),
- 'webpage_url': info.get('permalink_url'),
- 'license': info.get('license'),
- 'view_count': extract_count('playback'),
- 'like_count': extract_count('favoritings') or extract_count('likes'),
- 'comment_count': extract_count('comment'),
- 'repost_count': extract_count('reposts'),
- 'genre': info.get('genre'),
- 'formats': formats
- }
-
def _real_extract(self, url):
mobj = self._match_valid_url(url)
@@ -670,25 +659,20 @@ class SoundcloudPagedPlaylistBaseIE(SoundcloudBaseIE):
'offset': 0,
}
- retries = self.get_param('extractor_retries', 3)
-
for i in itertools.count():
- attempt, last_error = -1, None
- while attempt < retries:
- attempt += 1
- if last_error:
- self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'), playlist_id)
+ for retry in self.RetryManager():
try:
response = self._download_json(
url, playlist_id, query=query, headers=self._HEADERS,
- note='Downloading track page %s%s' % (i + 1, f' (retry #{attempt})' if attempt else ''))
+ note=f'Downloading track page {i + 1}')
break
except ExtractorError as e:
# Downloading page may result in intermittent 502 HTTP error
# See https://github.com/hypervideo/hypervideo/issues/872
- if attempt >= retries or not isinstance(e.cause, compat_HTTPError) or e.cause.code != 502:
+ if not isinstance(e.cause, compat_HTTPError) or e.cause.code != 502:
raise
- last_error = str(e.cause or e.msg)
+ retry.error = e
+ continue
def resolve_entry(*candidates):
for cand in candidates:
@@ -906,6 +890,7 @@ class SoundcloudSearchIE(SoundcloudBaseIE, SearchInfoExtractor):
_TESTS = [{
'url': 'scsearch15:post-avant jazzcore',
'info_dict': {
+ 'id': 'post-avant jazzcore',
'title': 'post-avant jazzcore',
},
'playlist_count': 15,
@@ -932,7 +917,8 @@ class SoundcloudSearchIE(SoundcloudBaseIE, SearchInfoExtractor):
for item in response.get('collection') or []:
if item:
- yield self.url_result(item['uri'], SoundcloudIE.ie_key())
+ yield self.url_result(
+ item['uri'], SoundcloudIE.ie_key(), **self._extract_info_dict(item, extract_flat=True))
next_url = response.get('next_href')
if not next_url:
diff --git a/hypervideo_dl/extractor/soundgasm.py b/hypervideo_dl/extractor/soundgasm.py
index d608eb7..9e59c7c 100644
--- a/hypervideo_dl/extractor/soundgasm.py
+++ b/hypervideo_dl/extractor/soundgasm.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/southpark.py b/hypervideo_dl/extractor/southpark.py
index 942a52d..e23f192 100644
--- a/hypervideo_dl/extractor/southpark.py
+++ b/hypervideo_dl/extractor/southpark.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .mtv import MTVServicesInfoExtractor
@@ -37,7 +34,7 @@ class SouthParkIE(MTVServicesInfoExtractor):
}
-class SouthParkEsIE(SouthParkIE):
+class SouthParkEsIE(SouthParkIE): # XXX: Do not subclass from concrete IE
IE_NAME = 'southpark.cc.com:español'
_VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.cc\.com/es/episodios/(?P<id>.+?)(\?|#|$))'
_LANG = 'es'
@@ -53,7 +50,7 @@ class SouthParkEsIE(SouthParkIE):
}]
-class SouthParkDeIE(SouthParkIE):
+class SouthParkDeIE(SouthParkIE): # XXX: Do not subclass from concrete IE
IE_NAME = 'southpark.de'
_VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.de/(?:(en/(videoclip|collections|episodes|video-clips))|(videoclip|collections|folgen))/(?P<id>(?P<unique_id>.+?)/.+?)(?:\?|#|$))'
_TESTS = [{
@@ -112,7 +109,50 @@ class SouthParkDeIE(SouthParkIE):
return
-class SouthParkNlIE(SouthParkIE):
+class SouthParkLatIE(SouthParkIE): # XXX: Do not subclass from concrete IE
+ IE_NAME = 'southpark.lat'
+ _VALID_URL = r'https?://(?:www\.)?southpark\.lat/(?:en/)?(?:video-?clips?|collections|episod(?:e|io)s)/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.southpark.lat/en/video-clips/ct46op/south-park-tooth-fairy-cartman',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.southpark.lat/episodios/9h0qbg/south-park-orgia-gatuna-temporada-3-ep-7',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.southpark.lat/en/collections/29ve08/south-park-heating-up/lydbrc',
+ 'only_matching': True,
+ }, {
+ # clip
+ 'url': 'https://www.southpark.lat/en/video-clips/ct46op/south-park-tooth-fairy-cartman',
+ 'info_dict': {
+ 'id': 'e99d45ea-ed00-11e0-aca6-0026b9414f30',
+ 'ext': 'mp4',
+ 'title': 'Tooth Fairy Cartman',
+ 'description': 'md5:db02e23818b4dc9cb5f0c5a7e8833a68',
+ },
+ }, {
+ # episode
+ 'url': 'https://www.southpark.lat/episodios/9h0qbg/south-park-orgia-gatuna-temporada-3-ep-7',
+ 'info_dict': {
+ 'id': 'f5fbd823-04bc-11eb-9b1b-0e40cf2fc285',
+ 'ext': 'mp4',
+ 'title': 'South Park',
+ 'description': 'md5:ae0d875eff169dcbed16b21531857ac1',
+ },
+ }]
+
+ def _get_feed_url(self, uri, url=None):
+ video_id = self._id_from_uri(uri)
+ config = self._download_json(
+ f'http://media.mtvnservices.com/pmt/e1/access/index.html?uri={uri}&configtype=edge&ref={url}',
+ video_id)
+ return self._remove_template_parameter(config['feedWithQueryParams'])
+
+ def _get_feed_query(self, uri):
+ return
+
+
+class SouthParkNlIE(SouthParkIE): # XXX: Do not subclass from concrete IE
IE_NAME = 'southpark.nl'
_VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.nl/(?:clips|(?:full-)?episodes|collections)/(?P<id>.+?)(\?|#|$))'
_FEED_URL = 'http://www.southpark.nl/feeds/video-player/mrss/'
@@ -127,7 +167,7 @@ class SouthParkNlIE(SouthParkIE):
}]
-class SouthParkDkIE(SouthParkIE):
+class SouthParkDkIE(SouthParkIE): # XXX: Do not subclass from concrete IE
IE_NAME = 'southparkstudios.dk'
_VALID_URL = r'https?://(?:www\.)?(?P<url>southparkstudios\.(?:dk|nu)/(?:clips|full-episodes|collections)/(?P<id>.+?)(\?|#|$))'
_FEED_URL = 'http://www.southparkstudios.dk/feeds/video-player/mrss/'
diff --git a/hypervideo_dl/extractor/sovietscloset.py b/hypervideo_dl/extractor/sovietscloset.py
index 4bc2263..453016c 100644
--- a/hypervideo_dl/extractor/sovietscloset.py
+++ b/hypervideo_dl/extractor/sovietscloset.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
try_get,
@@ -47,7 +44,7 @@ class SovietsClosetIE(SovietsClosetBaseIE):
_TESTS = [
{
'url': 'https://sovietscloset.com/video/1337',
- 'md5': '11e58781c4ca5b283307aa54db5b3f93',
+ 'md5': 'bd012b04b261725510ca5383074cdd55',
'info_dict': {
'id': '1337',
'ext': 'mp4',
@@ -72,11 +69,11 @@ class SovietsClosetIE(SovietsClosetBaseIE):
},
{
'url': 'https://sovietscloset.com/video/1105',
- 'md5': '578b1958a379e7110ba38697042e9efb',
+ 'md5': '89fa928f183893cb65a0b7be846d8a90',
'info_dict': {
'id': '1105',
'ext': 'mp4',
- 'title': 'Arma 3 - Zeus Games #3',
+ 'title': 'Arma 3 - Zeus Games #5',
'uploader': 'SovietWomble',
'thumbnail': r're:^https?://.*\.b-cdn\.net/c0e5e76f-3a93-40b4-bf01-12343c2eec5d/thumbnail\.jpg$',
'uploader': 'SovietWomble',
@@ -92,8 +89,8 @@ class SovietsClosetIE(SovietsClosetBaseIE):
'availability': 'public',
'series': 'Arma 3',
'season': 'Zeus Games',
- 'episode_number': 3,
- 'episode': 'Episode 3',
+ 'episode_number': 5,
+ 'episode': 'Episode 5',
},
},
]
@@ -107,7 +104,6 @@ class SovietsClosetIE(SovietsClosetBaseIE):
thumbnail_url = self._search_regex(r'(https?://.*?thumbnail\.jpg)', iframe, 'thumbnail url')
m3u8_formats = self._extract_m3u8_formats(m3u8_url, video_id, headers=self.MEDIADELIVERY_REFERER)
- self._sort_formats(m3u8_formats)
if not m3u8_formats:
duration = None
@@ -125,7 +121,7 @@ class SovietsClosetIE(SovietsClosetBaseIE):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- static_assets_base = self._search_regex(r'staticAssetsBase:\"(.*?)\"', webpage, 'staticAssetsBase')
+ static_assets_base = self._search_regex(r'(/_nuxt/static/\d+)', webpage, 'staticAssetsBase')
static_assets_base = f'https://sovietscloset.com{static_assets_base}'
stream = self.parse_nuxt_jsonp(f'{static_assets_base}/video/{video_id}/payload.js', video_id, 'video')['stream']
@@ -184,7 +180,7 @@ class SovietsClosetPlaylistIE(SovietsClosetBaseIE):
webpage = self._download_webpage(url, playlist_id)
- static_assets_base = self._search_regex(r'staticAssetsBase:\"(.*?)\"', webpage, 'staticAssetsBase')
+ static_assets_base = self._search_regex(r'(/_nuxt/static/\d+)', webpage, 'staticAssetsBase')
static_assets_base = f'https://sovietscloset.com{static_assets_base}'
sovietscloset = self.parse_nuxt_jsonp(f'{static_assets_base}/payload.js', playlist_id, 'global')['games']
diff --git a/hypervideo_dl/extractor/spankbang.py b/hypervideo_dl/extractor/spankbang.py
index dd849ae..f242d33 100644
--- a/hypervideo_dl/extractor/spankbang.py
+++ b/hypervideo_dl/extractor/spankbang.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -130,8 +128,6 @@ class SpankBangIE(InfoExtractor):
format_url = format_url[0]
extract_format(format_id, format_url)
- self._sort_formats(formats)
-
info = self._search_json_ld(webpage, video_id, default={})
title = self._html_search_regex(
diff --git a/hypervideo_dl/extractor/spankwire.py b/hypervideo_dl/extractor/spankwire.py
index e97c1d2..334b297 100644
--- a/hypervideo_dl/extractor/spankwire.py
+++ b/hypervideo_dl/extractor/spankwire.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -23,6 +21,7 @@ class SpankwireIE(InfoExtractor):
)
(?P<id>\d+)
'''
+ _EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//(?:www\.)?spankwire\.com/EmbedPlayer\.aspx/?\?.*?\bArticleId=\d+)']
_TESTS = [{
# download URL pattern: */<height>P_<tbr>K_<video_id>.mp4
'url': 'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/',
@@ -67,12 +66,6 @@ class SpankwireIE(InfoExtractor):
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- return re.findall(
- r'<iframe[^>]+\bsrc=["\']((?:https?:)?//(?:www\.)?spankwire\.com/EmbedPlayer\.aspx/?\?.*?\bArticleId=\d+)',
- webpage)
-
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -108,7 +101,6 @@ class SpankwireIE(InfoExtractor):
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
- self._sort_formats(formats)
view_count = str_to_int(video.get('viewed'))
diff --git a/hypervideo_dl/extractor/spiegel.py b/hypervideo_dl/extractor/spiegel.py
index 58f2ed3..3701e29 100644
--- a/hypervideo_dl/extractor/spiegel.py
+++ b/hypervideo_dl/extractor/spiegel.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from .jwplatform import JWPlatformIE
diff --git a/hypervideo_dl/extractor/spiegeltv.py b/hypervideo_dl/extractor/spiegeltv.py
deleted file mode 100644
index 6ccf4c3..0000000
--- a/hypervideo_dl/extractor/spiegeltv.py
+++ /dev/null
@@ -1,17 +0,0 @@
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from .nexx import NexxIE
-
-
-class SpiegeltvIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?spiegel\.tv/videos/(?P<id>\d+)'
- _TEST = {
- 'url': 'http://www.spiegel.tv/videos/161681-flug-mh370/',
- 'only_matching': True,
- }
-
- def _real_extract(self, url):
- return self.url_result(
- 'https://api.nexx.cloud/v3/748/videos/byid/%s'
- % self._match_id(url), ie=NexxIE.ie_key())
diff --git a/hypervideo_dl/extractor/spike.py b/hypervideo_dl/extractor/spike.py
index 5805f3d..5c1c78d 100644
--- a/hypervideo_dl/extractor/spike.py
+++ b/hypervideo_dl/extractor/spike.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .mtv import MTVServicesInfoExtractor
diff --git a/hypervideo_dl/extractor/sport5.py b/hypervideo_dl/extractor/sport5.py
index 35c57d6..44b4067 100644
--- a/hypervideo_dl/extractor/sport5.py
+++ b/hypervideo_dl/extractor/sport5.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..utils import ExtractorError
@@ -78,7 +74,6 @@ class Sport5IE(InfoExtractor):
'width': int(fmt.get('width')),
'height': int(fmt.get('height')),
} for fmt in metadata.findall('./PlaybackLinks/FileURL')]
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/sportbox.py b/hypervideo_dl/extractor/sportbox.py
index b9017fd..ccbb0e8 100644
--- a/hypervideo_dl/extractor/sportbox.py
+++ b/hypervideo_dl/extractor/sportbox.py
@@ -1,8 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
from .common import InfoExtractor
from ..utils import (
determine_ext,
@@ -14,6 +9,7 @@ from ..utils import (
class SportBoxIE(InfoExtractor):
_VALID_URL = r'https?://(?:news\.sportbox|matchtv)\.ru/vdl/player(?:/[^/]+/|\?.*?\bn?id=)(?P<id>\d+)'
+ _EMBED_REGEX = [r'<iframe[^>]+src="(?P<url>https?://(?:news\.sportbox|matchtv)\.ru/vdl/player[^"]+)"']
_TESTS = [{
'url': 'http://news.sportbox.ru/vdl/player/ci/211355',
'info_dict': {
@@ -45,12 +41,6 @@ class SportBoxIE(InfoExtractor):
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- return re.findall(
- r'<iframe[^>]+src="(https?://(?:news\.sportbox|matchtv)\.ru/vdl/player[^"]+)"',
- webpage)
-
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -75,7 +65,6 @@ class SportBoxIE(InfoExtractor):
formats.append({
'url': src,
})
- self._sort_formats(formats)
player = self._parse_json(
self._search_regex(
diff --git a/hypervideo_dl/extractor/sportdeutschland.py b/hypervideo_dl/extractor/sportdeutschland.py
index 15b488a..75074b3 100644
--- a/hypervideo_dl/extractor/sportdeutschland.py
+++ b/hypervideo_dl/extractor/sportdeutschland.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
clean_html,
diff --git a/hypervideo_dl/extractor/spotify.py b/hypervideo_dl/extractor/spotify.py
index 826f98c..55ce36a 100644
--- a/hypervideo_dl/extractor/spotify.py
+++ b/hypervideo_dl/extractor/spotify.py
@@ -1,34 +1,36 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
+import functools
import json
import re
from .common import InfoExtractor
from ..utils import (
+ OnDemandPagedList,
clean_podcast_url,
float_or_none,
int_or_none,
strip_or_none,
+ traverse_obj,
try_get,
unified_strdate,
)
class SpotifyBaseIE(InfoExtractor):
+ _WORKING = False
_ACCESS_TOKEN = None
_OPERATION_HASHES = {
'Episode': '8276d4423d709ae9b68ec1b74cc047ba0f7479059a37820be730f125189ac2bf',
'MinimalShow': '13ee079672fad3f858ea45a55eb109553b4fb0969ed793185b2e34cbb6ee7cc0',
'ShowEpisodes': 'e0e5ce27bd7748d2c59b4d44ba245a8992a05be75d6fabc3b20753fc8857444d',
}
- _VALID_URL_TEMPL = r'https?://open\.spotify\.com/%s/(?P<id>[^/?&#]+)'
+ _VALID_URL_TEMPL = r'https?://open\.spotify\.com/(?:embed-podcast/|embed/|)%s/(?P<id>[^/?&#]+)'
+ _EMBED_REGEX = [r'<iframe[^>]+src="(?P<url>https?://open\.spotify.com/embed/[^"]+)"']
def _real_initialize(self):
self._ACCESS_TOKEN = self._download_json(
'https://open.spotify.com/get_access_token', None)['accessToken']
- def _call_api(self, operation, video_id, variables):
+ def _call_api(self, operation, video_id, variables, **kwargs):
return self._download_json(
'https://api-partner.spotify.com/pathfinder/v1/query', video_id, query={
'operationName': 'query' + operation,
@@ -38,7 +40,8 @@ class SpotifyBaseIE(InfoExtractor):
'sha256Hash': self._OPERATION_HASHES[operation],
},
})
- }, headers={'authorization': 'Bearer ' + self._ACCESS_TOKEN})['data']
+ }, headers={'authorization': 'Bearer ' + self._ACCESS_TOKEN},
+ **kwargs)['data']
def _extract_episode(self, episode, series):
episode_id = episode['id']
@@ -99,8 +102,9 @@ class SpotifyBaseIE(InfoExtractor):
class SpotifyIE(SpotifyBaseIE):
IE_NAME = 'spotify'
+ IE_DESC = 'Spotify episodes'
_VALID_URL = SpotifyBaseIE._VALID_URL_TEMPL % 'episode'
- _TEST = {
+ _TESTS = [{
'url': 'https://open.spotify.com/episode/4Z7GAJ50bgctf6uclHlWKo',
'md5': '74010a1e3fa4d9e1ab3aa7ad14e42d3b',
'info_dict': {
@@ -112,7 +116,10 @@ class SpotifyIE(SpotifyBaseIE):
'release_date': '20201217',
'series': "The Guardian's Audio Long Reads",
}
- }
+ }, {
+ 'url': 'https://open.spotify.com/embed/episode/4TvCsKKs2thXmarHigWvXE?si=7eatS8AbQb6RxqO2raIuWA',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
episode_id = self._match_id(url)
@@ -125,6 +132,7 @@ class SpotifyIE(SpotifyBaseIE):
class SpotifyShowIE(SpotifyBaseIE):
IE_NAME = 'spotify:show'
+ IE_DESC = 'Spotify shows'
_VALID_URL = SpotifyBaseIE._VALID_URL_TEMPL % 'show'
_TEST = {
'url': 'https://open.spotify.com/show/4PM9Ke6l66IRNpottHKV9M',
@@ -135,22 +143,25 @@ class SpotifyShowIE(SpotifyBaseIE):
},
'playlist_mincount': 36,
}
+ _PER_PAGE = 100
+
+ def _fetch_page(self, show_id, page=0):
+ return self._call_api('ShowEpisodes', show_id, {
+ 'limit': 100,
+ 'offset': page * self._PER_PAGE,
+ 'uri': f'spotify:show:{show_id}',
+ }, note=f'Downloading page {page + 1} JSON metadata')['podcast']
def _real_extract(self, url):
show_id = self._match_id(url)
- podcast = self._call_api('ShowEpisodes', show_id, {
- 'limit': 1000000000,
- 'offset': 0,
- 'uri': 'spotify:show:' + show_id,
- })['podcast']
- podcast_name = podcast.get('name')
-
- entries = []
- for item in (try_get(podcast, lambda x: x['episodes']['items']) or []):
- episode = item.get('episode')
- if not episode:
- continue
- entries.append(self._extract_episode(episode, podcast_name))
+ first_page = self._fetch_page(show_id)
+
+ def _entries(page):
+ podcast = self._fetch_page(show_id, page) if page else first_page
+ yield from map(
+ functools.partial(self._extract_episode, series=podcast.get('name')),
+ traverse_obj(podcast, ('episodes', 'items', ..., 'episode')))
return self.playlist_result(
- entries, show_id, podcast_name, podcast.get('description'))
+ OnDemandPagedList(_entries, self._PER_PAGE),
+ show_id, first_page.get('name'), first_page.get('description'))
diff --git a/hypervideo_dl/extractor/spreaker.py b/hypervideo_dl/extractor/spreaker.py
index 6c7e40a..36a9bd2 100644
--- a/hypervideo_dl/extractor/spreaker.py
+++ b/hypervideo_dl/extractor/spreaker.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import itertools
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/springboardplatform.py b/hypervideo_dl/extractor/springboardplatform.py
index 49ac1f5..a98584a 100644
--- a/hypervideo_dl/extractor/springboardplatform.py
+++ b/hypervideo_dl/extractor/springboardplatform.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -24,6 +21,7 @@ class SpringboardPlatformIE(InfoExtractor):
xml_feeds_advanced/index/(?P<index_2>\d+)/rss3/(?P<id_2>\d+)
)
'''
+ _EMBED_REGEX = [r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//cms\.springboardplatform\.com/embed_iframe/\d+/video/\d+.*?)\1']
_TESTS = [{
'url': 'http://cms.springboardplatform.com/previews/159/video/981017/0/0/1',
'md5': '5c3cb7b5c55740d482561099e920f192',
@@ -48,14 +46,6 @@ class SpringboardPlatformIE(InfoExtractor):
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- return [
- mobj.group('url')
- for mobj in re.finditer(
- r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//cms\.springboardplatform\.com/embed_iframe/\d+/video/\d+.*?)\1',
- webpage)]
-
def _real_extract(self, url):
mobj = self._match_valid_url(url)
video_id = mobj.group('id') or mobj.group('id_2')
@@ -112,8 +102,6 @@ class SpringboardPlatformIE(InfoExtractor):
})
formats.append(m3u8_format)
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': title,
diff --git a/hypervideo_dl/extractor/sprout.py b/hypervideo_dl/extractor/sprout.py
index e243732..444a6c2 100644
--- a/hypervideo_dl/extractor/sprout.py
+++ b/hypervideo_dl/extractor/sprout.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .adobepass import AdobePassIE
from ..utils import (
int_or_none,
diff --git a/hypervideo_dl/extractor/srgssr.py b/hypervideo_dl/extractor/srgssr.py
index f991981..145f25e 100644
--- a/hypervideo_dl/extractor/srgssr.py
+++ b/hypervideo_dl/extractor/srgssr.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..utils import (
ExtractorError,
@@ -132,7 +128,6 @@ class SRGSSRIE(InfoExtractor):
'url': podcast_url,
'quality': q(quality),
})
- self._sort_formats(formats)
if media_type == 'video':
for sub in (media_data.get('subtitleList') or []):
diff --git a/hypervideo_dl/extractor/srmediathek.py b/hypervideo_dl/extractor/srmediathek.py
index 359dada..3cc3987 100644
--- a/hypervideo_dl/extractor/srmediathek.py
+++ b/hypervideo_dl/extractor/srmediathek.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .ard import ARDMediathekBaseIE
from ..utils import (
ExtractorError,
diff --git a/hypervideo_dl/extractor/stanfordoc.py b/hypervideo_dl/extractor/stanfordoc.py
index 0003075..be0f4af 100644
--- a/hypervideo_dl/extractor/stanfordoc.py
+++ b/hypervideo_dl/extractor/stanfordoc.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/startrek.py b/hypervideo_dl/extractor/startrek.py
new file mode 100644
index 0000000..e92122f
--- /dev/null
+++ b/hypervideo_dl/extractor/startrek.py
@@ -0,0 +1,75 @@
+from .common import InfoExtractor
+from ..utils import int_or_none, urljoin
+
+
+class StarTrekIE(InfoExtractor):
+ _VALID_URL = r'(?P<base>https?://(?:intl|www)\.startrek\.com)/videos/(?P<id>[^/]+)'
+ _TESTS = [{
+ 'url': 'https://intl.startrek.com/videos/watch-welcoming-jess-bush-to-the-ready-room',
+ 'md5': '491df5035c9d4dc7f63c79caaf9c839e',
+ 'info_dict': {
+ 'id': 'watch-welcoming-jess-bush-to-the-ready-room',
+ 'ext': 'mp4',
+ 'title': 'WATCH: Welcoming Jess Bush to The Ready Room',
+ 'duration': 1888,
+ 'timestamp': 1655388000,
+ 'upload_date': '20220616',
+ 'description': 'md5:1ffee884e3920afbdd6dd04e926a1221',
+ 'thumbnail': r're:https://(?:intl|www)\.startrek\.com/sites/default/files/styles/video_1920x1080/public/images/2022-06/pp_14794_rr_thumb_107_yt_16x9\.jpg(?:\?.+)?',
+ 'subtitles': {'en-US': [{
+ 'url': r're:https://(?:intl|www)\.startrek\.com/sites/default/files/video/captions/2022-06/TRR_SNW_107_v4\.vtt',
+ }, {
+ 'url': 'https://media.startrek.com/2022/06/16/2043801155561/1069981_hls/trr_snw_107_v4-c4bfc25d/stream_vtt.m3u8',
+ }]},
+ }
+ }, {
+ 'url': 'https://www.startrek.com/videos/watch-ethan-peck-and-gia-sandhu-beam-down-to-the-ready-room',
+ 'md5': 'f5ad74fbb86e91e0882fc0a333178d1d',
+ 'info_dict': {
+ 'id': 'watch-ethan-peck-and-gia-sandhu-beam-down-to-the-ready-room',
+ 'ext': 'mp4',
+ 'title': 'WATCH: Ethan Peck and Gia Sandhu Beam Down to The Ready Room',
+ 'duration': 1986,
+ 'timestamp': 1654221600,
+ 'upload_date': '20220603',
+ 'description': 'md5:b3aa0edacfe119386567362dec8ed51b',
+ 'thumbnail': r're:https://www\.startrek\.com/sites/default/files/styles/video_1920x1080/public/images/2022-06/pp_14792_rr_thumb_105_yt_16x9_1.jpg(?:\?.+)?',
+ 'subtitles': {'en-US': [{
+ 'url': r're:https://(?:intl|www)\.startrek\.com/sites/default/files/video/captions/2022-06/TRR_SNW_105_v5\.vtt',
+ }]},
+ }
+ }]
+
+ def _real_extract(self, url):
+ urlbase, video_id = self._match_valid_url(url).group('base', 'id')
+ webpage = self._download_webpage(url, video_id)
+
+ player = self._search_regex(
+ r'(<\s*div\s+id\s*=\s*"cvp-player-[^<]+<\s*/div\s*>)', webpage, 'player')
+
+ hls = self._html_search_regex(r'\bdata-hls\s*=\s*"([^"]+)"', player, 'HLS URL')
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(hls, video_id, 'mp4')
+
+ captions = self._html_search_regex(
+ r'\bdata-captions-url\s*=\s*"([^"]+)"', player, 'captions URL', fatal=False)
+ if captions:
+ subtitles.setdefault('en-US', [])[:0] = [{'url': urljoin(urlbase, captions)}]
+
+ # NB: Most of the data in the json_ld is undesirable
+ json_ld = self._search_json_ld(webpage, video_id, fatal=False)
+
+ return {
+ 'id': video_id,
+ 'title': self._html_search_regex(
+ r'\bdata-title\s*=\s*"([^"]+)"', player, 'title', json_ld.get('title')),
+ 'description': self._html_search_regex(
+ r'(?s)<\s*div\s+class\s*=\s*"header-body"\s*>(.+?)<\s*/div\s*>',
+ webpage, 'description', fatal=False),
+ 'duration': int_or_none(self._html_search_regex(
+ r'\bdata-duration\s*=\s*"(\d+)"', player, 'duration', fatal=False)),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'thumbnail': urljoin(urlbase, self._html_search_regex(
+ r'\bdata-poster-url\s*=\s*"([^"]+)"', player, 'thumbnail', fatal=False)),
+ 'timestamp': json_ld.get('timestamp'),
+ }
diff --git a/hypervideo_dl/extractor/startv.py b/hypervideo_dl/extractor/startv.py
index 411320e..bb6e8f1 100644
--- a/hypervideo_dl/extractor/startv.py
+++ b/hypervideo_dl/extractor/startv.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import (
compat_str,
diff --git a/hypervideo_dl/extractor/steam.py b/hypervideo_dl/extractor/steam.py
index 4ed0fb5..7daee2f 100644
--- a/hypervideo_dl/extractor/steam.py
+++ b/hypervideo_dl/extractor/steam.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -111,7 +109,6 @@ class SteamIE(InfoExtractor):
'format_id': ext + quality,
'url': video_url,
})
- self._sort_formats(formats)
entry['formats'] = formats
entries.append(entry)
embedded_videos = re.findall(r'(<iframe[^>]+>)', webpage)
@@ -129,3 +126,49 @@ class SteamIE(InfoExtractor):
raise ExtractorError('Could not find any videos')
return self.playlist_result(entries, playlist_id, playlist_title)
+
+
+class SteamCommunityBroadcastIE(InfoExtractor):
+ _VALID_URL = r'https?://steamcommunity\.(?:com)/broadcast/watch/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://steamcommunity.com/broadcast/watch/76561199073851486',
+ 'info_dict': {
+ 'id': '76561199073851486',
+ 'title': r're:Steam Community :: pepperm!nt :: Broadcast 2022-06-26 \d{2}:\d{2}',
+ 'ext': 'mp4',
+ 'uploader_id': 1113585758,
+ 'uploader': 'pepperm!nt',
+ 'live_status': 'is_live',
+ },
+ 'skip': 'Stream has ended',
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ json_data = self._download_json(
+ 'https://steamcommunity.com/broadcast/getbroadcastmpd/',
+ video_id, query={'steamid': f'{video_id}'})
+
+ formats, subs = self._extract_m3u8_formats_and_subtitles(json_data['hls_url'], video_id)
+
+ ''' # We cannot download live dash atm
+ mpd_formats, mpd_subs = self._extract_mpd_formats_and_subtitles(json_data['url'], video_id)
+ formats.extend(mpd_formats)
+ self._merge_subtitles(mpd_subs, target=subs)
+ '''
+
+ uploader_json = self._download_json(
+ 'https://steamcommunity.com/actions/ajaxresolveusers',
+ video_id, query={'steamids': video_id})[0]
+
+ return {
+ 'id': video_id,
+ 'title': self._generic_title('', webpage),
+ 'formats': formats,
+ 'live_status': 'is_live',
+ 'view_count': json_data.get('num_view'),
+ 'uploader': uploader_json.get('persona_name'),
+ 'uploader_id': uploader_json.get('accountid'),
+ 'subtitles': subs,
+ }
diff --git a/hypervideo_dl/extractor/stitcher.py b/hypervideo_dl/extractor/stitcher.py
index 8227825..2fd200f 100644
--- a/hypervideo_dl/extractor/stitcher.py
+++ b/hypervideo_dl/extractor/stitcher.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
diff --git a/hypervideo_dl/extractor/storyfire.py b/hypervideo_dl/extractor/storyfire.py
index e18a59a..035747c 100644
--- a/hypervideo_dl/extractor/storyfire.py
+++ b/hypervideo_dl/extractor/storyfire.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import functools
from .common import InfoExtractor
@@ -47,7 +44,7 @@ class StoryFireBaseIE(InfoExtractor):
'timestamp': int_or_none(video.get('publishDate')),
'uploader': video.get('username'),
'uploader_id': uploader_id,
- 'uploader_url': format_field(uploader_id, template='https://storyfire.com/user/%s/video'),
+ 'uploader_url': format_field(uploader_id, None, 'https://storyfire.com/user/%s/video'),
'episode_number': int_or_none(video.get('episodeNumber') or video.get('episode_number')),
}
diff --git a/hypervideo_dl/extractor/streamable.py b/hypervideo_dl/extractor/streamable.py
index 8081296..462861e 100644
--- a/hypervideo_dl/extractor/streamable.py
+++ b/hypervideo_dl/extractor/streamable.py
@@ -1,8 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
from .common import InfoExtractor
from ..utils import (
ExtractorError,
@@ -15,6 +10,7 @@ from ..utils import (
class StreamableIE(InfoExtractor):
_VALID_URL = r'https?://streamable\.com/(?:[es]/)?(?P<id>\w+)'
+ _EMBED_REGEX = [r'<iframe[^>]+\bsrc=(?P<q1>[\'"])(?P<url>(?:https?:)?//streamable\.com/.+?)(?P=q1)']
_TESTS = [
{
'url': 'https://streamable.com/dnd1',
@@ -56,14 +52,6 @@ class StreamableIE(InfoExtractor):
}
]
- @staticmethod
- def _extract_url(webpage):
- mobj = re.search(
- r'<iframe[^>]+src=(?P<q1>[\'"])(?P<src>(?:https?:)?//streamable\.com/(?:(?!\1).+))(?P=q1)',
- webpage)
- if mobj:
- return mobj.group('src')
-
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -101,7 +89,6 @@ class StreamableIE(InfoExtractor):
'vcodec': parse_codecs(try_get(info, lambda x: x['input_metadata']['video_codec_name'])).get('vcodec'),
'acodec': parse_codecs(try_get(info, lambda x: x['input_metadata']['audio_codec_name'])).get('acodec'),
})
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/streamanity.py b/hypervideo_dl/extractor/streamanity.py
index 2e2d5ee..6eaee52 100644
--- a/hypervideo_dl/extractor/streamanity.py
+++ b/hypervideo_dl/extractor/streamanity.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
@@ -38,7 +35,6 @@ class StreamanityIE(InfoExtractor):
formats = self._extract_m3u8_formats(
f'https://stream.mux.com/{video_info["play_id"]}.m3u8?token={video_info["token"]}',
video_id, ext='mp4', m3u8_id='hls')
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/streamcloud.py b/hypervideo_dl/extractor/streamcloud.py
index b97bb43..7289809 100644
--- a/hypervideo_dl/extractor/streamcloud.py
+++ b/hypervideo_dl/extractor/streamcloud.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/streamcz.py b/hypervideo_dl/extractor/streamcz.py
index 4cb9923..c4537ba 100644
--- a/hypervideo_dl/extractor/streamcz.py
+++ b/hypervideo_dl/extractor/streamcz.py
@@ -1,4 +1,3 @@
-# coding: utf-8
import json
from .common import InfoExtractor
@@ -53,8 +52,8 @@ class StreamCZIE(InfoExtractor):
def _extract_formats(self, spl_url, video):
for ext, pref, streams in (
- ('ts', -1, traverse_obj(video, ('http_stream', 'qualities'))),
- ('mp4', 1, video.get('mp4'))):
+ ('ts', -1, traverse_obj(video, ('http_stream', 'qualities')) or {}),
+ ('mp4', 1, video.get('mp4') or {})):
for format_id, stream in streams.items():
if not stream.get('url'):
continue
@@ -110,7 +109,6 @@ class StreamCZIE(InfoExtractor):
})
formats = list(self._extract_formats(spl_url, video))
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/streamff.py b/hypervideo_dl/extractor/streamff.py
index 6b190bb..93c4294 100644
--- a/hypervideo_dl/extractor/streamff.py
+++ b/hypervideo_dl/extractor/streamff.py
@@ -1,4 +1,3 @@
-# coding: utf-8
from .common import InfoExtractor
from ..utils import int_or_none, parse_iso8601
diff --git a/hypervideo_dl/extractor/streetvoice.py b/hypervideo_dl/extractor/streetvoice.py
index f21681a..a32c8bc 100644
--- a/hypervideo_dl/extractor/streetvoice.py
+++ b/hypervideo_dl/extractor/streetvoice.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
int_or_none,
diff --git a/hypervideo_dl/extractor/stretchinternet.py b/hypervideo_dl/extractor/stretchinternet.py
index ec08eae..e438dee 100644
--- a/hypervideo_dl/extractor/stretchinternet.py
+++ b/hypervideo_dl/extractor/stretchinternet.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/stripchat.py b/hypervideo_dl/extractor/stripchat.py
index 0d4a0ce..4229a0b 100644
--- a/hypervideo_dl/extractor/stripchat.py
+++ b/hypervideo_dl/extractor/stripchat.py
@@ -1,37 +1,28 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
-from ..compat import (
- compat_str,
-)
-from ..utils import (
- ExtractorError,
- lowercase_escape,
- try_get,
-)
+from ..utils import ExtractorError, lowercase_escape, traverse_obj
class StripchatIE(InfoExtractor):
- _VALID_URL = r'https?://stripchat\.com/(?P<id>[0-9A-Za-z-_]+)'
+ _VALID_URL = r'https?://stripchat\.com/(?P<id>[^/?#]+)'
_TESTS = [{
- 'url': 'https://stripchat.com/feel_me',
+ 'url': 'https://stripchat.com/Joselin_Flower',
'info_dict': {
- 'id': 'feel_me',
+ 'id': 'Joselin_Flower',
'ext': 'mp4',
- 'title': 're:^feel_me [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'title': 're:^Joselin_Flower [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'description': str,
'is_live': True,
'age_limit': 18,
},
'skip': 'Room is offline',
+ }, {
+ 'url': 'https://stripchat.com/Rakhijaan@xh',
+ 'only_matching': True
}]
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage(
- 'https://stripchat.com/%s/' % video_id, video_id,
- headers=self.geo_verification_headers())
+ webpage = self._download_webpage(url, video_id, headers=self.geo_verification_headers())
data = self._parse_json(
self._search_regex(
@@ -41,19 +32,24 @@ class StripchatIE(InfoExtractor):
if not data:
raise ExtractorError('Unable to find configuration for stream.')
- if try_get(data, lambda x: x['viewCam']['show'], dict):
+ if traverse_obj(data, ('viewCam', 'show'), expected_type=dict):
raise ExtractorError('Model is in private show', expected=True)
- elif not try_get(data, lambda x: x['viewCam']['model']['isLive'], bool):
+ elif not traverse_obj(data, ('viewCam', 'model', 'isLive'), expected_type=bool):
raise ExtractorError('Model is offline', expected=True)
- server = try_get(data, lambda x: x['viewCam']['viewServers']['flashphoner-hls'], compat_str)
- host = try_get(data, lambda x: x['config']['data']['hlsStreamHost'], compat_str)
- model_id = try_get(data, lambda x: x['viewCam']['model']['id'], int)
-
- formats = self._extract_m3u8_formats(
- 'https://b-%s.%s/hls/%d/%d.m3u8' % (server, host, model_id, model_id),
- video_id, ext='mp4', m3u8_id='hls', fatal=False, live=True)
- self._sort_formats(formats)
+ server = traverse_obj(data, ('viewCam', 'viewServers', 'flashphoner-hls'), expected_type=str)
+ model_id = traverse_obj(data, ('viewCam', 'model', 'id'), expected_type=int)
+
+ formats = []
+ for host in traverse_obj(data, (
+ 'config', 'data', (('featuresV2', 'hlsFallback', 'fallbackDomains', ...), 'hlsStreamHost'))):
+ formats = self._extract_m3u8_formats(
+ f'https://b-{server}.{host}/hls/{model_id}/{model_id}.m3u8',
+ video_id, ext='mp4', m3u8_id='hls', fatal=False, live=True)
+ if formats:
+ break
+ if not formats:
+ self.raise_no_formats('No active streams found', expected=True)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/stv.py b/hypervideo_dl/extractor/stv.py
index ba5661d..c879fb5 100644
--- a/hypervideo_dl/extractor/stv.py
+++ b/hypervideo_dl/extractor/stv.py
@@ -1,10 +1,6 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
- compat_str,
float_or_none,
int_or_none,
smuggle_url,
diff --git a/hypervideo_dl/extractor/substack.py b/hypervideo_dl/extractor/substack.py
new file mode 100644
index 0000000..fa38263
--- /dev/null
+++ b/hypervideo_dl/extractor/substack.py
@@ -0,0 +1,100 @@
+import re
+import urllib.parse
+
+from .common import InfoExtractor
+from ..utils import str_or_none, traverse_obj
+
+
+class SubstackIE(InfoExtractor):
+ _VALID_URL = r'https?://(?P<username>[\w-]+)\.substack\.com/p/(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://haleynahman.substack.com/p/i-made-a-vlog?s=r',
+ 'md5': 'f27e4fc6252001d48d479f45e65cdfd5',
+ 'info_dict': {
+ 'id': '47660949',
+ 'ext': 'mp4',
+ 'title': 'I MADE A VLOG',
+ 'description': 'md5:10c01ff93439a62e70ce963b2aa0b7f6',
+ 'thumbnail': 'md5:bec758a34d8ee9142d43bcebdf33af18',
+ 'uploader': 'Maybe Baby',
+ 'uploader_id': '33628',
+ }
+ }, {
+ 'url': 'https://haleynahman.substack.com/p/-dear-danny-i-found-my-boyfriends?s=r',
+ 'md5': '0a63eacec877a1171a62cfa69710fcea',
+ 'info_dict': {
+ 'id': '51045592',
+ 'ext': 'mpga',
+ 'title': "🎧 Dear Danny: I found my boyfriend's secret Twitter account",
+ 'description': 'md5:a57f2439319e56e0af92dd0c95d75797',
+ 'thumbnail': 'md5:daa40b6b79249417c14ff8103db29639',
+ 'uploader': 'Maybe Baby',
+ 'uploader_id': '33628',
+ }
+ }, {
+ 'url': 'https://andrewzimmern.substack.com/p/mussels-with-black-bean-sauce-recipe',
+ 'md5': 'fd3c07077b02444ff0130715b5f632bb',
+ 'info_dict': {
+ 'id': '47368578',
+ 'ext': 'mp4',
+ 'title': 'Mussels with Black Bean Sauce: Recipe of the Week #7',
+ 'description': 'md5:b96234a2906c7d854d5229818d889515',
+ 'thumbnail': 'md5:e30bfaa9da40e82aa62354263a9dd232',
+ 'uploader': "Andrew Zimmern's Spilled Milk ",
+ 'uploader_id': '577659',
+ }
+ }]
+
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ if not re.search(r'<script[^>]+src=["\']https://substackcdn.com/[^"\']+\.js', webpage):
+ return
+
+ mobj = re.search(r'{[^}]*["\']subdomain["\']\s*:\s*["\'](?P<subdomain>[^"]+)', webpage)
+ if mobj:
+ parsed = urllib.parse.urlparse(url)
+ yield parsed._replace(netloc=f'{mobj.group("subdomain")}.substack.com').geturl()
+ raise cls.StopExtraction()
+
+ def _extract_video_formats(self, video_id, username):
+ formats, subtitles = [], {}
+ for video_format in ('hls', 'mp4'):
+ video_url = f'https://{username}.substack.com/api/v1/video/upload/{video_id}/src?type={video_format}'
+
+ if video_format == 'hls':
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(video_url, video_id, 'mp4', fatal=False)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+ else:
+ formats.append({
+ 'url': video_url,
+ 'ext': video_format,
+ })
+
+ return formats, subtitles
+
+ def _real_extract(self, url):
+ display_id, username = self._match_valid_url(url).group('id', 'username')
+ webpage = self._download_webpage(url, display_id)
+
+ webpage_info = self._search_json(r'<script[^>]*>\s*window\._preloads\s*=', webpage, 'preloads', display_id)
+
+ post_type = webpage_info['post']['type']
+ formats, subtitles = [], {}
+ if post_type == 'podcast':
+ formats, subtitles = [{'url': webpage_info['post']['podcast_url']}], {}
+ elif post_type == 'video':
+ formats, subtitles = self._extract_video_formats(webpage_info['post']['videoUpload']['id'], username)
+ else:
+ self.raise_no_formats(f'Page type "{post_type}" is not supported')
+
+ return {
+ 'id': str(webpage_info['post']['id']),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'title': traverse_obj(webpage_info, ('post', 'title')),
+ 'description': traverse_obj(webpage_info, ('post', 'description')),
+ 'thumbnail': traverse_obj(webpage_info, ('post', 'cover_image')),
+ 'uploader': traverse_obj(webpage_info, ('pub', 'name')),
+ 'uploader_id': str_or_none(traverse_obj(webpage_info, ('post', 'publication_id'))),
+ }
diff --git a/hypervideo_dl/extractor/sunporno.py b/hypervideo_dl/extractor/sunporno.py
index 59b77bf..708873a 100644
--- a/hypervideo_dl/extractor/sunporno.py
+++ b/hypervideo_dl/extractor/sunporno.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -63,7 +61,6 @@ class SunPornoIE(InfoExtractor):
'format_id': video_ext,
'quality': quality(video_ext),
})
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/sverigesradio.py b/hypervideo_dl/extractor/sverigesradio.py
index aa0691f..65da615 100644
--- a/hypervideo_dl/extractor/sverigesradio.py
+++ b/hypervideo_dl/extractor/sverigesradio.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
determine_ext,
@@ -61,7 +58,6 @@ class SverigesRadioBaseIE(InfoExtractor):
'vcodec': 'none',
'url': audio_url,
})
- self._sort_formats(formats)
return {
'id': audio_id,
diff --git a/hypervideo_dl/extractor/svt.py b/hypervideo_dl/extractor/svt.py
index 8ca62e3..31bf7f9 100644
--- a/hypervideo_dl/extractor/svt.py
+++ b/hypervideo_dl/extractor/svt.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -54,7 +51,6 @@ class SVTBaseIE(InfoExtractor):
self.raise_geo_restricted(
'This video is only available in Sweden',
countries=self._GEO_COUNTRIES, metadata_available=True)
- self._sort_formats(formats)
subtitle_references = dict_get(video_info, ('subtitles', 'subtitleReferences'))
if isinstance(subtitle_references, list):
@@ -104,6 +100,7 @@ class SVTBaseIE(InfoExtractor):
class SVTIE(SVTBaseIE):
_VALID_URL = r'https?://(?:www\.)?svt\.se/wd\?(?:.*?&)?widgetId=(?P<widget_id>\d+)&.*?\barticleId=(?P<id>\d+)'
+ _EMBED_REGEX = [r'(?:<iframe src|href)="(?P<url>%s[^"]*)"' % _VALID_URL]
_TEST = {
'url': 'http://www.svt.se/wd?widgetId=23991&sectionId=541&articleId=2900353&type=embed&contextSectionId=123&autostart=false',
'md5': '33e9a5d8f646523ce0868ecfb0eed77d',
@@ -116,13 +113,6 @@ class SVTIE(SVTBaseIE):
},
}
- @staticmethod
- def _extract_url(webpage):
- mobj = re.search(
- r'(?:<iframe src|href)="(?P<url>%s[^"]*)"' % SVTIE._VALID_URL, webpage)
- if mobj:
- return mobj.group('url')
-
def _real_extract(self, url):
mobj = self._match_valid_url(url)
widget_id = mobj.group('widget_id')
diff --git a/hypervideo_dl/extractor/swearnet.py b/hypervideo_dl/extractor/swearnet.py
new file mode 100644
index 0000000..6e216a2
--- /dev/null
+++ b/hypervideo_dl/extractor/swearnet.py
@@ -0,0 +1,73 @@
+from .common import InfoExtractor
+from ..utils import int_or_none, traverse_obj
+
+
+class SwearnetEpisodeIE(InfoExtractor):
+ _VALID_URL = r'https?://www\.swearnet\.com/shows/(?P<id>[\w-]+)/seasons/(?P<season_num>\d+)/episodes/(?P<episode_num>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.swearnet.com/shows/gettin-learnt-with-ricky/seasons/1/episodes/1',
+ 'info_dict': {
+ 'id': '232819',
+ 'ext': 'mp4',
+ 'episode_number': 1,
+ 'episode': 'Episode 1',
+ 'duration': 719,
+ 'description': 'md5:c48ef71440ce466284c07085cd7bd761',
+ 'season': 'Season 1',
+ 'title': 'Episode 1 - Grilled Cheese Sammich',
+ 'season_number': 1,
+ 'thumbnail': 'https://cdn.vidyard.com/thumbnails/232819/_RX04IKIq60a2V6rIRqq_Q_small.jpg',
+ }
+ }]
+
+ def _get_formats_and_subtitle(self, video_source, video_id):
+ video_source = video_source or {}
+ formats, subtitles = [], {}
+ for key, value in video_source.items():
+ if key == 'hls':
+ for video_hls in value:
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(video_hls.get('url'), video_id)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+ else:
+ formats.extend({
+ 'url': video_mp4.get('url'),
+ 'ext': 'mp4'
+ } for video_mp4 in value)
+
+ return formats, subtitles
+
+ def _get_direct_subtitle(self, caption_json):
+ subs = {}
+ for caption in caption_json:
+ subs.setdefault(caption.get('language') or 'und', []).append({
+ 'url': caption.get('vttUrl'),
+ 'name': caption.get('name')
+ })
+
+ return subs
+
+ def _real_extract(self, url):
+ display_id, season_number, episode_number = self._match_valid_url(url).group('id', 'season_num', 'episode_num')
+ webpage = self._download_webpage(url, display_id)
+
+ external_id = self._search_regex(r'externalid\s*=\s*"([^"]+)', webpage, 'externalid')
+ json_data = self._download_json(
+ f'https://play.vidyard.com/player/{external_id}.json', display_id)['payload']['chapters'][0]
+
+ formats, subtitles = self._get_formats_and_subtitle(json_data['sources'], display_id)
+ self._merge_subtitles(self._get_direct_subtitle(json_data.get('captions')), target=subtitles)
+
+ return {
+ 'id': str(json_data['videoId']),
+ 'title': json_data.get('name') or self._html_search_meta(['og:title', 'twitter:title'], webpage),
+ 'description': (json_data.get('description')
+ or self._html_search_meta(['og:description', 'twitter:description'], webpage)),
+ 'duration': int_or_none(json_data.get('seconds')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'season_number': int_or_none(season_number),
+ 'episode_number': int_or_none(episode_number),
+ 'thumbnails': [{'url': thumbnail_url}
+ for thumbnail_url in traverse_obj(json_data, ('thumbnailUrls', ...))]
+ }
diff --git a/hypervideo_dl/extractor/swrmediathek.py b/hypervideo_dl/extractor/swrmediathek.py
index 0f61597..38bdfce 100644
--- a/hypervideo_dl/extractor/swrmediathek.py
+++ b/hypervideo_dl/extractor/swrmediathek.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
parse_duration,
@@ -95,7 +92,6 @@ class SWRMediathekIE(InfoExtractor):
'vcodec': codec if media_type == 'Video' else 'none',
'acodec': codec if media_type == 'Audio' else None,
})
- self._sort_formats(formats)
upload_date = None
entry_pdatet = attr.get('entry_pdatet')
diff --git a/hypervideo_dl/extractor/syfy.py b/hypervideo_dl/extractor/syfy.py
index def7e5a..c79d27a 100644
--- a/hypervideo_dl/extractor/syfy.py
+++ b/hypervideo_dl/extractor/syfy.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .adobepass import AdobePassIE
from ..utils import (
update_url_query,
diff --git a/hypervideo_dl/extractor/syvdk.py b/hypervideo_dl/extractor/syvdk.py
new file mode 100644
index 0000000..287fb26
--- /dev/null
+++ b/hypervideo_dl/extractor/syvdk.py
@@ -0,0 +1,33 @@
+from .common import InfoExtractor
+from ..utils import traverse_obj
+
+
+class SYVDKIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?24syv\.dk/episode/(?P<id>[\w-]+)'
+
+ _TESTS = [{
+ 'url': 'https://24syv.dk/episode/isabella-arendt-stiller-op-for-de-konservative-2',
+ 'md5': '429ce5a423dd4b1e1d0bf3a569558089',
+ 'info_dict': {
+ 'id': '12215',
+ 'display_id': 'isabella-arendt-stiller-op-for-de-konservative-2',
+ 'ext': 'mp3',
+ 'title': 'Isabella Arendt stiller op for De Konservative',
+ 'description': 'md5:f5fa6a431813bf37284f3412ad7c6c06'
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ info_data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['episodeDetails'][0]
+
+ return {
+ 'id': str(info_data['id']),
+ 'vcodec': 'none',
+ 'ext': 'mp3',
+ 'url': info_data['details']['enclosure'],
+ 'display_id': video_id,
+ 'title': traverse_obj(info_data, ('title', 'rendered')),
+ 'description': traverse_obj(info_data, ('details', 'post_title')),
+ }
diff --git a/hypervideo_dl/extractor/sztvhu.py b/hypervideo_dl/extractor/sztvhu.py
index cfad331..1cbc2a3 100644
--- a/hypervideo_dl/extractor/sztvhu.py
+++ b/hypervideo_dl/extractor/sztvhu.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/tagesschau.py b/hypervideo_dl/extractor/tagesschau.py
index 6e03d0a..ea0532c 100644
--- a/hypervideo_dl/extractor/tagesschau.py
+++ b/hypervideo_dl/extractor/tagesschau.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -142,8 +139,6 @@ class TagesschauIE(InfoExtractor):
timestamp = video_info.get('timestamp')
title = title or video_info.get('description')
- self._sort_formats(formats)
-
return {
'id': display_id,
'title': title,
diff --git a/hypervideo_dl/extractor/tass.py b/hypervideo_dl/extractor/tass.py
index 6d336da..67e544a 100644
--- a/hypervideo_dl/extractor/tass.py
+++ b/hypervideo_dl/extractor/tass.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import json
from .common import InfoExtractor
@@ -51,7 +48,6 @@ class TassIE(InfoExtractor):
'format_id': label,
'quality': quality(label),
})
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/tastytrade.py b/hypervideo_dl/extractor/tastytrade.py
deleted file mode 100644
index 7fe96bd..0000000
--- a/hypervideo_dl/extractor/tastytrade.py
+++ /dev/null
@@ -1,43 +0,0 @@
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from .ooyala import OoyalaIE
-
-
-class TastyTradeIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?tastytrade\.com/tt/shows/[^/]+/episodes/(?P<id>[^/?#&]+)'
-
- _TESTS = [{
- 'url': 'https://www.tastytrade.com/tt/shows/market-measures/episodes/correlation-in-short-volatility-06-28-2017',
- 'info_dict': {
- 'id': 'F3bnlzbToeI6pLEfRyrlfooIILUjz4nM',
- 'ext': 'mp4',
- 'title': 'A History of Teaming',
- 'description': 'md5:2a9033db8da81f2edffa4c99888140b3',
- 'duration': 422.255,
- },
- 'params': {
- 'skip_download': True,
- },
- 'add_ie': ['Ooyala'],
- }, {
- 'url': 'https://www.tastytrade.com/tt/shows/daily-dose/episodes/daily-dose-06-30-2017',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
- webpage = self._download_webpage(url, display_id)
-
- ooyala_code = self._search_regex(
- r'data-media-id=(["\'])(?P<code>(?:(?!\1).)+)\1',
- webpage, 'ooyala code', group='code')
-
- info = self._search_json_ld(webpage, display_id, fatal=False)
- info.update({
- '_type': 'url_transparent',
- 'ie_key': OoyalaIE.ie_key(),
- 'url': 'ooyala:%s' % ooyala_code,
- 'display_id': display_id,
- })
- return info
diff --git a/hypervideo_dl/extractor/tbs.py b/hypervideo_dl/extractor/tbs.py
index c7d62ff..808c6c7 100644
--- a/hypervideo_dl/extractor/tbs.py
+++ b/hypervideo_dl/extractor/tbs.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .turner import TurnerBaseIE
diff --git a/hypervideo_dl/extractor/tdslifeway.py b/hypervideo_dl/extractor/tdslifeway.py
index 101c6ee..3623a68 100644
--- a/hypervideo_dl/extractor/tdslifeway.py
+++ b/hypervideo_dl/extractor/tdslifeway.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/teachable.py b/hypervideo_dl/extractor/teachable.py
index 232eaa5..c212a49 100644
--- a/hypervideo_dl/extractor/teachable.py
+++ b/hypervideo_dl/extractor/teachable.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -142,12 +140,12 @@ class TeachableIE(TeachableBaseIE):
r'<link[^>]+href=["\']https?://(?:process\.fs|assets)\.teachablecdn\.com',
webpage)
- @staticmethod
- def _extract_url(webpage, source_url):
- if not TeachableIE._is_teachable(webpage):
- return
- if re.match(r'https?://[^/]+/(?:courses|p)', source_url):
- return '%s%s' % (TeachableBaseIE._URL_PREFIX, source_url)
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ if cls._is_teachable(webpage):
+ if re.match(r'https?://[^/]+/(?:courses|p)', url):
+ yield f'{cls._URL_PREFIX}{url}'
+ raise cls.StopExtraction()
def _real_extract(self, url):
mobj = self._match_valid_url(url)
@@ -162,7 +160,7 @@ class TeachableIE(TeachableBaseIE):
webpage = self._download_webpage(url, video_id)
- wistia_urls = WistiaIE._extract_urls(webpage)
+ wistia_urls = WistiaIE._extract_embed_urls(url, webpage)
if not wistia_urls:
if any(re.search(p, webpage) for p in (
r'class=["\']lecture-contents-locked',
diff --git a/hypervideo_dl/extractor/teachertube.py b/hypervideo_dl/extractor/teachertube.py
index e22f011..c3eec27 100644
--- a/hypervideo_dl/extractor/teachertube.py
+++ b/hypervideo_dl/extractor/teachertube.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -76,8 +73,6 @@ class TeacherTubeIE(InfoExtractor):
} for media_url in set(media_urls)
]
- self._sort_formats(formats)
-
thumbnail = self._og_search_thumbnail(
webpage, default=None) or self._html_search_meta(
'thumbnail', webpage)
diff --git a/hypervideo_dl/extractor/teachingchannel.py b/hypervideo_dl/extractor/teachingchannel.py
index 624cdb3..275f6d1 100644
--- a/hypervideo_dl/extractor/teachingchannel.py
+++ b/hypervideo_dl/extractor/teachingchannel.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/teamcoco.py b/hypervideo_dl/extractor/teamcoco.py
index 5793b71..a822b67 100644
--- a/hypervideo_dl/extractor/teamcoco.py
+++ b/hypervideo_dl/extractor/teamcoco.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import json
from .turner import TurnerBaseIE
@@ -199,7 +196,6 @@ class TeamcocoIE(TurnerBaseIE):
'format_id': format_id,
'quality': get_quality(format_id),
})
- self._sort_formats(formats)
info['formats'] = formats
return info
diff --git a/hypervideo_dl/extractor/teamtreehouse.py b/hypervideo_dl/extractor/teamtreehouse.py
index 64522ec..dd802db 100644
--- a/hypervideo_dl/extractor/teamtreehouse.py
+++ b/hypervideo_dl/extractor/teamtreehouse.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/techtalks.py b/hypervideo_dl/extractor/techtalks.py
index 78f0731..d37de36 100644
--- a/hypervideo_dl/extractor/techtalks.py
+++ b/hypervideo_dl/extractor/techtalks.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/ted.py b/hypervideo_dl/extractor/ted.py
index b5c7e35..c28a154 100644
--- a/hypervideo_dl/extractor/ted.py
+++ b/hypervideo_dl/extractor/ted.py
@@ -125,8 +125,6 @@ class TedTalkIE(TedBaseIE):
ext_url = external.get('code') if service.lower() == 'youtube' else None
return self.url_result(ext_url or external['uri'])
- self._sort_formats(formats)
-
thumbnail = playerData.get('thumb') or self._og_search_property('image', webpage)
if thumbnail:
# trim thumbnail resize parameters
@@ -215,6 +213,7 @@ class TedPlaylistIE(TedBaseIE):
class TedEmbedIE(InfoExtractor):
_VALID_URL = r'https?://embed(?:-ssl)?\.ted\.com/'
+ _EMBED_REGEX = [rf'<iframe[^>]+?src=(["\'])(?P<url>{_VALID_URL}.+?)\1']
_TESTS = [{
'url': 'https://embed.ted.com/talks/janet_stovall_how_to_get_serious_about_diversity_and_inclusion_in_the_workplace',
@@ -233,10 +232,5 @@ class TedEmbedIE(InfoExtractor):
},
}]
- @classmethod
- def _extract_urls(cls, webpage):
- return [mobj.group('url') for mobj in re.finditer(
- fr'<iframe[^>]+?src=(["\'])(?P<url>{cls._VALID_URL}.+?)\1', webpage)]
-
def _real_extract(self, url):
return self.url_result(re.sub(r'://embed(-ssl)?', '://www', url), TedTalkIE.ie_key())
diff --git a/hypervideo_dl/extractor/tele13.py b/hypervideo_dl/extractor/tele13.py
index f8a2755..212af37 100644
--- a/hypervideo_dl/extractor/tele13.py
+++ b/hypervideo_dl/extractor/tele13.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from .youtube import YoutubeIE
from ..utils import (
@@ -74,7 +71,6 @@ class Tele13IE(InfoExtractor):
'ext': ext,
})
urls.append(format_url)
- self._sort_formats(formats)
return {
'id': display_id,
diff --git a/hypervideo_dl/extractor/tele5.py b/hypervideo_dl/extractor/tele5.py
index c7beee1..9260db2 100644
--- a/hypervideo_dl/extractor/tele5.py
+++ b/hypervideo_dl/extractor/tele5.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .dplay import DPlayIE
from ..compat import compat_urlparse
from ..utils import (
@@ -9,7 +6,7 @@ from ..utils import (
)
-class Tele5IE(DPlayIE):
+class Tele5IE(DPlayIE): # XXX: Do not subclass from concrete IE
_VALID_URL = r'https?://(?:www\.)?tele5\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_GEO_COUNTRIES = ['DE']
_TESTS = [{
diff --git a/hypervideo_dl/extractor/telebruxelles.py b/hypervideo_dl/extractor/telebruxelles.py
index 9e8c89b..2c50a67 100644
--- a/hypervideo_dl/extractor/telebruxelles.py
+++ b/hypervideo_dl/extractor/telebruxelles.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -62,7 +59,6 @@ class TeleBruxellesIE(InfoExtractor):
rtmp_url = re.sub(r'^rmtp', 'rtmp', rtmp_url)
rtmp_url = re.sub(r'"\s*\+\s*"', '', rtmp_url)
formats = self._extract_wowza_formats(rtmp_url, article_id or display_id)
- self._sort_formats(formats)
is_live = 'stream/live' in rtmp_url
diff --git a/hypervideo_dl/extractor/telecinco.py b/hypervideo_dl/extractor/telecinco.py
index eecd6a5..20bb824 100644
--- a/hypervideo_dl/extractor/telecinco.py
+++ b/hypervideo_dl/extractor/telecinco.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import json
import re
@@ -105,7 +102,6 @@ class TelecincoIE(InfoExtractor):
}).encode(), headers=headers)['tokens']['1']['cdn']
formats = self._extract_m3u8_formats(
stream + '?' + cdn, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/telegraaf.py b/hypervideo_dl/extractor/telegraaf.py
index 2dc0205..13e9515 100644
--- a/hypervideo_dl/extractor/telegraaf.py
+++ b/hypervideo_dl/extractor/telegraaf.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
determine_ext,
@@ -34,7 +31,9 @@ class TelegraafIE(InfoExtractor):
article_id = self._match_id(url)
video_id = self._download_json(
- 'https://www.telegraaf.nl/graphql', article_id, query={
+ 'https://app.telegraaf.nl/graphql', article_id,
+ headers={'User-Agent': 'De Telegraaf/6.8.11 (Android 11; en_US)'},
+ query={
'query': '''{
article(uid: %s) {
videos {
@@ -76,8 +75,6 @@ class TelegraafIE(InfoExtractor):
'format_id': 'http' + ('-%s' % label if label else ''),
})
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': title,
diff --git a/hypervideo_dl/extractor/telegram.py b/hypervideo_dl/extractor/telegram.py
index 2dfa261..5ec5485 100644
--- a/hypervideo_dl/extractor/telegram.py
+++ b/hypervideo_dl/extractor/telegram.py
@@ -1,37 +1,136 @@
+import re
+
from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ format_field,
+ get_element_by_class,
+ parse_duration,
+ parse_qs,
+ traverse_obj,
+ unified_timestamp,
+ update_url_query,
+ url_basename,
+)
class TelegramEmbedIE(InfoExtractor):
IE_NAME = 'telegram:embed'
- _VALID_URL = r'https?://t\.me/(?P<channel_name>[^/]+)/(?P<id>\d+)'
+ _VALID_URL = r'https?://t\.me/(?P<channel_id>[^/]+)/(?P<id>\d+)'
_TESTS = [{
'url': 'https://t.me/europa_press/613',
+ 'md5': 'dd707708aea958c11a590e8068825f22',
'info_dict': {
'id': '613',
'ext': 'mp4',
- 'title': 'Europa Press',
- 'description': '6ce2d7e8d56eda16d80607b23db7b252',
- 'thumbnail': r're:^https?:\/\/cdn.*?telesco\.pe\/file\/\w+',
+ 'title': 'md5:6ce2d7e8d56eda16d80607b23db7b252',
+ 'description': 'md5:6ce2d7e8d56eda16d80607b23db7b252',
+ 'channel_id': 'europa_press',
+ 'channel': 'Europa Press ✔',
+ 'thumbnail': r're:^https?://.+',
+ 'timestamp': 1635631203,
+ 'upload_date': '20211030',
+ 'duration': 61,
+ },
+ }, {
+ # 2-video post
+ 'url': 'https://t.me/vorposte/29342',
+ 'info_dict': {
+ 'id': 'vorposte-29342',
+ 'title': 'Форпост 29342',
+ 'description': 'md5:9d92e22169a3e136d5d69df25f82c3dc',
+ },
+ 'playlist_count': 2,
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # 2-video post with --no-playlist
+ 'url': 'https://t.me/vorposte/29343',
+ 'md5': '1724e96053c18e788c8464038876e245',
+ 'info_dict': {
+ 'id': '29343',
+ 'ext': 'mp4',
+ 'title': 'md5:9d92e22169a3e136d5d69df25f82c3dc',
+ 'description': 'md5:9d92e22169a3e136d5d69df25f82c3dc',
+ 'channel_id': 'vorposte',
+ 'channel': 'Форпост',
+ 'thumbnail': r're:^https?://.+',
+ 'timestamp': 1666384480,
+ 'upload_date': '20221021',
+ 'duration': 35,
+ },
+ 'params': {
+ 'noplaylist': True,
+ }
+ }, {
+ # 2-video post with 'single' query param
+ 'url': 'https://t.me/vorposte/29342?single',
+ 'md5': 'd20b202f1e41400a9f43201428add18f',
+ 'info_dict': {
+ 'id': '29342',
+ 'ext': 'mp4',
+ 'title': 'md5:9d92e22169a3e136d5d69df25f82c3dc',
+ 'description': 'md5:9d92e22169a3e136d5d69df25f82c3dc',
+ 'channel_id': 'vorposte',
+ 'channel': 'Форпост',
+ 'thumbnail': r're:^https?://.+',
+ 'timestamp': 1666384480,
+ 'upload_date': '20221021',
+ 'duration': 33,
},
}]
def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
- webpage_embed = self._download_webpage(f'{url}?embed=1', video_id)
+ channel_id, msg_id = self._match_valid_url(url).group('channel_id', 'id')
+ embed = self._download_webpage(
+ url, msg_id, query={'embed': '1', 'single': []}, note='Downloading embed frame')
- formats = [{
- 'url': self._proto_relative_url(self._search_regex(
- '<video[^>]+src="([^"]+)"', webpage_embed, 'source')),
- 'ext': 'mp4',
- }]
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'title': self._html_search_meta(['og:title', 'twitter:title'], webpage, fatal=True),
- 'description': self._html_search_meta(['og:description', 'twitter:description'], webpage, fatal=True),
- 'thumbnail': self._search_regex(r'tgme_widget_message_video_thumb"[^>]+background-image:url\(\'([^\']+)\'\)',
- webpage_embed, 'thumbnail'),
- 'formats': formats,
+ def clean_text(html_class, html):
+ text = clean_html(get_element_by_class(html_class, html))
+ return text.replace('\n', ' ') if text else None
+
+ description = clean_text('tgme_widget_message_text', embed)
+ message = {
+ 'title': description or '',
+ 'description': description,
+ 'channel': clean_text('tgme_widget_message_author', embed),
+ 'channel_id': channel_id,
+ 'timestamp': unified_timestamp(self._search_regex(
+ r'<time[^>]*datetime="([^"]*)"', embed, 'timestamp', fatal=False)),
}
+
+ videos = []
+ for video in re.findall(r'<a class="tgme_widget_message_video_player(?s:.+?)</time>', embed):
+ video_url = self._search_regex(
+ r'<video[^>]+src="([^"]+)"', video, 'video URL', fatal=False)
+ webpage_url = self._search_regex(
+ r'<a class="tgme_widget_message_video_player[^>]+href="([^"]+)"',
+ video, 'webpage URL', fatal=False)
+ if not video_url or not webpage_url:
+ continue
+ formats = [{
+ 'url': video_url,
+ 'ext': 'mp4',
+ }]
+ videos.append({
+ 'id': url_basename(webpage_url),
+ 'webpage_url': update_url_query(webpage_url, {'single': True}),
+ 'duration': parse_duration(self._search_regex(
+ r'<time[^>]+duration[^>]*>([\d:]+)</time>', video, 'duration', fatal=False)),
+ 'thumbnail': self._search_regex(
+ r'tgme_widget_message_video_thumb"[^>]+background-image:url\(\'([^\']+)\'\)',
+ video, 'thumbnail', fatal=False),
+ 'formats': formats,
+ **message,
+ })
+
+ playlist_id = None
+ if len(videos) > 1 and 'single' not in parse_qs(url, keep_blank_values=True):
+ playlist_id = f'{channel_id}-{msg_id}'
+
+ if self._yes_playlist(playlist_id, msg_id):
+ return self.playlist_result(
+ videos, playlist_id, format_field(message, 'channel', f'%s {msg_id}'), description)
+ else:
+ return traverse_obj(videos, lambda _, x: x['id'] == msg_id, get_all=False)
diff --git a/hypervideo_dl/extractor/telemb.py b/hypervideo_dl/extractor/telemb.py
index ac2d603..3d29dac 100644
--- a/hypervideo_dl/extractor/telemb.py
+++ b/hypervideo_dl/extractor/telemb.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -60,7 +57,6 @@ class TeleMBIE(InfoExtractor):
'preference': -10,
})
formats.append(fmt)
- self._sort_formats(formats)
title = remove_start(self._og_search_title(webpage), 'TéléMB : ')
description = self._html_search_regex(
diff --git a/hypervideo_dl/extractor/telemundo.py b/hypervideo_dl/extractor/telemundo.py
index ebcecf5..88f29cb 100644
--- a/hypervideo_dl/extractor/telemundo.py
+++ b/hypervideo_dl/extractor/telemundo.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
try_get,
@@ -43,7 +40,6 @@ class TelemundoIE(InfoExtractor):
redirect_url + '?format=redirect&manifest=m3u&format=redirect&Tracking=true&Embedded=true&formats=MPEG4'),
video_id, 'Processing m3u8').geturl()
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
- self._sort_formats(formats)
date = unified_timestamp(try_get(
metadata, lambda x: x['props']['initialState']['video']['associatedPlaylists'][0]['videos'][0]['datePublished'].split(' ', 1)[1]))
return {
diff --git a/hypervideo_dl/extractor/telequebec.py b/hypervideo_dl/extractor/telequebec.py
index 4bef2fe..e891372 100644
--- a/hypervideo_dl/extractor/telequebec.py
+++ b/hypervideo_dl/extractor/telequebec.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
diff --git a/hypervideo_dl/extractor/teletask.py b/hypervideo_dl/extractor/teletask.py
index b9e2ef8..a73dd68 100644
--- a/hypervideo_dl/extractor/teletask.py
+++ b/hypervideo_dl/extractor/teletask.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/telewebion.py b/hypervideo_dl/extractor/telewebion.py
index 1207b1a..550549f 100644
--- a/hypervideo_dl/extractor/telewebion.py
+++ b/hypervideo_dl/extractor/telewebion.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/tempo.py b/hypervideo_dl/extractor/tempo.py
new file mode 100644
index 0000000..1cfb956
--- /dev/null
+++ b/hypervideo_dl/extractor/tempo.py
@@ -0,0 +1,53 @@
+from .common import InfoExtractor
+from ..utils import int_or_none, parse_iso8601, str_or_none, traverse_obj
+
+
+class TempoIE(InfoExtractor):
+ _VALID_URL = r'https?://video\.tempo\.co/\w+/\d+/(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://video.tempo.co/read/30058/anies-baswedan-ajukan-banding-putusan-ptun-batalkan-ump-dki',
+ 'info_dict': {
+ 'id': '2144438',
+ 'ext': 'mp4',
+ 'title': 'Anies Baswedan Ajukan Banding Putusan PTUN Batalkan UMP DKI',
+ 'display_id': 'anies-baswedan-ajukan-banding-putusan-ptun-batalkan-ump-dki',
+ 'duration': 84,
+ 'description': 'md5:a6822b7c4c874fa7e5bd63e96a387b66',
+ 'thumbnail': 'https://statik.tempo.co/data/2022/07/27/id_1128287/1128287_720.jpg',
+ 'timestamp': 1658911277,
+ 'upload_date': '20220727',
+ 'tags': ['Anies Baswedan', ' PTUN', ' PTUN | Pengadilan Tata Usaha Negara', ' PTUN Batalkan UMP DKI', ' UMP DKI'],
+ }
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ player_key, widget_id = self._search_regex(
+ r'<ivs-player\s*[^>]+data-ivs-key\s*=\s*"(?P<player_key>[\w]+)[^>]+\bdata-ivs-wid="(?P<widget_id>[\w-]+)',
+ webpage, 'player_key, widget_id', group=('player_key', 'widget_id'))
+
+ json_ld_data = self._search_json_ld(webpage, display_id)
+
+ json_data = self._download_json(
+ f'https://ivxplayer.ivideosmart.com/prod/widget/{widget_id}',
+ display_id, query={'key': player_key})
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ json_data['player']['video_url'], display_id, ext='mp4')
+
+ return {
+ 'id': str(json_data['ivx']['id']),
+ 'display_id': display_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'title': (self._html_search_meta('twitter:title', webpage) or self._og_search_title(webpage)
+ or traverse_obj(json_data, ('ivx', 'name'))),
+ 'duration': int_or_none(traverse_obj(json_data, ('ivx', 'duration'))),
+ 'thumbnail': (self._html_search_meta('twitter:image:src', webpage) or self._og_search_thumbnail(webpage)
+ or traverse_obj(json_data, ('ivx', 'thumbnail_url'))),
+ 'description': (json_ld_data.get('description') or self._html_search_meta(['description', 'twitter:description'], webpage)
+ or self._og_search_description(webpage)),
+ 'timestamp': parse_iso8601(traverse_obj(json_data, ('ivx', 'created_at'))),
+ 'tags': str_or_none(self._html_search_meta('keywords', webpage), '').split(','),
+ }
diff --git a/hypervideo_dl/extractor/tencent.py b/hypervideo_dl/extractor/tencent.py
new file mode 100644
index 0000000..ff8bf99
--- /dev/null
+++ b/hypervideo_dl/extractor/tencent.py
@@ -0,0 +1,452 @@
+import functools
+import random
+import re
+import string
+import time
+
+from .common import InfoExtractor
+from ..aes import aes_cbc_encrypt_bytes
+from ..utils import (
+ ExtractorError,
+ determine_ext,
+ int_or_none,
+ js_to_json,
+ traverse_obj,
+ urljoin,
+)
+
+
+class TencentBaseIE(InfoExtractor):
+ """Subclasses must set _API_URL, _APP_VERSION, _PLATFORM, _HOST, _REFERER"""
+
+ def _get_ckey(self, video_id, url, guid):
+ ua = self.get_param('http_headers')['User-Agent']
+
+ payload = (f'{video_id}|{int(time.time())}|mg3c3b04ba|{self._APP_VERSION}|{guid}|'
+ f'{self._PLATFORM}|{url[:48]}|{ua.lower()[:48]}||Mozilla|Netscape|Windows x86_64|00|')
+
+ return aes_cbc_encrypt_bytes(
+ bytes(f'|{sum(map(ord, payload))}|{payload}', 'utf-8'),
+ b'Ok\xda\xa3\x9e/\x8c\xb0\x7f^r-\x9e\xde\xf3\x14',
+ b'\x01PJ\xf3V\xe6\x19\xcf.B\xbb\xa6\x8c?p\xf9',
+ padding_mode='whitespace').hex().upper()
+
+ def _get_video_api_response(self, video_url, video_id, series_id, subtitle_format, video_format, video_quality):
+ guid = ''.join([random.choice(string.digits + string.ascii_lowercase) for _ in range(16)])
+ ckey = self._get_ckey(video_id, video_url, guid)
+ query = {
+ 'vid': video_id,
+ 'cid': series_id,
+ 'cKey': ckey,
+ 'encryptVer': '8.1',
+ 'spcaptiontype': '1' if subtitle_format == 'vtt' else '0',
+ 'sphls': '2' if video_format == 'hls' else '0',
+ 'dtype': '3' if video_format == 'hls' else '0',
+ 'defn': video_quality,
+ 'spsrt': '2', # Enable subtitles
+ 'sphttps': '1', # Enable HTTPS
+ 'otype': 'json',
+ 'spwm': '1',
+ # For SHD
+ 'host': self._HOST,
+ 'referer': self._REFERER,
+ 'ehost': video_url,
+ 'appVer': self._APP_VERSION,
+ 'platform': self._PLATFORM,
+ # For VQQ
+ 'guid': guid,
+ 'flowid': ''.join(random.choice(string.digits + string.ascii_lowercase) for _ in range(32)),
+ }
+
+ return self._search_json(r'QZOutputJson=', self._download_webpage(
+ self._API_URL, video_id, query=query), 'api_response', video_id)
+
+ def _extract_video_formats_and_subtitles(self, api_response, video_id):
+ video_response = api_response['vl']['vi'][0]
+ video_width, video_height = video_response.get('vw'), video_response.get('vh')
+
+ formats, subtitles = [], {}
+ for video_format in video_response['ul']['ui']:
+ if video_format.get('hls') or determine_ext(video_format['url']) == 'm3u8':
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ video_format['url'] + traverse_obj(video_format, ('hls', 'pt'), default=''),
+ video_id, 'mp4', fatal=False)
+ for f in fmts:
+ f.update({'width': video_width, 'height': video_height})
+
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+ else:
+ formats.append({
+ 'url': f'{video_format["url"]}{video_response["fn"]}?vkey={video_response["fvkey"]}',
+ 'width': video_width,
+ 'height': video_height,
+ 'ext': 'mp4',
+ })
+
+ return formats, subtitles
+
+ def _extract_video_native_subtitles(self, api_response, subtitles_format):
+ subtitles = {}
+ for subtitle in traverse_obj(api_response, ('sfl', 'fi')) or ():
+ subtitles.setdefault(subtitle['lang'].lower(), []).append({
+ 'url': subtitle['url'],
+ 'ext': subtitles_format,
+ 'protocol': 'm3u8_native' if determine_ext(subtitle['url']) == 'm3u8' else 'http',
+ })
+
+ return subtitles
+
+ def _extract_all_video_formats_and_subtitles(self, url, video_id, series_id):
+ formats, subtitles = [], {}
+ for video_format, subtitle_format, video_quality in (
+ # '': 480p, 'shd': 720p, 'fhd': 1080p
+ ('mp4', 'srt', ''), ('hls', 'vtt', 'shd'), ('hls', 'vtt', 'fhd')):
+ api_response = self._get_video_api_response(
+ url, video_id, series_id, subtitle_format, video_format, video_quality)
+
+ if api_response.get('em') != 0 and api_response.get('exem') != 0:
+ if '您所在区域暂无此内容版权' in api_response.get('msg'):
+ self.raise_geo_restricted()
+ raise ExtractorError(f'Tencent said: {api_response.get("msg")}')
+
+ fmts, subs = self._extract_video_formats_and_subtitles(api_response, video_id)
+ native_subtitles = self._extract_video_native_subtitles(api_response, subtitle_format)
+
+ formats.extend(fmts)
+ self._merge_subtitles(subs, native_subtitles, target=subtitles)
+
+ return formats, subtitles
+
+ def _get_clean_title(self, title):
+ return re.sub(
+ r'\s*[_\-]\s*(?:Watch online|腾讯视频|(?:高清)?1080P在线观看平台).*?$',
+ '', title or '').strip() or None
+
+
+class VQQBaseIE(TencentBaseIE):
+ _VALID_URL_BASE = r'https?://v\.qq\.com'
+
+ _API_URL = 'https://h5vv6.video.qq.com/getvinfo'
+ _APP_VERSION = '3.5.57'
+ _PLATFORM = '10901'
+ _HOST = 'v.qq.com'
+ _REFERER = 'v.qq.com'
+
+ def _get_webpage_metadata(self, webpage, video_id):
+ return self._parse_json(
+ self._search_regex(
+ r'(?s)<script[^>]*>[^<]*window\.__pinia\s*=\s*([^<]+)</script>',
+ webpage, 'pinia data', fatal=False),
+ video_id, transform_source=js_to_json, fatal=False)
+
+
+class VQQVideoIE(VQQBaseIE):
+ IE_NAME = 'vqq:video'
+ _VALID_URL = VQQBaseIE._VALID_URL_BASE + r'/x/(?:page|cover/(?P<series_id>\w+))/(?P<id>\w+)'
+
+ _TESTS = [{
+ 'url': 'https://v.qq.com/x/page/q326831cny0.html',
+ 'md5': '826ef93682df09e3deac4a6e6e8cdb6e',
+ 'info_dict': {
+ 'id': 'q326831cny0',
+ 'ext': 'mp4',
+ 'title': '我是选手:雷霆裂阵,终极时刻',
+ 'description': 'md5:e7ed70be89244017dac2a835a10aeb1e',
+ 'thumbnail': r're:^https?://[^?#]+q326831cny0',
+ },
+ }, {
+ 'url': 'https://v.qq.com/x/page/o3013za7cse.html',
+ 'md5': 'b91cbbeada22ef8cc4b06df53e36fa21',
+ 'info_dict': {
+ 'id': 'o3013za7cse',
+ 'ext': 'mp4',
+ 'title': '欧阳娜娜VLOG',
+ 'description': 'md5:29fe847497a98e04a8c3826e499edd2e',
+ 'thumbnail': r're:^https?://[^?#]+o3013za7cse',
+ },
+ }, {
+ 'url': 'https://v.qq.com/x/cover/7ce5noezvafma27/a00269ix3l8.html',
+ 'md5': '71459c5375c617c265a22f083facce67',
+ 'info_dict': {
+ 'id': 'a00269ix3l8',
+ 'ext': 'mp4',
+ 'title': '鸡毛飞上天 第01集',
+ 'description': 'md5:8cae3534327315b3872fbef5e51b5c5b',
+ 'thumbnail': r're:^https?://[^?#]+7ce5noezvafma27',
+ 'series': '鸡毛飞上天',
+ },
+ }, {
+ 'url': 'https://v.qq.com/x/cover/mzc00200p29k31e/s0043cwsgj0.html',
+ 'md5': '96b9fd4a189fdd4078c111f21d7ac1bc',
+ 'info_dict': {
+ 'id': 's0043cwsgj0',
+ 'ext': 'mp4',
+ 'title': '第1集:如何快乐吃糖?',
+ 'description': 'md5:1d8c3a0b8729ae3827fa5b2d3ebd5213',
+ 'thumbnail': r're:^https?://[^?#]+s0043cwsgj0',
+ 'series': '青年理工工作者生活研究所',
+ },
+ }, {
+ # Geo-restricted to China
+ 'url': 'https://v.qq.com/x/cover/mcv8hkc8zk8lnov/x0036x5qqsr.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id, series_id = self._match_valid_url(url).group('id', 'series_id')
+ webpage = self._download_webpage(url, video_id)
+ webpage_metadata = self._get_webpage_metadata(webpage, video_id)
+
+ formats, subtitles = self._extract_all_video_formats_and_subtitles(url, video_id, series_id)
+ return {
+ 'id': video_id,
+ 'title': self._get_clean_title(self._og_search_title(webpage)
+ or traverse_obj(webpage_metadata, ('global', 'videoInfo', 'title'))),
+ 'description': (self._og_search_description(webpage)
+ or traverse_obj(webpage_metadata, ('global', 'videoInfo', 'desc'))),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'thumbnail': (self._og_search_thumbnail(webpage)
+ or traverse_obj(webpage_metadata, ('global', 'videoInfo', 'pic160x90'))),
+ 'series': traverse_obj(webpage_metadata, ('global', 'coverInfo', 'title')),
+ }
+
+
+class VQQSeriesIE(VQQBaseIE):
+ IE_NAME = 'vqq:series'
+ _VALID_URL = VQQBaseIE._VALID_URL_BASE + r'/x/cover/(?P<id>\w+)\.html/?(?:[?#]|$)'
+
+ _TESTS = [{
+ 'url': 'https://v.qq.com/x/cover/7ce5noezvafma27.html',
+ 'info_dict': {
+ 'id': '7ce5noezvafma27',
+ 'title': '鸡毛飞上天',
+ 'description': 'md5:8cae3534327315b3872fbef5e51b5c5b',
+ },
+ 'playlist_count': 55,
+ }, {
+ 'url': 'https://v.qq.com/x/cover/oshd7r0vy9sfq8e.html',
+ 'info_dict': {
+ 'id': 'oshd7r0vy9sfq8e',
+ 'title': '恋爱细胞2',
+ 'description': 'md5:9d8a2245679f71ca828534b0f95d2a03',
+ },
+ 'playlist_count': 12,
+ }]
+
+ def _real_extract(self, url):
+ series_id = self._match_id(url)
+ webpage = self._download_webpage(url, series_id)
+ webpage_metadata = self._get_webpage_metadata(webpage, series_id)
+
+ episode_paths = [f'/x/cover/{series_id}/{video_id}.html' for video_id in re.findall(
+ r'<div[^>]+data-vid="(?P<video_id>[^"]+)"[^>]+class="[^"]+episode-item-rect--number',
+ webpage)]
+
+ return self.playlist_from_matches(
+ episode_paths, series_id, ie=VQQVideoIE, getter=functools.partial(urljoin, url),
+ title=self._get_clean_title(traverse_obj(webpage_metadata, ('coverInfo', 'title'))
+ or self._og_search_title(webpage)),
+ description=(traverse_obj(webpage_metadata, ('coverInfo', 'description'))
+ or self._og_search_description(webpage)))
+
+
+class WeTvBaseIE(TencentBaseIE):
+ _VALID_URL_BASE = r'https?://(?:www\.)?wetv\.vip/(?:[^?#]+/)?play'
+
+ _API_URL = 'https://play.wetv.vip/getvinfo'
+ _APP_VERSION = '3.5.57'
+ _PLATFORM = '4830201'
+ _HOST = 'wetv.vip'
+ _REFERER = 'wetv.vip'
+
+ def _get_webpage_metadata(self, webpage, video_id):
+ return self._parse_json(
+ traverse_obj(self._search_nextjs_data(webpage, video_id), ('props', 'pageProps', 'data')),
+ video_id, fatal=False)
+
+ def _extract_episode(self, url):
+ video_id, series_id = self._match_valid_url(url).group('id', 'series_id')
+ webpage = self._download_webpage(url, video_id)
+ webpage_metadata = self._get_webpage_metadata(webpage, video_id)
+
+ formats, subtitles = self._extract_all_video_formats_and_subtitles(url, video_id, series_id)
+ return {
+ 'id': video_id,
+ 'title': self._get_clean_title(self._og_search_title(webpage)
+ or traverse_obj(webpage_metadata, ('coverInfo', 'title'))),
+ 'description': (traverse_obj(webpage_metadata, ('coverInfo', 'description'))
+ or self._og_search_description(webpage)),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'duration': int_or_none(traverse_obj(webpage_metadata, ('videoInfo', 'duration'))),
+ 'series': traverse_obj(webpage_metadata, ('coverInfo', 'title')),
+ 'episode_number': int_or_none(traverse_obj(webpage_metadata, ('videoInfo', 'episode'))),
+ }
+
+ def _extract_series(self, url, ie):
+ series_id = self._match_id(url)
+ webpage = self._download_webpage(url, series_id)
+ webpage_metadata = self._get_webpage_metadata(webpage, series_id)
+
+ episode_paths = ([f'/play/{series_id}/{episode["vid"]}' for episode in webpage_metadata.get('videoList')]
+ or re.findall(r'<a[^>]+class="play-video__link"[^>]+href="(?P<path>[^"]+)', webpage))
+
+ return self.playlist_from_matches(
+ episode_paths, series_id, ie=ie, getter=functools.partial(urljoin, url),
+ title=self._get_clean_title(traverse_obj(webpage_metadata, ('coverInfo', 'title'))
+ or self._og_search_title(webpage)),
+ description=(traverse_obj(webpage_metadata, ('coverInfo', 'description'))
+ or self._og_search_description(webpage)))
+
+
+class WeTvEpisodeIE(WeTvBaseIE):
+ IE_NAME = 'wetv:episode'
+ _VALID_URL = WeTvBaseIE._VALID_URL_BASE + r'/(?P<series_id>\w+)(?:-[^?#]+)?/(?P<id>\w+)(?:-[^?#]+)?'
+
+ _TESTS = [{
+ 'url': 'https://wetv.vip/en/play/air11ooo2rdsdi3-Cute-Programmer/v0040pr89t9-EP1-Cute-Programmer',
+ 'md5': '0c70fdfaa5011ab022eebc598e64bbbe',
+ 'info_dict': {
+ 'id': 'v0040pr89t9',
+ 'ext': 'mp4',
+ 'title': 'EP1: Cute Programmer',
+ 'description': 'md5:e87beab3bf9f392d6b9e541a63286343',
+ 'thumbnail': r're:^https?://[^?#]+air11ooo2rdsdi3',
+ 'series': 'Cute Programmer',
+ 'episode': 'Episode 1',
+ 'episode_number': 1,
+ 'duration': 2835,
+ },
+ }, {
+ 'url': 'https://wetv.vip/en/play/u37kgfnfzs73kiu/p0039b9nvik',
+ 'md5': '3b3c15ca4b9a158d8d28d5aa9d7c0a49',
+ 'info_dict': {
+ 'id': 'p0039b9nvik',
+ 'ext': 'mp4',
+ 'title': 'EP1: You Are My Glory',
+ 'description': 'md5:831363a4c3b4d7615e1f3854be3a123b',
+ 'thumbnail': r're:^https?://[^?#]+u37kgfnfzs73kiu',
+ 'series': 'You Are My Glory',
+ 'episode': 'Episode 1',
+ 'episode_number': 1,
+ 'duration': 2454,
+ },
+ }, {
+ 'url': 'https://wetv.vip/en/play/lcxgwod5hapghvw-WeTV-PICK-A-BOO/i0042y00lxp-Zhao-Lusi-Describes-The-First-Experiences-She-Had-In-Who-Rules-The-World-%7C-WeTV-PICK-A-BOO',
+ 'md5': '71133f5c2d5d6cad3427e1b010488280',
+ 'info_dict': {
+ 'id': 'i0042y00lxp',
+ 'ext': 'mp4',
+ 'title': 'md5:f7a0857dbe5fbbe2e7ad630b92b54e6a',
+ 'description': 'md5:76260cb9cdc0ef76826d7ca9d92fadfa',
+ 'thumbnail': r're:^https?://[^?#]+lcxgwod5hapghvw',
+ 'series': 'WeTV PICK-A-BOO',
+ 'episode': 'Episode 0',
+ 'episode_number': 0,
+ 'duration': 442,
+ },
+ }]
+
+ def _real_extract(self, url):
+ return self._extract_episode(url)
+
+
+class WeTvSeriesIE(WeTvBaseIE):
+ _VALID_URL = WeTvBaseIE._VALID_URL_BASE + r'/(?P<id>\w+)(?:-[^/?#]+)?/?(?:[?#]|$)'
+
+ _TESTS = [{
+ 'url': 'https://wetv.vip/play/air11ooo2rdsdi3-Cute-Programmer',
+ 'info_dict': {
+ 'id': 'air11ooo2rdsdi3',
+ 'title': 'Cute Programmer',
+ 'description': 'md5:e87beab3bf9f392d6b9e541a63286343',
+ },
+ 'playlist_count': 30,
+ }, {
+ 'url': 'https://wetv.vip/en/play/u37kgfnfzs73kiu-You-Are-My-Glory',
+ 'info_dict': {
+ 'id': 'u37kgfnfzs73kiu',
+ 'title': 'You Are My Glory',
+ 'description': 'md5:831363a4c3b4d7615e1f3854be3a123b',
+ },
+ 'playlist_count': 32,
+ }]
+
+ def _real_extract(self, url):
+ return self._extract_series(url, WeTvEpisodeIE)
+
+
+class IflixBaseIE(WeTvBaseIE):
+ _VALID_URL_BASE = r'https?://(?:www\.)?iflix\.com/(?:[^?#]+/)?play'
+
+ _API_URL = 'https://vplay.iflix.com/getvinfo'
+ _APP_VERSION = '3.5.57'
+ _PLATFORM = '330201'
+ _HOST = 'www.iflix.com'
+ _REFERER = 'www.iflix.com'
+
+
+class IflixEpisodeIE(IflixBaseIE):
+ IE_NAME = 'iflix:episode'
+ _VALID_URL = IflixBaseIE._VALID_URL_BASE + r'/(?P<series_id>\w+)(?:-[^?#]+)?/(?P<id>\w+)(?:-[^?#]+)?'
+
+ _TESTS = [{
+ 'url': 'https://www.iflix.com/en/play/daijrxu03yypu0s/a0040kvgaza',
+ 'md5': '9740f9338c3a2105290d16b68fb3262f',
+ 'info_dict': {
+ 'id': 'a0040kvgaza',
+ 'ext': 'mp4',
+ 'title': 'EP1: Put Your Head On My Shoulder 2021',
+ 'description': 'md5:c095a742d3b7da6dfedd0c8170727a42',
+ 'thumbnail': r're:^https?://[^?#]+daijrxu03yypu0s',
+ 'series': 'Put Your Head On My Shoulder 2021',
+ 'episode': 'Episode 1',
+ 'episode_number': 1,
+ 'duration': 2639,
+ },
+ }, {
+ 'url': 'https://www.iflix.com/en/play/fvvrcc3ra9lbtt1-Take-My-Brother-Away/i0029sd3gm1-EP1%EF%BC%9ATake-My-Brother-Away',
+ 'md5': '375c9b8478fdedca062274b2c2f53681',
+ 'info_dict': {
+ 'id': 'i0029sd3gm1',
+ 'ext': 'mp4',
+ 'title': 'EP1:Take My Brother Away',
+ 'description': 'md5:f0f7be1606af51cd94d5627de96b0c76',
+ 'thumbnail': r're:^https?://[^?#]+fvvrcc3ra9lbtt1',
+ 'series': 'Take My Brother Away',
+ 'episode': 'Episode 1',
+ 'episode_number': 1,
+ 'duration': 228,
+ },
+ }]
+
+ def _real_extract(self, url):
+ return self._extract_episode(url)
+
+
+class IflixSeriesIE(IflixBaseIE):
+ _VALID_URL = IflixBaseIE._VALID_URL_BASE + r'/(?P<id>\w+)(?:-[^/?#]+)?/?(?:[?#]|$)'
+
+ _TESTS = [{
+ 'url': 'https://www.iflix.com/en/play/g21a6qk4u1s9x22-You-Are-My-Hero',
+ 'info_dict': {
+ 'id': 'g21a6qk4u1s9x22',
+ 'title': 'You Are My Hero',
+ 'description': 'md5:9c4d844bc0799cd3d2b5aed758a2050a',
+ },
+ 'playlist_count': 40,
+ }, {
+ 'url': 'https://www.iflix.com/play/0s682hc45t0ohll',
+ 'info_dict': {
+ 'id': '0s682hc45t0ohll',
+ 'title': 'Miss Gu Who Is Silent',
+ 'description': 'md5:a9651d0236f25af06435e845fa2f8c78',
+ },
+ 'playlist_count': 20,
+ }]
+
+ def _real_extract(self, url):
+ return self._extract_series(url, IflixEpisodeIE)
diff --git a/hypervideo_dl/extractor/tennistv.py b/hypervideo_dl/extractor/tennistv.py
index 58fdece..bc64226 100644
--- a/hypervideo_dl/extractor/tennistv.py
+++ b/hypervideo_dl/extractor/tennistv.py
@@ -1,19 +1,17 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import json
+import urllib.parse
from .common import InfoExtractor
-
from ..utils import (
ExtractorError,
+ random_uuidv4,
unified_timestamp,
+ urlencode_postdata,
)
class TennisTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tennistv\.com/videos/(?P<id>[-a-z0-9]+)'
- _TEST = {
+ _TESTS = [{
'url': 'https://www.tennistv.com/videos/indian-wells-2018-verdasco-fritz',
'info_dict': {
'id': 'indian-wells-2018-verdasco-fritz',
@@ -28,86 +26,130 @@ class TennisTVIE(InfoExtractor):
'skip_download': True,
},
'skip': 'Requires email and password of a subscribed account',
- }
+ }, {
+ 'url': 'https://www.tennistv.com/videos/2650480/best-matches-of-2022-part-5',
+ 'info_dict': {
+ 'id': '2650480',
+ 'ext': 'mp4',
+ 'title': 'Best Matches of 2022 - Part 5',
+ 'description': 'md5:36dec3bfae7ed74bd79e48045b17264c',
+ 'thumbnail': 'https://open.http.mp.streamamg.com/p/3001482/sp/300148200/thumbnail/entry_id/0_myef18pd/version/100001/height/1920',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ 'skip': 'Requires email and password of a subscribed account',
+ }]
_NETRC_MACHINE = 'tennistv'
- _session_token = None
-
- def _perform_login(self, username, password):
-
- login_form = {
- 'Email': username,
- 'Password': password,
- }
- login_json = json.dumps(login_form).encode('utf-8')
- headers = {
- 'content-type': 'application/json',
- 'Referer': 'https://www.tennistv.com/login',
- 'Origin': 'https://www.tennistv.com',
- }
- login_result = self._download_json(
- 'https://www.tennistv.com/api/users/v1/login', None,
- note='Logging in',
- errnote='Login failed (wrong password?)',
- headers=headers,
- data=login_json)
+ access_token, refresh_token = None, None
+ _PARTNER_ID = 3001482
+ _FORMAT_URL = 'https://open.http.mp.streamamg.com/p/{partner}/sp/{partner}00/playManifest/entryId/{entry}/format/applehttp/protocol/https/a.m3u8?ks={session}'
+ _AUTH_BASE_URL = 'https://sso.tennistv.com/auth/realms/TennisTV/protocol/openid-connect'
+ _HEADERS = {
+ 'origin': 'https://www.tennistv.com',
+ 'referer': 'https://www.tennistv.com/',
+ 'content-Type': 'application/x-www-form-urlencoded'
+ }
- if login_result['error']['errorCode']:
- raise ExtractorError('Login failed, %s said: %r' % (self.IE_NAME, login_result['error']['errorMessage']))
+ def _perform_login(self, username, password):
+ login_page = self._download_webpage(
+ f'{self._AUTH_BASE_URL}/auth', None, 'Downloading login page',
+ query={
+ 'client_id': 'tennis-tv-web',
+ 'redirect_uri': 'https://tennistv.com',
+ 'response_mode': 'fragment',
+ 'response_type': 'code',
+ 'scope': 'openid'
+ })
+
+ post_url = self._html_search_regex(r'action=["\']([^"\']+?)["\']\s+method=["\']post["\']', login_page, 'login POST url')
+ temp_page = self._download_webpage(
+ post_url, None, 'Sending login data', 'Unable to send login data',
+ headers=self._HEADERS, data=urlencode_postdata({
+ 'username': username,
+ 'password': password,
+ 'submitAction': 'Log In'
+ }))
+ if 'Your username or password was incorrect' in temp_page:
+ raise ExtractorError('Your username or password was incorrect', expected=True)
+
+ handle = self._request_webpage(
+ f'{self._AUTH_BASE_URL}/auth', None, 'Logging in', headers=self._HEADERS,
+ query={
+ 'client_id': 'tennis-tv-web',
+ 'redirect_uri': 'https://www.tennistv.com/resources/v1.1.10/html/silent-check-sso.html',
+ 'state': random_uuidv4(),
+ 'response_mode': 'fragment',
+ 'response_type': 'code',
+ 'scope': 'openid',
+ 'nonce': random_uuidv4(),
+ 'prompt': 'none'
+ })
+
+ self.get_token(None, {
+ 'code': urllib.parse.parse_qs(handle.geturl())['code'][-1],
+ 'grant_type': 'authorization_code',
+ 'client_id': 'tennis-tv-web',
+ 'redirect_uri': 'https://www.tennistv.com/resources/v1.1.10/html/silent-check-sso.html'
+ })
+
+ def get_token(self, video_id, payload):
+ res = self._download_json(
+ f'{self._AUTH_BASE_URL}/token', video_id, 'Fetching tokens',
+ 'Unable to fetch tokens', headers=self._HEADERS, data=urlencode_postdata(payload))
+
+ self.access_token = res.get('access_token') or self.access_token
+ self.refresh_token = res.get('refresh_token') or self.refresh_token
- if login_result['entitlement'] != 'SUBSCRIBED':
- self.report_warning('%s may not be subscribed to %s.' % (username, self.IE_NAME))
+ def _real_initialize(self):
+ if self.access_token and self.refresh_token:
+ return
- self._session_token = login_result['sessionToken']
+ cookies = self._get_cookies('https://www.tennistv.com/')
+ if not cookies.get('access_token') or not cookies.get('refresh_token'):
+ self.raise_login_required()
+ self.access_token, self.refresh_token = cookies['access_token'].value, cookies['refresh_token'].value
- def _real_initialize(self):
- if not self._session_token:
- raise self.raise_login_required('Login info is needed for this website', method='password')
+ def _download_session_json(self, video_id, entryid,):
+ return self._download_json(
+ f'https://atppayments.streamamg.com/api/v1/session/ksession/?lang=en&apijwttoken={self.access_token}&entryId={entryid}',
+ video_id, 'Downloading ksession token', 'Failed to download ksession token', headers=self._HEADERS)
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- internal_id = self._search_regex(r'video=([\w-]+)', webpage, 'internal video id')
+ entryid = self._search_regex(r'data-entry-id=["\']([^"\']+)', webpage, 'entryID')
+ session_json = self._download_session_json(video_id, entryid)
- headers = {
- 'Origin': 'https://www.tennistv.com',
- 'authorization': 'ATP %s' % self._session_token,
- 'content-type': 'application/json',
- 'Referer': url,
- }
- check_data = {
- 'videoID': internal_id,
- 'VideoUrlType': 'HLS',
- }
- check_json = json.dumps(check_data).encode('utf-8')
- check_result = self._download_json(
- 'https://www.tennistv.com/api/users/v1/entitlementchecknondiva',
- video_id, note='Checking video authorization', headers=headers, data=check_json)
- formats = self._extract_m3u8_formats(check_result['contentUrl'], video_id, ext='mp4')
- self._sort_formats(formats)
-
- vdata = self._download_json(
- 'https://www.tennistv.com/api/en/v2/none/common/video/%s' % video_id,
- video_id, headers=headers)
-
- timestamp = unified_timestamp(vdata['timestamp'])
- thumbnail = vdata['video']['thumbnailUrl']
- description = vdata['displayText']['description']
- title = vdata['video']['title']
-
- series = vdata['tour']
- venue = vdata['displayText']['venue']
- round_str = vdata['seo']['round']
+ k_session = session_json.get('KSession')
+ if k_session is None:
+ self.get_token(video_id, {
+ 'grant_type': 'refresh_token',
+ 'refresh_token': self.refresh_token,
+ 'client_id': 'tennis-tv-web'
+ })
+ k_session = self._download_session_json(video_id, entryid).get('KSession')
+ if k_session is None:
+ raise ExtractorError('Failed to get KSession, possibly a premium video', expected=True)
+
+ if session_json.get('ErrorMessage'):
+ self.report_warning(session_json['ErrorMessage'])
+
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ self._FORMAT_URL.format(partner=self._PARTNER_ID, entry=entryid, session=k_session), video_id)
return {
'id': video_id,
- 'title': title,
- 'description': description,
+ 'title': self._generic_title('', webpage),
+ 'description': self._html_search_regex(
+ (r'<span itemprop="description" content=["\']([^"\']+)["\']>', *self._og_regexes('description')),
+ webpage, 'description', fatal=False),
+ 'thumbnail': f'https://open.http.mp.streamamg.com/p/{self._PARTNER_ID}/sp/{self._PARTNER_ID}00/thumbnail/entry_id/{entryid}/version/100001/height/1920',
+ 'timestamp': unified_timestamp(self._html_search_regex(
+ r'<span itemprop="uploadDate" content=["\']([^"\']+)["\']>', webpage, 'upload time', fatal=False)),
+ 'series': self._html_search_regex(r'data-series\s*?=\s*?"(.*?)"', webpage, 'series', fatal=False) or None,
+ 'season': self._html_search_regex(r'data-tournament-city\s*?=\s*?"(.*?)"', webpage, 'season', fatal=False) or None,
+ 'episode': self._html_search_regex(r'data-round\s*?=\s*?"(.*?)"', webpage, 'round', fatal=False) or None,
'formats': formats,
- 'thumbnail': thumbnail,
- 'timestamp': timestamp,
- 'series': series,
- 'season': venue,
- 'episode': round_str,
+ 'subtitles': subtitles,
}
diff --git a/hypervideo_dl/extractor/tenplay.py b/hypervideo_dl/extractor/tenplay.py
index 5c7b545..633032e 100644
--- a/hypervideo_dl/extractor/tenplay.py
+++ b/hypervideo_dl/extractor/tenplay.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from datetime import datetime
import base64
@@ -101,7 +98,6 @@ class TenPlayIE(InfoExtractor):
if '10play-not-in-oz' in m3u8_url:
self.raise_geo_restricted(countries=['AU'])
formats = self._extract_m3u8_formats(m3u8_url, content_id, 'mp4')
- self._sort_formats(formats)
return {
'formats': formats,
diff --git a/hypervideo_dl/extractor/testurl.py b/hypervideo_dl/extractor/testurl.py
index 8bc512a..dccca10 100644
--- a/hypervideo_dl/extractor/testurl.py
+++ b/hypervideo_dl/extractor/testurl.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -10,55 +8,38 @@ class TestURLIE(InfoExtractor):
""" Allows addressing of the test cases as test:yout.*be_1 """
IE_DESC = False # Do not list
- _VALID_URL = r'test(?:url)?:(?P<id>(?P<extractor>.+?)(?:_(?P<num>[0-9]+))?)$'
+ _VALID_URL = r'test(?:url)?:(?P<extractor>.*?)(?:_(?P<num>[0-9]+))?$'
def _real_extract(self, url):
- from ..extractor import gen_extractors
+ from . import gen_extractor_classes
- mobj = self._match_valid_url(url)
- video_id = mobj.group('id')
- extractor_id = mobj.group('extractor')
- all_extractors = gen_extractors()
+ extractor_id, num = self._match_valid_url(url).group('extractor', 'num')
+ if not extractor_id:
+ return {'id': ':test', 'title': '', 'url': url}
rex = re.compile(extractor_id, flags=re.IGNORECASE)
- matching_extractors = [
- e for e in all_extractors if rex.search(e.IE_NAME)]
+ matching_extractors = [e for e in gen_extractor_classes() if rex.search(e.IE_NAME)]
if len(matching_extractors) == 0:
- raise ExtractorError(
- 'No extractors matching %r found' % extractor_id,
- expected=True)
+ raise ExtractorError(f'No extractors matching {extractor_id!r} found', expected=True)
elif len(matching_extractors) > 1:
- # Is it obvious which one to pick?
- try:
+ try: # Check for exact match
extractor = next(
ie for ie in matching_extractors
if ie.IE_NAME.lower() == extractor_id.lower())
except StopIteration:
raise ExtractorError(
- ('Found multiple matching extractors: %s' %
- ' '.join(ie.IE_NAME for ie in matching_extractors)),
+ 'Found multiple matching extractors: %s' % ' '.join(ie.IE_NAME for ie in matching_extractors),
expected=True)
else:
extractor = matching_extractors[0]
- num_str = mobj.group('num')
- num = int(num_str) if num_str else 0
-
- testcases = []
- t = getattr(extractor, '_TEST', None)
- if t:
- testcases.append(t)
- testcases.extend(getattr(extractor, '_TESTS', []))
-
+ testcases = tuple(extractor.get_testcases(True))
try:
- tc = testcases[num]
+ tc = testcases[int(num or 0)]
except IndexError:
raise ExtractorError(
- ('Test case %d not found, got only %d tests' %
- (num, len(testcases))),
- expected=True)
-
- self.to_screen('Test URL: %s' % tc['url'])
+ f'Test case {num or 0} not found, got only {len(testcases)} tests', expected=True)
- return self.url_result(tc['url'], video_id=video_id)
+ self.to_screen(f'Test URL: {tc["url"]}')
+ return self.url_result(tc['url'])
diff --git a/hypervideo_dl/extractor/tf1.py b/hypervideo_dl/extractor/tf1.py
index 44785bc..4cf0322 100644
--- a/hypervideo_dl/extractor/tf1.py
+++ b/hypervideo_dl/extractor/tf1.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import json
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/tfo.py b/hypervideo_dl/extractor/tfo.py
index 0631cb7..a24789c 100644
--- a/hypervideo_dl/extractor/tfo.py
+++ b/hypervideo_dl/extractor/tfo.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import json
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/theholetv.py b/hypervideo_dl/extractor/theholetv.py
new file mode 100644
index 0000000..a13f83b
--- /dev/null
+++ b/hypervideo_dl/extractor/theholetv.py
@@ -0,0 +1,35 @@
+from .common import InfoExtractor
+from ..utils import extract_attributes, remove_end
+
+
+class TheHoleTvIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?the-hole\.tv/episodes/(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://the-hole.tv/episodes/gromkii-vopros-sergey-orlov',
+ 'md5': 'fea6682f47786f3ae5a6cbd635ec4bf9',
+ 'info_dict': {
+ 'id': 'gromkii-vopros-sergey-orlov',
+ 'ext': 'mp4',
+ 'title': 'Сергей Орлов — Громкий вопрос',
+ 'thumbnail': 'https://assets-cdn.the-hole.tv/images/t8gan4n6zn627e7wni11b2uemqts',
+ 'description': 'md5:45741a9202331f995d9fb76996759379'
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ player_attrs = extract_attributes(self._search_regex(
+ r'(<div[^>]*\bdata-controller="player"[^>]*>)', webpage, 'video player'))
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ player_attrs['data-player-source-value'], video_id, 'mp4')
+
+ return {
+ 'id': video_id,
+ 'title': remove_end(self._html_extract_title(webpage), ' — The Hole'),
+ 'description': self._og_search_description(webpage),
+ 'thumbnail': player_attrs.get('data-player-poster-value'),
+ 'formats': formats,
+ 'subtitles': subtitles
+ }
diff --git a/hypervideo_dl/extractor/theintercept.py b/hypervideo_dl/extractor/theintercept.py
index f23b587..a991a4d 100644
--- a/hypervideo_dl/extractor/theintercept.py
+++ b/hypervideo_dl/extractor/theintercept.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
diff --git a/hypervideo_dl/extractor/theplatform.py b/hypervideo_dl/extractor/theplatform.py
index c2729f1..e659b8e 100644
--- a/hypervideo_dl/extractor/theplatform.py
+++ b/hypervideo_dl/extractor/theplatform.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
import time
import hmac
@@ -126,6 +123,13 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
(?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/
(?:(?:(?:[^/]+/)+select/)?(?P<media>media/(?:guid/\d+/)?)?|(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/))?
|theplatform:)(?P<id>[^/\?&]+)'''
+ _EMBED_REGEX = [
+ r'''(?x)
+ <meta\s+
+ property=(["'])(?:og:video(?::(?:secure_)?url)?|twitter:player)\1\s+
+ content=(["'])(?P<url>https?://player\.theplatform\.com/p/.+?)\2''',
+ r'(?s)<(?:iframe|script)[^>]+src=(["\'])(?P<url>(?:https?:)?//player\.theplatform\.com/p/.+?)\1'
+ ]
_TESTS = [{
# from http://www.metacafe.com/watch/cb-e9I_cZgTgIPd/blackberrys_big_bold_z30/
@@ -195,22 +199,11 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
}]
@classmethod
- def _extract_urls(cls, webpage):
- m = re.search(
- r'''(?x)
- <meta\s+
- property=(["'])(?:og:video(?::(?:secure_)?url)?|twitter:player)\1\s+
- content=(["'])(?P<url>https?://player\.theplatform\.com/p/.+?)\2
- ''', webpage)
- if m:
- return [m.group('url')]
-
+ def _extract_embed_urls(cls, url, webpage):
# Are whitespaces ignored in URLs?
# https://github.com/ytdl-org/youtube-dl/issues/12044
- matches = re.findall(
- r'(?s)<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage)
- if matches:
- return [re.sub(r'\s', '', list(zip(*matches))[1][0])]
+ for embed_url in super()._extract_embed_urls(url, webpage):
+ yield re.sub(r'\s', '', embed_url)
@staticmethod
def _sign_url(url, sig_key, sig_secret, life=600, include_qs=False):
@@ -303,7 +296,6 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
smil_url = self._sign_url(smil_url, sig['key'], sig['secret'])
formats, subtitles = self._extract_theplatform_smil(smil_url, video_id)
- self._sort_formats(formats)
ret = self._extract_theplatform_metadata(path, video_id)
combined_subtitles = self._merge_subtitles(ret.get('subtitles', {}), subtitles)
@@ -373,8 +365,6 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
formats.extend(cur_formats)
subtitles = self._merge_subtitles(subtitles, cur_subtitles)
- self._sort_formats(formats)
-
thumbnails = [{
'url': thumbnail['plfile$url'],
'width': int_or_none(thumbnail.get('plfile$width')),
diff --git a/hypervideo_dl/extractor/thescene.py b/hypervideo_dl/extractor/thescene.py
deleted file mode 100644
index cd64235..0000000
--- a/hypervideo_dl/extractor/thescene.py
+++ /dev/null
@@ -1,44 +0,0 @@
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-
-from ..compat import compat_urlparse
-
-
-class TheSceneIE(InfoExtractor):
- _VALID_URL = r'https?://thescene\.com/watch/[^/]+/(?P<id>[^/#?]+)'
-
- _TEST = {
- 'url': 'https://thescene.com/watch/vogue/narciso-rodriguez-spring-2013-ready-to-wear',
- 'info_dict': {
- 'id': '520e8faac2b4c00e3c6e5f43',
- 'ext': 'mp4',
- 'title': 'Narciso Rodriguez: Spring 2013 Ready-to-Wear',
- 'display_id': 'narciso-rodriguez-spring-2013-ready-to-wear',
- 'duration': 127,
- 'series': 'Style.com Fashion Shows',
- 'season': 'Ready To Wear Spring 2013',
- 'tags': list,
- 'categories': list,
- 'upload_date': '20120913',
- 'timestamp': 1347512400,
- 'uploader': 'vogue',
- },
- }
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
-
- webpage = self._download_webpage(url, display_id)
-
- player_url = compat_urlparse.urljoin(
- url,
- self._html_search_regex(
- r'id=\'js-player-script\'[^>]+src=\'(.+?)\'', webpage, 'player url'))
-
- return {
- '_type': 'url_transparent',
- 'display_id': display_id,
- 'url': player_url,
- 'ie_key': 'CondeNast',
- }
diff --git a/hypervideo_dl/extractor/thestar.py b/hypervideo_dl/extractor/thestar.py
index c3f1188..293c34c 100644
--- a/hypervideo_dl/extractor/thestar.py
+++ b/hypervideo_dl/extractor/thestar.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/thesun.py b/hypervideo_dl/extractor/thesun.py
index 15d4a69..ba58482 100644
--- a/hypervideo_dl/extractor/thesun.py
+++ b/hypervideo_dl/extractor/thesun.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/theta.py b/hypervideo_dl/extractor/theta.py
index 8b6d70a..ecf0ea0 100644
--- a/hypervideo_dl/extractor/theta.py
+++ b/hypervideo_dl/extractor/theta.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import try_get
@@ -44,7 +41,6 @@ class ThetaStreamIE(InfoExtractor):
if data.get('type') != 'embed' and data.get('resolution') in ('master', 'source'))
formats = self._extract_m3u8_formats(m3u8_playlist, channel_id, 'mp4', m3u8_id='hls', live=True)
- self._sort_formats(formats)
channel = try_get(info, lambda x: x['user']['username']) # using this field instead of channel_id due to capitalization
@@ -81,7 +77,6 @@ class ThetaVideoIE(InfoExtractor):
m3u8_playlist = try_get(info, lambda x: x['video_urls'][0]['url'])
formats = self._extract_m3u8_formats(m3u8_playlist, video_id, 'mp4', m3u8_id='hls')
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/theweatherchannel.py b/hypervideo_dl/extractor/theweatherchannel.py
index 9e506c9..682e433 100644
--- a/hypervideo_dl/extractor/theweatherchannel.py
+++ b/hypervideo_dl/extractor/theweatherchannel.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import json
from .theplatform import ThePlatformIE
@@ -11,7 +8,7 @@ from ..utils import (
)
-class TheWeatherChannelIE(ThePlatformIE):
+class TheWeatherChannelIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
_VALID_URL = r'https?://(?:www\.)?weather\.com(?P<asset_name>(?:/(?P<locale>[a-z]{2}-[A-Z]{2}))?/(?:[^/]+/)*video/(?P<id>[^/?#]+))'
_TESTS = [{
'url': 'https://weather.com/series/great-outdoors/video/ice-climber-is-in-for-a-shock',
@@ -82,7 +79,6 @@ class TheWeatherChannelIE(ThePlatformIE):
'url': variant_url,
'format_id': variant_id,
})
- self._sort_formats(formats)
cc_url = video_data.get('cc_url')
diff --git a/hypervideo_dl/extractor/thisamericanlife.py b/hypervideo_dl/extractor/thisamericanlife.py
index 91e45f2..9a3d798 100644
--- a/hypervideo_dl/extractor/thisamericanlife.py
+++ b/hypervideo_dl/extractor/thisamericanlife.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/thisav.py b/hypervideo_dl/extractor/thisav.py
index 6bb00b3..b1cd57d 100644
--- a/hypervideo_dl/extractor/thisav.py
+++ b/hypervideo_dl/extractor/thisav.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..utils import remove_end
diff --git a/hypervideo_dl/extractor/thisoldhouse.py b/hypervideo_dl/extractor/thisoldhouse.py
index 8a1d173..55b6413 100644
--- a/hypervideo_dl/extractor/thisoldhouse.py
+++ b/hypervideo_dl/extractor/thisoldhouse.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import HEADRequest
diff --git a/hypervideo_dl/extractor/threeqsdn.py b/hypervideo_dl/extractor/threeqsdn.py
index 00a51dc..b104190 100644
--- a/hypervideo_dl/extractor/threeqsdn.py
+++ b/hypervideo_dl/extractor/threeqsdn.py
@@ -1,7 +1,3 @@
-from __future__ import unicode_literals
-
-import re
-
from .common import InfoExtractor
from ..compat import compat_HTTPError
from ..utils import (
@@ -18,6 +14,7 @@ class ThreeQSDNIE(InfoExtractor):
IE_NAME = '3qsdn'
IE_DESC = '3Q SDN'
_VALID_URL = r'https?://playout\.3qsdn\.com/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+ _EMBED_REGEX = [r'<iframe[^>]+\b(?:data-)?src=(["\'])(?P<url>%s.*?)\1' % _VALID_URL]
_TESTS = [{
# https://player.3qsdn.com/demo.html
'url': 'https://playout.3qsdn.com/7201c779-6b3c-11e7-a40e-002590c750be',
@@ -78,12 +75,13 @@ class ThreeQSDNIE(InfoExtractor):
'only_matching': True,
}]
- @staticmethod
- def _extract_url(webpage):
- mobj = re.search(
- r'<iframe[^>]+\b(?:data-)?src=(["\'])(?P<url>%s.*?)\1' % ThreeQSDNIE._VALID_URL, webpage)
- if mobj:
- return mobj.group('url')
+ def _extract_from_webpage(self, url, webpage):
+ for res in super()._extract_from_webpage(url, webpage):
+ yield {
+ **res,
+ '_type': 'url_transparent',
+ 'uploader': self._search_regex(r'^(?:https?://)?([^/]*)/.*', url, 'video uploader'),
+ }
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -130,10 +128,6 @@ class ThreeQSDNIE(InfoExtractor):
'vcodec': 'none' if height == 0 else None,
'width': int(height * aspect) if height and aspect else None,
})
- # It seems like this would be correctly handled by default
- # However, unless someone can confirm this, the old
- # behaviour is being kept as-is
- self._sort_formats(formats, ('res', 'source_preference'))
for subtitle in (config.get('subtitles') or []):
src = subtitle.get('src')
@@ -155,4 +149,8 @@ class ThreeQSDNIE(InfoExtractor):
'is_live': live,
'formats': formats,
'subtitles': subtitles,
+ # It seems like this would be correctly handled by default
+ # However, unless someone can confirm this, the old
+ # behaviour is being kept as-is
+ '_format_sort_fields': ('res', 'source_preference')
}
diff --git a/hypervideo_dl/extractor/threespeak.py b/hypervideo_dl/extractor/threespeak.py
index fe6a955..dbd5090 100644
--- a/hypervideo_dl/extractor/threespeak.py
+++ b/hypervideo_dl/extractor/threespeak.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -60,7 +57,6 @@ class ThreeSpeakIE(InfoExtractor):
'quality': 11,
'format_note': 'Original file',
})
- self._sort_formats(formats)
return {
'id': id,
'title': data_json.get('title') or data_json.get('root_title'),
diff --git a/hypervideo_dl/extractor/tiktok.py b/hypervideo_dl/extractor/tiktok.py
index c1d6c54..1bbf884 100644
--- a/hypervideo_dl/extractor/tiktok.py
+++ b/hypervideo_dl/extractor/tiktok.py
@@ -1,35 +1,32 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import itertools
+import json
import random
import string
import time
-import json
from .common import InfoExtractor
-from ..compat import (
- compat_urllib_parse_unquote,
- compat_urllib_parse_urlparse
-)
+from ..compat import compat_urllib_parse_unquote, compat_urllib_parse_urlparse
from ..utils import (
ExtractorError,
HEADRequest,
+ LazyList,
+ UnsupportedError,
+ get_element_by_id,
get_first,
int_or_none,
join_nonempty,
- LazyList,
+ qualities,
+ remove_start,
srt_subtitles_timecode,
str_or_none,
traverse_obj,
try_get,
url_or_none,
- qualities,
)
class TikTokBaseIE(InfoExtractor):
- _APP_VERSIONS = [('20.9.3', '293'), ('20.4.3', '243'), ('20.2.1', '221'), ('20.1.2', '212'), ('20.0.4', '204')]
+ _APP_VERSIONS = [('26.1.3', '260103'), ('26.1.2', '260102'), ('26.1.1', '260101'), ('25.6.2', '250602')]
_WORKING_APP_VERSION = None
_APP_NAME = 'trill'
_AID = 1180
@@ -38,6 +35,14 @@ class TikTokBaseIE(InfoExtractor):
_WEBPAGE_HOST = 'https://www.tiktok.com/'
QUALITIES = ('360p', '540p', '720p', '1080p')
+ @staticmethod
+ def _create_url(user_id, video_id):
+ return f'https://www.tiktok.com/@{user_id or "_"}/video/{video_id}'
+
+ def _get_sigi_state(self, webpage, display_id):
+ return self._parse_json(get_element_by_id(
+ 'SIGI_STATE|sigi-persisted-data', webpage, escape_value=False), display_id)
+
def _call_api_impl(self, ep, query, manifest_app_version, video_id, fatal=True,
note='Downloading API JSON', errnote='Unable to download API page'):
self._set_cookie(self._API_HOSTNAME, 'odin_tt', ''.join(random.choice('0123456789abcdef') for _ in range(160)))
@@ -47,7 +52,7 @@ class TikTokBaseIE(InfoExtractor):
return self._download_json(
'https://%s/aweme/v1/%s/' % (self._API_HOSTNAME, ep), video_id=video_id,
fatal=fatal, note=note, errnote=errnote, headers={
- 'User-Agent': f'com.ss.android.ugc.trill/{manifest_app_version} (Linux; U; Android 10; en_US; Pixel 4; Build/QQ3A.200805.001; Cronet/58.0.2991.0)',
+ 'User-Agent': f'com.ss.android.ugc.{self._APP_NAME}/{manifest_app_version} (Linux; U; Android 10; en_US; Pixel 4; Build/QQ3A.200805.001; Cronet/58.0.2991.0)',
'Accept': 'application/json',
}, query=query)
@@ -122,11 +127,21 @@ class TikTokBaseIE(InfoExtractor):
continue
raise e
+ def _extract_aweme_app(self, aweme_id):
+ feed_list = self._call_api(
+ 'feed', {'aweme_id': aweme_id}, aweme_id, note='Downloading video feed',
+ errnote='Unable to download video feed').get('aweme_list') or []
+ aweme_detail = next((aweme for aweme in feed_list if str(aweme.get('aweme_id')) == aweme_id), None)
+ if not aweme_detail:
+ raise ExtractorError('Unable to find video in feed', video_id=aweme_id)
+ return self._parse_aweme_video_app(aweme_detail)
+
def _get_subtitles(self, aweme_detail, aweme_id):
# TODO: Extract text positioning info
subtitles = {}
+ # aweme/detail endpoint subs
captions_info = traverse_obj(
- aweme_detail, ('interaction_stickers', ..., 'auto_video_caption_info', 'auto_captions', ...), expected_type=dict, default=[])
+ aweme_detail, ('interaction_stickers', ..., 'auto_video_caption_info', 'auto_captions', ...), expected_type=dict)
for caption in captions_info:
caption_url = traverse_obj(caption, ('url', 'url_list', ...), expected_type=url_or_none, get_all=False)
if not caption_url:
@@ -141,6 +156,24 @@ class TikTokBaseIE(InfoExtractor):
f'{i + 1}\n{srt_subtitles_timecode(line["start_time"] / 1000)} --> {srt_subtitles_timecode(line["end_time"] / 1000)}\n{line["text"]}'
for i, line in enumerate(caption_json['utterances']) if line.get('text'))
})
+ # feed endpoint subs
+ if not subtitles:
+ for caption in traverse_obj(aweme_detail, ('video', 'cla_info', 'caption_infos', ...), expected_type=dict):
+ if not caption.get('url'):
+ continue
+ subtitles.setdefault(caption.get('lang') or 'en', []).append({
+ 'ext': remove_start(caption.get('caption_format'), 'web'),
+ 'url': caption['url'],
+ })
+ # webpage subs
+ if not subtitles:
+ for caption in traverse_obj(aweme_detail, ('video', 'subtitleInfos', ...), expected_type=dict):
+ if not caption.get('Url'):
+ continue
+ subtitles.setdefault(caption.get('LanguageCodeName') or 'en', []).append({
+ 'ext': remove_start(caption.get('Format'), 'web'),
+ 'url': caption['Url'],
+ })
return subtitles
def _parse_aweme_video_app(self, aweme_detail):
@@ -229,7 +262,6 @@ class TikTokBaseIE(InfoExtractor):
if auth_cookie:
for f in formats:
self._set_cookie(compat_urllib_parse_urlparse(f['url']).hostname, 'sid_tt', auth_cookie.value)
- self._sort_formats(formats, ('quality', 'codec', 'size', 'br'))
thumbnails = []
for cover_id in ('cover', 'ai_dynamic_cover', 'animated_cover', 'ai_dynamic_cover_bak',
@@ -263,6 +295,9 @@ class TikTokBaseIE(InfoExtractor):
return {
'id': aweme_id,
+ 'extractor_key': TikTokIE.ie_key(),
+ 'extractor': TikTokIE.IE_NAME,
+ 'webpage_url': self._create_url(author_info.get('uid'), aweme_id),
'title': aweme_detail.get('desc'),
'description': aweme_detail.get('desc'),
'view_count': int_or_none(stats_info.get('play_count')),
@@ -275,7 +310,7 @@ class TikTokBaseIE(InfoExtractor):
'uploader_url': user_url,
'track': music_track,
'album': str_or_none(music_info.get('album')) or None,
- 'artist': music_author,
+ 'artist': music_author or None,
'timestamp': int_or_none(aweme_detail.get('create_time')),
'formats': formats,
'subtitles': self.extract_subtitles(aweme_detail, aweme_id),
@@ -284,7 +319,8 @@ class TikTokBaseIE(InfoExtractor):
'availability': self._availability(
is_private='Private' in labels,
needs_subscription='Friends only' in labels,
- is_unlisted='Followers only' in labels)
+ is_unlisted='Followers only' in labels),
+ '_format_sort_fields': ('quality', 'codec', 'size', 'br'),
}
def _parse_aweme_video_web(self, aweme_detail, webpage_url):
@@ -326,7 +362,6 @@ class TikTokBaseIE(InfoExtractor):
'height': height,
})
self._remove_duplicate_formats(formats)
- self._sort_formats(formats)
thumbnails = []
for thumbnail_name in ('thumbnail', 'cover', 'dynamicCover', 'originCover'):
@@ -348,7 +383,7 @@ class TikTokBaseIE(InfoExtractor):
'timestamp': int_or_none(aweme_detail.get('createTime')),
'creator': str_or_none(author_info.get('nickname')),
'uploader': str_or_none(author_info.get('uniqueId') or aweme_detail.get('author')),
- 'uploader_id': str_or_none(author_info.get('id') or aweme_detail.get('authorId')),
+ 'uploader_id': str_or_none(traverse_obj(author_info, 'id', 'uid', 'authorId')),
'uploader_url': user_url,
'track': str_or_none(music_info.get('title')),
'album': str_or_none(music_info.get('album')) or None,
@@ -363,7 +398,8 @@ class TikTokBaseIE(InfoExtractor):
class TikTokIE(TikTokBaseIE):
- _VALID_URL = r'https?://www\.tiktok\.com/@[\w\.-]+/video/(?P<id>\d+)'
+ _VALID_URL = r'https?://www\.tiktok\.com/(?:embed|@(?P<user_id>[\w\.-]+)/video)/(?P<id>\d+)'
+ _EMBED_REGEX = [rf'<(?:script|iframe)[^>]+\bsrc=(["\'])(?P<url>{_VALID_URL})']
_TESTS = [{
'url': 'https://www.tiktok.com/@leenabhushan/video/6748451240264420610',
@@ -461,14 +497,14 @@ class TikTokIE(TikTokBaseIE):
'repost_count': int,
'comment_count': int,
},
- 'expected_warnings': ['Video not available']
+ 'expected_warnings': ['trying with webpage', 'Unable to find video in feed']
}, {
# Video without title and description
'url': 'https://www.tiktok.com/@pokemonlife22/video/7059698374567611694',
'info_dict': {
'id': '7059698374567611694',
'ext': 'mp4',
- 'title': 'tiktok video #7059698374567611694',
+ 'title': 'TikTok video #7059698374567611694',
'description': '',
'uploader': 'pokemonlife22',
'creator': 'Pokemon',
@@ -485,49 +521,50 @@ class TikTokIE(TikTokBaseIE):
'repost_count': int,
'comment_count': int,
},
- 'expected_warnings': ['Video not available', 'Creating a generic title']
+ }, {
+ # hydration JSON is sent in a <script> element
+ 'url': 'https://www.tiktok.com/@denidil6/video/7065799023130643713',
+ 'info_dict': {
+ 'id': '7065799023130643713',
+ 'ext': 'mp4',
+ 'title': '#denidil#денидил',
+ 'description': '#denidil#денидил',
+ 'uploader': 'denidil6',
+ 'uploader_id': '7046664115636405250',
+ 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAsvMSzFdQ4ikl3uR2TEJwMBbB2yZh2Zxwhx-WCo3rbDpAharE3GQCrFuJArI3C8QJ',
+ 'artist': 'Holocron Music',
+ 'album': 'Wolf Sounds (1 Hour) Enjoy the Company of the Animal That Is the Majestic King of the Night',
+ 'track': 'Wolf Sounds (1 Hour) Enjoy the Company of the Animal That Is the Majestic King of the Night',
+ 'timestamp': 1645134536,
+ 'duration': 26,
+ 'upload_date': '20220217',
+ 'view_count': int,
+ 'like_count': int,
+ 'repost_count': int,
+ 'comment_count': int,
+ },
+ 'skip': 'This video is unavailable',
}, {
# Auto-captions available
'url': 'https://www.tiktok.com/@hankgreen1/video/7047596209028074758',
'only_matching': True
}]
- def _extract_aweme_app(self, aweme_id):
- try:
- aweme_detail = self._call_api('aweme/detail', {'aweme_id': aweme_id}, aweme_id,
- note='Downloading video details', errnote='Unable to download video details').get('aweme_detail')
- if not aweme_detail:
- raise ExtractorError('Video not available', video_id=aweme_id)
- except ExtractorError as e:
- self.report_warning(f'{e}; Retrying with feed workaround')
- feed_list = self._call_api('feed', {'aweme_id': aweme_id}, aweme_id,
- note='Downloading video feed', errnote='Unable to download video feed').get('aweme_list') or []
- aweme_detail = next((aweme for aweme in feed_list if str(aweme.get('aweme_id')) == aweme_id), None)
- if not aweme_detail:
- raise ExtractorError('Unable to find video in feed', video_id=aweme_id)
- return self._parse_aweme_video_app(aweme_detail)
-
def _real_extract(self, url):
- video_id = self._match_id(url)
-
+ video_id, user_id = self._match_valid_url(url).group('id', 'user_id')
try:
return self._extract_aweme_app(video_id)
except ExtractorError as e:
- self.report_warning(f'{e}; Retrying with webpage')
+ self.report_warning(f'{e}; trying with webpage')
- # If we only call once, we get a 403 when downlaoding the video.
- self._download_webpage(url, video_id)
- webpage = self._download_webpage(url, video_id, note='Downloading video webpage')
+ url = self._create_url(user_id, video_id)
+ webpage = self._download_webpage(url, video_id, headers={'User-Agent': 'User-Agent:Mozilla/5.0'})
next_data = self._search_nextjs_data(webpage, video_id, default='{}')
-
if next_data:
status = traverse_obj(next_data, ('props', 'pageProps', 'statusCode'), expected_type=int) or 0
video_data = traverse_obj(next_data, ('props', 'pageProps', 'itemInfo', 'itemStruct'), expected_type=dict)
else:
- sigi_json = self._search_regex(
- r'>\s*window\[[\'"]SIGI_STATE[\'"]\]\s*=\s*(?P<sigi_state>{.+});',
- webpage, 'sigi data', group='sigi_state')
- sigi_data = self._parse_json(sigi_json, video_id)
+ sigi_data = self._get_sigi_state(webpage, video_id)
status = traverse_obj(sigi_data, ('VideoPage', 'statusCode'), expected_type=int) or 0
video_data = traverse_obj(sigi_data, ('ItemModule', video_id), expected_type=dict)
@@ -541,6 +578,7 @@ class TikTokIE(TikTokBaseIE):
class TikTokUserIE(TikTokBaseIE):
IE_NAME = 'tiktok:user'
_VALID_URL = r'https?://(?:www\.)?tiktok\.com/@(?P<id>[\w\.-]+)/?(?:$|[#?])'
+ _WORKING = False
_TESTS = [{
'url': 'https://tiktok.com/@corgibobaa?lang=en',
'playlist_mincount': 45,
@@ -599,19 +637,17 @@ class TikTokUserIE(TikTokBaseIE):
'device_id': ''.join(random.choice(string.digits) for _ in range(19)), # Some endpoints don't like randomized device_id, so it isn't directly set in _call_api.
}
- max_retries = self.get_param('extractor_retries', 3)
for page in itertools.count(1):
- for retries in itertools.count():
+ for retry in self.RetryManager():
try:
- post_list = self._call_api('aweme/post', query, username,
- note='Downloading user video list page %d%s' % (page, f' (attempt {retries})' if retries != 0 else ''),
- errnote='Unable to download user video list')
+ post_list = self._call_api(
+ 'aweme/post', query, username, note=f'Downloading user video list page {page}',
+ errnote='Unable to download user video list')
except ExtractorError as e:
- if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0 and retries != max_retries:
- self.report_warning('%s. Retrying...' % str(e.cause or e.msg))
+ if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0:
+ retry.error = e
continue
raise
- break
yield from post_list.get('aweme_list', [])
if not post_list.get('has_more'):
break
@@ -639,7 +675,7 @@ class TikTokUserIE(TikTokBaseIE):
return self.playlist_result(self._entries_api(user_id, videos), user_id, user_name, thumbnail=thumbnail)
-class TikTokBaseListIE(TikTokBaseIE):
+class TikTokBaseListIE(TikTokBaseIE): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor
def _entries(self, list_id, display_id):
query = {
self._QUERY_NAME: list_id,
@@ -649,19 +685,17 @@ class TikTokBaseListIE(TikTokBaseIE):
'device_id': ''.join(random.choice(string.digits) for i in range(19))
}
- max_retries = self.get_param('extractor_retries', 3)
for page in itertools.count(1):
- for retries in itertools.count():
+ for retry in self.RetryManager():
try:
- post_list = self._call_api(self._API_ENDPOINT, query, display_id,
- note='Downloading video list page %d%s' % (page, f' (attempt {retries})' if retries != 0 else ''),
- errnote='Unable to download video list')
+ post_list = self._call_api(
+ self._API_ENDPOINT, query, display_id, note=f'Downloading video list page {page}',
+ errnote='Unable to download video list')
except ExtractorError as e:
- if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0 and retries != max_retries:
- self.report_warning('%s. Retrying...' % str(e.cause or e.msg))
+ if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0:
+ retry.error = e
continue
raise
- break
for video in post_list.get('aweme_list', []):
yield {
**self._parse_aweme_video_app(video),
@@ -681,6 +715,7 @@ class TikTokBaseListIE(TikTokBaseIE):
class TikTokSoundIE(TikTokBaseListIE):
IE_NAME = 'tiktok:sound'
_VALID_URL = r'https?://(?:www\.)?tiktok\.com/music/[\w\.-]+-(?P<id>[\d]+)[/?#&]?'
+ _WORKING = False
_QUERY_NAME = 'music_id'
_API_ENDPOINT = 'music/aweme'
_TESTS = [{
@@ -704,6 +739,7 @@ class TikTokSoundIE(TikTokBaseListIE):
class TikTokEffectIE(TikTokBaseListIE):
IE_NAME = 'tiktok:effect'
_VALID_URL = r'https?://(?:www\.)?tiktok\.com/sticker/[\w\.-]+-(?P<id>[\d]+)[/?#&]?'
+ _WORKING = False
_QUERY_NAME = 'sticker_id'
_API_ENDPOINT = 'sticker/aweme'
_TESTS = [{
@@ -723,6 +759,7 @@ class TikTokEffectIE(TikTokBaseListIE):
class TikTokTagIE(TikTokBaseListIE):
IE_NAME = 'tiktok:tag'
_VALID_URL = r'https?://(?:www\.)?tiktok\.com/tag/(?P<id>[^/?#&]+)'
+ _WORKING = False
_QUERY_NAME = 'ch_id'
_API_ENDPOINT = 'challenge/aweme'
_TESTS = [{
@@ -747,56 +784,68 @@ class TikTokTagIE(TikTokBaseListIE):
return self.playlist_result(self._entries(tag_id, display_id), tag_id, display_id)
-class DouyinIE(TikTokIE):
+class DouyinIE(TikTokBaseIE):
_VALID_URL = r'https?://(?:www\.)?douyin\.com/video/(?P<id>[0-9]+)'
_TESTS = [{
'url': 'https://www.douyin.com/video/6961737553342991651',
- 'md5': '10523312c8b8100f353620ac9dc8f067',
+ 'md5': 'a97db7e3e67eb57bf40735c022ffa228',
'info_dict': {
'id': '6961737553342991651',
'ext': 'mp4',
'title': '#杨超越 小小水手带你去远航❤️',
- 'uploader': '杨超越',
- 'upload_date': '20210513',
- 'timestamp': 1620905839,
+ 'description': '#杨超越 小小水手带你去远航❤️',
'uploader_id': '110403406559',
+ 'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
+ 'creator': '杨超越',
+ 'duration': 19782,
+ 'timestamp': 1620905839,
+ 'upload_date': '20210513',
+ 'track': '@杨超越创作的原声',
'view_count': int,
'like_count': int,
'repost_count': int,
'comment_count': int,
- }
+ },
}, {
'url': 'https://www.douyin.com/video/6982497745948921092',
- 'md5': 'd78408c984b9b5102904cf6b6bc2d712',
+ 'md5': '34a87ebff3833357733da3fe17e37c0e',
'info_dict': {
'id': '6982497745948921092',
'ext': 'mp4',
'title': '这个夏日和小羊@杨超越 一起遇见白色幻想',
- 'uploader': '杨超越工作室',
- 'upload_date': '20210708',
- 'timestamp': 1625739481,
+ 'description': '这个夏日和小羊@杨超越 一起遇见白色幻想',
'uploader_id': '408654318141572',
+ 'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAZJpnglcjW2f_CMVcnqA_6oVBXKWMpH0F8LIHuUu8-lA',
+ 'creator': '杨超越工作室',
+ 'duration': 42608,
+ 'timestamp': 1625739481,
+ 'upload_date': '20210708',
+ 'track': '@杨超越工作室创作的原声',
'view_count': int,
'like_count': int,
'repost_count': int,
'comment_count': int,
- }
+ },
}, {
'url': 'https://www.douyin.com/video/6953975910773099811',
- 'md5': '72e882e24f75064c218b76c8b713c185',
+ 'md5': 'dde3302460f19db59c47060ff013b902',
'info_dict': {
'id': '6953975910773099811',
'ext': 'mp4',
'title': '#一起看海 出现在你的夏日里',
- 'uploader': '杨超越',
- 'upload_date': '20210422',
- 'timestamp': 1619098692,
+ 'description': '#一起看海 出现在你的夏日里',
'uploader_id': '110403406559',
+ 'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
+ 'creator': '杨超越',
+ 'duration': 17228,
+ 'timestamp': 1619098692,
+ 'upload_date': '20210422',
+ 'track': '@杨超越创作的原声',
'view_count': int,
'like_count': int,
'repost_count': int,
'comment_count': int,
- }
+ },
}, {
'url': 'https://www.douyin.com/video/6950251282489675042',
'md5': 'b4db86aec367ef810ddd38b1737d2fed',
@@ -812,25 +861,30 @@ class DouyinIE(TikTokIE):
'like_count': int,
'repost_count': int,
'comment_count': int,
- }
+ },
+ 'skip': 'No longer available',
}, {
'url': 'https://www.douyin.com/video/6963263655114722595',
- 'md5': '1abe1c477d05ee62efb40bf2329957cf',
+ 'md5': 'cf9f11f0ec45d131445ec2f06766e122',
'info_dict': {
'id': '6963263655114722595',
'ext': 'mp4',
'title': '#哪个爱豆的105度最甜 换个角度看看我哈哈',
- 'uploader': '杨超越',
- 'upload_date': '20210517',
- 'timestamp': 1621261163,
+ 'description': '#哪个爱豆的105度最甜 换个角度看看我哈哈',
'uploader_id': '110403406559',
+ 'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
+ 'creator': '杨超越',
+ 'duration': 15115,
+ 'timestamp': 1621261163,
+ 'upload_date': '20210517',
+ 'track': '@杨超越创作的原声',
'view_count': int,
'like_count': int,
'repost_count': int,
'comment_count': int,
- }
+ },
}]
- _APP_VERSIONS = [('9.6.0', '960')]
+ _APP_VERSIONS = [('23.3.0', '230300')]
_APP_NAME = 'aweme'
_AID = 1128
_API_HOSTNAME = 'aweme.snssdk.com'
@@ -843,7 +897,8 @@ class DouyinIE(TikTokIE):
try:
return self._extract_aweme_app(video_id)
except ExtractorError as e:
- self.report_warning(f'{e}; Retrying with webpage')
+ e.expected = True
+ self.to_screen(f'{e}; trying with webpage')
webpage = self._download_webpage(url, video_id)
render_data_json = self._search_regex(
@@ -851,7 +906,10 @@ class DouyinIE(TikTokIE):
webpage, 'render data', default=None)
if not render_data_json:
# TODO: Run verification challenge code to generate signature cookies
- raise ExtractorError('Fresh cookies (not necessarily logged in) are needed')
+ cookies = self._get_cookies(self._WEBPAGE_HOST)
+ expected = not cookies.get('s_v_web_id') or not cookies.get('ttwid')
+ raise ExtractorError(
+ 'Fresh cookies (not necessarily logged in) are needed', expected=expected)
render_data = self._parse_json(
render_data_json, video_id, transform_source=compat_urllib_parse_unquote)
@@ -859,36 +917,43 @@ class DouyinIE(TikTokIE):
class TikTokVMIE(InfoExtractor):
- _VALID_URL = r'https?://(?:vm|vt)\.tiktok\.com/(?P<id>\w+)'
+ _VALID_URL = r'https?://(?:(?:vm|vt)\.tiktok\.com|(?:www\.)tiktok\.com/t)/(?P<id>\w+)'
IE_NAME = 'vm.tiktok'
_TESTS = [{
- 'url': 'https://vm.tiktok.com/ZSe4FqkKd',
+ 'url': 'https://www.tiktok.com/t/ZTRC5xgJp',
'info_dict': {
- 'id': '7023491746608712966',
+ 'id': '7170520270497680683',
'ext': 'mp4',
- 'title': 'md5:5607564db90271abbbf8294cca77eddd',
- 'description': 'md5:5607564db90271abbbf8294cca77eddd',
- 'duration': 11,
- 'upload_date': '20211026',
- 'uploader_id': '7007385080558846981',
- 'creator': 'Memes',
- 'artist': 'Memes',
- 'track': 'original sound',
- 'uploader': 'susmandem',
- 'timestamp': 1635284105,
- 'thumbnail': r're:https://.+\.webp.*',
- 'like_count': int,
+ 'title': 'md5:c64f6152330c2efe98093ccc8597871c',
+ 'uploader_id': '6687535061741700102',
+ 'upload_date': '20221127',
'view_count': int,
+ 'like_count': int,
'comment_count': int,
+ 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAObqu3WCTXxmw2xwZ3iLEHnEecEIw7ks6rxWqOqOhaPja9BI7gqUQnjw8_5FSoDXX',
+ 'album': 'Wave of Mutilation: Best of Pixies',
+ 'thumbnail': r're:https://.+\.webp.*',
+ 'duration': 5,
+ 'timestamp': 1669516858,
'repost_count': int,
- 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAXcNoOEOxVyBzuII_E--T0MeCrLP0ay1Sm6x_n3dluiWEoWZD0VlQOytwad4W0i0n',
- }
+ 'artist': 'Pixies',
+ 'track': 'Where Is My Mind?',
+ 'description': 'md5:c64f6152330c2efe98093ccc8597871c',
+ 'uploader': 'sigmachaddeus',
+ 'creator': 'SigmaChad',
+ },
+ }, {
+ 'url': 'https://vm.tiktok.com/ZSe4FqkKd',
+ 'only_matching': True,
}, {
'url': 'https://vt.tiktok.com/ZSe4FqkKd',
'only_matching': True,
}]
def _real_extract(self, url):
- return self.url_result(self._request_webpage(
- HEADRequest(url), self._match_id(url), headers={'User-Agent': 'facebookexternalhit/1.1'}).geturl(), TikTokIE)
+ new_url = self._request_webpage(
+ HEADRequest(url), self._match_id(url), headers={'User-Agent': 'facebookexternalhit/1.1'}).geturl()
+ if self.suitable(new_url): # Prevent infinite loop in case redirect fails
+ raise UnsupportedError(new_url)
+ return self.url_result(new_url)
diff --git a/hypervideo_dl/extractor/tinypic.py b/hypervideo_dl/extractor/tinypic.py
index 39056e5..216208c 100644
--- a/hypervideo_dl/extractor/tinypic.py
+++ b/hypervideo_dl/extractor/tinypic.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/tmz.py b/hypervideo_dl/extractor/tmz.py
index aee2273..ffb30c6 100644
--- a/hypervideo_dl/extractor/tmz.py
+++ b/hypervideo_dl/extractor/tmz.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -21,8 +18,10 @@ class TMZIE(InfoExtractor):
"title": "No Charges Against Hillary Clinton? Harvey Says It Ain't Over Yet",
"description": "Harvey talks about Director Comey’s decision not to prosecute Hillary Clinton.",
"timestamp": 1467831837,
- "uploader": "{'@type': 'Person', 'name': 'TMZ Staff'}",
+ "uploader": "TMZ Staff",
"upload_date": "20160706",
+ "thumbnail": "https://imagez.tmz.com/image/5e/4by3/2016/07/06/5eea7dc01baa5c2e83eb06930c170e46_xl.jpg",
+ "duration": 772.0,
},
},
{
@@ -33,8 +32,10 @@ class TMZIE(InfoExtractor):
"title": "Angry Bagel Shop Guy Says He Doesn't Trust Women",
"description": "The enraged man who went viral for ranting about women on dating sites before getting ragdolled in a bagel shop is defending his misogyny ... he says it's women's fault in the first place.",
"timestamp": 1562889485,
- "uploader": "{'@type': 'Person', 'name': 'TMZ Staff'}",
+ "uploader": "TMZ Staff",
"upload_date": "20190711",
+ "thumbnail": "https://imagez.tmz.com/image/a8/4by3/2019/07/12/a85480d27b2f50a7bfea2322151d67a5_xl.jpg",
+ "duration": 123.0,
},
},
{
@@ -46,8 +47,10 @@ class TMZIE(InfoExtractor):
"title": "Bobby Brown Tells Crowd ... Bobbi Kristina is Awake",
"description": 'Bobby Brown stunned his audience during a concert Saturday night, when he told the crowd, "Bobbi is awake. She\'s watching me."',
"timestamp": 1429467813,
- "uploader": "{'@type': 'Person', 'name': 'TMZ Staff'}",
+ "uploader": "TMZ Staff",
"upload_date": "20150419",
+ "duration": 29.0,
+ "thumbnail": "https://imagez.tmz.com/image/15/4by3/2015/04/20/1539c7ae136359fc979236fa6a9449dd_xl.jpg",
},
},
{
@@ -59,8 +62,10 @@ class TMZIE(InfoExtractor):
"description": "Patti LaBelle made it known loud and clear last night ... NO "
"ONE gets on her stage and strips down.",
"timestamp": 1442683746,
- "uploader": "{'@type': 'Person', 'name': 'TMZ Staff'}",
+ "uploader": "TMZ Staff",
"upload_date": "20150919",
+ "duration": 104.0,
+ "thumbnail": "https://imagez.tmz.com/image/5e/4by3/2015/09/20/5e57d7575062528082994e18ac3f0f48_xl.jpg",
},
},
{
@@ -71,8 +76,10 @@ class TMZIE(InfoExtractor):
"title": "NBA's Adam Silver -- Blake Griffin's a Great Guy ... He'll Learn from This",
"description": "Two pretty parts of this video with NBA Commish Adam Silver.",
"timestamp": 1454010989,
- "uploader": "{'@type': 'Person', 'name': 'TMZ Staff'}",
+ "uploader": "TMZ Staff",
"upload_date": "20160128",
+ "duration": 59.0,
+ "thumbnail": "https://imagez.tmz.com/image/38/4by3/2016/01/29/3856e83e0beb57059ec412122b842fb1_xl.jpg",
},
},
{
@@ -83,8 +90,10 @@ class TMZIE(InfoExtractor):
"title": "Trump Star Vandal -- I'm Not Afraid of Donald or the Cops!",
"description": "James Otis is the the guy who took a pickaxe to Donald Trump's star on the Walk of Fame, and he tells TMZ .. he's ready and willing to go to jail for the crime.",
"timestamp": 1477500095,
- "uploader": "{'@type': 'Person', 'name': 'TMZ Staff'}",
+ "uploader": "TMZ Staff",
"upload_date": "20161026",
+ "thumbnail": "https://imagez.tmz.com/image/0d/4by3/2016/10/27/0d904814d4a75dcf9cc3b8cfd1edc1a3_xl.jpg",
+ "duration": 128.0,
},
},
{
@@ -99,8 +108,10 @@ class TMZIE(InfoExtractor):
"swinging their billy clubs at both Anti-Fascist and Pro-Trump "
"demonstrators.",
"timestamp": 1604182772,
- "uploader": "{'@type': 'Person', 'name': 'TMZ Staff'}",
+ "uploader": "TMZ Staff",
"upload_date": "20201031",
+ "duration": 96.0,
+ "thumbnail": "https://imagez.tmz.com/image/f3/4by3/2020/10/31/f37bd5a8aef84497866f425130c58be3_xl.jpg",
},
},
{
@@ -111,8 +122,23 @@ class TMZIE(InfoExtractor):
"title": "SICK LAMBO GERVONTA DAVIS IN HIS NEW RIDE RIGHT AFTER KO AFTER LEO EsNews Boxing",
"uploader": "ESNEWS",
"description": "md5:49675bc58883ccf80474b8aa701e1064",
- "upload_date": "20201101",
+ "upload_date": "20201102",
"uploader_id": "ESNEWS",
+ "uploader_url": "http://www.youtube.com/user/ESNEWS",
+ "like_count": int,
+ "channel_id": "UCI-Oq7oFGakzSzHFlTtsUsQ",
+ "channel": "ESNEWS",
+ "view_count": int,
+ "duration": 225,
+ "live_status": "not_live",
+ "thumbnail": "https://i.ytimg.com/vi_webp/Dddb6IGe-ws/maxresdefault.webp",
+ "channel_url": "https://www.youtube.com/channel/UCI-Oq7oFGakzSzHFlTtsUsQ",
+ "channel_follower_count": int,
+ "playable_in_embed": True,
+ "categories": ["Sports"],
+ "age_limit": 0,
+ "tags": "count:10",
+ "availability": "public",
},
},
{
@@ -120,12 +146,20 @@ class TMZIE(InfoExtractor):
"info_dict": {
"id": "1329450007125225473",
"ext": "mp4",
- "title": "TheMacLife - BREAKING: Conor McGregor (@thenotoriousmma) has signed his bout agreement for his rematch with Dustin Poirier for January 23.",
- "uploader": "TheMacLife",
+ "title": "The Mac Life - BREAKING: Conor McGregor (@thenotoriousmma) has signed his bout agreement for his rematch with Dustin Poirier for January 23.",
+ "uploader": "The Mac Life",
"description": "md5:56e6009bbc3d12498e10d08a8e1f1c69",
"upload_date": "20201119",
- "uploader_id": "Maclifeofficial",
+ "uploader_id": "TheMacLife",
"timestamp": 1605800556,
+ "thumbnail": "https://pbs.twimg.com/media/EnMmfT8XYAExgxJ.jpg?name=small",
+ "like_count": int,
+ "duration": 11.812,
+ "uploader_url": "https://twitter.com/TheMacLife",
+ "age_limit": 0,
+ "repost_count": int,
+ "tags": [],
+ "comment_count": int,
},
},
]
diff --git a/hypervideo_dl/extractor/tnaflix.py b/hypervideo_dl/extractor/tnaflix.py
index d7617f7..4482c84 100644
--- a/hypervideo_dl/extractor/tnaflix.py
+++ b/hypervideo_dl/extractor/tnaflix.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -11,6 +9,7 @@ from ..utils import (
parse_duration,
str_to_int,
unescapeHTML,
+ url_basename,
xpath_text,
)
@@ -22,8 +21,6 @@ class TNAFlixNetworkBaseIE(InfoExtractor):
r'<input[^>]+name="config\d?" value="(?P<url>[^"]+)"',
r'config\s*=\s*(["\'])(?P<url>(?:https?:)?//(?:(?!\1).)+)\1',
]
- _HOST = 'tna'
- _VKEY_SUFFIX = ''
_TITLE_REGEX = r'<input[^>]+name="title" value="([^"]+)"'
_DESCRIPTION_REGEX = r'<input[^>]+name="description" value="([^"]+)"'
_UPLOADER_REGEX = r'<input[^>]+name="username" value="([^"]+)"'
@@ -74,7 +71,7 @@ class TNAFlixNetworkBaseIE(InfoExtractor):
def _real_extract(self, url):
mobj = self._match_valid_url(url)
- video_id = mobj.group('id')
+ video_id, host = mobj.group('id', 'host')
for display_id_key in ('display_id', 'display_id_2'):
if display_id_key in mobj.groupdict():
display_id = mobj.group(display_id_key)
@@ -85,98 +82,109 @@ class TNAFlixNetworkBaseIE(InfoExtractor):
webpage = self._download_webpage(url, display_id)
+ # check for MovieFap-style config
cfg_url = self._proto_relative_url(self._html_search_regex(
self._CONFIG_REGEX, webpage, 'flashvars.config', default=None,
group='url'), 'http:')
+ query = {}
+ # check for TNAFlix-style config
if not cfg_url:
inputs = self._hidden_inputs(webpage)
- cfg_url = ('https://cdn-fck.%sflix.com/%sflix/%s%s.fid?key=%s&VID=%s&premium=1&vip=1&alpha'
- % (self._HOST, self._HOST, inputs['vkey'], self._VKEY_SUFFIX, inputs['nkey'], video_id))
-
- cfg_xml = self._download_xml(
- cfg_url, display_id, 'Downloading metadata',
- transform_source=fix_xml_ampersands, headers={'Referer': url})
-
- formats = []
-
- def extract_video_url(vl):
- # Any URL modification now results in HTTP Error 403: Forbidden
- return unescapeHTML(vl.text)
-
- video_link = cfg_xml.find('./videoLink')
- if video_link is not None:
- formats.append({
- 'url': extract_video_url(video_link),
- 'ext': xpath_text(cfg_xml, './videoConfig/type', 'type', default='flv'),
- })
-
- for item in cfg_xml.findall('./quality/item'):
- video_link = item.find('./videoLink')
- if video_link is None:
- continue
- res = item.find('res')
- format_id = None if res is None else res.text
- height = int_or_none(self._search_regex(
- r'^(\d+)[pP]', format_id, 'height', default=None))
- formats.append({
- 'url': self._proto_relative_url(extract_video_url(video_link), 'http:'),
- 'format_id': format_id,
- 'height': height,
+ if inputs.get('vkey') and inputs.get('nkey'):
+ cfg_url = f'https://www.{host}.com/cdn/cdn.php'
+ query.update({
+ 'file': inputs['vkey'],
+ 'key': inputs['nkey'],
+ 'VID': video_id,
+ 'premium': '1',
+ 'vip': '1',
+ 'alpha': '',
+ })
+
+ formats, json_ld = [], {}
+
+ # TNAFlix and MovieFap extraction
+ if cfg_url:
+ cfg_xml = self._download_xml(
+ cfg_url, display_id, 'Downloading metadata',
+ transform_source=fix_xml_ampersands, headers={'Referer': url}, query=query)
+
+ def extract_video_url(vl):
+ # Any URL modification now results in HTTP Error 403: Forbidden
+ return unescapeHTML(vl.text)
+
+ video_link = cfg_xml.find('./videoLink')
+ if video_link is not None:
+ formats.append({
+ 'url': extract_video_url(video_link),
+ 'ext': xpath_text(cfg_xml, './videoConfig/type', 'type', default='flv'),
+ })
+
+ for item in cfg_xml.findall('./quality/item'):
+ video_link = item.find('./videoLink')
+ if video_link is None:
+ continue
+ res = item.find('res')
+ format_id = None if res is None else res.text
+ height = int_or_none(self._search_regex(
+ r'^(\d+)[pP]', format_id, 'height', default=None))
+ formats.append({
+ 'url': self._proto_relative_url(extract_video_url(video_link), 'http:'),
+ 'format_id': format_id,
+ 'height': height,
+ })
+
+ thumbnails = self._extract_thumbnails(cfg_xml) or []
+ thumbnails.append({
+ 'url': self._proto_relative_url(xpath_text(cfg_xml, './startThumb', 'thumbnail'), 'http:')
})
- self._sort_formats(formats)
-
- thumbnail = self._proto_relative_url(
- xpath_text(cfg_xml, './startThumb', 'thumbnail'), 'http:')
- thumbnails = self._extract_thumbnails(cfg_xml)
-
- title = None
- if self._TITLE_REGEX:
- title = self._html_search_regex(
- self._TITLE_REGEX, webpage, 'title', default=None)
- if not title:
- title = self._og_search_title(webpage)
-
- age_limit = self._rta_search(webpage) or 18
-
- duration = parse_duration(self._html_search_meta(
- 'duration', webpage, 'duration', default=None))
+ # check for EMPFlix-style JSON and extract
+ else:
+ player = self._download_json(
+ f'http://www.{host}.com/ajax/video-player/{video_id}', video_id,
+ headers={'Referer': url}).get('html', '')
+ for mobj in re.finditer(r'<source src="(?P<src>[^"]+)"', player):
+ video_url = mobj.group('src')
+ height = self._search_regex(r'-(\d+)p\.', url_basename(video_url), 'height', default=None)
+ formats.append({
+ 'url': self._proto_relative_url(video_url, 'http:'),
+ 'ext': url_basename(video_url).split('.')[-1],
+ 'height': int_or_none(height),
+ 'format_id': f'{height}p' if height else url_basename(video_url).split('.')[0],
+ })
+ thumbnail = self._proto_relative_url(self._search_regex(
+ r'data-poster="([^"]+)"', player, 'thumbnail', default=None), 'http:')
+ thumbnails = [{'url': thumbnail}] if thumbnail else None
+ json_ld = self._search_json_ld(webpage, display_id, default={})
def extract_field(pattern, name):
return self._html_search_regex(pattern, webpage, name, default=None) if pattern else None
- description = extract_field(self._DESCRIPTION_REGEX, 'description')
- uploader = extract_field(self._UPLOADER_REGEX, 'uploader')
- view_count = str_to_int(extract_field(self._VIEW_COUNT_REGEX, 'view count'))
- comment_count = str_to_int(extract_field(self._COMMENT_COUNT_REGEX, 'comment count'))
- average_rating = float_or_none(extract_field(self._AVERAGE_RATING_REGEX, 'average rating'))
-
- categories_str = extract_field(self._CATEGORIES_REGEX, 'categories')
- categories = [c.strip() for c in categories_str.split(',')] if categories_str is not None else []
-
return {
'id': video_id,
'display_id': display_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
+ 'title': (extract_field(self._TITLE_REGEX, 'title')
+ or self._og_search_title(webpage, default=None)
+ or json_ld.get('title')),
+ 'description': extract_field(self._DESCRIPTION_REGEX, 'description') or json_ld.get('description'),
'thumbnails': thumbnails,
- 'duration': duration,
- 'age_limit': age_limit,
- 'uploader': uploader,
- 'view_count': view_count,
- 'comment_count': comment_count,
- 'average_rating': average_rating,
- 'categories': categories,
+ 'duration': parse_duration(
+ self._html_search_meta('duration', webpage, 'duration', default=None)) or json_ld.get('duration'),
+ 'age_limit': self._rta_search(webpage) or 18,
+ 'uploader': extract_field(self._UPLOADER_REGEX, 'uploader') or json_ld.get('uploader'),
+ 'view_count': str_to_int(extract_field(self._VIEW_COUNT_REGEX, 'view count')),
+ 'comment_count': str_to_int(extract_field(self._COMMENT_COUNT_REGEX, 'comment count')),
+ 'average_rating': float_or_none(extract_field(self._AVERAGE_RATING_REGEX, 'average rating')),
+ 'categories': list(map(str.strip, (extract_field(self._CATEGORIES_REGEX, 'categories') or '').split(','))),
'formats': formats,
}
class TNAFlixNetworkEmbedIE(TNAFlixNetworkBaseIE):
- _VALID_URL = r'https?://player\.(?:tna|emp)flix\.com/video/(?P<id>\d+)'
-
- _TITLE_REGEX = r'<title>([^<]+)</title>'
+ _VALID_URL = r'https?://player\.(?P<host>tnaflix|empflix)\.com/video/(?P<id>\d+)'
+ _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.(?:tna|emp)flix\.com/video/\d+)\1']
_TESTS = [{
'url': 'https://player.tnaflix.com/video/6538',
@@ -184,23 +192,26 @@ class TNAFlixNetworkEmbedIE(TNAFlixNetworkBaseIE):
'id': '6538',
'display_id': '6538',
'ext': 'mp4',
- 'title': 'Educational xxx video',
+ 'title': 'Educational xxx video (G Spot)',
+ 'description': 'md5:b4fab8f88a8621c8fabd361a173fe5b8',
'thumbnail': r're:https?://.*\.jpg$',
'age_limit': 18,
+ 'duration': 164,
+ 'uploader': 'bobwhite39',
+ 'categories': list,
},
'params': {
'skip_download': True,
},
}, {
- 'url': 'https://player.empflix.com/video/33051',
+ 'url': 'http://player.empflix.com/video/33051',
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- return [url for _, url in re.findall(
- r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.(?:tna|emp)flix\.com/video/\d+)\1',
- webpage)]
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ video_id, host = mobj.group('id', 'host')
+ return self.url_result(f'http://www.{host}.com/category/{video_id}/video{video_id}')
class TNAEMPFlixBaseIE(TNAFlixNetworkBaseIE):
@@ -210,7 +221,7 @@ class TNAEMPFlixBaseIE(TNAFlixNetworkBaseIE):
class TNAFlixIE(TNAEMPFlixBaseIE):
- _VALID_URL = r'https?://(?:www\.)?tnaflix\.com/[^/]+/(?P<display_id>[^/]+)/video(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:www\.)?(?P<host>tnaflix)\.com/[^/]+/(?P<display_id>[^/]+)/video(?P<id>\d+)'
_TITLE_REGEX = r'<title>(.+?) - (?:TNAFlix Porn Videos|TNAFlix\.com)</title>'
@@ -226,17 +237,17 @@ class TNAFlixIE(TNAEMPFlixBaseIE):
'thumbnail': r're:https?://.*\.jpg$',
'duration': 91,
'age_limit': 18,
- 'categories': ['Porn Stars'],
+ 'categories': list,
}
}, {
# non-anonymous uploader, categories
'url': 'https://www.tnaflix.com/teen-porn/Educational-xxx-video/video6538',
- 'md5': '0f5d4d490dbfd117b8607054248a07c0',
+ 'md5': 'add5a9fa7f4da53d3e9d0845ac58f20c',
'info_dict': {
'id': '6538',
'display_id': 'Educational-xxx-video',
'ext': 'mp4',
- 'title': 'Educational xxx video',
+ 'title': 'Educational xxx video (G Spot)',
'description': 'md5:b4fab8f88a8621c8fabd361a173fe5b8',
'thumbnail': r're:https?://.*\.jpg$',
'duration': 164,
@@ -251,14 +262,11 @@ class TNAFlixIE(TNAEMPFlixBaseIE):
class EMPFlixIE(TNAEMPFlixBaseIE):
- _VALID_URL = r'https?://(?:www\.)?empflix\.com/(?:videos/(?P<display_id>.+?)-|[^/]+/(?P<display_id_2>[^/]+)/video)(?P<id>[0-9]+)'
-
- _HOST = 'emp'
- _VKEY_SUFFIX = '-1'
+ _VALID_URL = r'https?://(?:www\.)?(?P<host>empflix)\.com/(?:videos/(?P<display_id>.+?)-|[^/]+/(?P<display_id_2>[^/]+)/video)(?P<id>[0-9]+)'
_TESTS = [{
- 'url': 'http://www.empflix.com/videos/Amateur-Finger-Fuck-33051.html',
- 'md5': 'bc30d48b91a7179448a0bda465114676',
+ 'url': 'http://www.empflix.com/amateur-porn/Amateur-Finger-Fuck/video33051',
+ 'md5': 'd761c7b26601bd14476cd9512f2654fc',
'info_dict': {
'id': '33051',
'display_id': 'Amateur-Finger-Fuck',
@@ -268,20 +276,20 @@ class EMPFlixIE(TNAEMPFlixBaseIE):
'thumbnail': r're:https?://.*\.jpg$',
'duration': 83,
'age_limit': 18,
- 'uploader': 'cwbike',
- 'categories': ['Amateur', 'Anal', 'Fisting', 'Home made', 'Solo'],
+ 'uploader': None,
+ 'categories': list,
}
}, {
'url': 'http://www.empflix.com/videos/[AROMA][ARMD-718]-Aoi-Yoshino-Sawa-25826.html',
'only_matching': True,
}, {
- 'url': 'https://www.empflix.com/amateur-porn/Amateur-Finger-Fuck/video33051',
+ 'url': 'http://www.empflix.com/videos/Amateur-Finger-Fuck-33051.html',
'only_matching': True,
}]
class MovieFapIE(TNAFlixNetworkBaseIE):
- _VALID_URL = r'https?://(?:www\.)?moviefap\.com/videos/(?P<id>[0-9a-f]+)/(?P<display_id>[^/]+)\.html'
+ _VALID_URL = r'https?://(?:www\.)?(?P<host>moviefap)\.com/videos/(?P<id>[0-9a-f]+)/(?P<display_id>[^/]+)\.html'
_VIEW_COUNT_REGEX = r'<br>Views\s*<strong>([\d,.]+)</strong>'
_COMMENT_COUNT_REGEX = r'<span[^>]+id="comCount"[^>]*>([\d,.]+)</span>'
@@ -323,5 +331,6 @@ class MovieFapIE(TNAFlixNetworkBaseIE):
'comment_count': int,
'average_rating': float,
'categories': ['Amateur', 'Teen'],
- }
+ },
+ 'skip': 'This video does not exist',
}]
diff --git a/hypervideo_dl/extractor/toggle.py b/hypervideo_dl/extractor/toggle.py
index eb87349..7073733 100644
--- a/hypervideo_dl/extractor/toggle.py
+++ b/hypervideo_dl/extractor/toggle.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import json
import re
@@ -157,7 +154,6 @@ class ToggleIE(InfoExtractor):
and meta.get('Key') == 'Encryption' and meta.get('Value') == '1'):
self.report_drm(video_id)
# Most likely because geo-blocked if no formats and no DRM
- self._sort_formats(formats)
thumbnails = []
for picture in info.get('Pictures', []):
diff --git a/hypervideo_dl/extractor/toggo.py b/hypervideo_dl/extractor/toggo.py
index da5f0c4..1ddec49 100644
--- a/hypervideo_dl/extractor/toggo.py
+++ b/hypervideo_dl/extractor/toggo.py
@@ -4,7 +4,7 @@ from ..utils import int_or_none, parse_qs
class ToggoIE(InfoExtractor):
IE_NAME = 'toggo'
- _VALID_URL = r'https?://(?:www\.)?toggo\.de/[\w-]+/folge/(?P<id>[\w-]+)'
+ _VALID_URL = r'https?://(?:www\.)?toggo\.de/(?:toggolino/)?[^/?#]+/(?:folge|video)/(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'https://www.toggo.de/weihnachtsmann--co-kg/folge/ein-geschenk-fuer-zwei',
'info_dict': {
@@ -27,6 +27,15 @@ class ToggoIE(InfoExtractor):
'upload_date': '20200217',
},
'params': {'skip_download': True},
+ }, {
+ 'url': 'https://www.toggo.de/grizzy--die-lemminge/folge/ab-durch-die-wand-vogelfrei-rock\'n\'lemming',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.toggo.de/toggolino/paw-patrol/folge/der-wetter-zeppelin-der-chili-kochwettbewerb',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.toggo.de/toggolino/paw-patrol/video/paw-patrol-rettung-im-anflug',
+ 'only_matching': True,
}]
def _real_extract(self, url):
diff --git a/hypervideo_dl/extractor/tokentube.py b/hypervideo_dl/extractor/tokentube.py
index 579623f..d022e27 100644
--- a/hypervideo_dl/extractor/tokentube.py
+++ b/hypervideo_dl/extractor/tokentube.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import functools
import re
@@ -98,8 +95,6 @@ class TokentubeIE(InfoExtractor):
description = remove_end(description, 'Category')
- self._sort_formats(formats)
-
return {
'id': video_id,
'formats': formats,
diff --git a/hypervideo_dl/extractor/tonline.py b/hypervideo_dl/extractor/tonline.py
index 9b6a40d..7202826 100644
--- a/hypervideo_dl/extractor/tonline.py
+++ b/hypervideo_dl/extractor/tonline.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import int_or_none, join_nonempty
diff --git a/hypervideo_dl/extractor/toongoggles.py b/hypervideo_dl/extractor/toongoggles.py
index df13d64..1b8fc3a 100644
--- a/hypervideo_dl/extractor/toongoggles.py
+++ b/hypervideo_dl/extractor/toongoggles.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..utils import (
int_or_none,
diff --git a/hypervideo_dl/extractor/toutv.py b/hypervideo_dl/extractor/toutv.py
index 1d5da10..f60c199 100644
--- a/hypervideo_dl/extractor/toutv.py
+++ b/hypervideo_dl/extractor/toutv.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import json
from .radiocanada import RadioCanadaIE
@@ -12,7 +9,7 @@ from ..utils import (
)
-class TouTvIE(RadioCanadaIE):
+class TouTvIE(RadioCanadaIE): # XXX: Do not subclass from concrete IE
_NETRC_MACHINE = 'toutv'
IE_NAME = 'tou.tv'
_VALID_URL = r'https?://ici\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+(?:/S[0-9]+[EC][0-9]+)?)'
diff --git a/hypervideo_dl/extractor/toypics.py b/hypervideo_dl/extractor/toypics.py
index f705a06..bc73361 100644
--- a/hypervideo_dl/extractor/toypics.py
+++ b/hypervideo_dl/extractor/toypics.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
import re
diff --git a/hypervideo_dl/extractor/traileraddict.py b/hypervideo_dl/extractor/traileraddict.py
index 514f479..5c4a138 100644
--- a/hypervideo_dl/extractor/traileraddict.py
+++ b/hypervideo_dl/extractor/traileraddict.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/triller.py b/hypervideo_dl/extractor/triller.py
new file mode 100644
index 0000000..acd9e68
--- /dev/null
+++ b/hypervideo_dl/extractor/triller.py
@@ -0,0 +1,294 @@
+import itertools
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ str_or_none,
+ traverse_obj,
+ unified_strdate,
+ unified_timestamp,
+ url_basename,
+)
+
+
+class TrillerBaseIE(InfoExtractor):
+ _NETRC_MACHINE = 'triller'
+ _API_BASE_URL = 'https://social.triller.co/v1.5'
+ _API_HEADERS = {'Origin': 'https://triller.co'}
+
+ def _perform_login(self, username, password):
+ if self._API_HEADERS.get('Authorization'):
+ return
+
+ user_check = self._download_json(
+ f'{self._API_BASE_URL}/api/user/is-valid-username', None, note='Checking username',
+ fatal=False, expected_status=400, headers={
+ 'Content-Type': 'application/json',
+ 'Origin': 'https://triller.co',
+ }, data=json.dumps({'username': username}, separators=(',', ':')).encode('utf-8'))
+ if user_check.get('status'): # endpoint returns "status":false if username exists
+ raise ExtractorError('Unable to login: Invalid username', expected=True)
+
+ credentials = {
+ 'username': username,
+ 'password': password,
+ }
+ login = self._download_json(
+ f'{self._API_BASE_URL}/user/auth', None, note='Logging in',
+ fatal=False, expected_status=400, headers={
+ 'Content-Type': 'application/json',
+ 'Origin': 'https://triller.co',
+ }, data=json.dumps(credentials, separators=(',', ':')).encode('utf-8'))
+ if not login.get('auth_token'):
+ if login.get('error') == 1008:
+ raise ExtractorError('Unable to login: Incorrect password', expected=True)
+ raise ExtractorError('Unable to login')
+
+ self._API_HEADERS['Authorization'] = f'Bearer {login["auth_token"]}'
+
+ def _get_comments(self, video_id, limit=15):
+ comment_info = self._download_json(
+ f'{self._API_BASE_URL}/api/videos/{video_id}/comments_v2',
+ video_id, fatal=False, note='Downloading comments API JSON',
+ headers=self._API_HEADERS, query={'limit': limit}) or {}
+ if not comment_info.get('comments'):
+ return
+ for comment_dict in comment_info['comments']:
+ yield {
+ 'author': traverse_obj(comment_dict, ('author', 'username')),
+ 'author_id': traverse_obj(comment_dict, ('author', 'user_id')),
+ 'id': comment_dict.get('id'),
+ 'text': comment_dict.get('body'),
+ 'timestamp': unified_timestamp(comment_dict.get('timestamp')),
+ }
+
+ def _check_user_info(self, user_info):
+ if not user_info:
+ self.report_warning('Unable to extract user info')
+ elif user_info.get('private') and not user_info.get('followed_by_me'):
+ raise ExtractorError('This video is private', expected=True)
+ elif traverse_obj(user_info, 'blocked_by_user', 'blocking_user'):
+ raise ExtractorError('The author of the video is blocked', expected=True)
+ return user_info
+
+ def _parse_video_info(self, video_info, username, user_info=None):
+ video_uuid = video_info.get('video_uuid')
+ video_id = video_info.get('id')
+
+ formats = []
+ video_url = traverse_obj(video_info, 'video_url', 'stream_url')
+ if video_url:
+ formats.append({
+ 'url': video_url,
+ 'ext': 'mp4',
+ 'vcodec': 'h264',
+ 'width': video_info.get('width'),
+ 'height': video_info.get('height'),
+ 'format_id': url_basename(video_url).split('.')[0],
+ 'filesize': video_info.get('filesize'),
+ })
+ video_set = video_info.get('video_set') or []
+ for video in video_set:
+ resolution = video.get('resolution') or ''
+ formats.append({
+ 'url': video['url'],
+ 'ext': 'mp4',
+ 'vcodec': video.get('codec'),
+ 'vbr': int_or_none(video.get('bitrate'), 1000),
+ 'width': int_or_none(resolution.split('x')[0]),
+ 'height': int_or_none(resolution.split('x')[1]),
+ 'format_id': url_basename(video['url']).split('.')[0],
+ })
+ audio_url = video_info.get('audio_url')
+ if audio_url:
+ formats.append({
+ 'url': audio_url,
+ 'ext': 'm4a',
+ 'format_id': url_basename(audio_url).split('.')[0],
+ })
+
+ manifest_url = video_info.get('transcoded_url')
+ if manifest_url:
+ formats.extend(self._extract_m3u8_formats(
+ manifest_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+
+ comment_count = int_or_none(video_info.get('comment_count'))
+
+ user_info = user_info or traverse_obj(video_info, 'user', default={})
+
+ return {
+ 'id': str_or_none(video_id) or video_uuid,
+ 'title': video_info.get('description') or f'Video by {username}',
+ 'thumbnail': video_info.get('thumbnail_url'),
+ 'description': video_info.get('description'),
+ 'uploader': str_or_none(username),
+ 'uploader_id': str_or_none(user_info.get('user_id')),
+ 'creator': str_or_none(user_info.get('name')),
+ 'timestamp': unified_timestamp(video_info.get('timestamp')),
+ 'upload_date': unified_strdate(video_info.get('timestamp')),
+ 'duration': int_or_none(video_info.get('duration')),
+ 'view_count': int_or_none(video_info.get('play_count')),
+ 'like_count': int_or_none(video_info.get('likes_count')),
+ 'artist': str_or_none(video_info.get('song_artist')),
+ 'track': str_or_none(video_info.get('song_title')),
+ 'webpage_url': f'https://triller.co/@{username}/video/{video_uuid}',
+ 'uploader_url': f'https://triller.co/@{username}',
+ 'extractor_key': TrillerIE.ie_key(),
+ 'extractor': TrillerIE.IE_NAME,
+ 'formats': formats,
+ 'comment_count': comment_count,
+ '__post_extractor': self.extract_comments(video_id, comment_count),
+ }
+
+
+class TrillerIE(TrillerBaseIE):
+ _VALID_URL = r'''(?x)
+ https?://(?:www\.)?triller\.co/
+ @(?P<username>[\w\._]+)/video/
+ (?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})
+ '''
+ _TESTS = [{
+ 'url': 'https://triller.co/@theestallion/video/2358fcd7-3df2-4c77-84c8-1d091610a6cf',
+ 'md5': '228662d783923b60d78395fedddc0a20',
+ 'info_dict': {
+ 'id': '71595734',
+ 'ext': 'mp4',
+ 'title': 'md5:9a2bf9435c5c4292678996a464669416',
+ 'thumbnail': r're:^https://uploads\.cdn\.triller\.co/.+\.jpg$',
+ 'description': 'md5:9a2bf9435c5c4292678996a464669416',
+ 'uploader': 'theestallion',
+ 'uploader_id': '18992236',
+ 'creator': 'Megan Thee Stallion',
+ 'timestamp': 1660598222,
+ 'upload_date': '20220815',
+ 'duration': 47,
+ 'height': 3840,
+ 'width': 2160,
+ 'view_count': int,
+ 'like_count': int,
+ 'artist': 'Megan Thee Stallion',
+ 'track': 'Her',
+ 'webpage_url': 'https://triller.co/@theestallion/video/2358fcd7-3df2-4c77-84c8-1d091610a6cf',
+ 'uploader_url': 'https://triller.co/@theestallion',
+ 'comment_count': int,
+ }
+ }, {
+ 'url': 'https://triller.co/@charlidamelio/video/46c6fcfa-aa9e-4503-a50c-68444f44cddc',
+ 'md5': '874055f462af5b0699b9dbb527a505a0',
+ 'info_dict': {
+ 'id': '71621339',
+ 'ext': 'mp4',
+ 'title': 'md5:4c91ea82760fe0fffb71b8c3aa7295fc',
+ 'thumbnail': r're:^https://uploads\.cdn\.triller\.co/.+\.jpg$',
+ 'description': 'md5:4c91ea82760fe0fffb71b8c3aa7295fc',
+ 'uploader': 'charlidamelio',
+ 'uploader_id': '1875551',
+ 'creator': 'charli damelio',
+ 'timestamp': 1660773354,
+ 'upload_date': '20220817',
+ 'duration': 16,
+ 'height': 1920,
+ 'width': 1080,
+ 'view_count': int,
+ 'like_count': int,
+ 'artist': 'Dixie',
+ 'track': 'Someone to Blame',
+ 'webpage_url': 'https://triller.co/@charlidamelio/video/46c6fcfa-aa9e-4503-a50c-68444f44cddc',
+ 'uploader_url': 'https://triller.co/@charlidamelio',
+ 'comment_count': int,
+ }
+ }]
+
+ def _real_extract(self, url):
+ username, video_uuid = self._match_valid_url(url).group('username', 'id')
+
+ video_info = traverse_obj(self._download_json(
+ f'{self._API_BASE_URL}/api/videos/{video_uuid}',
+ video_uuid, note='Downloading video info API JSON',
+ errnote='Unable to download video info API JSON',
+ headers=self._API_HEADERS), ('videos', 0))
+ if not video_info:
+ raise ExtractorError('No video info found in API response')
+
+ user_info = self._check_user_info(video_info.get('user') or {})
+ return self._parse_video_info(video_info, username, user_info)
+
+
+class TrillerUserIE(TrillerBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?triller\.co/@(?P<id>[\w\._]+)/?(?:$|[#?])'
+ _TESTS = [{
+ # first videos request only returns 2 videos
+ 'url': 'https://triller.co/@theestallion',
+ 'playlist_mincount': 9,
+ 'info_dict': {
+ 'id': '18992236',
+ 'title': 'theestallion',
+ 'thumbnail': r're:^https://uploads\.cdn\.triller\.co/.+\.jpg$',
+ }
+ }, {
+ 'url': 'https://triller.co/@charlidamelio',
+ 'playlist_mincount': 25,
+ 'info_dict': {
+ 'id': '1875551',
+ 'title': 'charlidamelio',
+ 'thumbnail': r're:^https://uploads\.cdn\.triller\.co/.+\.jpg$',
+ }
+ }]
+
+ def _real_initialize(self):
+ if not self._API_HEADERS.get('Authorization'):
+ guest = self._download_json(
+ f'{self._API_BASE_URL}/user/create_guest',
+ None, note='Creating guest session', data=b'', headers=self._API_HEADERS, query={
+ 'platform': 'Web',
+ 'app_version': '',
+ })
+ if not guest.get('auth_token'):
+ raise ExtractorError('Unable to fetch required auth token for user extraction')
+
+ self._API_HEADERS['Authorization'] = f'Bearer {guest["auth_token"]}'
+
+ def _extract_video_list(self, username, user_id, limit=6):
+ query = {
+ 'limit': limit,
+ }
+ for page in itertools.count(1):
+ for retry in self.RetryManager():
+ try:
+ video_list = self._download_json(
+ f'{self._API_BASE_URL}/api/users/{user_id}/videos',
+ username, note=f'Downloading user video list page {page}',
+ errnote='Unable to download user video list', headers=self._API_HEADERS,
+ query=query)
+ except ExtractorError as e:
+ if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0:
+ retry.error = e
+ continue
+ raise
+ if not video_list.get('videos'):
+ break
+ yield from video_list['videos']
+ query['before_time'] = traverse_obj(video_list, ('videos', -1, 'timestamp'))
+ if not query['before_time']:
+ break
+
+ def _entries(self, videos, username, user_info):
+ for video in videos:
+ yield self._parse_video_info(video, username, user_info)
+
+ def _real_extract(self, url):
+ username = self._match_id(url)
+ user_info = self._check_user_info(self._download_json(
+ f'{self._API_BASE_URL}/api/users/by_username/{username}',
+ username, note='Downloading user info',
+ errnote='Failed to download user info', headers=self._API_HEADERS).get('user', {}))
+
+ user_id = str_or_none(user_info.get('user_id'))
+ videos = self._extract_video_list(username, user_id)
+ thumbnail = user_info.get('avatar_url')
+
+ return self.playlist_result(
+ self._entries(videos, username, user_info), user_id, username, thumbnail=thumbnail)
diff --git a/hypervideo_dl/extractor/trilulilu.py b/hypervideo_dl/extractor/trilulilu.py
index a800449..fb97be7 100644
--- a/hypervideo_dl/extractor/trilulilu.py
+++ b/hypervideo_dl/extractor/trilulilu.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
ExtractorError,
diff --git a/hypervideo_dl/extractor/trovo.py b/hypervideo_dl/extractor/trovo.py
index 65ea13d..545a672 100644
--- a/hypervideo_dl/extractor/trovo.py
+++ b/hypervideo_dl/extractor/trovo.py
@@ -1,8 +1,7 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import itertools
import json
+import random
+import string
from .common import InfoExtractor
from ..utils import (
@@ -10,6 +9,7 @@ from ..utils import (
format_field,
int_or_none,
str_or_none,
+ traverse_obj,
try_get,
)
@@ -18,10 +18,20 @@ class TrovoBaseIE(InfoExtractor):
_VALID_URL_BASE = r'https?://(?:www\.)?trovo\.live/'
_HEADERS = {'Origin': 'https://trovo.live'}
- def _call_api(self, video_id, query=None, data=None):
- return self._download_json(
- 'https://gql.trovo.live/', video_id, query=query, data=data,
- headers={'Accept': 'application/json'})
+ def _call_api(self, video_id, data):
+ if 'persistedQuery' in data.get('extensions', {}):
+ url = 'https://gql.trovo.live'
+ else:
+ url = 'https://api-web.trovo.live/graphql'
+
+ resp = self._download_json(
+ url, video_id, data=json.dumps([data]).encode(), headers={'Accept': 'application/json'},
+ query={
+ 'qid': ''.join(random.choices(string.ascii_uppercase + string.digits, k=16)),
+ })[0]
+ if 'errors' in resp:
+ raise ExtractorError(f'Trovo said: {resp["errors"][0]["message"]}')
+ return resp['data'][data['operationName']]
def _extract_streamer_info(self, data):
streamer_info = data.get('streamerInfo') or {}
@@ -29,36 +39,43 @@ class TrovoBaseIE(InfoExtractor):
return {
'uploader': streamer_info.get('nickName'),
'uploader_id': str_or_none(streamer_info.get('uid')),
- 'uploader_url': format_field(username, template='https://trovo.live/%s'),
+ 'uploader_url': format_field(username, None, 'https://trovo.live/%s'),
}
class TrovoIE(TrovoBaseIE):
- _VALID_URL = TrovoBaseIE._VALID_URL_BASE + r'(?!(?:clip|video)/)(?P<id>[^/?&#]+)'
+ _VALID_URL = TrovoBaseIE._VALID_URL_BASE + r'(?:s/)?(?!(?:clip|video)/)(?P<id>(?!s/)[^/?&#]+(?![^#]+[?&]vid=))'
+ _TESTS = [{
+ 'url': 'https://trovo.live/Exsl',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://trovo.live/s/SkenonSLive/549759191497',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://trovo.live/s/zijo987/208251706',
+ 'info_dict': {
+ 'id': '104125853_104125853_1656439572',
+ 'ext': 'flv',
+ 'uploader_url': 'https://trovo.live/zijo987',
+ 'uploader_id': '104125853',
+ 'thumbnail': 'https://livecover.trovo.live/screenshot/73846_104125853_104125853-2022-06-29-04-00-22-852x480.jpg',
+ 'uploader': 'zijo987',
+ 'title': '💥IGRAMO IGRICE UPADAJTE💥2500/5000 2022-06-28 22:01',
+ 'live_status': 'is_live',
+ },
+ 'skip': 'May not be live'
+ }]
def _real_extract(self, url):
username = self._match_id(url)
- live_info = self._call_api(username, query={
- 'query': '''{
- getLiveInfo(params: {userName: "%s"}) {
- isLive
- programInfo {
- coverUrl
- id
- streamInfo {
- desc
- playUrl
- }
- title
- }
- streamerInfo {
- nickName
- uid
- userName
- }
- }
-}''' % username,
- })['data']['getLiveInfo']
+ live_info = self._call_api(username, data={
+ 'operationName': 'live_LiveReaderService_GetLiveInfo',
+ 'variables': {
+ 'params': {
+ 'userName': username,
+ },
+ },
+ })
if live_info.get('isLive') == 0:
raise ExtractorError('%s is offline' % username, expected=True)
program_info = live_info['programInfo']
@@ -75,9 +92,9 @@ class TrovoIE(TrovoBaseIE):
'format_id': format_id,
'height': int_or_none(format_id[:-1]) if format_id else None,
'url': play_url,
+ 'tbr': stream_info.get('bitrate'),
'http_headers': self._HEADERS,
})
- self._sort_formats(formats)
info = {
'id': program_id,
@@ -91,57 +108,100 @@ class TrovoIE(TrovoBaseIE):
class TrovoVodIE(TrovoBaseIE):
- _VALID_URL = TrovoBaseIE._VALID_URL_BASE + r'(?:clip|video)/(?P<id>[^/?&#]+)'
+ _VALID_URL = TrovoBaseIE._VALID_URL_BASE + r'(?:clip|video|s)/(?:[^/]+/\d+[^#]*[?&]vid=)?(?P<id>(?<!/s/)[^/?&#]+)'
_TESTS = [{
- 'url': 'https://trovo.live/video/ltv-100095501_100095501_1609596043',
+ 'url': 'https://trovo.live/clip/lc-5285890818705062210?ltab=videos',
+ 'params': {'getcomments': True},
'info_dict': {
- 'id': 'ltv-100095501_100095501_1609596043',
+ 'id': 'lc-5285890818705062210',
'ext': 'mp4',
- 'title': 'Spontaner 12 Stunden Stream! - Ok Boomer!',
- 'uploader': 'Exsl',
- 'timestamp': 1609640305,
- 'upload_date': '20210103',
- 'uploader_id': '100095501',
- 'duration': 43977,
+ 'title': 'fatal moaning for a super good🤣🤣',
+ 'uploader': 'OneTappedYou',
+ 'timestamp': 1621628019,
+ 'upload_date': '20210521',
+ 'uploader_id': '100719456',
+ 'duration': 31,
'view_count': int,
'like_count': int,
'comment_count': int,
- 'comments': 'mincount:8',
- 'categories': ['Grand Theft Auto V'],
+ 'comments': 'mincount:1',
+ 'categories': ['Call of Duty: Mobile'],
+ 'uploader_url': 'https://trovo.live/OneTappedYou',
+ 'thumbnail': r're:^https?://.*\.jpg',
},
- 'skip': '404'
}, {
- 'url': 'https://trovo.live/clip/lc-5285890810184026005',
+ 'url': 'https://trovo.live/s/SkenonSLive/549759191497?vid=ltv-100829718_100829718_387702301737980280',
+ 'info_dict': {
+ 'id': 'ltv-100829718_100829718_387702301737980280',
+ 'ext': 'mp4',
+ 'timestamp': 1654909624,
+ 'thumbnail': 'http://vod.trovo.live/1f09baf0vodtransger1301120758/ef9ea3f0387702301737980280/coverBySnapshot/coverBySnapshot_10_0.jpg',
+ 'uploader_id': '100829718',
+ 'uploader': 'SkenonSLive',
+ 'title': 'Trovo u secanju, uz par modova i muzike :)',
+ 'uploader_url': 'https://trovo.live/SkenonSLive',
+ 'duration': 10830,
+ 'view_count': int,
+ 'like_count': int,
+ 'upload_date': '20220611',
+ 'comment_count': int,
+ 'categories': ['Minecraft'],
+ },
+ 'skip': 'Not available',
+ }, {
+ 'url': 'https://trovo.live/s/Trovo/549756886599?vid=ltv-100264059_100264059_387702304241698583',
+ 'info_dict': {
+ 'id': 'ltv-100264059_100264059_387702304241698583',
+ 'ext': 'mp4',
+ 'timestamp': 1661479563,
+ 'thumbnail': 'http://vod.trovo.live/be5ae591vodtransusw1301120758/cccb9915387702304241698583/coverBySnapshot/coverBySnapshot_10_0.jpg',
+ 'uploader_id': '100264059',
+ 'uploader': 'Trovo',
+ 'title': 'Dev Corner 8/25',
+ 'uploader_url': 'https://trovo.live/Trovo',
+ 'duration': 3753,
+ 'view_count': int,
+ 'like_count': int,
+ 'upload_date': '20220826',
+ 'comment_count': int,
+ 'categories': ['Talk Shows'],
+ },
+ }, {
+ 'url': 'https://trovo.live/video/ltv-100095501_100095501_1609596043',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://trovo.live/s/SkenonSLive/549759191497?foo=bar&vid=ltv-100829718_100829718_387702301737980280',
'only_matching': True,
}]
def _real_extract(self, url):
vid = self._match_id(url)
- resp = self._call_api(vid, data=json.dumps([{
- 'query': '''{
- batchGetVodDetailInfo(params: {vids: ["%s"]}) {
- VodDetailInfos
- }
-}''' % vid,
- }, {
- 'query': '''{
- getCommentList(params: {appInfo: {postID: "%s"}, pageSize: 1000000000, preview: {}}) {
- commentList {
- author {
- nickName
- uid
- }
- commentID
- content
- createdAt
- parentID
- }
- }
-}''' % vid,
- }]).encode())
- vod_detail_info = resp[0]['data']['batchGetVodDetailInfo']['VodDetailInfos'][vid]
- vod_info = vod_detail_info['vodInfo']
- title = vod_info['title']
+
+ # NOTE: It is also possible to extract this info from the Nuxt data on the website,
+ # however that seems unreliable - sometimes it randomly doesn't return the data,
+ # at least when using a non-residential IP.
+ resp = self._call_api(vid, data={
+ 'operationName': 'vod_VodReaderService_BatchGetVodDetailInfo',
+ 'variables': {
+ 'params': {
+ 'vids': [vid],
+ },
+ },
+ 'extensions': {},
+ })
+
+ vod_detail_info = traverse_obj(resp, ('VodDetailInfos', vid), expected_type=dict)
+ if not vod_detail_info:
+ raise ExtractorError('This video not found or not available anymore', expected=True)
+ vod_info = vod_detail_info.get('vodInfo')
+ title = vod_info.get('title')
+
+ if try_get(vod_info, lambda x: x['playbackRights']['playbackRights'] != 'Normal'):
+ playback_rights_setting = vod_info['playbackRights']['playbackRightsSetting']
+ if playback_rights_setting == 'SubscriberOnly':
+ raise ExtractorError('This video is only available for subscribers', expected=True)
+ else:
+ raise ExtractorError(f'This video is not available ({playback_rights_setting})', expected=True)
language = vod_info.get('languageName')
formats = []
@@ -161,28 +221,10 @@ class TrovoVodIE(TrovoBaseIE):
'url': play_url,
'http_headers': self._HEADERS,
})
- self._sort_formats(formats)
category = vod_info.get('categoryName')
get_count = lambda x: int_or_none(vod_info.get(x + 'Num'))
- comment_list = try_get(resp, lambda x: x[1]['data']['getCommentList']['commentList'], list) or []
- comments = []
- for comment in comment_list:
- content = comment.get('content')
- if not content:
- continue
- author = comment.get('author') or {}
- parent = comment.get('parentID')
- comments.append({
- 'author': author.get('nickName'),
- 'author_id': str_or_none(author.get('uid')),
- 'id': str_or_none(comment.get('commentID')),
- 'text': content,
- 'timestamp': int_or_none(comment.get('createdAt')),
- 'parent': 'root' if parent == 0 else str_or_none(parent),
- })
-
info = {
'id': vid,
'title': title,
@@ -193,35 +235,81 @@ class TrovoVodIE(TrovoBaseIE):
'view_count': get_count('watch'),
'like_count': get_count('like'),
'comment_count': get_count('comment'),
- 'comments': comments,
'categories': [category] if category else None,
+ '__post_extractor': self.extract_comments(vid),
}
info.update(self._extract_streamer_info(vod_detail_info))
return info
+ def _get_comments(self, vid):
+ for page in itertools.count(1):
+ comments_json = self._call_api(vid, data={
+ 'operationName': 'public_CommentProxyService_GetCommentList',
+ 'variables': {
+ 'params': {
+ 'appInfo': {
+ 'postID': vid,
+ },
+ 'preview': {},
+ 'pageSize': 99,
+ 'page': page,
+ },
+ },
+ 'extensions': {
+ 'singleReq': 'true',
+ },
+ })
+ for comment in comments_json['commentList']:
+ content = comment.get('content')
+ if not content:
+ continue
+ author = comment.get('author') or {}
+ parent = comment.get('parentID')
+ yield {
+ 'author': author.get('nickName'),
+ 'author_id': str_or_none(author.get('uid')),
+ 'id': str_or_none(comment.get('commentID')),
+ 'text': content,
+ 'timestamp': int_or_none(comment.get('createdAt')),
+ 'parent': 'root' if parent == 0 else str_or_none(parent),
+ }
+
+ if comments_json['lastPage']:
+ break
-class TrovoChannelBaseIE(TrovoBaseIE):
- def _get_vod_json(self, page, uid):
- raise NotImplementedError('This method must be implemented by subclasses')
- def _entries(self, uid):
+class TrovoChannelBaseIE(TrovoBaseIE):
+ def _entries(self, spacename):
for page in itertools.count(1):
- vod_json = self._get_vod_json(page, uid)
+ vod_json = self._call_api(spacename, data={
+ 'operationName': self._OPERATION,
+ 'variables': {
+ 'params': {
+ 'terminalSpaceID': {
+ 'spaceName': spacename,
+ },
+ 'currPage': page,
+ 'pageSize': 99,
+ },
+ },
+ 'extensions': {
+ 'singleReq': 'true',
+ },
+ })
vods = vod_json.get('vodInfos', [])
for vod in vods:
+ vid = vod.get('vid')
+ room = traverse_obj(vod, ('spaceInfo', 'roomID'))
yield self.url_result(
- 'https://trovo.live/%s/%s' % (self._TYPE, vod.get('vid')),
+ f'https://trovo.live/s/{spacename}/{room}?vid={vid}',
ie=TrovoVodIE.ie_key())
- has_more = vod_json['hasMore']
+ has_more = vod_json.get('hasMore')
if not has_more:
break
def _real_extract(self, url):
- id = self._match_id(url)
- uid = str(self._call_api(id, query={
- 'query': '{getLiveInfo(params:{userName:"%s"}){streamerInfo{uid}}}' % id
- })['data']['getLiveInfo']['streamerInfo']['uid'])
- return self.playlist_result(self._entries(uid), playlist_id=uid)
+ spacename = self._match_id(url)
+ return self.playlist_result(self._entries(spacename), playlist_id=spacename)
class TrovoChannelVodIE(TrovoChannelBaseIE):
@@ -232,17 +320,11 @@ class TrovoChannelVodIE(TrovoChannelBaseIE):
'url': 'trovovod:OneTappedYou',
'playlist_mincount': 24,
'info_dict': {
- 'id': '100719456',
+ 'id': 'OneTappedYou',
},
}]
- _QUERY = '{getChannelLtvVideoInfos(params:{pageSize:99,currPage:%d,channelID:%s}){hasMore,vodInfos{vid}}}'
- _TYPE = 'video'
-
- def _get_vod_json(self, page, uid):
- return self._call_api(uid, query={
- 'query': self._QUERY % (page, uid)
- })['data']['getChannelLtvVideoInfos']
+ _OPERATION = 'vod_VodReaderService_GetChannelLtvVideoInfos'
class TrovoChannelClipIE(TrovoChannelBaseIE):
@@ -253,14 +335,8 @@ class TrovoChannelClipIE(TrovoChannelBaseIE):
'url': 'trovoclip:OneTappedYou',
'playlist_mincount': 29,
'info_dict': {
- 'id': '100719456',
+ 'id': 'OneTappedYou',
},
}]
- _QUERY = '{getChannelClipVideoInfos(params:{pageSize:99,currPage:%d,channelID:%s,albumType:VOD_CLIP_ALBUM_TYPE_LATEST}){hasMore,vodInfos{vid}}}'
- _TYPE = 'clip'
-
- def _get_vod_json(self, page, uid):
- return self._call_api(uid, query={
- 'query': self._QUERY % (page, uid)
- })['data']['getChannelClipVideoInfos']
+ _OPERATION = 'vod_VodReaderService_GetChannelClipVideoInfos'
diff --git a/hypervideo_dl/extractor/trueid.py b/hypervideo_dl/extractor/trueid.py
index fc98303..6963436 100644
--- a/hypervideo_dl/extractor/trueid.py
+++ b/hypervideo_dl/extractor/trueid.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import compat_HTTPError
from ..utils import (
diff --git a/hypervideo_dl/extractor/trunews.py b/hypervideo_dl/extractor/trunews.py
index cca5b5c..d5ce86e 100644
--- a/hypervideo_dl/extractor/trunews.py
+++ b/hypervideo_dl/extractor/trunews.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/truth.py b/hypervideo_dl/extractor/truth.py
new file mode 100644
index 0000000..1c6409c
--- /dev/null
+++ b/hypervideo_dl/extractor/truth.py
@@ -0,0 +1,69 @@
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ format_field,
+ int_or_none,
+ strip_or_none,
+ traverse_obj,
+ unified_timestamp,
+)
+
+
+class TruthIE(InfoExtractor):
+ _VALID_URL = r'https?://truthsocial\.com/@[^/]+/posts/(?P<id>\d+)'
+ _TESTS = [
+ {
+ 'url': 'https://truthsocial.com/@realDonaldTrump/posts/108779000807761862',
+ 'md5': '4a5fb1470c192e493d9efd6f19e514d3',
+ 'info_dict': {
+ 'id': '108779000807761862',
+ 'ext': 'qt',
+ 'title': 'Truth video #108779000807761862',
+ 'description': None,
+ 'timestamp': 1659835827,
+ 'upload_date': '20220807',
+ 'uploader': 'Donald J. Trump',
+ 'uploader_id': 'realDonaldTrump',
+ 'uploader_url': 'https://truthsocial.com/@realDonaldTrump',
+ 'repost_count': int,
+ 'comment_count': int,
+ 'like_count': int,
+ },
+ },
+ {
+ 'url': 'https://truthsocial.com/@ProjectVeritasAction/posts/108618228543962049',
+ 'md5': 'fd47ba68933f9dce27accc52275be9c3',
+ 'info_dict': {
+ 'id': '108618228543962049',
+ 'ext': 'mp4',
+ 'title': 'md5:debde7186cf83f60ff7b44dbb9444e35',
+ 'description': 'md5:de2fc49045bf92bb8dc97e56503b150f',
+ 'timestamp': 1657382637,
+ 'upload_date': '20220709',
+ 'uploader': 'Project Veritas Action',
+ 'uploader_id': 'ProjectVeritasAction',
+ 'uploader_url': 'https://truthsocial.com/@ProjectVeritasAction',
+ 'repost_count': int,
+ 'comment_count': int,
+ 'like_count': int,
+ },
+ },
+ ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ status = self._download_json(f'https://truthsocial.com/api/v1/statuses/{video_id}', video_id)
+ uploader_id = strip_or_none(traverse_obj(status, ('account', 'username')))
+ return {
+ 'id': video_id,
+ 'url': status['media_attachments'][0]['url'],
+ 'title': '',
+ 'description': strip_or_none(clean_html(status.get('content'))) or None,
+ 'timestamp': unified_timestamp(status.get('created_at')),
+ 'uploader': strip_or_none(traverse_obj(status, ('account', 'display_name'))),
+ 'uploader_id': uploader_id,
+ 'uploader_url': format_field(uploader_id, None, 'https://truthsocial.com/@%s'),
+ 'repost_count': int_or_none(status.get('reblogs_count')),
+ 'like_count': int_or_none(status.get('favourites_count')),
+ 'comment_count': int_or_none(status.get('replies_count')),
+ }
diff --git a/hypervideo_dl/extractor/trutv.py b/hypervideo_dl/extractor/trutv.py
index c09ff89..ea0f2f4 100644
--- a/hypervideo_dl/extractor/trutv.py
+++ b/hypervideo_dl/extractor/trutv.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .turner import TurnerBaseIE
from ..utils import (
int_or_none,
diff --git a/hypervideo_dl/extractor/tube8.py b/hypervideo_dl/extractor/tube8.py
index db93b01..77ed05f 100644
--- a/hypervideo_dl/extractor/tube8.py
+++ b/hypervideo_dl/extractor/tube8.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import re
from ..utils import (
@@ -9,8 +7,9 @@ from ..utils import (
from .keezmovies import KeezMoviesIE
-class Tube8IE(KeezMoviesIE):
+class Tube8IE(KeezMoviesIE): # XXX: Do not subclass from concrete IE
_VALID_URL = r'https?://(?:www\.)?tube8\.com/(?:[^/]+/)+(?P<display_id>[^/]+)/(?P<id>\d+)'
+ _EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//(?:www\.)?tube8\.com/embed/(?:[^/]+/)+\d+)']
_TESTS = [{
'url': 'http://www.tube8.com/teen/kasia-music-video/229795/',
'md5': '65e20c48e6abff62ed0c3965fff13a39',
@@ -31,12 +30,6 @@ class Tube8IE(KeezMoviesIE):
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- return re.findall(
- r'<iframe[^>]+\bsrc=["\']((?:https?:)?//(?:www\.)?tube8\.com/embed/(?:[^/]+/)+\d+)',
- webpage)
-
def _real_extract(self, url):
webpage, info = self._extract_info(url)
diff --git a/hypervideo_dl/extractor/tubetugraz.py b/hypervideo_dl/extractor/tubetugraz.py
new file mode 100644
index 0000000..ebabedc
--- /dev/null
+++ b/hypervideo_dl/extractor/tubetugraz.py
@@ -0,0 +1,233 @@
+from .common import InfoExtractor
+from ..utils import (
+ float_or_none,
+ parse_resolution,
+ traverse_obj,
+ urlencode_postdata,
+ variadic,
+)
+
+
+class TubeTuGrazBaseIE(InfoExtractor):
+ _NETRC_MACHINE = 'tubetugraz'
+
+ _API_EPISODE = 'https://tube.tugraz.at/search/episode.json'
+ _FORMAT_TYPES = ('presentation', 'presenter')
+
+ def _perform_login(self, username, password):
+ urlh = self._request_webpage(
+ 'https://tube.tugraz.at/Shibboleth.sso/Login?target=/paella/ui/index.html',
+ None, fatal=False, note='downloading login page', errnote='unable to fetch login page')
+ if not urlh:
+ return
+
+ urlh = self._request_webpage(
+ urlh.geturl(), None, fatal=False, headers={'referer': urlh.geturl()},
+ note='logging in', errnote='unable to log in', data=urlencode_postdata({
+ 'lang': 'de',
+ '_eventId_proceed': '',
+ 'j_username': username,
+ 'j_password': password
+ }))
+
+ if urlh and urlh.geturl() != 'https://tube.tugraz.at/paella/ui/index.html':
+ self.report_warning('unable to login: incorrect password')
+
+ def _extract_episode(self, episode_info):
+ id = episode_info.get('id')
+ formats = list(self._extract_formats(
+ traverse_obj(episode_info, ('mediapackage', 'media', 'track')), id))
+
+ title = traverse_obj(episode_info, ('mediapackage', 'title'), 'dcTitle')
+ series_title = traverse_obj(episode_info, ('mediapackage', 'seriestitle'))
+ creator = ', '.join(variadic(traverse_obj(
+ episode_info, ('mediapackage', 'creators', 'creator'), 'dcCreator', default='')))
+ return {
+ 'id': id,
+ 'title': title,
+ 'creator': creator or None,
+ 'duration': traverse_obj(episode_info, ('mediapackage', 'duration'), 'dcExtent'),
+ 'series': series_title,
+ 'series_id': traverse_obj(episode_info, ('mediapackage', 'series'), 'dcIsPartOf'),
+ 'episode': series_title and title,
+ 'formats': formats
+ }
+
+ def _set_format_type(self, formats, type):
+ for f in formats:
+ f['format_note'] = type
+ if not type.startswith(self._FORMAT_TYPES[0]):
+ f['preference'] = -2
+ return formats
+
+ def _extract_formats(self, format_list, id):
+ has_hls, has_dash = False, False
+
+ for format_info in format_list or []:
+ url = traverse_obj(format_info, ('tags', 'url'), 'url')
+ if url is None:
+ continue
+
+ type = format_info.get('type') or 'unknown'
+ transport = (format_info.get('transport') or 'https').lower()
+
+ if transport == 'https':
+ formats = [{
+ 'url': url,
+ 'abr': float_or_none(traverse_obj(format_info, ('audio', 'bitrate')), 1000),
+ 'vbr': float_or_none(traverse_obj(format_info, ('video', 'bitrate')), 1000),
+ 'fps': traverse_obj(format_info, ('video', 'framerate')),
+ **parse_resolution(traverse_obj(format_info, ('video', 'resolution'))),
+ }]
+ elif transport == 'hls':
+ has_hls, formats = True, self._extract_m3u8_formats(
+ url, id, 'mp4', fatal=False, note=f'downloading {type} HLS manifest')
+ elif transport == 'dash':
+ has_dash, formats = True, self._extract_mpd_formats(
+ url, id, fatal=False, note=f'downloading {type} DASH manifest')
+ else:
+ # RTMP, HDS, SMOOTH, and unknown formats
+ # - RTMP url fails on every tested entry until now
+ # - HDS url 404's on every tested entry until now
+ # - SMOOTH url 404's on every tested entry until now
+ continue
+
+ yield from self._set_format_type(formats, type)
+
+ # TODO: Add test for these
+ for type in self._FORMAT_TYPES:
+ if not has_hls:
+ hls_formats = self._extract_m3u8_formats(
+ f'https://wowza.tugraz.at/matterhorn_engage/smil:engage-player_{id}_{type}.smil/playlist.m3u8',
+ id, 'mp4', fatal=False, note=f'Downloading {type} HLS manifest', errnote=False) or []
+ yield from self._set_format_type(hls_formats, type)
+
+ if not has_dash:
+ dash_formats = self._extract_mpd_formats(
+ f'https://wowza.tugraz.at/matterhorn_engage/smil:engage-player_{id}_{type}.smil/manifest_mpm4sav_mvlist.mpd',
+ id, fatal=False, note=f'Downloading {type} DASH manifest', errnote=False)
+ yield from self._set_format_type(dash_formats, type)
+
+
+class TubeTuGrazIE(TubeTuGrazBaseIE):
+ IE_DESC = 'tube.tugraz.at'
+
+ _VALID_URL = r'''(?x)
+ https?://tube\.tugraz\.at/paella/ui/watch.html\?id=
+ (?P<id>[0-9a-fA-F]{8}-(?:[0-9a-fA-F]{4}-){3}[0-9a-fA-F]{12})
+ '''
+ _TESTS = [
+ {
+ 'url': 'https://tube.tugraz.at/paella/ui/watch.html?id=f2634392-e40e-4ac7-9ddc-47764aa23d40',
+ 'md5': 'a23a3d5c9aaca2b84932fdba66e17145',
+ 'info_dict': {
+ 'id': 'f2634392-e40e-4ac7-9ddc-47764aa23d40',
+ 'ext': 'mp4',
+ 'title': '#6 (23.11.2017)',
+ 'episode': '#6 (23.11.2017)',
+ 'series': '[INB03001UF] Einführung in die strukturierte Programmierung',
+ 'creator': 'Safran C',
+ 'duration': 3295818,
+ 'series_id': 'b1192fff-2aa7-4bf0-a5cf-7b15c3bd3b34',
+ }
+ }, {
+ 'url': 'https://tube.tugraz.at/paella/ui/watch.html?id=2df6d787-e56a-428d-8ef4-d57f07eef238',
+ 'md5': 'de0d854a56bf7318d2b693fe1adb89a5',
+ 'info_dict': {
+ 'id': '2df6d787-e56a-428d-8ef4-d57f07eef238',
+ 'title': 'TubeTuGraz video #2df6d787-e56a-428d-8ef4-d57f07eef238',
+ 'ext': 'mp4',
+ },
+ 'expected_warnings': ['Extractor failed to obtain "title"'],
+ }
+ ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ episode_data = self._download_json(
+ self._API_EPISODE, video_id, query={'id': video_id, 'limit': 1}, note='Downloading episode metadata')
+
+ episode_info = traverse_obj(episode_data, ('search-results', 'result'), default={'id': video_id})
+ return self._extract_episode(episode_info)
+
+
+class TubeTuGrazSeriesIE(TubeTuGrazBaseIE):
+ _VALID_URL = r'''(?x)
+ https?://tube\.tugraz\.at/paella/ui/browse\.html\?series=
+ (?P<id>[0-9a-fA-F]{8}-(?:[0-9a-fA-F]{4}-){3}[0-9a-fA-F]{12})
+ '''
+ _TESTS = [{
+ 'url': 'https://tube.tugraz.at/paella/ui/browse.html?series=0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
+ 'id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
+ 'info_dict': {
+ 'id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
+ 'title': '[209351] Strassenwesen',
+ },
+ 'playlist': [
+ {
+ 'info_dict': {
+ 'id': 'ee17ce5d-34e2-48b7-a76a-fed148614e11',
+ 'series_id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
+ 'ext': 'mp4',
+ 'title': '#4 Detailprojekt',
+ 'episode': '#4 Detailprojekt',
+ 'series': '[209351] Strassenwesen',
+ 'creator': 'Neuhold R',
+ 'duration': 6127024,
+ }
+ },
+ {
+ 'info_dict': {
+ 'id': '87350498-799a-44d3-863f-d1518a98b114',
+ 'series_id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
+ 'ext': 'mp4',
+ 'title': '#3 Generelles Projekt',
+ 'episode': '#3 Generelles Projekt',
+ 'series': '[209351] Strassenwesen',
+ 'creator': 'Neuhold R',
+ 'duration': 5374422,
+ }
+ },
+ {
+ 'info_dict': {
+ 'id': '778599ea-489e-4189-9e05-3b4888e19bcd',
+ 'series_id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
+ 'ext': 'mp4',
+ 'title': '#2 Vorprojekt',
+ 'episode': '#2 Vorprojekt',
+ 'series': '[209351] Strassenwesen',
+ 'creator': 'Neuhold R',
+ 'duration': 5566404,
+ }
+ },
+ {
+ 'info_dict': {
+ 'id': '75e4c71c-d99d-4e56-b0e6-4f2bcdf11f29',
+ 'series_id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
+ 'ext': 'mp4',
+ 'title': '#1 Variantenstudium',
+ 'episode': '#1 Variantenstudium',
+ 'series': '[209351] Strassenwesen',
+ 'creator': 'Neuhold R',
+ 'duration': 5420200,
+ }
+ }
+ ],
+ 'min_playlist_count': 4
+ }]
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+ episodes_data = self._download_json(self._API_EPISODE, id, query={'sid': id}, note='Downloading episode list')
+ series_data = self._download_json(
+ 'https://tube.tugraz.at/series/series.json', id, fatal=False,
+ note='downloading series metadata', errnote='failed to download series metadata',
+ query={
+ 'seriesId': id,
+ 'count': 1,
+ 'sort': 'TITLE'
+ })
+
+ return self.playlist_result(
+ map(self._extract_episode, episodes_data['search-results']['result']), id,
+ traverse_obj(series_data, ('catalogs', 0, 'http://purl.org/dc/terms/', 'title', 0, 'value')))
diff --git a/hypervideo_dl/extractor/tubitv.py b/hypervideo_dl/extractor/tubitv.py
index 31feb9a..de8b5da 100644
--- a/hypervideo_dl/extractor/tubitv.py
+++ b/hypervideo_dl/extractor/tubitv.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -10,6 +7,7 @@ from ..utils import (
js_to_json,
sanitized_Request,
urlencode_postdata,
+ traverse_obj,
)
@@ -24,6 +22,19 @@ class TubiTvIE(InfoExtractor):
_NETRC_MACHINE = 'tubitv'
_GEO_COUNTRIES = ['US']
_TESTS = [{
+ 'url': 'https://tubitv.com/movies/383676/tracker',
+ 'md5': '566fa0f76870302d11af0de89511d3f0',
+ 'info_dict': {
+ 'id': '383676',
+ 'ext': 'mp4',
+ 'title': 'Tracker',
+ 'description': 'md5:ff320baf43d0ad2655e538c1d5cd9706',
+ 'uploader_id': 'f866e2677ea2f0dff719788e4f7f9195',
+ 'release_year': 2010,
+ 'thumbnail': r're:^https?://.+\.(jpe?g|png)$',
+ 'duration': 6122,
+ },
+ }, {
'url': 'http://tubitv.com/video/283829/the_comedian_at_the_friday',
'md5': '43ac06be9326f41912dc64ccf7a80320',
'info_dict': {
@@ -33,13 +44,11 @@ class TubiTvIE(InfoExtractor):
'description': 'A stand up comedian is forced to look at the decisions in his life while on a one week trip to the west coast.',
'uploader_id': 'bc168bee0d18dd1cb3b86c68706ab434',
},
+ 'skip': 'Content Unavailable'
}, {
'url': 'http://tubitv.com/tv-shows/321886/s01_e01_on_nom_stories',
'only_matching': True,
}, {
- 'url': 'http://tubitv.com/movies/383676/tracker',
- 'only_matching': True,
- }, {
'url': 'https://tubitv.com/movies/560057/penitentiary?start=true',
'info_dict': {
'id': '560057',
@@ -49,11 +58,13 @@ class TubiTvIE(InfoExtractor):
'uploader_id': 'd8fed30d4f24fcb22ec294421b9defc2',
'release_year': 1979,
},
- 'params': {
- 'skip_download': True,
- },
+ 'skip': 'Content Unavailable'
}]
+ # DRM formats are included only to raise appropriate error
+ _UNPLAYABLE_FORMATS = ('hlsv6_widevine', 'hlsv6_widevine_nonclearlead', 'hlsv6_playready_psshv0',
+ 'hlsv6_fairplay', 'dash_widevine', 'dash_widevine_nonclearlead')
+
def _perform_login(self, username, password):
self.report_login()
form_data = {
@@ -71,18 +82,26 @@ class TubiTvIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
- video_data = self._download_json(
- 'http://tubitv.com/oz/videos/%s/content' % video_id, video_id)
+ video_data = self._download_json(f'https://tubitv.com/oz/videos/{video_id}/content', video_id, query={
+ 'video_resources': ['dash', 'hlsv3', 'hlsv6', *self._UNPLAYABLE_FORMATS],
+ })
title = video_data['title']
formats = []
- url = video_data['url']
- # URL can be sometimes empty. Does this only happen when there is DRM?
- if url:
- formats = self._extract_m3u8_formats(
- self._proto_relative_url(url),
- video_id, 'mp4', 'm3u8_native')
- self._sort_formats(formats)
+ drm_formats = False
+
+ for resource in video_data['video_resources']:
+ if resource['type'] in ('dash', ):
+ formats += self._extract_mpd_formats(resource['manifest']['url'], video_id, mpd_id=resource['type'], fatal=False)
+ elif resource['type'] in ('hlsv3', 'hlsv6'):
+ formats += self._extract_m3u8_formats(resource['manifest']['url'], video_id, 'mp4', m3u8_id=resource['type'], fatal=False)
+ elif resource['type'] in self._UNPLAYABLE_FORMATS:
+ drm_formats = True
+
+ if not formats and drm_formats:
+ self.report_drm(video_id)
+ elif not formats and not video_data.get('policy_match'): # policy_match is False if content was removed
+ raise ExtractorError('This content is currently unavailable', expected=True)
thumbnails = []
for thumbnail_url in video_data.get('thumbnails', []):
@@ -138,6 +157,8 @@ class TubiTvShowIE(InfoExtractor):
show_webpage, 'data'), show_name, transform_source=js_to_json)['video']
for episode_id in show_json['fullContentById'].keys():
+ if traverse_obj(show_json, ('byId', episode_id, 'type')) == 's':
+ continue
yield self.url_result(
'tubitv:%s' % episode_id,
ie=TubiTvIE.ie_key(), video_id=episode_id)
diff --git a/hypervideo_dl/extractor/tudou.py b/hypervideo_dl/extractor/tudou.py
deleted file mode 100644
index 7421378..0000000
--- a/hypervideo_dl/extractor/tudou.py
+++ /dev/null
@@ -1,49 +0,0 @@
-# coding: utf-8
-
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-
-
-class TudouPlaylistIE(InfoExtractor):
- IE_NAME = 'tudou:playlist'
- _VALID_URL = r'https?://(?:www\.)?tudou\.com/listplay/(?P<id>[\w-]{11})\.html'
- _TESTS = [{
- 'url': 'http://www.tudou.com/listplay/zzdE77v6Mmo.html',
- 'info_dict': {
- 'id': 'zzdE77v6Mmo',
- },
- 'playlist_mincount': 209,
- }]
-
- def _real_extract(self, url):
- playlist_id = self._match_id(url)
- playlist_data = self._download_json(
- 'http://www.tudou.com/tvp/plist.action?lcode=%s' % playlist_id, playlist_id)
- entries = [self.url_result(
- 'http://www.tudou.com/programs/view/%s' % item['icode'],
- 'Tudou', item['icode'],
- item['kw']) for item in playlist_data['items']]
- return self.playlist_result(entries, playlist_id)
-
-
-class TudouAlbumIE(InfoExtractor):
- IE_NAME = 'tudou:album'
- _VALID_URL = r'https?://(?:www\.)?tudou\.com/album(?:cover|play)/(?P<id>[\w-]{11})'
- _TESTS = [{
- 'url': 'http://www.tudou.com/albumplay/v5qckFJvNJg.html',
- 'info_dict': {
- 'id': 'v5qckFJvNJg',
- },
- 'playlist_mincount': 45,
- }]
-
- def _real_extract(self, url):
- album_id = self._match_id(url)
- album_data = self._download_json(
- 'http://www.tudou.com/tvp/alist.action?acode=%s' % album_id, album_id)
- entries = [self.url_result(
- 'http://www.tudou.com/programs/view/%s' % item['icode'],
- 'Tudou', item['icode'],
- item['kw']) for item in album_data['items']]
- return self.playlist_result(entries, album_id)
diff --git a/hypervideo_dl/extractor/tumblr.py b/hypervideo_dl/extractor/tumblr.py
index 8086f61..88d4ae3 100644
--- a/hypervideo_dl/extractor/tumblr.py
+++ b/hypervideo_dl/extractor/tumblr.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..utils import (
ExtractorError,
@@ -362,7 +358,6 @@ class TumblrIE(InfoExtractor):
'height': int_or_none(
media_json.get('height') or self._og_search_property('video:height', webpage, default=None)),
}]
- self._sort_formats(formats)
# the url we're extracting from might be an original post or it might be a reblog.
# if it's a reblog, og:description will be the reblogger's comment, not the uploader's.
diff --git a/hypervideo_dl/extractor/tunein.py b/hypervideo_dl/extractor/tunein.py
index 7e51de8..43b4f67 100644
--- a/hypervideo_dl/extractor/tunein.py
+++ b/hypervideo_dl/extractor/tunein.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -11,12 +8,6 @@ from ..compat import compat_urlparse
class TuneInBaseIE(InfoExtractor):
_API_BASE_URL = 'http://tunein.com/tuner/tune/'
- @staticmethod
- def _extract_urls(webpage):
- return re.findall(
- r'<iframe[^>]+src=["\'](?P<url>(?:https?://)?tunein\.com/embed/player/[pst]\d+)',
- webpage)
-
def _real_extract(self, url):
content_id = self._match_id(url)
@@ -58,7 +49,6 @@ class TuneInBaseIE(InfoExtractor):
'source_preference': reliability,
'format_note': format_note,
})
- self._sort_formats(formats)
return {
'id': content_id,
@@ -89,6 +79,7 @@ class TuneInClipIE(TuneInBaseIE):
class TuneInStationIE(TuneInBaseIE):
IE_NAME = 'tunein:station'
_VALID_URL = r'https?://(?:www\.)?tunein\.com/(?:radio/.*?-s|station/.*?StationId=|embed/player/s)(?P<id>\d+)'
+ _EMBED_REGEX = [r'<iframe[^>]+src=["\'](?P<url>(?:https?://)?tunein\.com/embed/player/[pst]\d+)']
_API_URL_QUERY = '?tuneType=Station&stationId=%s'
@classmethod
diff --git a/hypervideo_dl/extractor/tunepk.py b/hypervideo_dl/extractor/tunepk.py
index 9d42651..e4e507b 100644
--- a/hypervideo_dl/extractor/tunepk.py
+++ b/hypervideo_dl/extractor/tunepk.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
@@ -59,7 +57,6 @@ class TunePkIE(InfoExtractor):
formats = self._parse_jwplayer_formats(
details['player']['sources'], video_id)
- self._sort_formats(formats)
description = self._og_search_description(
webpage, default=None) or self._html_search_meta(
diff --git a/hypervideo_dl/extractor/turbo.py b/hypervideo_dl/extractor/turbo.py
index f6bbf25..cdb7dcf 100644
--- a/hypervideo_dl/extractor/turbo.py
+++ b/hypervideo_dl/extractor/turbo.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -56,7 +53,6 @@ class TurboIE(InfoExtractor):
'url': child.text,
'quality': get_quality(quality),
})
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/turner.py b/hypervideo_dl/extractor/turner.py
index 519dc32..630d84b 100644
--- a/hypervideo_dl/extractor/turner.py
+++ b/hypervideo_dl/extractor/turner.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .adobepass import AdobePassIE
@@ -144,7 +141,7 @@ class TurnerBaseIE(AdobePassIE):
m3u8_id=format_id or 'hls', fatal=False)
if '/secure/' in video_url and '?hdnea=' in video_url:
for f in m3u8_formats:
- f['_ffmpeg_args'] = ['-seekable', '0']
+ f['downloader_options'] = {'ffmpeg_args': ['-seekable', '0']}
formats.extend(m3u8_formats)
elif ext == 'f4m':
formats.extend(self._extract_f4m_formats(
@@ -177,7 +174,6 @@ class TurnerBaseIE(AdobePassIE):
else:
f['tbr'] = int(mobj.group(1))
formats.append(f)
- self._sort_formats(formats)
for source in video_data.findall('closedCaptions/source'):
for track in source.findall('track'):
@@ -252,7 +248,6 @@ class TurnerBaseIE(AdobePassIE):
'start_time': start_time,
'end_time': start_time + chapter_duration,
})
- self._sort_formats(formats)
return {
'formats': formats,
diff --git a/hypervideo_dl/extractor/tv2.py b/hypervideo_dl/extractor/tv2.py
index 977da30..c51e633 100644
--- a/hypervideo_dl/extractor/tv2.py
+++ b/hypervideo_dl/extractor/tv2.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -19,23 +16,27 @@ from ..utils import (
class TV2IE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?tv2\.no/v\d*/(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:www\.)?tv2\.no/v(?:ideo)?\d*/(?:[^?#]+/)*(?P<id>\d+)'
_TESTS = [{
- 'url': 'http://www.tv2.no/v/916509/',
+ 'url': 'http://www.tv2.no/v/1791207/',
'info_dict': {
- 'id': '916509',
+ 'id': '1791207',
'ext': 'mp4',
- 'title': 'Se Frode Gryttens hyllest av Steven Gerrard',
- 'description': 'TV 2 Sportens huspoet tar avskjed med Liverpools kaptein Steven Gerrard.',
- 'timestamp': 1431715610,
- 'upload_date': '20150515',
- 'duration': 157,
+ 'title': 'Her kolliderer romsonden med asteroiden ',
+ 'description': 'En romsonde har krasjet inn i en asteroide i verdensrommet. Kollisjonen skjedde klokken 01:14 natt til tirsdag 27. september norsk tid. \n\nNasa kaller det sitt første forsøk på planetforsvar.',
+ 'timestamp': 1664238190,
+ 'upload_date': '20220927',
+ 'duration': 146,
+ 'thumbnail': r're:^https://.*$',
'view_count': int,
'categories': list,
},
}, {
'url': 'http://www.tv2.no/v2/916509',
'only_matching': True,
+ }, {
+ 'url': 'https://www.tv2.no/video/nyhetene/her-kolliderer-romsonden-med-asteroiden/1791207/',
+ 'only_matching': True,
}]
_PROTOCOLS = ('HLS', 'DASH')
_GEO_COUNTRIES = ['NO']
@@ -94,7 +95,6 @@ class TV2IE(InfoExtractor):
})
if not formats and data.get('drmProtected'):
self.report_drm(video_id)
- self._sort_formats(formats)
thumbnails = [{
'id': type,
@@ -117,13 +117,13 @@ class TV2IE(InfoExtractor):
class TV2ArticleIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?tv2\.no/(?:a|\d{4}/\d{2}/\d{2}(/[^/]+)+)/(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:www\.)?tv2\.no/(?!v(?:ideo)?\d*/)[^?#]+/(?P<id>\d+)'
_TESTS = [{
- 'url': 'http://www.tv2.no/2015/05/16/nyheter/alesund/krim/pingvin/6930542',
+ 'url': 'https://www.tv2.no/underholdning/forraeder/katarina-flatland-angrer-etter-forraeder-exit/15095188/',
'info_dict': {
- 'id': '6930542',
- 'title': 'Russen hetses etter pingvintyveri - innrømmer å ha åpnet luken på buret',
- 'description': 'De fire siktede nekter fortsatt for å ha stjålet pingvinbabyene, men innrømmer å ha åpnet luken til de små kyllingene.',
+ 'id': '15095188',
+ 'title': 'Katarina Flatland angrer etter Forræder-exit',
+ 'description': 'SANDEFJORD (TV 2): Katarina Flatland (33) måtte følge i sine fars fotspor, da hun ble forvist fra Forræder.',
},
'playlist_count': 2,
}, {
@@ -141,7 +141,7 @@ class TV2ArticleIE(InfoExtractor):
if not assets:
# New embed pattern
- for v in re.findall(r'(?s)TV2ContentboxVideo\(({.+?})\)', webpage):
+ for v in re.findall(r'(?s)(?:TV2ContentboxVideo|TV2\.TV2Video)\(({.+?})\)', webpage):
video = self._parse_json(
v, playlist_id, transform_source=js_to_json, fatal=False)
if not video:
@@ -257,7 +257,6 @@ class KatsomoIE(InfoExtractor):
})
if not formats and data.get('drmProtected'):
self.report_drm(video_id)
- self._sort_formats(formats)
thumbnails = [{
'id': thumbnail.get('@type'),
diff --git a/hypervideo_dl/extractor/tv24ua.py b/hypervideo_dl/extractor/tv24ua.py
new file mode 100644
index 0000000..89905ac
--- /dev/null
+++ b/hypervideo_dl/extractor/tv24ua.py
@@ -0,0 +1,78 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import determine_ext, js_to_json, mimetype2ext, traverse_obj
+
+
+class TV24UAVideoIE(InfoExtractor):
+ _VALID_URL = r'https?://24tv\.ua/news/showPlayer\.do.*?(?:\?|&)objectId=(?P<id>\d+)'
+ _EMBED_REGEX = [rf'<iframe[^>]+?src=["\']?(?P<url>{_VALID_URL})["\']?']
+ IE_NAME = '24tv.ua'
+ _TESTS = [{
+ 'url': 'https://24tv.ua/news/showPlayer.do?objectId=2074790&videoUrl=2022/07/2074790&w=640&h=360',
+ 'info_dict': {
+ 'id': '2074790',
+ 'ext': 'mp4',
+ 'title': 'У Харкові ворожа ракета прилетіла в будинок, де слухали пісні про "офіцерів-росіян"',
+ 'thumbnail': r're:^https?://.*\.jpe?g',
+ }
+ }, {
+ 'url': 'https://24tv.ua/news/showPlayer.do?videoUrl=2022/07/2074790&objectId=2074790&w=640&h=360',
+ 'only_matching': True,
+ }]
+
+ _WEBPAGE_TESTS = [
+ {
+ # iframe embed created from share menu.
+ 'url': 'data:text/html,%3Ciframe%20src=%22https://24tv.ua/news/showPlayer.do?objectId=1886193&videoUrl'
+ '=2022/03/1886193&w=640&h=360%22%20width=%22640%22%20height=%22360%22%20frameborder=%220%22'
+ '%20scrolling=%22no%22%3E%3C/iframe%3E',
+ 'info_dict': {
+ 'id': '1886193',
+ 'ext': 'mp4',
+ 'title': 'Росіяни руйнують Бородянку на Київщині та стріляють з літаків по мешканцях: шокуючі фото',
+ 'thumbnail': r're:^https?://.*\.jpe?g',
+ }
+ },
+ {
+ 'url': 'https://24tv.ua/vipalyuyut-nashi-mista-sela-dsns-pokazali-motoroshni-naslidki_n1883966',
+ 'info_dict': {
+ 'id': '1883966',
+ 'ext': 'mp4',
+ 'title': 'Випалюють наші міста та села, – моторошні наслідки обстрілів на Чернігівщині',
+ 'thumbnail': r're:^https?://.*\.jpe?g',
+ },
+ 'params': {'allowed_extractors': ['Generic', '24tv.ua']},
+ }
+ ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ formats = []
+ subtitles = {}
+ for j in re.findall(r'vPlayConfig\.sources\s*=\s*(?P<json>\[{\s*(?s:.+?)\s*}])', webpage):
+ sources = self._parse_json(j, video_id, fatal=False, ignore_extra=True, transform_source=js_to_json, errnote='') or []
+ for source in sources:
+ if mimetype2ext(traverse_obj(source, 'type')) == 'm3u8':
+ f, s = self._extract_m3u8_formats_and_subtitles(source['src'], video_id)
+ formats.extend(f)
+ self._merge_subtitles(subtitles, s)
+ else:
+ formats.append({
+ 'url': source['src'],
+ 'ext': determine_ext(source['src']),
+ })
+ thumbnail = traverse_obj(
+ self._search_json(
+ r'var\s*vPlayConfig\s*=\s*', webpage, 'thumbnail',
+ video_id, default=None, transform_source=js_to_json), 'poster')
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'thumbnail': thumbnail or self._og_search_thumbnail(webpage),
+ 'title': self._generic_title('', webpage),
+ 'description': self._og_search_description(webpage, default=None),
+ }
diff --git a/hypervideo_dl/extractor/tv2dk.py b/hypervideo_dl/extractor/tv2dk.py
index ec5cbdf..35e92f1 100644
--- a/hypervideo_dl/extractor/tv2dk.py
+++ b/hypervideo_dl/extractor/tv2dk.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import json
import re
@@ -167,7 +164,6 @@ class TV2DKBornholmPlayIE(InfoExtractor):
formats.append({
'url': src,
})
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/tv2hu.py b/hypervideo_dl/extractor/tv2hu.py
index f210435..d4c21c0 100644
--- a/hypervideo_dl/extractor/tv2hu.py
+++ b/hypervideo_dl/extractor/tv2hu.py
@@ -1,6 +1,4 @@
# encoding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
traverse_obj,
@@ -68,7 +66,6 @@ class TV2HuIE(InfoExtractor):
video_json = self._download_json(video_json_url, video_id)
m3u8_url = self._proto_relative_url(traverse_obj(video_json, ('bitrates', 'hls')))
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id)
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/tv4.py b/hypervideo_dl/extractor/tv4.py
index 4043e63..1378a6f 100644
--- a/hypervideo_dl/extractor/tv4.py
+++ b/hypervideo_dl/extractor/tv4.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -122,8 +119,6 @@ class TV4IE(InfoExtractor):
if not formats and info.get('is_geo_restricted'):
self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True)
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': title,
diff --git a/hypervideo_dl/extractor/tv5mondeplus.py b/hypervideo_dl/extractor/tv5mondeplus.py
index a0832d2..bd0be78 100644
--- a/hypervideo_dl/extractor/tv5mondeplus.py
+++ b/hypervideo_dl/extractor/tv5mondeplus.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
determine_ext,
@@ -80,7 +77,6 @@ class TV5MondePlusIE(InfoExtractor):
'url': v_url,
'format_id': video_format,
})
- self._sort_formats(formats)
metadata = self._parse_json(
vpl_data['data-metadata'], display_id)
diff --git a/hypervideo_dl/extractor/tv5unis.py b/hypervideo_dl/extractor/tv5unis.py
index 398b85d..978255b 100644
--- a/hypervideo_dl/extractor/tv5unis.py
+++ b/hypervideo_dl/extractor/tv5unis.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..utils import (
int_or_none,
diff --git a/hypervideo_dl/extractor/tva.py b/hypervideo_dl/extractor/tva.py
index 52a4ddf..9afe233 100644
--- a/hypervideo_dl/extractor/tva.py
+++ b/hypervideo_dl/extractor/tva.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
float_or_none,
diff --git a/hypervideo_dl/extractor/tvanouvelles.py b/hypervideo_dl/extractor/tvanouvelles.py
index 1086176..b9f5e11 100644
--- a/hypervideo_dl/extractor/tvanouvelles.py
+++ b/hypervideo_dl/extractor/tvanouvelles.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/tvc.py b/hypervideo_dl/extractor/tvc.py
index 008f64c..caa76ab 100644
--- a/hypervideo_dl/extractor/tvc.py
+++ b/hypervideo_dl/extractor/tvc.py
@@ -1,8 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
from .common import InfoExtractor
from ..utils import (
clean_html,
@@ -12,6 +7,7 @@ from ..utils import (
class TVCIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tvc\.ru/video/iframe/id/(?P<id>\d+)'
+ _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:http:)?//(?:www\.)?tvc\.ru/video/iframe/id/[^"]+)\1']
_TEST = {
'url': 'http://www.tvc.ru/video/iframe/id/74622/isPlay/false/id_stat/channel/?acc_video_id=/channel/brand/id/17/show/episodes/episode_id/39702',
'md5': 'bbc5ff531d1e90e856f60fc4b3afd708',
@@ -24,13 +20,6 @@ class TVCIE(InfoExtractor):
},
}
- @classmethod
- def _extract_url(cls, webpage):
- mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>(?:http:)?//(?:www\.)?tvc\.ru/video/iframe/id/[^"]+)\1', webpage)
- if mobj:
- return mobj.group('url')
-
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -52,7 +41,6 @@ class TVCIE(InfoExtractor):
'height': int_or_none(info.get('height')),
'tbr': int_or_none(info.get('bitrate')),
})
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/tver.py b/hypervideo_dl/extractor/tver.py
index 9ff3136..cebd027 100644
--- a/hypervideo_dl/extractor/tver.py
+++ b/hypervideo_dl/extractor/tver.py
@@ -1,77 +1,105 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
-from ..compat import compat_str
from ..utils import (
ExtractorError,
- int_or_none,
- remove_start,
+ join_nonempty,
smuggle_url,
+ str_or_none,
+ strip_or_none,
traverse_obj,
)
class TVerIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?tver\.jp/(?P<path>corner|episode|feature|lp|tokyo2020/video)/(?P<id>[fc]?\d+)'
- # videos are only available for 7 days
+ _VALID_URL = r'https?://(?:www\.)?tver\.jp/(?:(?P<type>lp|corner|series|episodes?|feature|tokyo2020/video)/)+(?P<id>[a-zA-Z0-9]+)'
_TESTS = [{
- 'url': 'https://tver.jp/corner/f0062178',
- 'only_matching': True,
- }, {
- 'url': 'https://tver.jp/feature/f0062413',
- 'only_matching': True,
- }, {
- 'url': 'https://tver.jp/episode/79622438',
- 'only_matching': True,
+ 'skip': 'videos are only available for 7 days',
+ 'url': 'https://tver.jp/episodes/ep83nf3w4p',
+ 'info_dict': {
+ 'title': '家事ヤロウ!!! 売り場席巻のチーズSP&財前直見×森泉親子の脱東京暮らし密着!',
+ 'description': 'md5:dc2c06b6acc23f1e7c730c513737719b',
+ 'series': '家事ヤロウ!!!',
+ 'episode': '売り場席巻のチーズSP&財前直見×森泉親子の脱東京暮らし密着!',
+ 'alt_title': '売り場席巻のチーズSP&財前直見×森泉親子の脱東京暮らし密着!',
+ 'channel': 'テレビ朝日',
+ 'onair_label': '5月3日(火)放送分',
+ 'ext_title': '家事ヤロウ!!! 売り場席巻のチーズSP&財前直見×森泉親子の脱東京暮らし密着! テレビ朝日 5月3日(火)放送分',
+ },
+ 'add_ie': ['BrightcoveNew'],
}, {
- # subtitle = ' '
- 'url': 'https://tver.jp/corner/f0068870',
+ 'url': 'https://tver.jp/corner/f0103888',
'only_matching': True,
}, {
- 'url': 'https://tver.jp/lp/f0009694',
- 'only_matching': True,
- }, {
- 'url': 'https://tver.jp/lp/c0000239',
- 'only_matching': True,
- }, {
- 'url': 'https://tver.jp/tokyo2020/video/6264525510001',
+ 'url': 'https://tver.jp/lp/f0033031',
'only_matching': True,
}]
- _TOKEN = None
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
+ _PLATFORM_UID = None
+ _PLATFORM_TOKEN = None
def _real_initialize(self):
- self._TOKEN = self._download_json(
- 'https://tver.jp/api/access_token.php', None)['token']
+ create_response = self._download_json(
+ 'https://platform-api.tver.jp/v2/api/platform_users/browser/create', None,
+ note='Creating session', data=b'device_type=pc', headers={
+ 'Origin': 'https://s.tver.jp',
+ 'Referer': 'https://s.tver.jp/',
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ })
+ self._PLATFORM_UID = traverse_obj(create_response, ('result', 'platform_uid'))
+ self._PLATFORM_TOKEN = traverse_obj(create_response, ('result', 'platform_token'))
def _real_extract(self, url):
- path, video_id = self._match_valid_url(url).groups()
- if path == 'lp':
- webpage = self._download_webpage(url, video_id)
- redirect_path = self._search_regex(r'to_href="([^"]+)', webpage, 'redirect path')
- path, video_id = self._match_valid_url(f'https://tver.jp{redirect_path}').groups()
- api_response = self._download_json(f'https://api.tver.jp/v4/{path}/{video_id}', video_id, query={'token': self._TOKEN})
- p_id = traverse_obj(api_response, ('main', 'publisher_id'))
- if not p_id:
- error_msg, expected = traverse_obj(api_response, ('episode', 0, 'textbar', 0, ('text', 'longer')), get_all=False), True
- if not error_msg:
- error_msg, expected = 'Failed to extract publisher ID', False
- raise ExtractorError(error_msg, expected=expected)
- service = remove_start(traverse_obj(api_response, ('main', 'service')), 'ts_')
+ video_id, video_type = self._match_valid_url(url).group('id', 'type')
+ if video_type not in {'series', 'episodes'}:
+ webpage = self._download_webpage(url, video_id, note='Resolving to new URL')
+ video_id = self._match_id(self._search_regex(
+ (r'canonical"\s*href="(https?://tver\.jp/[^"]+)"', r'&link=(https?://tver\.jp/[^?&]+)[?&]'),
+ webpage, 'url regex'))
+
+ episode_info = self._download_json(
+ f'https://platform-api.tver.jp/service/api/v1/callEpisode/{video_id}?require_data=mylist,later[epefy106ur],good[epefy106ur],resume[epefy106ur]',
+ video_id, fatal=False,
+ query={
+ 'platform_uid': self._PLATFORM_UID,
+ 'platform_token': self._PLATFORM_TOKEN,
+ }, headers={
+ 'x-tver-platform-type': 'web'
+ })
+ episode_content = traverse_obj(
+ episode_info, ('result', 'episode', 'content')) or {}
+
+ video_info = self._download_json(
+ f'https://statics.tver.jp/content/episode/{video_id}.json', video_id,
+ query={
+ 'v': str_or_none(episode_content.get('version')) or '5',
+ }, headers={
+ 'Origin': 'https://tver.jp',
+ 'Referer': 'https://tver.jp/',
+ })
+ p_id = video_info['video']['accountID']
+ r_id = traverse_obj(video_info, ('video', ('videoRefID', 'videoID')), get_all=False)
+ if not r_id:
+ raise ExtractorError('Failed to extract reference ID for Brightcove')
+ if not r_id.isdigit():
+ r_id = f'ref:{r_id}'
- r_id = traverse_obj(api_response, ('main', 'reference_id'))
- if service not in ('tx', 'russia2018', 'sebare2018live', 'gorin'):
- r_id = 'ref:' + r_id
- bc_url = smuggle_url(
- self.BRIGHTCOVE_URL_TEMPLATE % (p_id, r_id),
- {'geo_countries': ['JP']})
+ episode = strip_or_none(episode_content.get('title'))
+ series = str_or_none(episode_content.get('seriesTitle'))
+ title = (
+ join_nonempty(series, episode, delim=' ')
+ or str_or_none(video_info.get('title')))
+ provider = str_or_none(episode_content.get('productionProviderName'))
+ onair_label = str_or_none(episode_content.get('broadcastDateLabel'))
return {
'_type': 'url_transparent',
- 'description': traverse_obj(api_response, ('main', 'note', 0, 'text'), expected_type=compat_str),
- 'episode_number': int_or_none(traverse_obj(api_response, ('main', 'ext', 'episode_number'), expected_type=compat_str)),
- 'url': bc_url,
+ 'title': title,
+ 'series': series,
+ 'episode': episode,
+ # an another title which is considered "full title" for some viewers
+ 'alt_title': join_nonempty(title, provider, onair_label, delim=' '),
+ 'channel': provider,
+ 'description': str_or_none(video_info.get('description')),
+ 'url': smuggle_url(
+ self.BRIGHTCOVE_URL_TEMPLATE % (p_id, r_id), {'geo_countries': ['JP']}),
'ie_key': 'BrightcoveNew',
}
diff --git a/hypervideo_dl/extractor/tvigle.py b/hypervideo_dl/extractor/tvigle.py
index aa25ba0..6c98219 100644
--- a/hypervideo_dl/extractor/tvigle.py
+++ b/hypervideo_dl/extractor/tvigle.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..utils import (
ExtractorError,
@@ -17,6 +13,7 @@ class TvigleIE(InfoExtractor):
IE_NAME = 'tvigle'
IE_DESC = 'Интернет-телевидение Tvigle.ru'
_VALID_URL = r'https?://(?:www\.)?(?:tvigle\.ru/(?:[^/]+/)+(?P<display_id>[^/]+)/$|cloud\.tvigle\.ru/video/(?P<id>\d+))'
+ _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1']
_GEO_BYPASS = False
_GEO_COUNTRIES = ['RU']
@@ -123,7 +120,6 @@ class TvigleIE(InfoExtractor):
'height': int_or_none(height),
'filesize': filesize,
})
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/tviplayer.py b/hypervideo_dl/extractor/tviplayer.py
new file mode 100644
index 0000000..7e9b04d
--- /dev/null
+++ b/hypervideo_dl/extractor/tviplayer.py
@@ -0,0 +1,78 @@
+from .common import InfoExtractor
+from ..utils import traverse_obj
+
+
+class TVIPlayerIE(InfoExtractor):
+ _VALID_URL = r'https?://tviplayer\.iol\.pt(/programa/[\w-]+/[a-f0-9]+)?/\w+/(?P<id>\w+)'
+ _TESTS = [{
+ 'url': 'https://tviplayer.iol.pt/programa/jornal-das-8/53c6b3903004dc006243d0cf/video/61c8e8b90cf2c7ea0f0f71a9',
+ 'info_dict': {
+ 'id': '61c8e8b90cf2c7ea0f0f71a9',
+ 'ext': 'mp4',
+ 'duration': 4167,
+ 'title': 'Jornal das 8 - 26 de dezembro de 2021',
+ 'thumbnail': 'https://www.iol.pt/multimedia/oratvi/multimedia/imagem/id/61c8ee630cf2cc58e7d98d9f/',
+ 'season_number': 8,
+ 'season': 'Season 8',
+ }
+ }, {
+ 'url': 'https://tviplayer.iol.pt/programa/isabel/62b471090cf26256cd2a8594/video/62be445f0cf2ea4f0a5218e5',
+ 'info_dict': {
+ 'id': '62be445f0cf2ea4f0a5218e5',
+ 'ext': 'mp4',
+ 'duration': 3255,
+ 'season': 'Season 1',
+ 'title': 'Isabel - Episódio 1',
+ 'thumbnail': 'https://www.iol.pt/multimedia/oratvi/multimedia/imagem/id/62beac200cf2f9a86eab856b/',
+ 'season_number': 1,
+ }
+ }, {
+ # no /programa/
+ 'url': 'https://tviplayer.iol.pt/video/62c4131c0cf2f9a86eac06bb',
+ 'info_dict': {
+ 'id': '62c4131c0cf2f9a86eac06bb',
+ 'ext': 'mp4',
+ 'title': 'David e Mickael Carreira respondem: «Qual é o próximo a ser pai?»',
+ 'thumbnail': 'https://www.iol.pt/multimedia/oratvi/multimedia/imagem/id/62c416490cf2ea367d4433fd/',
+ 'season': 'Season 2',
+ 'duration': 148,
+ 'season_number': 2,
+ }
+ }, {
+ # episodio url
+ 'url': 'https://tviplayer.iol.pt/programa/para-sempre/61716c360cf2365a5ed894c4/episodio/t1e187',
+ 'info_dict': {
+ 'id': 't1e187',
+ 'ext': 'mp4',
+ 'season': 'Season 1',
+ 'title': 'Quem denunciou Pedro?',
+ 'thumbnail': 'https://www.iol.pt/multimedia/oratvi/multimedia/imagem/id/62eda30b0cf2ea367d48973b/',
+ 'duration': 1250,
+ 'season_number': 1,
+ }
+ }]
+
+ def _real_initialize(self):
+ self.wms_auth_sign_token = self._download_webpage(
+ 'https://services.iol.pt/matrix?userId=', 'wmsAuthSign',
+ note='Trying to get wmsAuthSign token')
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ json_data = self._search_json(
+ r'<script>\s*jsonData\s*=', webpage, 'json_data', video_id)
+
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ f'{json_data["videoUrl"]}?wmsAuthSign={self.wms_auth_sign_token}',
+ video_id, ext='mp4')
+ return {
+ 'id': video_id,
+ 'title': json_data.get('title') or self._og_search_title(webpage),
+ 'thumbnail': json_data.get('cover') or self._og_search_thumbnail(webpage),
+ 'duration': json_data.get('duration'),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'season_number': traverse_obj(json_data, ('program', 'seasonNum')),
+ }
diff --git a/hypervideo_dl/extractor/tvland.py b/hypervideo_dl/extractor/tvland.py
index 9ebf57f..481d5eb 100644
--- a/hypervideo_dl/extractor/tvland.py
+++ b/hypervideo_dl/extractor/tvland.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .mtv import MTVServicesInfoExtractor
# TODO: Remove - Reason not used anymore - Service moved to youtube
diff --git a/hypervideo_dl/extractor/tvn24.py b/hypervideo_dl/extractor/tvn24.py
index de0fb50..9c777c1 100644
--- a/hypervideo_dl/extractor/tvn24.py
+++ b/hypervideo_dl/extractor/tvn24.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
int_or_none,
@@ -73,7 +70,6 @@ class TVN24IE(InfoExtractor):
'format_id': format_id,
'height': int_or_none(format_id.rstrip('p')),
})
- self._sort_formats(formats)
description = self._og_search_description(webpage, default=None)
thumbnail = self._og_search_thumbnail(
diff --git a/hypervideo_dl/extractor/tvnet.py b/hypervideo_dl/extractor/tvnet.py
index aa1e9d9..77426f7 100644
--- a/hypervideo_dl/extractor/tvnet.py
+++ b/hypervideo_dl/extractor/tvnet.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -112,7 +109,6 @@ class TVNetIE(InfoExtractor):
stream_urls.add(stream_url)
formats.extend(self._extract_m3u8_formats(
stream_url, video_id, 'mp4', live=is_live, m3u8_id='hls', fatal=False))
- self._sort_formats(formats)
# better support for radio streams
if title.startswith('VOV'):
diff --git a/hypervideo_dl/extractor/tvnoe.py b/hypervideo_dl/extractor/tvnoe.py
index 26a5aea..712fbb2 100644
--- a/hypervideo_dl/extractor/tvnoe.py
+++ b/hypervideo_dl/extractor/tvnoe.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
clean_html,
diff --git a/hypervideo_dl/extractor/tvnow.py b/hypervideo_dl/extractor/tvnow.py
index b318184..0acc306 100644
--- a/hypervideo_dl/extractor/tvnow.py
+++ b/hypervideo_dl/extractor/tvnow.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -77,7 +74,6 @@ class TVNowBaseIE(InfoExtractor):
if not info.get('free', True):
raise ExtractorError(
'Video %s is not available for free' % video_id, expected=True)
- self._sort_formats(formats)
description = info.get('articleLong') or info.get('articleShort')
timestamp = parse_iso8601(info.get('broadcastStartDate'), ' ')
@@ -395,7 +391,6 @@ class TVNowIE(TVNowNewBaseIE):
if not info.get('free', True):
raise ExtractorError(
'Video %s is not available for free' % video_id, expected=True)
- self._sort_formats(formats)
description = source.get('description')
thumbnail = url_or_none(source.get('poster'))
@@ -429,7 +424,7 @@ class TVNowIE(TVNowNewBaseIE):
return self._extract_video(info, video_id, display_id)
-class TVNowFilmIE(TVNowIE):
+class TVNowFilmIE(TVNowIE): # XXX: Do not subclass from concrete IE
_VALID_URL = r'''(?x)
(?P<base_url>https?://
(?:www\.)?tvnow\.(?:de|at|ch)/
diff --git a/hypervideo_dl/extractor/tvopengr.py b/hypervideo_dl/extractor/tvopengr.py
index a11cdc6..e208e57 100644
--- a/hypervideo_dl/extractor/tvopengr.py
+++ b/hypervideo_dl/extractor/tvopengr.py
@@ -1,14 +1,8 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
from .common import InfoExtractor
from ..utils import (
determine_ext,
get_elements_text_and_html_by_attribute,
scale_thumbnails_to_max_format_width,
- unescapeHTML,
)
@@ -75,7 +69,6 @@ class TVOpenGrWatchIE(TVOpenGrBaseIE):
continue
formats.extend(formats_)
self._merge_subtitles(subs_, target=subs)
- self._sort_formats(formats)
return formats, subs
def _real_extract(self, url):
@@ -101,7 +94,7 @@ class TVOpenGrEmbedIE(TVOpenGrBaseIE):
IE_NAME = 'tvopengr:embed'
IE_DESC = 'tvopen.gr embedded videos'
_VALID_URL = r'(?:https?:)?//(?:www\.|cdn\.|)(?:tvopen|ethnos).gr/embed/(?P<id>\d+)'
- _EMBED_RE = re.compile(rf'''<iframe[^>]+?src=(?P<_q1>["'])(?P<url>{_VALID_URL})(?P=_q1)''')
+ _EMBED_REGEX = [rf'''<iframe[^>]+?src=(?P<_q1>["'])(?P<url>{_VALID_URL})(?P=_q1)''']
_TESTS = [{
'url': 'https://cdn.ethnos.gr/embed/100963',
@@ -118,11 +111,6 @@ class TVOpenGrEmbedIE(TVOpenGrBaseIE):
},
}]
- @classmethod
- def _extract_urls(cls, webpage):
- for mobj in cls._EMBED_RE.finditer(webpage):
- yield unescapeHTML(mobj.group('url'))
-
def _real_extract(self, url):
video_id = self._match_id(url)
return self._return_canonical_url(url, video_id)
diff --git a/hypervideo_dl/extractor/tvp.py b/hypervideo_dl/extractor/tvp.py
index 48e2c6e..8483564 100644
--- a/hypervideo_dl/extractor/tvp.py
+++ b/hypervideo_dl/extractor/tvp.py
@@ -1,46 +1,54 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import itertools
import random
import re
from .common import InfoExtractor
from ..utils import (
+ clean_html,
determine_ext,
dict_get,
ExtractorError,
int_or_none,
js_to_json,
- orderedSet,
str_or_none,
+ strip_or_none,
+ traverse_obj,
try_get,
+ url_or_none,
)
class TVPIE(InfoExtractor):
IE_NAME = 'tvp'
IE_DESC = 'Telewizja Polska'
- _VALID_URL = r'https?://(?:[^/]+\.)?(?:tvp(?:parlament)?\.(?:pl|info)|polandin\.com)/(?:video/(?:[^,\s]*,)*|(?:(?!\d+/)[^/]+/)*)(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:[^/]+\.)?(?:tvp(?:parlament)?\.(?:pl|info)|tvpworld\.com|swipeto\.pl)/(?:(?!\d+/)[^/]+/)*(?P<id>\d+)'
_TESTS = [{
# TVPlayer 2 in js wrapper
- 'url': 'https://vod.tvp.pl/video/czas-honoru,i-seria-odc-13,194536',
+ 'url': 'https://swipeto.pl/64095316/uliczny-foxtrot-wypozyczalnia-kaset-kto-pamieta-dvdvideo',
'info_dict': {
- 'id': '194536',
+ 'id': '64095316',
'ext': 'mp4',
- 'title': 'Czas honoru, odc. 13 – Władek',
- 'description': 'md5:437f48b93558370b031740546b696e24',
- 'age_limit': 12,
+ 'title': 'Uliczny Foxtrot — Wypożyczalnia kaset. Kto pamięta DVD-Video?',
+ 'age_limit': 0,
+ 'duration': 374,
+ 'thumbnail': r're:https://.+',
},
+ 'expected_warnings': [
+ 'Failed to download ISM manifest: HTTP Error 404: Not Found',
+ 'Failed to download m3u8 information: HTTP Error 404: Not Found',
+ ],
}, {
# TVPlayer legacy
- 'url': 'http://www.tvp.pl/there-can-be-anything-so-i-shortened-it/17916176',
+ 'url': 'https://www.tvp.pl/polska-press-video-uploader/wideo/62042351',
'info_dict': {
- 'id': '17916176',
+ 'id': '62042351',
'ext': 'mp4',
- 'title': 'TVP Gorzów pokaże filmy studentów z podroży dookoła świata',
- 'description': 'TVP Gorzów pokaże filmy studentów z podroży dookoła świata',
+ 'title': 'Wideo',
+ 'description': 'Wideo Kamera',
+ 'duration': 24,
+ 'age_limit': 0,
+ 'thumbnail': r're:https://.+',
},
}, {
# TVPlayer 2 in iframe
@@ -51,6 +59,8 @@ class TVPIE(InfoExtractor):
'title': 'Dzieci na sprzedaż dla homoseksualistów',
'description': 'md5:7d318eef04e55ddd9f87a8488ac7d590',
'age_limit': 12,
+ 'duration': 259,
+ 'thumbnail': r're:https://.+',
},
}, {
# TVPlayer 2 in client-side rendered website (regional; window.__newsData)
@@ -61,7 +71,11 @@ class TVPIE(InfoExtractor):
'title': 'Studio Yayo',
'upload_date': '20160616',
'timestamp': 1466075700,
- }
+ 'age_limit': 0,
+ 'duration': 20,
+ 'thumbnail': r're:https://.+',
+ },
+ 'skip': 'Geo-blocked outside PL',
}, {
# TVPlayer 2 in client-side rendered website (tvp.info; window.__videoData)
'url': 'https://www.tvp.info/52880236/09042021-0800',
@@ -69,7 +83,10 @@ class TVPIE(InfoExtractor):
'id': '52880236',
'ext': 'mp4',
'title': '09.04.2021, 08:00',
+ 'age_limit': 0,
+ 'thumbnail': r're:https://.+',
},
+ 'skip': 'Geo-blocked outside PL',
}, {
# client-side rendered (regional) program (playlist) page
'url': 'https://opole.tvp.pl/9660819/rozmowa-dnia',
@@ -125,7 +142,7 @@ class TVPIE(InfoExtractor):
'url': 'https://www.tvpparlament.pl/retransmisje-vod/inne/wizyta-premiera-mateusza-morawieckiego-w-firmie-berotu-sp-z-oo/48857277',
'only_matching': True,
}, {
- 'url': 'https://polandin.com/47942651/pln-10-billion-in-subsidies-transferred-to-companies-pm',
+ 'url': 'https://tvpworld.com/48583640/tescos-polish-business-bought-by-danish-chain-netto',
'only_matching': True,
}]
@@ -154,16 +171,13 @@ class TVPIE(InfoExtractor):
is_website = video_data.get('type') == 'website'
if is_website:
url = video_data['url']
- fucked_up_url_parts = re.match(r'https?://vod\.tvp\.pl/(\d+)/([^/?#]+)', url)
- if fucked_up_url_parts:
- url = f'https://vod.tvp.pl/website/{fucked_up_url_parts.group(2)},{fucked_up_url_parts.group(1)}'
else:
url = 'tvp:' + str_or_none(video_data.get('_id') or page_id)
return {
'_type': 'url_transparent',
'id': str_or_none(video_data.get('_id') or page_id),
'url': url,
- 'ie_key': 'TVPEmbed' if not is_website else 'TVPWebsite',
+ 'ie_key': (TVPIE if is_website else TVPEmbedIE).ie_key(),
'title': str_or_none(video_data.get('title')),
'description': str_or_none(video_data.get('lead')),
'timestamp': int_or_none(video_data.get('release_date_long')),
@@ -220,8 +234,9 @@ class TVPIE(InfoExtractor):
# The URL may redirect to a VOD
# example: https://vod.tvp.pl/48463890/wadowickie-spotkania-z-janem-pawlem-ii
- if TVPWebsiteIE.suitable(urlh.url):
- return self.url_result(urlh.url, ie=TVPWebsiteIE.ie_key(), video_id=page_id)
+ for ie_cls in (TVPVODSeriesIE, TVPVODVideoIE):
+ if ie_cls.suitable(urlh.url):
+ return self.url_result(urlh.url, ie=ie_cls.ie_key(), video_id=page_id)
if re.search(
r'window\.__(?:video|news|website|directory)Data\s*=',
@@ -300,12 +315,13 @@ class TVPStreamIE(InfoExtractor):
class TVPEmbedIE(InfoExtractor):
IE_NAME = 'tvp:embed'
IE_DESC = 'Telewizja Polska'
+ _GEO_BYPASS = False
_VALID_URL = r'''(?x)
(?:
tvp:
|https?://
(?:[^/]+\.)?
- (?:tvp(?:parlament)?\.pl|tvp\.info|polandin\.com)/
+ (?:tvp(?:parlament)?\.pl|tvp\.info|tvpworld\.com|swipeto\.pl)/
(?:sess/
(?:tvplayer\.php\?.*?object_id
|TVPlayer2/(?:embed|api)\.php\?.*[Ii][Dd])
@@ -313,6 +329,7 @@ class TVPEmbedIE(InfoExtractor):
=)
(?P<id>\d+)
'''
+ _EMBED_REGEX = [rf'(?x)<iframe[^>]+?src=(["\'])(?P<url>{_VALID_URL[4:]})']
_TESTS = [{
'url': 'tvp:194536',
@@ -322,6 +339,12 @@ class TVPEmbedIE(InfoExtractor):
'title': 'Czas honoru, odc. 13 – Władek',
'description': 'md5:76649d2014f65c99477be17f23a4dead',
'age_limit': 12,
+ 'duration': 2652,
+ 'series': 'Czas honoru',
+ 'episode': 'Episode 13',
+ 'episode_number': 13,
+ 'season': 'sezon 1',
+ 'thumbnail': r're:https://.+',
},
}, {
'url': 'https://www.tvp.pl/sess/tvplayer.php?object_id=51247504&amp;autoplay=false',
@@ -329,6 +352,9 @@ class TVPEmbedIE(InfoExtractor):
'id': '51247504',
'ext': 'mp4',
'title': 'Razmova 091220',
+ 'duration': 876,
+ 'age_limit': 0,
+ 'thumbnail': r're:https://.+',
},
}, {
# TVPlayer2 embed URL
@@ -343,12 +369,6 @@ class TVPEmbedIE(InfoExtractor):
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage, **kw):
- return [m.group('embed') for m in re.finditer(
- r'(?x)<iframe[^>]+?src=(["\'])(?P<embed>%s)' % TVPEmbedIE._VALID_URL[4:],
- webpage)]
-
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -369,44 +389,50 @@ class TVPEmbedIE(InfoExtractor):
# stripping JSONP padding
datastr = webpage[15 + len(callback):-3]
if datastr.startswith('null,'):
- error = self._parse_json(datastr[5:], video_id)
- raise ExtractorError(error[0]['desc'])
+ error = self._parse_json(datastr[5:], video_id, fatal=False)
+ error_desc = traverse_obj(error, (0, 'desc'))
+
+ if error_desc == 'Obiekt wymaga płatności':
+ raise ExtractorError('Video requires payment and log-in, but log-in is not implemented')
+
+ raise ExtractorError(error_desc or 'unexpected JSON error')
content = self._parse_json(datastr, video_id)['content']
info = content['info']
is_live = try_get(info, lambda x: x['isLive'], bool)
+ if info.get('isGeoBlocked'):
+ # actual country list is not provided, we just assume it's always available in PL
+ self.raise_geo_restricted(countries=['PL'])
+
formats = []
for file in content['files']:
- video_url = file.get('url')
+ video_url = url_or_none(file.get('url'))
if not video_url:
continue
- if video_url.endswith('.m3u8'):
+ ext = determine_ext(video_url, None)
+ if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(video_url, video_id, m3u8_id='hls', fatal=False, live=is_live))
- elif video_url.endswith('.mpd'):
+ elif ext == 'mpd':
if is_live:
# doesn't work with either ffmpeg or native downloader
continue
formats.extend(self._extract_mpd_formats(video_url, video_id, mpd_id='dash', fatal=False))
- elif video_url.endswith('.f4m'):
+ elif ext == 'f4m':
formats.extend(self._extract_f4m_formats(video_url, video_id, f4m_id='hds', fatal=False))
elif video_url.endswith('.ism/manifest'):
formats.extend(self._extract_ism_formats(video_url, video_id, ism_id='mss', fatal=False))
else:
- # mp4, wmv or something
- quality = file.get('quality', {})
formats.append({
'format_id': 'direct',
'url': video_url,
- 'ext': determine_ext(video_url, file['type']),
- 'fps': int_or_none(quality.get('fps')),
- 'tbr': int_or_none(quality.get('bitrate')),
- 'width': int_or_none(quality.get('width')),
- 'height': int_or_none(quality.get('height')),
+ 'ext': ext or file.get('type'),
+ 'fps': int_or_none(traverse_obj(file, ('quality', 'fps'))),
+ 'tbr': int_or_none(traverse_obj(file, ('quality', 'bitrate')), scale=1000),
+ 'width': int_or_none(traverse_obj(file, ('quality', 'width'))),
+ 'height': int_or_none(traverse_obj(file, ('quality', 'height'))),
})
- self._sort_formats(formats)
-
title = dict_get(info, ('subtitle', 'title', 'seoTitle'))
description = dict_get(info, ('description', 'seoDescription'))
thumbnails = []
@@ -457,57 +483,105 @@ class TVPEmbedIE(InfoExtractor):
return info_dict
-class TVPWebsiteIE(InfoExtractor):
- IE_NAME = 'tvp:series'
- _VALID_URL = r'https?://vod\.tvp\.pl/website/(?P<display_id>[^,]+),(?P<id>\d+)'
+class TVPVODBaseIE(InfoExtractor):
+ _API_BASE_URL = 'https://vod.tvp.pl/api/products'
+
+ def _call_api(self, resource, video_id, **kwargs):
+ return self._download_json(
+ f'{self._API_BASE_URL}/{resource}', video_id,
+ query={'lang': 'pl', 'platform': 'BROWSER'}, **kwargs)
+
+ def _parse_video(self, video):
+ return {
+ '_type': 'url',
+ 'url': 'tvp:' + video['externalUid'],
+ 'ie_key': TVPEmbedIE.ie_key(),
+ 'title': video.get('title'),
+ 'description': traverse_obj(video, ('lead', 'description')),
+ 'age_limit': int_or_none(video.get('rating')),
+ 'duration': int_or_none(video.get('duration')),
+ }
+
+
+class TVPVODVideoIE(TVPVODBaseIE):
+ IE_NAME = 'tvp:vod'
+ _VALID_URL = r'https?://vod\.tvp\.pl/[a-z\d-]+,\d+/[a-z\d-]+(?<!-odcinki)(?:-odcinki,\d+/odcinek-\d+,S\d+E\d+)?,(?P<id>\d+)(?:\?[^#]+)?(?:#.+)?$'
_TESTS = [{
- # series
- 'url': 'https://vod.tvp.pl/website/wspaniale-stulecie,17069012/video',
+ 'url': 'https://vod.tvp.pl/dla-dzieci,24/laboratorium-alchemika-odcinki,309338/odcinek-24,S01E24,311357',
'info_dict': {
- 'id': '17069012',
+ 'id': '60468609',
+ 'ext': 'mp4',
+ 'title': 'Laboratorium alchemika, Tusze termiczne. Jak zobaczyć niewidoczne. Odcinek 24',
+ 'description': 'md5:1d4098d3e537092ccbac1abf49b7cd4c',
+ 'duration': 300,
+ 'episode_number': 24,
+ 'episode': 'Episode 24',
+ 'age_limit': 0,
+ 'series': 'Laboratorium alchemika',
+ 'thumbnail': 're:https://.+',
},
- 'playlist_count': 312,
}, {
- # film
- 'url': 'https://vod.tvp.pl/website/krzysztof-krawczyk-cale-moje-zycie,51374466',
+ 'url': 'https://vod.tvp.pl/filmy-dokumentalne,163/ukrainski-sluga-narodu,339667',
'info_dict': {
- 'id': '51374509',
+ 'id': '51640077',
'ext': 'mp4',
- 'title': 'Krzysztof Krawczyk – całe moje życie, Krzysztof Krawczyk – całe moje życie',
- 'description': 'md5:2e80823f00f5fc263555482f76f8fa42',
+ 'title': 'Ukraiński sługa narodu, Ukraiński sługa narodu',
+ 'series': 'Ukraiński sługa narodu',
+ 'description': 'md5:b7940c0a8e439b0c81653a986f544ef3',
'age_limit': 12,
+ 'episode': 'Episode 0',
+ 'episode_number': 0,
+ 'duration': 3051,
+ 'thumbnail': 're:https://.+',
},
- 'params': {
- 'skip_download': True,
- },
- 'add_ie': ['TVPEmbed'],
- }, {
- 'url': 'https://vod.tvp.pl/website/lzy-cennet,38678312',
- 'only_matching': True,
}]
- def _entries(self, display_id, playlist_id):
- url = 'https://vod.tvp.pl/website/%s,%s/video' % (display_id, playlist_id)
- for page_num in itertools.count(1):
- page = self._download_webpage(
- url, display_id, 'Downloading page %d' % page_num,
- query={'page': page_num})
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ return self._parse_video(self._call_api(f'vods/{video_id}', video_id))
+
- video_ids = orderedSet(re.findall(
- r'<a[^>]+\bhref=["\']/video/%s,[^,]+,(\d+)' % display_id,
- page))
+class TVPVODSeriesIE(TVPVODBaseIE):
+ IE_NAME = 'tvp:vod:series'
+ _VALID_URL = r'https?://vod\.tvp\.pl/[a-z\d-]+,\d+/[a-z\d-]+-odcinki,(?P<id>\d+)(?:\?[^#]+)?(?:#.+)?$'
- if not video_ids:
- break
+ _TESTS = [{
+ 'url': 'https://vod.tvp.pl/seriale,18/ranczo-odcinki,316445',
+ 'info_dict': {
+ 'id': '316445',
+ 'title': 'Ranczo',
+ 'age_limit': 12,
+ 'categories': ['seriale'],
+ },
+ 'playlist_count': 129,
+ }, {
+ 'url': 'https://vod.tvp.pl/programy,88/rolnik-szuka-zony-odcinki,284514',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://vod.tvp.pl/dla-dzieci,24/laboratorium-alchemika-odcinki,309338',
+ 'only_matching': True,
+ }]
- for video_id in video_ids:
- yield self.url_result(
- 'tvp:%s' % video_id, ie=TVPEmbedIE.ie_key(),
- video_id=video_id)
+ def _entries(self, seasons, playlist_id):
+ for season in seasons:
+ episodes = self._call_api(
+ f'vods/serials/{playlist_id}/seasons/{season["id"]}/episodes', playlist_id,
+ note=f'Downloading episode list for {season["title"]}')
+ yield from map(self._parse_video, episodes)
def _real_extract(self, url):
- mobj = self._match_valid_url(url)
- display_id, playlist_id = mobj.group('display_id', 'id')
+ playlist_id = self._match_id(url)
+ metadata = self._call_api(
+ f'vods/serials/{playlist_id}', playlist_id,
+ note='Downloading serial metadata')
+ seasons = self._call_api(
+ f'vods/serials/{playlist_id}/seasons', playlist_id,
+ note='Downloading season list')
return self.playlist_result(
- self._entries(display_id, playlist_id), playlist_id)
+ self._entries(seasons, playlist_id), playlist_id, strip_or_none(metadata.get('title')),
+ clean_html(traverse_obj(metadata, ('description', 'lead'), expected_type=strip_or_none)),
+ categories=[traverse_obj(metadata, ('mainCategory', 'name'))],
+ age_limit=int_or_none(metadata.get('rating')),
+ )
diff --git a/hypervideo_dl/extractor/tvplay.py b/hypervideo_dl/extractor/tvplay.py
index b5dbc55..9ef4f96 100644
--- a/hypervideo_dl/extractor/tvplay.py
+++ b/hypervideo_dl/extractor/tvplay.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -297,8 +294,6 @@ class TVPlayIE(InfoExtractor):
'This content might not be available in your country due to copyright reasons',
metadata_available=True)
- self._sort_formats(formats)
-
# TODO: webvtt in m3u8
subtitles = {}
sami_path = video.get('sami_path')
@@ -413,7 +408,6 @@ class ViafreeIE(InfoExtractor):
raise
formats, subtitles = self._extract_m3u8_formats_and_subtitles(stream_href, guid, 'mp4')
- self._sort_formats(formats)
episode = program.get('episode') or {}
return {
'id': guid,
@@ -498,7 +492,6 @@ class TVPlayHomeIE(InfoExtractor):
urljoin(url, f'/api/products/{stream_id}/videos/playlist?videoType={video_type}&platform=BROWSER'), video_id)
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
stream['sources']['HLS'][0]['src'], video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
- self._sort_formats(formats)
thumbnails = set(traverse_obj(
data, (('galary', 'images', 'artworks'), ..., ..., ('miniUrl', 'mainUrl')), expected_type=url_or_none))
diff --git a/hypervideo_dl/extractor/tvplayer.py b/hypervideo_dl/extractor/tvplayer.py
index 5970596..b05355f 100644
--- a/hypervideo_dl/extractor/tvplayer.py
+++ b/hypervideo_dl/extractor/tvplayer.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import (
compat_HTTPError,
@@ -75,7 +72,6 @@ class TVPlayerIE(InfoExtractor):
raise
formats = self._extract_m3u8_formats(response['stream'], display_id, 'mp4')
- self._sort_formats(formats)
return {
'id': resource_id,
diff --git a/hypervideo_dl/extractor/tweakers.py b/hypervideo_dl/extractor/tweakers.py
index 2b10d9b..e8e1fc6 100644
--- a/hypervideo_dl/extractor/tweakers.py
+++ b/hypervideo_dl/extractor/tweakers.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
int_or_none,
@@ -49,7 +47,6 @@ class TweakersIE(InfoExtractor):
'height': height,
'ext': ext,
})
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/twentyfourvideo.py b/hypervideo_dl/extractor/twentyfourvideo.py
index ae19e11..baeb85d 100644
--- a/hypervideo_dl/extractor/twentyfourvideo.py
+++ b/hypervideo_dl/extractor/twentyfourvideo.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..utils import (
parse_iso8601,
diff --git a/hypervideo_dl/extractor/twentymin.py b/hypervideo_dl/extractor/twentymin.py
index a42977f..74f90b0 100644
--- a/hypervideo_dl/extractor/twentymin.py
+++ b/hypervideo_dl/extractor/twentymin.py
@@ -1,8 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
from .common import InfoExtractor
from ..utils import (
int_or_none,
@@ -21,6 +16,7 @@ class TwentyMinutenIE(InfoExtractor):
)
(?P<id>\d+)
'''
+ _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:(?:https?:)?//)?(?:www\.)?20min\.ch/videoplayer/videoplayer.html\?.*?\bvideoId@\d+.*?)\1']
_TESTS = [{
'url': 'http://www.20min.ch/videotv/?vid=469148&cid=2',
'md5': 'e7264320db31eed8c38364150c12496e',
@@ -47,12 +43,6 @@ class TwentyMinutenIE(InfoExtractor):
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- return [m.group('url') for m in re.finditer(
- r'<iframe[^>]+src=(["\'])(?P<url>(?:(?:https?:)?//)?(?:www\.)?20min\.ch/videoplayer/videoplayer.html\?.*?\bvideoId@\d+.*?)\1',
- webpage)]
-
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -67,7 +57,6 @@ class TwentyMinutenIE(InfoExtractor):
'url': 'http://podcast.20min-tv.ch/podcast/20min/%s%s.mp4' % (video_id, p),
'quality': quality,
} for quality, (format_id, p) in enumerate([('sd', ''), ('hd', 'h')])]
- self._sort_formats(formats)
description = video.get('lead')
thumbnail = video.get('thumbnail')
diff --git a/hypervideo_dl/extractor/twentythreevideo.py b/hypervideo_dl/extractor/twentythreevideo.py
index e8cf5a1..290c376 100644
--- a/hypervideo_dl/extractor/twentythreevideo.py
+++ b/hypervideo_dl/extractor/twentythreevideo.py
@@ -1,6 +1,3 @@
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..utils import int_or_none
diff --git a/hypervideo_dl/extractor/twitcasting.py b/hypervideo_dl/extractor/twitcasting.py
index 5c4d26c..30bc987 100644
--- a/hypervideo_dl/extractor/twitcasting.py
+++ b/hypervideo_dl/extractor/twitcasting.py
@@ -1,11 +1,9 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
+import base64
import itertools
import re
from .common import InfoExtractor
-from ..downloader.websocket import has_websockets
+from ..dependencies import websockets
from ..utils import (
clean_html,
ExtractorError,
@@ -77,6 +75,16 @@ class TwitCastingIE(InfoExtractor):
'playlist_mincount': 2,
}]
+ def _parse_data_movie_playlist(self, dmp, video_id):
+ # attempt 1: parse as JSON directly
+ try:
+ return self._parse_json(dmp, video_id)
+ except ExtractorError:
+ pass
+ # attempt 2: decode reversed base64
+ decoded = base64.b64decode(dmp[::-1])
+ return self._parse_json(decoded, video_id)
+
def _real_extract(self, url):
uploader_id, video_id = self._match_valid_url(url).groups()
@@ -103,7 +111,7 @@ class TwitCastingIE(InfoExtractor):
video_js_data = try_get(
webpage,
- lambda x: self._parse_json(self._search_regex(
+ lambda x: self._parse_data_movie_playlist(self._search_regex(
r'data-movie-playlist=\'([^\']+?)\'',
x, 'movie playlist', default=None), video_id)['2'], list)
@@ -164,7 +172,7 @@ class TwitCastingIE(InfoExtractor):
note='Downloading source quality m3u8',
headers=self._M3U8_HEADERS, fatal=False))
- if has_websockets:
+ if websockets:
qq = qualities(['base', 'mobilesource', 'main'])
streams = traverse_obj(stream_server_data, ('llfmp4', 'streams')) or {}
for mode, ws_url in streams.items():
@@ -178,10 +186,17 @@ class TwitCastingIE(InfoExtractor):
'protocol': 'websocket_frag',
})
- self._sort_formats(formats, ('source',))
-
infodict = {
- 'formats': formats
+ 'formats': formats,
+ '_format_sort_fields': ('source', ),
+ }
+ elif len(m3u8_urls) == 1:
+ formats = self._extract_m3u8_formats(
+ m3u8_urls[0], video_id, 'mp4', headers=self._M3U8_HEADERS)
+ infodict = {
+ # No problem here since there's only one manifest
+ 'formats': formats,
+ 'http_headers': self._M3U8_HEADERS,
}
else:
infodict = {
diff --git a/hypervideo_dl/extractor/twitch.py b/hypervideo_dl/extractor/twitch.py
index 10de74c..c59d1cf 100644
--- a/hypervideo_dl/extractor/twitch.py
+++ b/hypervideo_dl/extractor/twitch.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import collections
import itertools
import json
@@ -15,11 +12,14 @@ from ..compat import (
compat_urllib_parse_urlparse,
)
from ..utils import (
+ ExtractorError,
+ UserNotLive,
+ base_url,
clean_html,
dict_get,
- ExtractorError,
float_or_none,
int_or_none,
+ make_archive_id,
parse_duration,
parse_iso8601,
parse_qs,
@@ -55,6 +55,7 @@ class TwitchBaseIE(InfoExtractor):
'VideoPreviewOverlay': '3006e77e51b128d838fa4e835723ca4dc9a05c5efd4466c1085215c6e437e65c',
'VideoMetadata': '226edb3e692509f727fd56821f5653c05740242c82b0388883e0c0e75dcbf687',
'VideoPlayer_ChapterSelectButtonVideo': '8d2793384aac3773beab5e59bd5d6f585aedb923d292800119e03d40cd0f9b41',
+ 'VideoPlayer_VODSeekbarPreviewVideo': '07e99e4d56c5a7c67117a154777b0baf85a5ffefa393b213f4bc712ccaf85dd6',
}
def _perform_login(self, username, password):
@@ -132,7 +133,6 @@ class TwitchBaseIE(InfoExtractor):
'quality': 10,
'format_note': 'Source',
})
- self._sort_formats(formats)
def _download_base_gql(self, video_id, ops, note, fatal=True):
headers = {
@@ -205,6 +205,14 @@ class TwitchVodIE(TwitchBaseIE):
'uploader_id': 'riotgames',
'view_count': int,
'start_time': 310,
+ 'chapters': [
+ {
+ 'start_time': 0,
+ 'end_time': 17208,
+ 'title': 'League of Legends'
+ }
+ ],
+ 'live_status': 'was_live',
},
'params': {
# m3u8 download
@@ -273,10 +281,80 @@ class TwitchVodIE(TwitchBaseIE):
'title': 'Art'
}
],
+ 'live_status': 'was_live',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'view_count': int,
},
'params': {
'skip_download': True
+ },
+ }, {
+ 'note': 'Storyboards',
+ 'url': 'https://www.twitch.tv/videos/635475444',
+ 'info_dict': {
+ 'id': 'v635475444',
+ 'format_id': 'sb0',
+ 'ext': 'mhtml',
+ 'title': 'Riot Games',
+ 'duration': 11643,
+ 'uploader': 'Riot Games',
+ 'uploader_id': 'riotgames',
+ 'timestamp': 1590770569,
+ 'upload_date': '20200529',
+ 'chapters': [
+ {
+ 'start_time': 0,
+ 'end_time': 573,
+ 'title': 'League of Legends'
+ },
+ {
+ 'start_time': 573,
+ 'end_time': 3922,
+ 'title': 'Legends of Runeterra'
+ },
+ {
+ 'start_time': 3922,
+ 'end_time': 11643,
+ 'title': 'Art'
+ }
+ ],
+ 'live_status': 'was_live',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'view_count': int,
+ 'columns': int,
+ 'rows': int,
+ },
+ 'params': {
+ 'format': 'mhtml',
+ 'skip_download': True
}
+ }, {
+ 'note': 'VOD with single chapter',
+ 'url': 'https://www.twitch.tv/videos/1536751224',
+ 'info_dict': {
+ 'id': 'v1536751224',
+ 'ext': 'mp4',
+ 'title': 'Porter Robinson Star Guardian Stream Tour with LilyPichu',
+ 'duration': 8353,
+ 'uploader': 'Riot Games',
+ 'uploader_id': 'riotgames',
+ 'timestamp': 1658267731,
+ 'upload_date': '20220719',
+ 'chapters': [
+ {
+ 'start_time': 0,
+ 'end_time': 8353,
+ 'title': 'League of Legends'
+ }
+ ],
+ 'live_status': 'was_live',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'view_count': int,
+ },
+ 'params': {
+ 'skip_download': True
+ },
+ 'expected_warnings': ['Unable to download JSON metadata: HTTP Error 403: Forbidden']
}]
def _download_info(self, item_id):
@@ -293,16 +371,23 @@ class TwitchVodIE(TwitchBaseIE):
'includePrivate': False,
'videoID': item_id,
},
+ }, {
+ 'operationName': 'VideoPlayer_VODSeekbarPreviewVideo',
+ 'variables': {
+ 'includePrivate': False,
+ 'videoID': item_id,
+ },
}],
'Downloading stream metadata GraphQL')
video = traverse_obj(data, (0, 'data', 'video'))
video['moments'] = traverse_obj(data, (1, 'data', 'video', 'moments', 'edges', ..., 'node'))
+ video['storyboard'] = traverse_obj(data, (2, 'data', 'video', 'seekPreviewsURL'), expected_type=url_or_none)
if video is None:
raise ExtractorError(
'Video %s does not exist' % item_id, expected=True)
- return self._extract_info_gql(video, item_id)
+ return video
def _extract_info(self, info):
status = info.get('status')
@@ -341,8 +426,14 @@ class TwitchVodIE(TwitchBaseIE):
'was_live': True,
}
- def _extract_moments(self, info, item_id):
- for moment in info.get('moments') or []:
+ def _extract_chapters(self, info, item_id):
+ if not info.get('moments'):
+ game = traverse_obj(info, ('game', 'displayName'))
+ if game:
+ yield {'title': game}
+ return
+
+ for moment in info['moments']:
start_time = int_or_none(moment.get('positionMilliseconds'), 1000)
duration = int_or_none(moment.get('durationMilliseconds'), 1000)
name = str_or_none(moment.get('description'))
@@ -381,15 +472,49 @@ class TwitchVodIE(TwitchBaseIE):
'uploader_id': try_get(info, lambda x: x['owner']['login'], compat_str),
'timestamp': unified_timestamp(info.get('publishedAt')),
'view_count': int_or_none(info.get('viewCount')),
- 'chapters': list(self._extract_moments(info, item_id)),
+ 'chapters': list(self._extract_chapters(info, item_id)),
'is_live': is_live,
'was_live': True,
}
+ def _extract_storyboard(self, item_id, storyboard_json_url, duration):
+ if not duration or not storyboard_json_url:
+ return
+ spec = self._download_json(storyboard_json_url, item_id, 'Downloading storyboard metadata JSON', fatal=False) or []
+ # sort from highest quality to lowest
+ # This makes sb0 the highest-quality format, sb1 - lower, etc which is consistent with youtube sb ordering
+ spec.sort(key=lambda x: int_or_none(x.get('width')) or 0, reverse=True)
+ base = base_url(storyboard_json_url)
+ for i, s in enumerate(spec):
+ count = int_or_none(s.get('count'))
+ images = s.get('images')
+ if not (images and count):
+ continue
+ fragment_duration = duration / len(images)
+ yield {
+ 'format_id': f'sb{i}',
+ 'format_note': 'storyboard',
+ 'ext': 'mhtml',
+ 'protocol': 'mhtml',
+ 'acodec': 'none',
+ 'vcodec': 'none',
+ 'url': urljoin(base, images[0]),
+ 'width': int_or_none(s.get('width')),
+ 'height': int_or_none(s.get('height')),
+ 'fps': count / duration,
+ 'rows': int_or_none(s.get('rows')),
+ 'columns': int_or_none(s.get('cols')),
+ 'fragments': [{
+ 'url': urljoin(base, path),
+ 'duration': fragment_duration,
+ } for path in images],
+ }
+
def _real_extract(self, url):
vod_id = self._match_id(url)
- info = self._download_info(vod_id)
+ video = self._download_info(vod_id)
+ info = self._extract_info_gql(video, vod_id)
access_token = self._download_access_token(vod_id, 'video', 'id')
formats = self._extract_m3u8_formats(
@@ -406,6 +531,8 @@ class TwitchVodIE(TwitchBaseIE):
})),
vod_id, 'mp4', entry_protocol='m3u8_native')
+ formats.extend(self._extract_storyboard(vod_id, video.get('storyboard'), info.get('duration')))
+
self._prefer_source(formats)
info['formats'] = formats
@@ -853,7 +980,7 @@ class TwitchStreamIE(TwitchBaseIE):
stream = user['stream']
if not stream:
- raise ExtractorError('%s is offline' % channel_name, expected=True)
+ raise UserNotLive(video_id=channel_name)
access_token = self._download_access_token(
channel_name, 'stream', 'channelName')
@@ -1016,7 +1143,6 @@ class TwitchClipsIE(TwitchBaseIE):
'height': int_or_none(option.get('quality')),
'fps': int_or_none(option.get('frameRate')),
})
- self._sort_formats(formats)
thumbnails = []
for thumbnail_id in ('tiny', 'small', 'medium'):
@@ -1035,10 +1161,13 @@ class TwitchClipsIE(TwitchBaseIE):
})
thumbnails.append(thumb)
+ old_id = self._search_regex(r'%7C(\d+)(?:-\d+)?.mp4', formats[-1]['url'], 'old id', default=None)
+
return {
'id': clip.get('id') or video_id,
+ '_old_archive_ids': [make_archive_id(self, old_id)] if old_id else None,
'display_id': video_id,
- 'title': clip.get('title') or video_id,
+ 'title': clip.get('title'),
'formats': formats,
'duration': int_or_none(clip.get('durationSeconds')),
'view_count': int_or_none(clip.get('viewCount')),
diff --git a/hypervideo_dl/extractor/twitter.py b/hypervideo_dl/extractor/twitter.py
index 8ccc38e..18ebb36 100644
--- a/hypervideo_dl/extractor/twitter.py
+++ b/hypervideo_dl/extractor/twitter.py
@@ -1,40 +1,42 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
+import json
import re
+import urllib.error
from .common import InfoExtractor
+from .periscope import PeriscopeBaseIE, PeriscopeIE
+from ..compat import functools # isort: split
from ..compat import (
- compat_HTTPError,
compat_parse_qs,
compat_urllib_parse_unquote,
compat_urllib_parse_urlparse,
)
from ..utils import (
- dict_get,
ExtractorError,
- format_field,
+ dict_get,
float_or_none,
+ format_field,
int_or_none,
+ make_archive_id,
+ str_or_none,
+ strip_or_none,
traverse_obj,
+ try_call,
try_get,
- strip_or_none,
unified_timestamp,
update_url_query,
url_or_none,
xpath_text,
)
-from .periscope import (
- PeriscopeBaseIE,
- PeriscopeIE,
-)
-
class TwitterBaseIE(InfoExtractor):
_API_BASE = 'https://api.twitter.com/1.1/'
- _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?twitter\.com/'
- _GUEST_TOKEN = None
+ _GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
+ _TOKENS = {
+ 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA': None,
+ 'AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekMxqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28NHfOPqkca3qaAxGfsyKCs0wRbw': None,
+ }
+ _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
def _extract_variant_formats(self, variant, video_id):
variant_url = variant.get('url')
@@ -86,28 +88,81 @@ class TwitterBaseIE(InfoExtractor):
'height': int(m.group('height')),
})
- def _call_api(self, path, video_id, query={}):
- headers = {
- 'Authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekMxqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28NHfOPqkca3qaAxGfsyKCs0wRbw',
- }
- token = self._get_cookies(self._API_BASE).get('ct0')
- if token:
- headers['x-csrf-token'] = token.value
- if not self._GUEST_TOKEN:
- self._GUEST_TOKEN = self._download_json(
- self._API_BASE + 'guest/activate.json', video_id,
- 'Downloading guest token', data=b'',
- headers=headers)['guest_token']
- headers['x-guest-token'] = self._GUEST_TOKEN
- try:
- return self._download_json(
- self._API_BASE + path, video_id, headers=headers, query=query)
- except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
- raise ExtractorError(self._parse_json(
- e.cause.read().decode(),
- video_id)['errors'][0]['message'], expected=True)
- raise
+ @functools.cached_property
+ def is_logged_in(self):
+ return bool(self._get_cookies(self._API_BASE).get('auth_token'))
+
+ def _call_api(self, path, video_id, query={}, graphql=False):
+ cookies = self._get_cookies(self._API_BASE)
+ headers = {}
+
+ csrf_cookie = cookies.get('ct0')
+ if csrf_cookie:
+ headers['x-csrf-token'] = csrf_cookie.value
+
+ if self.is_logged_in:
+ headers.update({
+ 'x-twitter-auth-type': 'OAuth2Session',
+ 'x-twitter-client-language': 'en',
+ 'x-twitter-active-user': 'yes',
+ })
+
+ last_error = None
+ for bearer_token in self._TOKENS:
+ for first_attempt in (True, False):
+ headers['Authorization'] = f'Bearer {bearer_token}'
+
+ if not self.is_logged_in:
+ if not self._TOKENS[bearer_token]:
+ headers.pop('x-guest-token', None)
+ guest_token_response = self._download_json(
+ self._API_BASE + 'guest/activate.json', video_id,
+ 'Downloading guest token', data=b'', headers=headers)
+
+ self._TOKENS[bearer_token] = guest_token_response.get('guest_token')
+ if not self._TOKENS[bearer_token]:
+ raise ExtractorError('Could not retrieve guest token')
+
+ headers['x-guest-token'] = self._TOKENS[bearer_token]
+
+ try:
+ allowed_status = {400, 403, 404} if graphql else {403}
+ result = self._download_json(
+ (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
+ video_id, headers=headers, query=query, expected_status=allowed_status)
+
+ except ExtractorError as e:
+ if last_error:
+ raise last_error
+
+ if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code != 404:
+ raise
+
+ last_error = e
+ self.report_warning(
+ 'Twitter API gave 404 response, retrying with deprecated auth token. '
+ 'Only one media item can be extracted')
+ break # continue outer loop with next bearer_token
+
+ if result.get('errors'):
+ errors = traverse_obj(result, ('errors', ..., 'message'), expected_type=str)
+ if first_attempt and any('bad guest token' in error.lower() for error in errors):
+ self.to_screen('Guest token has expired. Refreshing guest token')
+ self._TOKENS[bearer_token] = None
+ continue
+
+ error_message = ', '.join(set(errors)) or 'Unknown error'
+ raise ExtractorError(f'Error(s) while querying API: {error_message}', expected=True)
+
+ return result
+
+ def _build_graphql_query(self, media_id):
+ raise NotImplementedError('Method must be implemented to support GraphQL')
+
+ def _call_graphql_api(self, endpoint, media_id):
+ data = self._build_graphql_query(media_id)
+ query = {key: json.dumps(value, separators=(',', ':')) for key, value in data.items()}
+ return traverse_obj(self._call_api(endpoint, media_id, query=query, graphql=True), 'data')
class TwitterCardIE(InfoExtractor):
@@ -118,7 +173,7 @@ class TwitterCardIE(InfoExtractor):
'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
# MD5 checksums are different in different places
'info_dict': {
- 'id': '560070183650213889',
+ 'id': '560070131976392705',
'ext': 'mp4',
'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.",
'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
@@ -128,6 +183,13 @@ class TwitterCardIE(InfoExtractor):
'duration': 30.033,
'timestamp': 1422366112,
'upload_date': '20150127',
+ 'age_limit': 0,
+ 'comment_count': int,
+ 'tags': [],
+ 'repost_count': int,
+ 'like_count': int,
+ 'display_id': '560070183650213889',
+ 'uploader_url': 'https://twitter.com/Twitter',
},
},
{
@@ -142,7 +204,14 @@ class TwitterCardIE(InfoExtractor):
'uploader_id': 'NASA',
'timestamp': 1437408129,
'upload_date': '20150720',
+ 'uploader_url': 'https://twitter.com/NASA',
+ 'age_limit': 0,
+ 'comment_count': int,
+ 'like_count': int,
+ 'repost_count': int,
+ 'tags': ['PlutoFlyby'],
},
+ 'params': {'format': '[protocol=https]'}
},
{
'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
@@ -155,12 +224,27 @@ class TwitterCardIE(InfoExtractor):
'upload_date': '20111013',
'uploader': 'OMG! UBUNTU!',
'uploader_id': 'omgubuntu',
+ 'channel_url': 'https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ',
+ 'channel_id': 'UCIiSwcm9xiFb3Y4wjzR41eQ',
+ 'channel_follower_count': int,
+ 'chapters': 'count:8',
+ 'uploader_url': 'http://www.youtube.com/user/omgubuntu',
+ 'duration': 138,
+ 'categories': ['Film & Animation'],
+ 'age_limit': 0,
+ 'comment_count': int,
+ 'availability': 'public',
+ 'like_count': int,
+ 'thumbnail': 'https://i.ytimg.com/vi/dq4Oj5quskI/maxresdefault.jpg',
+ 'view_count': int,
+ 'tags': 'count:12',
+ 'channel': 'OMG! UBUNTU!',
+ 'playable_in_embed': True,
},
'add_ie': ['Youtube'],
},
{
'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
- 'md5': '6dabeaca9e68cbb71c99c322a4b42a11',
'info_dict': {
'id': 'iBb2x00UVlv',
'ext': 'mp4',
@@ -169,9 +253,17 @@ class TwitterCardIE(InfoExtractor):
'uploader': 'ArsenalTerje',
'title': 'Vine by ArsenalTerje',
'timestamp': 1447451307,
+ 'alt_title': 'Vine by ArsenalTerje',
+ 'comment_count': int,
+ 'like_count': int,
+ 'thumbnail': r're:^https?://[^?#]+\.jpg',
+ 'view_count': int,
+ 'repost_count': int,
},
'add_ie': ['Vine'],
- }, {
+ 'params': {'skip_download': 'm3u8'},
+ },
+ {
'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
'info_dict': {
@@ -185,7 +277,8 @@ class TwitterCardIE(InfoExtractor):
'upload_date': '20160303',
},
'skip': 'This content is no longer available.',
- }, {
+ },
+ {
'url': 'https://twitter.com/i/videos/752274308186120192',
'only_matching': True,
},
@@ -205,7 +298,8 @@ class TwitterIE(TwitterBaseIE):
_TESTS = [{
'url': 'https://twitter.com/freethenipple/status/643211948184596480',
'info_dict': {
- 'id': '643211948184596480',
+ 'id': '643211870443208704',
+ 'display_id': '643211948184596480',
'ext': 'mp4',
'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
'thumbnail': r're:^https?://.*\.jpg',
@@ -215,6 +309,11 @@ class TwitterIE(TwitterBaseIE):
'duration': 12.922,
'timestamp': 1442188653,
'upload_date': '20150913',
+ 'uploader_url': 'https://twitter.com/freethenipple',
+ 'comment_count': int,
+ 'repost_count': int,
+ 'like_count': int,
+ 'tags': [],
'age_limit': 18,
},
}, {
@@ -235,13 +334,20 @@ class TwitterIE(TwitterBaseIE):
'url': 'https://twitter.com/starwars/status/665052190608723968',
'info_dict': {
'id': '665052190608723968',
+ 'display_id': '665052190608723968',
'ext': 'mp4',
- 'title': 'Star Wars - A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens.',
+ 'title': 'md5:55fef1d5b811944f1550e91b44abb82e',
'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
'uploader_id': 'starwars',
- 'uploader': 'Star Wars',
+ 'uploader': r're:Star Wars.*',
'timestamp': 1447395772,
'upload_date': '20151113',
+ 'uploader_url': 'https://twitter.com/starwars',
+ 'comment_count': int,
+ 'repost_count': int,
+ 'like_count': int,
+ 'tags': ['TV', 'StarWars', 'TheForceAwakens'],
+ 'age_limit': 0,
},
}, {
'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
@@ -254,25 +360,39 @@ class TwitterIE(TwitterBaseIE):
'uploader': 'Brent Yarina',
'timestamp': 1456976204,
'upload_date': '20160303',
+ 'uploader_url': 'https://twitter.com/BTNBrentYarina',
+ 'comment_count': int,
+ 'repost_count': int,
+ 'like_count': int,
+ 'tags': [],
+ 'age_limit': 0,
},
'params': {
# The same video as https://twitter.com/i/videos/tweet/705235433198714880
# Test case of TwitterCardIE
'skip_download': True,
},
+ 'skip': 'Dead external link',
}, {
'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
'info_dict': {
- 'id': '700207533655363584',
+ 'id': '700207414000242688',
+ 'display_id': '700207533655363584',
'ext': 'mp4',
- 'title': 'simon vertugo - BEAT PROD: @suhmeduh #Damndaniel',
+ 'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel',
'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
'thumbnail': r're:^https?://.*\.jpg',
- 'uploader': 'simon vertugo',
- 'uploader_id': 'simonvertugo',
+ 'uploader': 'jaydin donte geer',
+ 'uploader_id': 'jaydingeer',
'duration': 30.0,
'timestamp': 1455777459,
'upload_date': '20160218',
+ 'uploader_url': 'https://twitter.com/jaydingeer',
+ 'comment_count': int,
+ 'repost_count': int,
+ 'like_count': int,
+ 'tags': ['Damndaniel'],
+ 'age_limit': 0,
},
}, {
'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
@@ -285,12 +405,19 @@ class TwitterIE(TwitterBaseIE):
'uploader_id': '1004126642786242560',
'timestamp': 1402826626,
'upload_date': '20140615',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'alt_title': 'Vine by TAKUMA',
+ 'comment_count': int,
+ 'repost_count': int,
+ 'like_count': int,
+ 'view_count': int,
},
'add_ie': ['Vine'],
}, {
'url': 'https://twitter.com/captainamerica/status/719944021058060289',
'info_dict': {
- 'id': '719944021058060289',
+ 'id': '717462543795523584',
+ 'display_id': '719944021058060289',
'ext': 'mp4',
'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
@@ -299,6 +426,13 @@ class TwitterIE(TwitterBaseIE):
'duration': 3.17,
'timestamp': 1460483005,
'upload_date': '20160412',
+ 'uploader_url': 'https://twitter.com/CaptainAmerica',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'comment_count': int,
+ 'repost_count': int,
+ 'like_count': int,
+ 'tags': [],
+ 'age_limit': 0,
},
}, {
'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
@@ -310,6 +444,7 @@ class TwitterIE(TwitterBaseIE):
'uploader_id': '1PmKqpJdOJQoY',
'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
'timestamp': 1474613214,
+ 'thumbnail': r're:^https?://.*\.jpg',
},
'add_ie': ['Periscope'],
}, {
@@ -330,7 +465,8 @@ class TwitterIE(TwitterBaseIE):
}, {
'url': 'https://twitter.com/i/web/status/910031516746514432',
'info_dict': {
- 'id': '910031516746514432',
+ 'id': '910030238373089285',
+ 'display_id': '910031516746514432',
'ext': 'mp4',
'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
'thumbnail': r're:^https?://.*\.jpg',
@@ -340,6 +476,12 @@ class TwitterIE(TwitterBaseIE):
'duration': 47.48,
'timestamp': 1505803395,
'upload_date': '20170919',
+ 'uploader_url': 'https://twitter.com/Prefet971',
+ 'comment_count': int,
+ 'repost_count': int,
+ 'like_count': int,
+ 'tags': ['Maria'],
+ 'age_limit': 0,
},
'params': {
'skip_download': True, # requires ffmpeg
@@ -348,7 +490,8 @@ class TwitterIE(TwitterBaseIE):
# card via api.twitter.com/1.1/videos/tweet/config
'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
'info_dict': {
- 'id': '1001551623938805763',
+ 'id': '1001551417340022785',
+ 'display_id': '1001551623938805763',
'ext': 'mp4',
'title': 're:.*?Shep is on a roll today.*?',
'thumbnail': r're:^https?://.*\.jpg',
@@ -358,6 +501,12 @@ class TwitterIE(TwitterBaseIE):
'duration': 111.278,
'timestamp': 1527623489,
'upload_date': '20180529',
+ 'uploader_url': 'https://twitter.com/LisPower1',
+ 'comment_count': int,
+ 'repost_count': int,
+ 'like_count': int,
+ 'tags': [],
+ 'age_limit': 0,
},
'params': {
'skip_download': True, # requires ffmpeg
@@ -365,7 +514,8 @@ class TwitterIE(TwitterBaseIE):
}, {
'url': 'https://twitter.com/foobar/status/1087791357756956680',
'info_dict': {
- 'id': '1087791357756956680',
+ 'id': '1087791272830607360',
+ 'display_id': '1087791357756956680',
'ext': 'mp4',
'title': 'Twitter - A new is coming. Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
'thumbnail': r're:^https?://.*\.jpg',
@@ -375,6 +525,12 @@ class TwitterIE(TwitterBaseIE):
'duration': 61.567,
'timestamp': 1548184644,
'upload_date': '20190122',
+ 'uploader_url': 'https://twitter.com/Twitter',
+ 'comment_count': int,
+ 'repost_count': int,
+ 'like_count': int,
+ 'tags': [],
+ 'age_limit': 0,
},
}, {
# not available in Periscope
@@ -385,13 +541,17 @@ class TwitterIE(TwitterBaseIE):
'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019',
'uploader': 'Vivi',
'uploader_id': '1eVjYOLGkGrQL',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'tags': ['EduTECH2019'],
+ 'view_count': int,
},
'add_ie': ['TwitterBroadcast'],
}, {
# unified card
'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
'info_dict': {
- 'id': '1349794411333394432',
+ 'id': '1349774757969989634',
+ 'display_id': '1349794411333394432',
'ext': 'mp4',
'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
'thumbnail': r're:^https?://.*\.jpg',
@@ -401,11 +561,177 @@ class TwitterIE(TwitterBaseIE):
'duration': 324.484,
'timestamp': 1610651040,
'upload_date': '20210114',
+ 'uploader_url': 'https://twitter.com/BrooklynNets',
+ 'comment_count': int,
+ 'repost_count': int,
+ 'like_count': int,
+ 'tags': [],
+ 'age_limit': 0,
},
'params': {
'skip_download': True,
},
}, {
+ 'url': 'https://twitter.com/oshtru/status/1577855540407197696',
+ 'info_dict': {
+ 'id': '1577855447914409984',
+ 'display_id': '1577855540407197696',
+ 'ext': 'mp4',
+ 'title': 'md5:9d198efb93557b8f8d5b78c480407214',
+ 'description': 'md5:b9c3699335447391d11753ab21c70a74',
+ 'upload_date': '20221006',
+ 'uploader': 'oshtru',
+ 'uploader_id': 'oshtru',
+ 'uploader_url': 'https://twitter.com/oshtru',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 30.03,
+ 'timestamp': 1665025050,
+ 'comment_count': int,
+ 'repost_count': int,
+ 'like_count': int,
+ 'tags': [],
+ 'age_limit': 0,
+ },
+ 'params': {'skip_download': True},
+ }, {
+ 'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
+ 'info_dict': {
+ 'id': '1577719286659006464',
+ 'title': 'Ultima | #\u0432\u029f\u043c - Test',
+ 'description': 'Test https://t.co/Y3KEZD7Dad',
+ 'uploader': 'Ultima | #\u0432\u029f\u043c',
+ 'uploader_id': 'UltimaShadowX',
+ 'uploader_url': 'https://twitter.com/UltimaShadowX',
+ 'upload_date': '20221005',
+ 'timestamp': 1664992565,
+ 'comment_count': int,
+ 'repost_count': int,
+ 'like_count': int,
+ 'tags': [],
+ 'age_limit': 0,
+ },
+ 'playlist_count': 4,
+ 'params': {'skip_download': True},
+ }, {
+ 'url': 'https://twitter.com/MesoMax919/status/1575560063510810624',
+ 'info_dict': {
+ 'id': '1575559336759263233',
+ 'display_id': '1575560063510810624',
+ 'ext': 'mp4',
+ 'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'description': 'md5:95aea692fda36a12081b9629b02daa92',
+ 'uploader': 'Max Olson',
+ 'uploader_id': 'MesoMax919',
+ 'uploader_url': 'https://twitter.com/MesoMax919',
+ 'duration': 21.321,
+ 'timestamp': 1664477766,
+ 'upload_date': '20220929',
+ 'comment_count': int,
+ 'repost_count': int,
+ 'like_count': int,
+ 'tags': ['HurricaneIan'],
+ 'age_limit': 0,
+ },
+ }, {
+ # Adult content, uses old token
+ # Fails if not logged in (GraphQL)
+ 'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
+ 'info_dict': {
+ 'id': '1575199163847000068',
+ 'display_id': '1575199173472927762',
+ 'ext': 'mp4',
+ 'title': str,
+ 'description': str,
+ 'uploader': str,
+ 'uploader_id': 'Rizdraws',
+ 'uploader_url': 'https://twitter.com/Rizdraws',
+ 'upload_date': '20220928',
+ 'timestamp': 1664391723,
+ 'thumbnail': 're:^https?://.*\\.jpg',
+ 'like_count': int,
+ 'repost_count': int,
+ 'comment_count': int,
+ 'age_limit': 18,
+ 'tags': []
+ },
+ 'expected_warnings': ['404'],
+ }, {
+ # Description is missing one https://t.co url (GraphQL)
+ 'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
+ 'playlist_mincount': 2,
+ 'info_dict': {
+ 'id': '1395079556562706435',
+ 'title': str,
+ 'tags': [],
+ 'uploader': str,
+ 'like_count': int,
+ 'upload_date': '20210519',
+ 'age_limit': 0,
+ 'repost_count': int,
+ 'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw https://t.co/kbXZrozlY7',
+ 'uploader_id': 'Srirachachau',
+ 'comment_count': int,
+ 'uploader_url': 'https://twitter.com/Srirachachau',
+ 'timestamp': 1621447860,
+ },
+ }, {
+ # Description is missing one https://t.co url (GraphQL)
+ 'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
+ 'playlist_mincount': 2,
+ 'info_dict': {
+ 'id': '1578353380363501568',
+ 'title': str,
+ 'uploader_id': 'DavidToons_',
+ 'repost_count': int,
+ 'like_count': int,
+ 'uploader': str,
+ 'timestamp': 1665143744,
+ 'uploader_url': 'https://twitter.com/DavidToons_',
+ 'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/glfQdgfFXH https://t.co/WgJauwIW1w',
+ 'tags': [],
+ 'comment_count': int,
+ 'upload_date': '20221007',
+ 'age_limit': 0,
+ },
+ }, {
+ 'url': 'https://twitter.com/primevideouk/status/1578401165338976258',
+ 'playlist_count': 2,
+ 'info_dict': {
+ 'id': '1578401165338976258',
+ 'title': str,
+ 'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
+ 'uploader': str,
+ 'uploader_id': 'primevideouk',
+ 'timestamp': 1665155137,
+ 'upload_date': '20221007',
+ 'age_limit': 0,
+ 'uploader_url': 'https://twitter.com/primevideouk',
+ 'comment_count': int,
+ 'repost_count': int,
+ 'like_count': int,
+ 'tags': ['TheRingsOfPower'],
+ },
+ }, {
+ # Twitter Spaces
+ 'url': 'https://twitter.com/MoniqueCamarra/status/1550101959377551360',
+ 'info_dict': {
+ 'id': '1lPJqmBeeNAJb',
+ 'ext': 'm4a',
+ 'title': 'EuroFile@6 Ukraine Up-date-Draghi Defenestration-the West',
+ 'uploader': r're:Monique Camarra.+?',
+ 'uploader_id': 'MoniqueCamarra',
+ 'live_status': 'was_live',
+ 'description': 'md5:acce559345fd49f129c20dbcda3f1201',
+ 'timestamp': 1658407771464,
+ },
+ 'add_ie': ['TwitterSpaces'],
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ # onion route
+ 'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
+ 'only_matching': True,
+ }, {
# Twitch Clip Embed
'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
'only_matching': True,
@@ -439,10 +765,77 @@ class TwitterIE(TwitterBaseIE):
'only_matching': True,
}]
+ def _graphql_to_legacy(self, data, twid):
+ result = traverse_obj(data, (
+ 'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
+ lambda _, v: v['entryId'] == f'tweet-{twid}', 'content', 'itemContent',
+ 'tweet_results', 'result'
+ ), expected_type=dict, default={}, get_all=False)
+
+ if 'tombstone' in result:
+ cause = traverse_obj(result, ('tombstone', 'text', 'text'), expected_type=str)
+ raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
+
+ status = result.get('legacy', {})
+ status.update(traverse_obj(result, {
+ 'user': ('core', 'user_results', 'result', 'legacy'),
+ 'card': ('card', 'legacy'),
+ 'quoted_status': ('quoted_status_result', 'result', 'legacy'),
+ }, expected_type=dict, default={}))
+
+ # extra transformation is needed since result does not match legacy format
+ binding_values = {
+ binding_value.get('key'): binding_value.get('value')
+ for binding_value in traverse_obj(status, ('card', 'binding_values', ...), expected_type=dict)
+ }
+ if binding_values:
+ status['card']['binding_values'] = binding_values
+
+ return status
+
+ def _build_graphql_query(self, media_id):
+ return {
+ 'variables': {
+ 'focalTweetId': media_id,
+ 'includePromotedContent': True,
+ 'with_rux_injections': False,
+ 'withBirdwatchNotes': True,
+ 'withCommunity': True,
+ 'withDownvotePerspective': False,
+ 'withQuickPromoteEligibilityTweetFields': True,
+ 'withReactionsMetadata': False,
+ 'withReactionsPerspective': False,
+ 'withSuperFollowsTweetFields': True,
+ 'withSuperFollowsUserFields': True,
+ 'withV2Timeline': True,
+ 'withVoice': True,
+ },
+ 'features': {
+ 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False,
+ 'interactive_text_enabled': True,
+ 'responsive_web_edit_tweet_api_enabled': True,
+ 'responsive_web_enhance_cards_enabled': True,
+ 'responsive_web_graphql_timeline_navigation_enabled': False,
+ 'responsive_web_text_conversations_enabled': False,
+ 'responsive_web_uc_gql_enabled': True,
+ 'standardized_nudges_misinfo': True,
+ 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
+ 'tweetypie_unmention_optimization_enabled': True,
+ 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True,
+ 'verified_phone_label_enabled': False,
+ 'vibe_api_enabled': True,
+ },
+ }
+
def _real_extract(self, url):
twid = self._match_id(url)
- status = self._call_api(
- 'statuses/show/%s.json' % twid, twid, {
+ if self.is_logged_in or self._configuration_arg('force_graphql'):
+ self.write_debug(f'Using GraphQL API (Auth = {self.is_logged_in})')
+ result = self._call_graphql_api('zZXycP0V6H7m-2r0mOnFcA/TweetDetail', twid)
+ status = self._graphql_to_legacy(result, twid)
+
+ else:
+ status = self._call_api(f'statuses/show/{twid}.json', twid, {
'cards_platform': 'Web-12',
'include_cards': 1,
'include_reply_count': 1,
@@ -456,7 +849,7 @@ class TwitterIE(TwitterBaseIE):
user = status.get('user') or {}
uploader = user.get('name')
if uploader:
- title = '%s - %s' % (uploader, title)
+ title = f'{uploader} - {title}'
uploader_id = user.get('screen_name')
tags = []
@@ -473,7 +866,7 @@ class TwitterIE(TwitterBaseIE):
'uploader': uploader,
'timestamp': unified_timestamp(status.get('created_at')),
'uploader_id': uploader_id,
- 'uploader_url': format_field(uploader_id, template='https://twitter.com/%s'),
+ 'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'),
'like_count': int_or_none(status.get('favorite_count')),
'repost_count': int_or_none(status.get('retweet_count')),
'comment_count': int_or_none(status.get('reply_count')),
@@ -482,6 +875,8 @@ class TwitterIE(TwitterBaseIE):
}
def extract_from_video_info(media):
+ media_id = traverse_obj(media, 'id_str', 'id', expected_type=str_or_none)
+ self.write_debug(f'Extracting from video info: {media_id}')
video_info = media.get('video_info') or {}
formats = []
@@ -490,7 +885,6 @@ class TwitterIE(TwitterBaseIE):
fmts, subs = self._extract_variant_formats(variant, twid)
subtitles = self._merge_subtitles(subtitles, subs)
formats.extend(fmts)
- self._sort_formats(formats, ('res', 'br', 'size', 'proto')) # The codec of http formats are unknown
thumbnails = []
media_url = media.get('media_url_https') or media.get('media_url')
@@ -506,90 +900,111 @@ class TwitterIE(TwitterBaseIE):
add_thumbnail(name, size)
add_thumbnail('orig', media.get('original_info') or {})
- info.update({
+ return {
+ 'id': media_id,
'formats': formats,
'subtitles': subtitles,
'thumbnails': thumbnails,
'duration': float_or_none(video_info.get('duration_millis'), 1000),
- })
+ # The codec of http formats are unknown
+ '_format_sort_fields': ('res', 'br', 'size', 'proto'),
+ }
- media = traverse_obj(status, ((None, 'quoted_status'), 'extended_entities', 'media', 0), get_all=False)
- if media and media.get('type') != 'photo':
- extract_from_video_info(media)
- else:
- card = status.get('card')
- if card:
- binding_values = card['binding_values']
-
- def get_binding_value(k):
- o = binding_values.get(k) or {}
- return try_get(o, lambda x: x[x['type'].lower() + '_value'])
-
- card_name = card['name'].split(':')[-1]
- if card_name == 'player':
- info.update({
- '_type': 'url',
- 'url': get_binding_value('player_url'),
- })
- elif card_name == 'periscope_broadcast':
- info.update({
- '_type': 'url',
- 'url': get_binding_value('url') or get_binding_value('player_url'),
- 'ie_key': PeriscopeIE.ie_key(),
- })
- elif card_name == 'broadcast':
- info.update({
- '_type': 'url',
- 'url': get_binding_value('broadcast_url'),
- 'ie_key': TwitterBroadcastIE.ie_key(),
- })
- elif card_name == 'summary':
- info.update({
- '_type': 'url',
- 'url': get_binding_value('card_url'),
- })
- elif card_name == 'unified_card':
- media_entities = self._parse_json(get_binding_value('unified_card'), twid)['media_entities']
- extract_from_video_info(next(iter(media_entities.values())))
- # amplify, promo_video_website, promo_video_convo, appplayer,
- # video_direct_message, poll2choice_video, poll3choice_video,
- # poll4choice_video, ...
- else:
- is_amplify = card_name == 'amplify'
- vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
- content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
- formats, subtitles = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
- self._sort_formats(formats)
-
- thumbnails = []
- for suffix in ('_small', '', '_large', '_x_large', '_original'):
- image = get_binding_value('player_image' + suffix) or {}
- image_url = image.get('url')
- if not image_url or '/player-placeholder' in image_url:
- continue
- thumbnails.append({
- 'id': suffix[1:] if suffix else 'medium',
- 'url': image_url,
- 'width': int_or_none(image.get('width')),
- 'height': int_or_none(image.get('height')),
- })
-
- info.update({
- 'formats': formats,
- 'subtitles': subtitles,
- 'thumbnails': thumbnails,
- 'duration': int_or_none(get_binding_value(
- 'content_duration_seconds')),
- })
- else:
- expanded_url = try_get(status, lambda x: x['entities']['urls'][0]['expanded_url'])
- if not expanded_url:
- raise ExtractorError("There's no video in this tweet.")
- info.update({
+ def extract_from_card_info(card):
+ if not card:
+ return
+
+ self.write_debug(f'Extracting from card info: {card.get("url")}')
+ binding_values = card['binding_values']
+
+ def get_binding_value(k):
+ o = binding_values.get(k) or {}
+ return try_get(o, lambda x: x[x['type'].lower() + '_value'])
+
+ card_name = card['name'].split(':')[-1]
+ if card_name == 'player':
+ yield {
'_type': 'url',
- 'url': expanded_url,
- })
- return info
+ 'url': get_binding_value('player_url'),
+ }
+ elif card_name == 'periscope_broadcast':
+ yield {
+ '_type': 'url',
+ 'url': get_binding_value('url') or get_binding_value('player_url'),
+ 'ie_key': PeriscopeIE.ie_key(),
+ }
+ elif card_name == 'broadcast':
+ yield {
+ '_type': 'url',
+ 'url': get_binding_value('broadcast_url'),
+ 'ie_key': TwitterBroadcastIE.ie_key(),
+ }
+ elif card_name == 'audiospace':
+ yield {
+ '_type': 'url',
+ 'url': f'https://twitter.com/i/spaces/{get_binding_value("id")}',
+ 'ie_key': TwitterSpacesIE.ie_key(),
+ }
+ elif card_name == 'summary':
+ yield {
+ '_type': 'url',
+ 'url': get_binding_value('card_url'),
+ }
+ elif card_name == 'unified_card':
+ unified_card = self._parse_json(get_binding_value('unified_card'), twid)
+ yield from map(extract_from_video_info, traverse_obj(
+ unified_card, ('media_entities', ...), expected_type=dict))
+ # amplify, promo_video_website, promo_video_convo, appplayer,
+ # video_direct_message, poll2choice_video, poll3choice_video,
+ # poll4choice_video, ...
+ else:
+ is_amplify = card_name == 'amplify'
+ vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
+ content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
+ formats, subtitles = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
+
+ thumbnails = []
+ for suffix in ('_small', '', '_large', '_x_large', '_original'):
+ image = get_binding_value('player_image' + suffix) or {}
+ image_url = image.get('url')
+ if not image_url or '/player-placeholder' in image_url:
+ continue
+ thumbnails.append({
+ 'id': suffix[1:] if suffix else 'medium',
+ 'url': image_url,
+ 'width': int_or_none(image.get('width')),
+ 'height': int_or_none(image.get('height')),
+ })
+
+ yield {
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'thumbnails': thumbnails,
+ 'duration': int_or_none(get_binding_value(
+ 'content_duration_seconds')),
+ }
+
+ media_path = ((None, 'quoted_status'), 'extended_entities', 'media', lambda _, m: m['type'] != 'photo')
+ videos = map(extract_from_video_info, traverse_obj(status, media_path, expected_type=dict))
+ cards = extract_from_card_info(status.get('card'))
+ entries = [{**info, **data, 'display_id': twid} for data in (*videos, *cards)]
+
+ if not entries:
+ expanded_url = traverse_obj(status, ('entities', 'urls', 0, 'expanded_url'), expected_type=url_or_none)
+ if not expanded_url or expanded_url == url:
+ raise ExtractorError('No video could be found in this tweet', expected=True)
+
+ return self.url_result(expanded_url, display_id=twid, **info)
+
+ entries[0]['_old_archive_ids'] = [make_archive_id(self, twid)]
+
+ if len(entries) == 1:
+ return entries[0]
+
+ for index, entry in enumerate(entries, 1):
+ entry['title'] += f' #{index}'
+
+ return self.playlist_result(entries, **info)
class TwitterAmplifyIE(TwitterBaseIE):
@@ -598,13 +1013,14 @@ class TwitterAmplifyIE(TwitterBaseIE):
_TEST = {
'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
- 'md5': '7df102d0b9fd7066b86f3159f8e81bf6',
+ 'md5': 'fec25801d18a4557c5c9f33d2c379ffa',
'info_dict': {
'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
'ext': 'mp4',
'title': 'Twitter Video',
'thumbnail': 're:^https?://.*',
},
+ 'params': {'format': '[protocol=https]'},
}
def _real_extract(self, url):
@@ -613,7 +1029,7 @@ class TwitterAmplifyIE(TwitterBaseIE):
vmap_url = self._html_search_meta(
'twitter:amplify:vmap', webpage, 'vmap url')
- formats = self._extract_formats_from_vmap_url(vmap_url, video_id)
+ formats, _ = self._extract_formats_from_vmap_url(vmap_url, video_id)
thumbnails = []
thumbnail = self._html_search_meta(
@@ -661,6 +1077,8 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
'title': 'Andrea May Sahouri - Periscope Broadcast',
'uploader': 'Andrea May Sahouri',
'uploader_id': '1PXEdBZWpGwKe',
+ 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
+ 'view_count': int,
},
}
@@ -672,7 +1090,7 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
info = self._parse_broadcast_data(broadcast, broadcast_id)
media_key = broadcast['media_key']
source = self._call_api(
- 'live_video_stream/status/' + media_key, media_key)['source']
+ f'live_video_stream/status/{media_key}', media_key)['source']
m3u8_url = source.get('noRedirectPlaybackUrl') or source['location']
if '/live_video_stream/geoblocked/' in m3u8_url:
self.raise_geo_restricted()
@@ -684,6 +1102,100 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
return info
+class TwitterSpacesIE(TwitterBaseIE):
+ IE_NAME = 'twitter:spaces'
+ _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
+
+ _TESTS = [{
+ 'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
+ 'info_dict': {
+ 'id': '1RDxlgyvNXzJL',
+ 'ext': 'm4a',
+ 'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro',
+ 'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe',
+ 'uploader': r're:Lucio Di Gaetano.*?',
+ 'uploader_id': 'luciodigaetano',
+ 'live_status': 'was_live',
+ 'timestamp': 1659877956397,
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }]
+
+ SPACE_STATUS = {
+ 'notstarted': 'is_upcoming',
+ 'ended': 'was_live',
+ 'running': 'is_live',
+ 'timedout': 'post_live',
+ }
+
+ def _build_graphql_query(self, space_id):
+ return {
+ 'variables': {
+ 'id': space_id,
+ 'isMetatagsQuery': True,
+ 'withDownvotePerspective': False,
+ 'withReactionsMetadata': False,
+ 'withReactionsPerspective': False,
+ 'withReplays': True,
+ 'withSuperFollowsUserFields': True,
+ 'withSuperFollowsTweetFields': True,
+ },
+ 'features': {
+ 'dont_mention_me_view_api_enabled': True,
+ 'interactive_text_enabled': True,
+ 'responsive_web_edit_tweet_api_enabled': True,
+ 'responsive_web_enhance_cards_enabled': True,
+ 'responsive_web_uc_gql_enabled': True,
+ 'spaces_2022_h2_clipping': True,
+ 'spaces_2022_h2_spaces_communities': False,
+ 'standardized_nudges_misinfo': True,
+ 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
+ 'vibe_api_enabled': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ space_id = self._match_id(url)
+ space_data = self._call_graphql_api('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id)['audioSpace']
+ if not space_data:
+ raise ExtractorError('Twitter Space not found', expected=True)
+
+ metadata = space_data['metadata']
+ live_status = try_call(lambda: self.SPACE_STATUS[metadata['state'].lower()])
+
+ formats = []
+ if live_status == 'is_upcoming':
+ self.raise_no_formats('Twitter Space not started yet', expected=True)
+ elif live_status == 'post_live':
+ self.raise_no_formats('Twitter Space ended but not downloadable yet', expected=True)
+ else:
+ source = self._call_api(
+ f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key'])['source']
+
+ # XXX: Native downloader does not work
+ formats = self._extract_m3u8_formats(
+ traverse_obj(source, 'noRedirectPlaybackUrl', 'location'),
+ metadata['media_key'], 'm4a', 'm3u8', live=live_status == 'is_live',
+ headers={'Referer': 'https://twitter.com/'})
+ for fmt in formats:
+ fmt.update({'vcodec': 'none', 'acodec': 'aac'})
+
+ participants = ', '.join(traverse_obj(
+ space_data, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'
+ return {
+ 'id': space_id,
+ 'title': metadata.get('title'),
+ 'description': f'Twitter Space participated by {participants}',
+ 'uploader': traverse_obj(
+ metadata, ('creator_results', 'result', 'legacy', 'name')),
+ 'uploader_id': traverse_obj(
+ metadata, ('creator_results', 'result', 'legacy', 'screen_name')),
+ 'live_status': live_status,
+ 'timestamp': metadata.get('created_at'),
+ 'formats': formats,
+ }
+
+
class TwitterShortenerIE(TwitterBaseIE):
IE_NAME = 'twitter:shortener'
_VALID_URL = r'https?://t.co/(?P<id>[^?]+)|tco:(?P<eid>[^?]+)'
diff --git a/hypervideo_dl/extractor/udemy.py b/hypervideo_dl/extractor/udemy.py
index 88b2310..4faad58 100644
--- a/hypervideo_dl/extractor/udemy.py
+++ b/hypervideo_dl/extractor/udemy.py
@@ -1,19 +1,12 @@
-from __future__ import unicode_literals
-
import re
+import urllib.request
from .common import InfoExtractor
-from ..compat import (
- compat_HTTPError,
- compat_kwargs,
- compat_str,
- compat_urllib_request,
- compat_urlparse,
-)
+from ..compat import compat_HTTPError, compat_str, compat_urlparse
from ..utils import (
+ ExtractorError,
determine_ext,
extract_attributes,
- ExtractorError,
float_or_none,
int_or_none,
js_to_json,
@@ -132,7 +125,7 @@ class UdemyIE(InfoExtractor):
headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36'
kwargs['headers'] = headers
ret = super(UdemyIE, self)._download_webpage_handle(
- *args, **compat_kwargs(kwargs))
+ *args, **kwargs)
if not ret:
return ret
webpage, _ = ret
@@ -151,14 +144,14 @@ class UdemyIE(InfoExtractor):
'X-Udemy-Snail-Case': 'true',
'X-Requested-With': 'XMLHttpRequest',
}
- for cookie in self._downloader.cookiejar:
+ for cookie in self.cookiejar:
if cookie.name == 'client_id':
headers['X-Udemy-Client-Id'] = cookie.value
elif cookie.name == 'access_token':
headers['X-Udemy-Bearer-Token'] = cookie.value
headers['X-Udemy-Authorization'] = 'Bearer %s' % cookie.value
- if isinstance(url_or_request, compat_urllib_request.Request):
+ if isinstance(url_or_request, urllib.request.Request):
for header, value in headers.items():
url_or_request.add_header(header, value)
else:
@@ -398,8 +391,6 @@ class UdemyIE(InfoExtractor):
if f.get('url'):
formats.append(f)
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': title,
@@ -412,7 +403,7 @@ class UdemyIE(InfoExtractor):
}
-class UdemyCourseIE(UdemyIE):
+class UdemyCourseIE(UdemyIE): # XXX: Do not subclass from concrete IE
IE_NAME = 'udemy:course'
_VALID_URL = r'https?://(?:[^/]+\.)?udemy\.com/(?P<id>[^/?#&]+)'
_TESTS = [{
diff --git a/hypervideo_dl/extractor/udn.py b/hypervideo_dl/extractor/udn.py
index 2c8e5c7..10668ac 100644
--- a/hypervideo_dl/extractor/udn.py
+++ b/hypervideo_dl/extractor/udn.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -16,6 +13,7 @@ class UDNEmbedIE(InfoExtractor):
IE_DESC = '聯合影音'
_PROTOCOL_RELATIVE_VALID_URL = r'//video\.udn\.com/(?:embed|play)/news/(?P<id>\d+)'
_VALID_URL = r'https?:' + _PROTOCOL_RELATIVE_VALID_URL
+ _EMBED_REGEX = [r'<iframe[^>]+src="(?:https?:)?(?P<url>%s)"' % _PROTOCOL_RELATIVE_VALID_URL]
_TESTS = [{
'url': 'http://video.udn.com/embed/news/300040',
'info_dict': {
@@ -92,8 +90,6 @@ class UDNEmbedIE(InfoExtractor):
})
formats.append(a_format)
- self._sort_formats(formats)
-
return {
'id': video_id,
'formats': formats,
diff --git a/hypervideo_dl/extractor/ufctv.py b/hypervideo_dl/extractor/ufctv.py
index 3d74ba0..2c1c5e0 100644
--- a/hypervideo_dl/extractor/ufctv.py
+++ b/hypervideo_dl/extractor/ufctv.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .imggaming import ImgGamingBaseIE
diff --git a/hypervideo_dl/extractor/ukcolumn.py b/hypervideo_dl/extractor/ukcolumn.py
index d2626f0..aade79f 100644
--- a/hypervideo_dl/extractor/ukcolumn.py
+++ b/hypervideo_dl/extractor/ukcolumn.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from ..utils import (
unescapeHTML,
urljoin,
diff --git a/hypervideo_dl/extractor/uktvplay.py b/hypervideo_dl/extractor/uktvplay.py
index f28fd51..ab22a8e 100644
--- a/hypervideo_dl/extractor/uktvplay.py
+++ b/hypervideo_dl/extractor/uktvplay.py
@@ -1,11 +1,8 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
class UKTVPlayIE(InfoExtractor):
- _VALID_URL = r'https?://uktvplay\.uktv\.co\.uk/(?:.+?\?.*?\bvideo=|([^/]+/)*watch-online/)(?P<id>\d+)'
+ _VALID_URL = r'https?://uktvplay\.(?:uktv\.)?co\.uk/(?:.+?\?.*?\bvideo=|([^/]+/)*)(?P<id>\d+)'
_TESTS = [{
'url': 'https://uktvplay.uktv.co.uk/shows/world-at-war/c/200/watch-online/?video=2117008346001',
'info_dict': {
@@ -25,6 +22,9 @@ class UKTVPlayIE(InfoExtractor):
}, {
'url': 'https://uktvplay.uktv.co.uk/shows/africa/watch-online/5983349675001',
'only_matching': True,
+ }, {
+ 'url': 'https://uktvplay.co.uk/shows/hornby-a-model-world/series-1/episode-1/6276739790001?autoplaying=true',
+ 'only_matching': True,
}]
# BRIGHTCOVE_URL_TEMPLATE = 'https://players.brightcove.net/1242911124001/OrCyvJ2gyL_default/index.html?videoId=%s'
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1242911124001/H1xnMOqP_default/index.html?videoId=%s'
diff --git a/hypervideo_dl/extractor/umg.py b/hypervideo_dl/extractor/umg.py
index c1b65d1..3ffcb73 100644
--- a/hypervideo_dl/extractor/umg.py
+++ b/hypervideo_dl/extractor/umg.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
int_or_none,
@@ -89,7 +86,6 @@ class UMGDeIE(InfoExtractor):
if not formats:
for format_id in (867, 836, 940):
add_m3u8_format(format_id)
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/unistra.py b/hypervideo_dl/extractor/unistra.py
index 685d74f..6e872cd 100644
--- a/hypervideo_dl/extractor/unistra.py
+++ b/hypervideo_dl/extractor/unistra.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -49,7 +47,6 @@ class UnistraIE(InfoExtractor):
'format_id': format_id,
'quality': quality(format_id)
})
- self._sort_formats(formats)
title = self._html_search_regex(
r'<title>UTV - (.*?)</', webpage, 'title')
diff --git a/hypervideo_dl/extractor/unity.py b/hypervideo_dl/extractor/unity.py
index 73daacf..d1b0ecb 100644
--- a/hypervideo_dl/extractor/unity.py
+++ b/hypervideo_dl/extractor/unity.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from .youtube import YoutubeIE
diff --git a/hypervideo_dl/extractor/unscripted.py b/hypervideo_dl/extractor/unscripted.py
new file mode 100644
index 0000000..6643a71
--- /dev/null
+++ b/hypervideo_dl/extractor/unscripted.py
@@ -0,0 +1,53 @@
+from .common import InfoExtractor
+from ..utils import parse_duration, traverse_obj
+
+
+class UnscriptedNewsVideoIE(InfoExtractor):
+ _VALID_URL = r'https?://www\.unscripted\.news/videos/(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://www.unscripted.news/videos/a-day-at-the-farmers-protest',
+ 'info_dict': {
+ 'id': '60c0a55cd1e99b1079918a57',
+ 'display_id': 'a-day-at-the-farmers-protest',
+ 'ext': 'mp4',
+ 'title': 'A Day at the Farmers\' Protest',
+ 'description': 'md5:4b3df22747a03e8f14f746dd72190384',
+ 'thumbnail': 'https://s3.unscripted.news/anj2/60c0a55cd1e99b1079918a57/5f199a65-c803-4a5c-8fce-2077359c3b72.jpg',
+ 'duration': 2251.0,
+ 'series': 'Ground Reports',
+ }
+ }, {
+ 'url': 'https://www.unscripted.news/videos/you-get-the-politicians-you-deserve-ft-shashi-tharoor',
+ 'info_dict': {
+ 'id': '5fb3afbf18ac817d341a74d8',
+ 'display_id': 'you-get-the-politicians-you-deserve-ft-shashi-tharoor',
+ 'ext': 'mp4',
+ 'cast': ['Avalok Langer', 'Ashwin Mehta'],
+ 'thumbnail': 'https://s3.unscripted.news/anj2/5fb3afbf18ac817d341a74d8/82bd7942-4f20-4cd8-98ae-83f9e814f998.jpg',
+ 'description': 'md5:1e91b069238a705ca3a40f87e6f1182c',
+ 'duration': 1046.0,
+ 'series': 'Dumb Questions Only',
+ 'title': 'You Get The Politicians You Deserve! ft. Shashi Tharoor',
+ }
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ nextjs_data = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['dataLocal']
+
+ # TODO: get subtitle from srt key
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(nextjs_data['alt_content'], display_id)
+
+ return {
+ 'id': nextjs_data['_id'],
+ 'display_id': display_id,
+ 'title': nextjs_data.get('title') or self._og_search_title(webpage),
+ 'description': nextjs_data.get('sh_heading') or self._og_search_description(webpage),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'duration': parse_duration(nextjs_data.get('duration')),
+ 'series': traverse_obj(nextjs_data, ('show', 'topic')),
+ 'cast': traverse_obj(nextjs_data, ('cast_crew', ..., 'displayname')),
+ }
diff --git a/hypervideo_dl/extractor/unsupported.py b/hypervideo_dl/extractor/unsupported.py
new file mode 100644
index 0000000..620c025
--- /dev/null
+++ b/hypervideo_dl/extractor/unsupported.py
@@ -0,0 +1,143 @@
+from .common import InfoExtractor
+from ..utils import ExtractorError, classproperty, remove_start
+
+
+class UnsupportedInfoExtractor(InfoExtractor):
+ IE_DESC = False
+ URLS = () # Redefine in subclasses
+
+ @classproperty
+ def IE_NAME(cls):
+ return remove_start(super().IE_NAME, 'Known')
+
+ @classproperty
+ def _VALID_URL(cls):
+ return rf'https?://(?:www\.)?(?:{"|".join(cls.URLS)})'
+
+
+LF = '\n '
+
+
+class KnownDRMIE(UnsupportedInfoExtractor):
+ """Sites that are known to use DRM for all their videos
+
+ Add to this list only if:
+ * You are reasonably certain that the site uses DRM for ALL their videos
+ * Multiple users have asked about this site on github/reddit/discord
+ """
+
+ URLS = (
+ r'play\.hbomax\.com',
+ r'channel(?:4|5)\.com',
+ r'peacocktv\.com',
+ r'(?:[\w\.]+\.)?disneyplus\.com',
+ r'open\.spotify\.com/(?:track|playlist|album|artist)',
+ r'tvnz\.co\.nz',
+ r'oneplus\.ch',
+ r'artstation\.com/learning/courses',
+ r'philo\.com',
+ r'(?:[\w\.]+\.)?mech-plus\.com',
+ r'aha\.video',
+ r'mubi\.com',
+ r'vootkids\.com',
+ r'nowtv\.it/watch',
+ r'tv\.apple\.com',
+ )
+
+ _TESTS = [{
+ # https://github.com/hypervideo/hypervideo/issues/4309
+ 'url': 'https://peacocktv.com/watch/playback/vod/GMO_00000000073159_01/f9d03003-eb04-3c7f-a7b6-a83ab7eb55bc',
+ 'only_matching': True,
+ }, {
+ # https://github.com/hypervideo/hypervideo/issues/1719,
+ 'url': 'https://www.channel4.com/programmes/gurren-lagann/on-demand/69960-001',
+ 'only_matching': True,
+ }, {
+ # https://github.com/hypervideo/hypervideo/issues/1548
+ 'url': 'https://www.channel5.com/show/uk-s-strongest-man-2021/season-2021/episode-1',
+ 'only_matching': True,
+ }, {
+ 'url': r'https://hsesn.apps.disneyplus.com',
+ 'only_matching': True,
+ }, {
+ 'url': r'https://www.disneyplus.com',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://open.spotify.com/artist/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://open.spotify.com/track/',
+ 'only_matching': True,
+ }, {
+ # https://github.com/hypervideo/hypervideo/issues/4122
+ 'url': 'https://www.tvnz.co.nz/shows/ice-airport-alaska/episodes/s1-e1',
+ 'only_matching': True,
+ }, {
+ # https://github.com/hypervideo/hypervideo/issues/1922
+ 'url': 'https://www.oneplus.ch/play/1008188',
+ 'only_matching': True,
+ }, {
+ # https://github.com/hypervideo/hypervideo/issues/1140
+ 'url': 'https://www.artstation.com/learning/courses/dqQ/character-design-masterclass-with-serge-birault/chapters/Rxn3/introduction',
+ 'only_matching': True,
+ }, {
+ # https://github.com/hypervideo/hypervideo/issues/3544
+ 'url': 'https://www.philo.com/player/player/vod/Vk9EOjYwODU0ODg5OTY0ODY0OTQ5NA',
+ 'only_matching': True,
+ }, {
+ # https://github.com/hypervideo/hypervideo/issues/3533
+ 'url': 'https://www.mech-plus.com/player/24892/stream?assetType=episodes&playlist_id=6',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://watch.mech-plus.com/details/25240?playlist_id=6',
+ 'only_matching': True,
+ }, {
+ # https://github.com/hypervideo/hypervideo/issues/2934
+ 'url': 'https://www.aha.video/player/movie/lucky-man',
+ 'only_matching': True,
+ }, {
+ # https://github.com/hypervideo/hypervideo/issues/2743
+ 'url': 'https://mubi.com/films/the-night-doctor',
+ 'only_matching': True,
+ }, {
+ # https://github.com/hypervideo/hypervideo/issues/3287
+ 'url': 'https://www.vootkids.com/movies/chhota-bheem-the-rise-of-kirmada/764459',
+ 'only_matching': True,
+ }, {
+ # https://github.com/hypervideo/hypervideo/issues/2744
+ 'url': 'https://www.nowtv.it/watch/home/asset/and-just-like-that/skyserie_f8fe979772e8437d8a61ab83b6d293e9/seasons/1/episodes/8/R_126182_HD',
+ 'only_matching': True,
+ }, {
+ # https://github.com/hypervideo/hypervideo/issues/5557
+ 'url': 'https://tv.apple.com/it/show/loot---una-fortuna/umc.cmc.5erbujil1mpazuerhr1udnk45?ctx_brand=tvs.sbd.4000',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ raise ExtractorError(
+ f'The requested site is known to use DRM protection. '
+ f'It will {self._downloader._format_err("NOT", self._downloader.Styles.EMPHASIS)} be supported.{LF}'
+ f'Please {self._downloader._format_err("DO NOT", self._downloader.Styles.ERROR)} open an issue, '
+ 'unless you have evidence that the video is not DRM protected', expected=True)
+
+
+class KnownPiracyIE(UnsupportedInfoExtractor):
+ """Sites that have been deemed to be piracy
+
+ In order for this to not end up being a catalog of piracy sites,
+ only sites that were once supported should be added to this list
+ """
+
+ URLS = (
+ r'dood\.(?:to|watch|so|pm|wf|re)',
+ )
+
+ _TESTS = [{
+ 'url': 'http://dood.to/e/5s1wmbdacezb',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ raise ExtractorError(
+ f'This website is no longer supported since it has been determined to be primarily used for piracy.{LF}'
+ f'{self._downloader._format_err("DO NOT", self._downloader.Styles.ERROR)} open issues for it', expected=True)
diff --git a/hypervideo_dl/extractor/uol.py b/hypervideo_dl/extractor/uol.py
index 1baee0b..068c2b8 100644
--- a/hypervideo_dl/extractor/uol.py
+++ b/hypervideo_dl/extractor/uol.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import (
compat_str,
@@ -110,7 +107,6 @@ class UOLIE(InfoExtractor):
'url': f_url,
'quality': quality(format_id),
})
- self._sort_formats(formats)
tags = []
for tag in video_data.get('tags', []):
diff --git a/hypervideo_dl/extractor/uplynk.py b/hypervideo_dl/extractor/uplynk.py
index 9adb969..87c427f 100644
--- a/hypervideo_dl/extractor/uplynk.py
+++ b/hypervideo_dl/extractor/uplynk.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -36,7 +33,6 @@ class UplynkIE(InfoExtractor):
if session_id:
for f in formats:
f['extra_param_to_segment_url'] = 'pbs=' + session_id
- self._sort_formats(formats)
asset = self._download_json('http://content.uplynk.com/player/assetinfo/%s.json' % path, display_id)
if asset.get('error') == 1:
raise ExtractorError('% said: %s' % (self.IE_NAME, asset['msg']), expected=True)
@@ -55,10 +51,9 @@ class UplynkIE(InfoExtractor):
return self._extract_uplynk_info(url)
-class UplynkPreplayIE(UplynkIE):
+class UplynkPreplayIE(UplynkIE): # XXX: Do not subclass from concrete IE
IE_NAME = 'uplynk:preplay'
_VALID_URL = r'https?://.*?\.uplynk\.com/preplay2?/(?P<path>ext/[0-9a-f]{32}/(?P<external_id>[^/?&]+)|(?P<id>[0-9a-f]{32}))\.json'
- _TEST = None
def _real_extract(self, url):
path, external_id, video_id = self._match_valid_url(url).groups()
diff --git a/hypervideo_dl/extractor/urort.py b/hypervideo_dl/extractor/urort.py
index 020425f..debd2ba 100644
--- a/hypervideo_dl/extractor/urort.py
+++ b/hypervideo_dl/extractor/urort.py
@@ -1,13 +1,7 @@
-# coding: utf-8
-from __future__ import unicode_literals
+import urllib.parse
from .common import InfoExtractor
-from ..compat import (
- compat_urllib_parse,
-)
-from ..utils import (
- unified_strdate,
-)
+from ..utils import unified_strdate
class UrortIE(InfoExtractor):
@@ -34,7 +28,7 @@ class UrortIE(InfoExtractor):
def _real_extract(self, url):
playlist_id = self._match_id(url)
- fstr = compat_urllib_parse.quote("InternalBandUrl eq '%s'" % playlist_id)
+ fstr = urllib.parse.quote("InternalBandUrl eq '%s'" % playlist_id)
json_url = 'http://urort.p3.no/breeze/urort/TrackDTOViews?$filter=%s&$orderby=Released%%20desc&$expand=Tags%%2CFiles' % fstr
songs = self._download_json(json_url, playlist_id)
entries = []
@@ -46,7 +40,6 @@ class UrortIE(InfoExtractor):
'url': 'http://p3urort.blob.core.windows.net/tracks/%s' % f['FileRef'],
'quality': 3 if f['FileType'] == 'mp3' else 2,
} for f in s['Files']]
- self._sort_formats(formats)
e = {
'id': '%d-%s' % (s['BandId'], s['$id']),
'title': s['Title'],
diff --git a/hypervideo_dl/extractor/urplay.py b/hypervideo_dl/extractor/urplay.py
index eb2ab26..0f0d659 100644
--- a/hypervideo_dl/extractor/urplay.py
+++ b/hypervideo_dl/extractor/urplay.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
dict_get,
@@ -79,7 +76,6 @@ class URPlayIE(InfoExtractor):
formats.extend(self._extract_wowza_formats(
'http://%s/%splaylist.m3u8' % (host, file_http),
video_id, skip_protocols=['f4m', 'rtmp', 'rtsp']))
- self._sort_formats(formats)
subtitles = {}
diff --git a/hypervideo_dl/extractor/usanetwork.py b/hypervideo_dl/extractor/usanetwork.py
index d953e46..4a06a9a 100644
--- a/hypervideo_dl/extractor/usanetwork.py
+++ b/hypervideo_dl/extractor/usanetwork.py
@@ -1,10 +1,7 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .nbc import NBCIE
-class USANetworkIE(NBCIE):
+class USANetworkIE(NBCIE): # XXX: Do not subclass from concrete IE
_VALID_URL = r'https?(?P<permalink>://(?:www\.)?usanetwork\.com/(?:[^/]+/videos?|movies?)/(?:[^/]+/)?(?P<id>\d+))'
_TESTS = [{
'url': 'https://www.usanetwork.com/peacock-trailers/video/intelligence-trailer/4185302',
diff --git a/hypervideo_dl/extractor/usatoday.py b/hypervideo_dl/extractor/usatoday.py
index b210344..3243f3e 100644
--- a/hypervideo_dl/extractor/usatoday.py
+++ b/hypervideo_dl/extractor/usatoday.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
ExtractorError,
diff --git a/hypervideo_dl/extractor/ustream.py b/hypervideo_dl/extractor/ustream.py
index 4a7a8f8..5df2416 100644
--- a/hypervideo_dl/extractor/ustream.py
+++ b/hypervideo_dl/extractor/ustream.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import random
import re
@@ -22,6 +20,7 @@ from ..utils import (
class UstreamIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?:ustream\.tv|video\.ibm\.com)/(?P<type>recorded|embed|embed/recorded)/(?P<id>\d+)'
IE_NAME = 'ustream'
+ _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?(?:ustream\.tv|video\.ibm\.com)/embed/.+?)\1']
_TESTS = [{
'url': 'http://www.ustream.tv/recorded/20274954',
'md5': '088f151799e8f572f84eb62f17d73e5c',
@@ -73,13 +72,6 @@ class UstreamIE(InfoExtractor):
'only_matching': True,
}]
- @staticmethod
- def _extract_url(webpage):
- mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?(?:ustream\.tv|video\.ibm\.com)/embed/.+?)\1', webpage)
- if mobj is not None:
- return mobj.group('url')
-
def _get_stream_info(self, url, video_id, app_id_ver, extra_note=None):
def num_to_hex(n):
return hex(n)[2:]
@@ -218,8 +210,6 @@ class UstreamIE(InfoExtractor):
formats.extend(self._parse_segmented_mp4(dash_streams))
'''
- self._sort_formats(formats)
-
description = video.get('description')
timestamp = int_or_none(video.get('created_at'))
duration = float_or_none(video.get('length'))
diff --git a/hypervideo_dl/extractor/ustudio.py b/hypervideo_dl/extractor/ustudio.py
index 92509d1..c3aeeb9 100644
--- a/hypervideo_dl/extractor/ustudio.py
+++ b/hypervideo_dl/extractor/ustudio.py
@@ -1,6 +1,3 @@
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..utils import (
int_or_none,
@@ -42,7 +39,6 @@ class UstudioIE(InfoExtractor):
} for item in config.findall('./qualities/quality/%s' % kind) if item.get('url')]
formats = extract('video')
- self._sort_formats(formats)
webpage = self._download_webpage(url, display_id)
@@ -101,7 +97,6 @@ class UstudioEmbedIE(InfoExtractor):
'width': int_or_none(quality.get('width')),
'height': height,
})
- self._sort_formats(formats)
thumbnails = []
for image in video_data.get('images', []):
diff --git a/hypervideo_dl/extractor/utreon.py b/hypervideo_dl/extractor/utreon.py
index 4986635..90c10c0 100644
--- a/hypervideo_dl/extractor/utreon.py
+++ b/hypervideo_dl/extractor/utreon.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
dict_get,
@@ -71,7 +68,6 @@ class UtreonIE(InfoExtractor):
'format_id': format_key.split('_')[1],
'height': int(format_key.split('_')[1][:-1]),
} for format_key, format_url in videos_json.items() if url_or_none(format_url)]
- self._sort_formats(formats)
thumbnail = url_or_none(dict_get(json_data, ('cover_image_url', 'preview_image_url')))
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/varzesh3.py b/hypervideo_dl/extractor/varzesh3.py
index 32655b9..2c13cbd 100644
--- a/hypervideo_dl/extractor/varzesh3.py
+++ b/hypervideo_dl/extractor/varzesh3.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
clean_html,
diff --git a/hypervideo_dl/extractor/vbox7.py b/hypervideo_dl/extractor/vbox7.py
index 8152ace..be35dad 100644
--- a/hypervideo_dl/extractor/vbox7.py
+++ b/hypervideo_dl/extractor/vbox7.py
@@ -1,8 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
from .common import InfoExtractor
from ..utils import ExtractorError
@@ -20,6 +15,7 @@ class Vbox7IE(InfoExtractor):
)
(?P<id>[\da-fA-F]+)
'''
+ _EMBED_REGEX = [r'<iframe[^>]+src=(?P<q>["\'])(?P<url>(?:https?:)?//vbox7\.com/emb/external\.php.+?)(?P=q)']
_GEO_COUNTRIES = ['BG']
_TESTS = [{
'url': 'http://vbox7.com/play:0946fff23c',
@@ -54,14 +50,6 @@ class Vbox7IE(InfoExtractor):
'only_matching': True,
}]
- @staticmethod
- def _extract_url(webpage):
- mobj = re.search(
- r'<iframe[^>]+src=(?P<q>["\'])(?P<url>(?:https?:)?//vbox7\.com/emb/external\.php.+?)(?P=q)',
- webpage)
- if mobj:
- return mobj.group('url')
-
def _real_extract(self, url):
video_id = self._match_id(url)
diff --git a/hypervideo_dl/extractor/veehd.py b/hypervideo_dl/extractor/veehd.py
index a6dc3c8..5ecd887 100644
--- a/hypervideo_dl/extractor/veehd.py
+++ b/hypervideo_dl/extractor/veehd.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import re
import json
diff --git a/hypervideo_dl/extractor/veo.py b/hypervideo_dl/extractor/veo.py
index d87bb5b..ef44d42 100644
--- a/hypervideo_dl/extractor/veo.py
+++ b/hypervideo_dl/extractor/veo.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
@@ -68,8 +65,6 @@ class VeoIE(InfoExtractor):
'vbr': int_or_none(fmt.get('bit_rate'), scale=1000),
})
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': str_or_none(metadata.get('title')),
diff --git a/hypervideo_dl/extractor/veoh.py b/hypervideo_dl/extractor/veoh.py
index d9afb56..92ff865 100644
--- a/hypervideo_dl/extractor/veoh.py
+++ b/hypervideo_dl/extractor/veoh.py
@@ -1,11 +1,14 @@
-from __future__ import unicode_literals
+import functools
+import json
from .common import InfoExtractor
from ..utils import (
+ ExtractorError,
+ OnDemandPagedList,
int_or_none,
parse_duration,
qualities,
- try_get
+ try_get,
)
@@ -102,7 +105,6 @@ class VeohIE(InfoExtractor):
'quality': q(f_id),
'url': f_url,
})
- self._sort_formats(formats)
categories = metadata.get('categoryPath')
if not categories:
@@ -125,3 +127,62 @@ class VeohIE(InfoExtractor):
'categories': categories,
'tags': tags.split(', ') if tags else None,
}
+
+
+class VeohUserIE(VeohIE): # XXX: Do not subclass from concrete IE
+ _VALID_URL = r'https?://(?:www\.)?veoh\.com/users/(?P<id>[\w-]+)'
+ IE_NAME = 'veoh:user'
+
+ _TESTS = [
+ {
+ 'url': 'https://www.veoh.com/users/valentinazoe',
+ 'info_dict': {
+ 'id': 'valentinazoe',
+ 'title': 'valentinazoe (Uploads)'
+ },
+ 'playlist_mincount': 75
+ },
+ {
+ 'url': 'https://www.veoh.com/users/PiensaLibre',
+ 'info_dict': {
+ 'id': 'PiensaLibre',
+ 'title': 'PiensaLibre (Uploads)'
+ },
+ 'playlist_mincount': 2
+ }]
+
+ _PAGE_SIZE = 16
+
+ def _fetch_page(self, uploader, page):
+ response = self._download_json(
+ 'https://www.veoh.com/users/published/videos', uploader,
+ note=f'Downloading videos page {page + 1}',
+ headers={
+ 'x-csrf-token': self._TOKEN,
+ 'content-type': 'application/json;charset=UTF-8'
+ },
+ data=json.dumps({
+ 'username': uploader,
+ 'maxResults': self._PAGE_SIZE,
+ 'page': page + 1,
+ 'requestName': 'userPage'
+ }).encode('utf-8'))
+ if not response.get('success'):
+ raise ExtractorError(response['message'])
+
+ for video in response['videos']:
+ yield self.url_result(f'https://www.veoh.com/watch/{video["permalinkId"]}', VeohIE,
+ video['permalinkId'], video.get('title'))
+
+ def _real_initialize(self):
+ webpage = self._download_webpage(
+ 'https://www.veoh.com', None, note='Downloading authorization token')
+ self._TOKEN = self._search_regex(
+ r'csrfToken:\s*(["\'])(?P<token>[0-9a-zA-Z]{40})\1', webpage,
+ 'request token', group='token')
+
+ def _real_extract(self, url):
+ uploader = self._match_id(url)
+ return self.playlist_result(OnDemandPagedList(
+ functools.partial(self._fetch_page, uploader),
+ self._PAGE_SIZE), uploader, f'{uploader} (Uploads)')
diff --git a/hypervideo_dl/extractor/vesti.py b/hypervideo_dl/extractor/vesti.py
index 002047d..e9731a9 100644
--- a/hypervideo_dl/extractor/vesti.py
+++ b/hypervideo_dl/extractor/vesti.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/vevo.py b/hypervideo_dl/extractor/vevo.py
index 8a0f292..da4ce49 100644
--- a/hypervideo_dl/extractor/vevo.py
+++ b/hypervideo_dl/extractor/vevo.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import re
import json
@@ -35,10 +33,125 @@ class VevoIE(VevoBaseIE):
https?://cache\.vevo\.com/m/html/embed\.html\?video=|
https?://videoplayer\.vevo\.com/embed/embedded\?videoId=|
https?://embed\.vevo\.com/.*?[?&]isrc=|
+ https?://tv\.vevo\.com/watch/artist/(?:[^/]+)/|
vevo:)
(?P<id>[^&?#]+)'''
+ _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1']
- _TESTS = []
+ _TESTS = [{
+ 'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
+ 'md5': '95ee28ee45e70130e3ab02b0f579ae23',
+ 'info_dict': {
+ 'id': 'GB1101300280',
+ 'ext': 'mp4',
+ 'title': 'Hurts - Somebody to Die For',
+ 'timestamp': 1372057200,
+ 'upload_date': '20130624',
+ 'uploader': 'Hurts',
+ 'track': 'Somebody to Die For',
+ 'artist': 'Hurts',
+ 'genre': 'Pop',
+ },
+ 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
+ }, {
+ 'note': 'v3 SMIL format',
+ 'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923',
+ 'md5': 'f6ab09b034f8c22969020b042e5ac7fc',
+ 'info_dict': {
+ 'id': 'USUV71302923',
+ 'ext': 'mp4',
+ 'title': 'Cassadee Pope - I Wish I Could Break Your Heart',
+ 'timestamp': 1392796919,
+ 'upload_date': '20140219',
+ 'uploader': 'Cassadee Pope',
+ 'track': 'I Wish I Could Break Your Heart',
+ 'artist': 'Cassadee Pope',
+ 'genre': 'Country',
+ },
+ 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
+ }, {
+ 'note': 'Age-limited video',
+ 'url': 'https://www.vevo.com/watch/justin-timberlake/tunnel-vision-explicit/USRV81300282',
+ 'info_dict': {
+ 'id': 'USRV81300282',
+ 'ext': 'mp4',
+ 'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
+ 'age_limit': 18,
+ 'timestamp': 1372888800,
+ 'upload_date': '20130703',
+ 'uploader': 'Justin Timberlake',
+ 'track': 'Tunnel Vision (Explicit)',
+ 'artist': 'Justin Timberlake',
+ 'genre': 'Pop',
+ },
+ 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
+ }, {
+ 'note': 'No video_info',
+ 'url': 'http://www.vevo.com/watch/k-camp-1/Till-I-Die/USUV71503000',
+ 'md5': '8b83cc492d72fc9cf74a02acee7dc1b0',
+ 'info_dict': {
+ 'id': 'USUV71503000',
+ 'ext': 'mp4',
+ 'title': 'K Camp ft. T.I. - Till I Die',
+ 'age_limit': 18,
+ 'timestamp': 1449468000,
+ 'upload_date': '20151207',
+ 'uploader': 'K Camp',
+ 'track': 'Till I Die',
+ 'artist': 'K Camp',
+ 'genre': 'Hip-Hop',
+ },
+ 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
+ }, {
+ 'note': 'Featured test',
+ 'url': 'https://www.vevo.com/watch/lemaitre/Wait/USUV71402190',
+ 'md5': 'd28675e5e8805035d949dc5cf161071d',
+ 'info_dict': {
+ 'id': 'USUV71402190',
+ 'ext': 'mp4',
+ 'title': 'Lemaitre ft. LoLo - Wait',
+ 'age_limit': 0,
+ 'timestamp': 1413432000,
+ 'upload_date': '20141016',
+ 'uploader': 'Lemaitre',
+ 'track': 'Wait',
+ 'artist': 'Lemaitre',
+ 'genre': 'Electronic',
+ },
+ 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
+ }, {
+ 'note': 'Only available via webpage',
+ 'url': 'http://www.vevo.com/watch/GBUV71600656',
+ 'md5': '67e79210613865b66a47c33baa5e37fe',
+ 'info_dict': {
+ 'id': 'GBUV71600656',
+ 'ext': 'mp4',
+ 'title': 'ABC - Viva Love',
+ 'age_limit': 0,
+ 'timestamp': 1461830400,
+ 'upload_date': '20160428',
+ 'uploader': 'ABC',
+ 'track': 'Viva Love',
+ 'artist': 'ABC',
+ 'genre': 'Pop',
+ },
+ 'expected_warnings': ['Failed to download video versions info'],
+ }, {
+ # no genres available
+ 'url': 'http://www.vevo.com/watch/INS171400764',
+ 'only_matching': True,
+ }, {
+ # Another case available only via the webpage; using streams/streamsV3 formats
+ # Geo-restricted to Netherlands/Germany
+ 'url': 'http://www.vevo.com/watch/boostee/pop-corn-clip-officiel/FR1A91600909',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://embed.vevo.com/?isrc=USH5V1923499&partnerId=4d61b777-8023-4191-9ede-497ed6c24647&partnerAdCode=',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tv.vevo.com/watch/artist/janet-jackson/US0450100550',
+ 'only_matching': True,
+ }]
_VERSIONS = {
0: 'youtube', # only in AuthenticateVideo videoVersions
1: 'level3',
@@ -140,6 +253,7 @@ class VevoIE(VevoBaseIE):
fatal=False))
else:
m = re.search(r'''(?xi)
+ _(?P<quality>[a-z0-9]+)
_(?P<width>[0-9]+)x(?P<height>[0-9]+)
_(?P<vcodec>[a-z0-9]+)
_(?P<vbr>[0-9]+)
@@ -151,7 +265,7 @@ class VevoIE(VevoBaseIE):
formats.append({
'url': version_url,
- 'format_id': 'http-%s-%s' % (version, video_version['quality']),
+ 'format_id': f'http-{version}-{video_version.get("quality") or m.group("quality")}',
'vcodec': m.group('vcodec'),
'acodec': m.group('acodec'),
'vbr': int(m.group('vbr')),
@@ -160,7 +274,6 @@ class VevoIE(VevoBaseIE):
'width': int(m.group('width')),
'height': int(m.group('height')),
})
- self._sort_formats(formats)
track = video_info['title']
if featured_artist:
diff --git a/hypervideo_dl/extractor/vgtv.py b/hypervideo_dl/extractor/vgtv.py
index 9d6090b..db338fa 100644
--- a/hypervideo_dl/extractor/vgtv.py
+++ b/hypervideo_dl/extractor/vgtv.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -12,11 +9,12 @@ from ..utils import (
)
-class VGTVIE(XstreamIE):
+class VGTVIE(XstreamIE): # XXX: Do not subclass from concrete IE
IE_DESC = 'VGTV, BTTV, FTV, Aftenposten and Aftonbladet'
_GEO_BYPASS = False
_HOST_TO_APPNAME = {
+ 'tv.vg.no': 'vgtv',
'vgtv.no': 'vgtv',
'bt.no/tv': 'bttv',
'aftenbladet.no/tv': 'satv',
@@ -130,6 +128,10 @@ class VGTVIE(XstreamIE):
},
},
{
+ 'url': 'https://tv.vg.no/video/241779/politiets-ekstremkjoering',
+ 'only_matching': True,
+ },
+ {
'url': 'http://www.bt.no/tv/#!/video/100250/norling-dette-er-forskjellen-paa-1-divisjon-og-eliteserien',
'only_matching': True,
},
@@ -236,8 +238,6 @@ class VGTVIE(XstreamIE):
raise self.raise_geo_restricted(
countries=[host.rpartition('.')[-1].partition('/')[0].upper()])
- self._sort_formats(info['formats'])
-
info.update({
'id': video_id,
'title': data['title'],
diff --git a/hypervideo_dl/extractor/vh1.py b/hypervideo_dl/extractor/vh1.py
index 862c5c7..41b8a46 100644
--- a/hypervideo_dl/extractor/vh1.py
+++ b/hypervideo_dl/extractor/vh1.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .mtv import MTVServicesInfoExtractor
# TODO Remove - Reason: Outdated Site
diff --git a/hypervideo_dl/extractor/vice.py b/hypervideo_dl/extractor/vice.py
index c8c3055..d1a3b48 100644
--- a/hypervideo_dl/extractor/vice.py
+++ b/hypervideo_dl/extractor/vice.py
@@ -1,11 +1,7 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import functools
import hashlib
import json
import random
-import re
import time
from .adobepass import AdobePassIE
@@ -41,6 +37,7 @@ class ViceBaseIE(InfoExtractor):
class ViceIE(ViceBaseIE, AdobePassIE):
IE_NAME = 'vice'
_VALID_URL = r'https?://(?:(?:video|vms)\.vice|(?:www\.)?vice(?:land|tv))\.com/(?P<locale>[^/]+)/(?:video/[^/]+|embed)/(?P<id>[\da-f]{24})'
+ _EMBED_REGEX = [r'<iframe\b[^>]+\bsrc=["\'](?P<url>(?:https?:)?//video\.vice\.com/[^/]+/embed/[\da-f]{24})']
_TESTS = [{
'url': 'https://video.vice.com/en_us/video/pet-cremator/58c69e38a55424f1227dc3f7',
'info_dict': {
@@ -106,17 +103,6 @@ class ViceIE(ViceBaseIE, AdobePassIE):
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- return re.findall(
- r'<iframe\b[^>]+\bsrc=["\']((?:https?:)?//video\.vice\.com/[^/]+/embed/[\da-f]{24})',
- webpage)
-
- @staticmethod
- def _extract_url(webpage):
- urls = ViceIE._extract_urls(webpage)
- return urls[0] if urls else None
-
def _real_extract(self, url):
locale, video_id = self._match_valid_url(url).groups()
@@ -164,7 +150,6 @@ class ViceIE(ViceBaseIE, AdobePassIE):
video_data = preplay['video']
formats = self._extract_m3u8_formats(
preplay['playURL'], video_id, 'mp4', 'm3u8_native')
- self._sort_formats(formats)
episode = video_data.get('episode') or {}
channel = video_data.get('channel') or {}
season = video_data.get('season') or {}
diff --git a/hypervideo_dl/extractor/vidbit.py b/hypervideo_dl/extractor/vidbit.py
index 91f45b7..2813032 100644
--- a/hypervideo_dl/extractor/vidbit.py
+++ b/hypervideo_dl/extractor/vidbit.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
diff --git a/hypervideo_dl/extractor/viddler.py b/hypervideo_dl/extractor/viddler.py
index ecc4824..4091477 100644
--- a/hypervideo_dl/extractor/viddler.py
+++ b/hypervideo_dl/extractor/viddler.py
@@ -1,6 +1,3 @@
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..utils import (
float_or_none,
@@ -10,6 +7,8 @@ from ..utils import (
class ViddlerIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?viddler\.com/(?:v|embed|player)/(?P<id>[a-z0-9]+)(?:.+?\bsecret=(\d+))?'
+ _EMBED_REGEX = [r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1']
+
_TESTS = [{
'url': 'http://www.viddler.com/v/43903784',
'md5': '9eee21161d2c7f5b39690c3e325fab2f',
@@ -117,7 +116,6 @@ class ViddlerIE(InfoExtractor):
f['format_id'] = format_id + '-html5'
f['source_preference'] = 0
formats.append(f)
- self._sort_formats(formats)
categories = [
t.get('text') for t in data.get('tags', []) if 'text' in t]
diff --git a/hypervideo_dl/extractor/videa.py b/hypervideo_dl/extractor/videa.py
index 90d7050..52fa8fc 100644
--- a/hypervideo_dl/extractor/videa.py
+++ b/hypervideo_dl/extractor/videa.py
@@ -1,11 +1,9 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import random
-import re
import string
+import struct
from .common import InfoExtractor
+from ..compat import compat_b64decode, compat_ord
from ..utils import (
ExtractorError,
int_or_none,
@@ -17,11 +15,6 @@ from ..utils import (
xpath_element,
xpath_text,
)
-from ..compat import (
- compat_b64decode,
- compat_ord,
- compat_struct_pack,
-)
class VideaIE(InfoExtractor):
@@ -35,6 +28,7 @@ class VideaIE(InfoExtractor):
)
(?P<id>[^?#&]+)
'''
+ _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//videa\.hu/player\?.*?\bv=.+?)\1']
_TESTS = [{
'url': 'http://videa.hu/videok/allatok/az-orult-kigyasz-285-kigyot-kigyo-8YfIAjxwWGwT8HVQ',
'md5': '97a7af41faeaffd9f1fc864a7c7e7603',
@@ -81,12 +75,6 @@ class VideaIE(InfoExtractor):
_STATIC_SECRET = 'xHb0ZvME5q8CBcoQi6AngerDu3FGO9fkUlwPmLVY_RTzj2hJIS4NasXWKy1td7p'
@staticmethod
- def _extract_urls(webpage):
- return [url for _, url in re.findall(
- r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//videa\.hu/player\?.*?\bv=.+?)\1',
- webpage)]
-
- @staticmethod
def rc4(cipher_text, key):
res = b''
@@ -105,7 +93,7 @@ class VideaIE(InfoExtractor):
j = (j + S[i]) % 256
S[i], S[j] = S[j], S[i]
k = S[(S[i] + S[j]) % 256]
- res += compat_struct_pack('B', k ^ compat_ord(cipher_text[m]))
+ res += struct.pack('B', k ^ compat_ord(cipher_text[m]))
return res.decode()
@@ -179,7 +167,6 @@ class VideaIE(InfoExtractor):
'height': int_or_none(source.get('height')),
})
formats.append(f)
- self._sort_formats(formats)
thumbnail = self._proto_relative_url(xpath_text(video, './poster_src'))
diff --git a/hypervideo_dl/extractor/videocampus_sachsen.py b/hypervideo_dl/extractor/videocampus_sachsen.py
index 96e9857..982ab3d 100644
--- a/hypervideo_dl/extractor/videocampus_sachsen.py
+++ b/hypervideo_dl/extractor/videocampus_sachsen.py
@@ -1,12 +1,80 @@
-# coding: utf-8
+import functools
+import re
+
from .common import InfoExtractor
+from ..compat import compat_HTTPError
+from ..utils import ExtractorError, OnDemandPagedList, urlencode_postdata
class VideocampusSachsenIE(InfoExtractor):
- _VALID_URL = r'''(?x)https?://videocampus\.sachsen\.de/(?:
+ IE_NAME = 'ViMP'
+ _INSTANCES = (
+ 'bergauf.tv',
+ 'campus.demo.vimp.com',
+ 'corporate.demo.vimp.com',
+ 'dancehalldatabase.com',
+ 'drehzahl.tv',
+ 'educhannel.hs-gesundheit.de',
+ 'emedia.ls.haw-hamburg.de',
+ 'globale-evolution.net',
+ 'hohu.tv',
+ 'htvideos.hightechhigh.org',
+ 'k210039.vimp.mivitec.net',
+ 'media.cmslegal.com',
+ 'media.hs-furtwangen.de',
+ 'media.hwr-berlin.de',
+ 'mediathek.dkfz.de',
+ 'mediathek.htw-berlin.de',
+ 'mediathek.polizei-bw.de',
+ 'medien.hs-merseburg.de',
+ 'mportal.europa-uni.de',
+ 'pacific.demo.vimp.com',
+ 'slctv.com',
+ 'streaming.prairiesouth.ca',
+ 'tube.isbonline.cn',
+ 'univideo.uni-kassel.de',
+ 'ursula2.genetics.emory.edu',
+ 'ursulablicklevideoarchiv.com',
+ 'v.agrarumweltpaedagogik.at',
+ 'video.eplay-tv.de',
+ 'video.fh-dortmund.de',
+ 'video.hs-offenburg.de',
+ 'video.hs-pforzheim.de',
+ 'video.hspv.nrw.de',
+ 'video.irtshdf.fr',
+ 'video.pareygo.de',
+ 'video.tu-freiberg.de',
+ 'videocampus.sachsen.de',
+ 'videoportal.uni-freiburg.de',
+ 'videoportal.vm.uni-freiburg.de',
+ 'videos.duoc.cl',
+ 'videos.uni-paderborn.de',
+ 'vimp-bemus.udk-berlin.de',
+ 'vimp.aekwl.de',
+ 'vimp.hs-mittweida.de',
+ 'vimp.oth-regensburg.de',
+ 'vimp.ph-heidelberg.de',
+ 'vimp.sma-events.com',
+ 'vimp.weka-fachmedien.de',
+ 'webtv.univ-montp3.fr',
+ 'www.b-tu.de/media',
+ 'www.bergauf.tv',
+ 'www.bigcitytv.de',
+ 'www.cad-videos.de',
+ 'www.drehzahl.tv',
+ 'www.fh-bielefeld.de/medienportal',
+ 'www.hohu.tv',
+ 'www.orvovideo.com',
+ 'www.rwe.tv',
+ 'www.salzi.tv',
+ 'www.wenglor-media.com',
+ 'www2.univ-sba.dz',
+ )
+ _VALID_URL = r'''(?x)https?://(?P<host>%s)/(?:
m/(?P<tmp_id>[0-9a-f]+)|
- (?:category/)?video/(?P<display_id>[\w-]+)/(?P<id>[0-9a-f]{32})
- )'''
+ (?:category/)?video/(?P<display_id>[\w-]+)/(?P<id>[0-9a-f]{32})|
+ media/embed.*(?:\?|&)key=(?P<embed_id>[0-9a-f]{32}&?)
+ )''' % ('|'.join(map(re.escape, _INSTANCES)))
_TESTS = [
{
@@ -14,6 +82,8 @@ class VideocampusSachsenIE(InfoExtractor):
'info_dict': {
'id': 'e6b9349905c1628631f175712250f2a1',
'title': 'Konstruktiver Entwicklungsprozess Vorlesung 7',
+ 'description': 'Konstruktiver Entwicklungsprozess Vorlesung 7',
+ 'thumbnail': 'https://videocampus.sachsen.de/cache/1a985379ad3aecba8097a6902c7daa4e.jpg',
'ext': 'mp4',
},
},
@@ -22,6 +92,8 @@ class VideocampusSachsenIE(InfoExtractor):
'info_dict': {
'id': 'fc99c527e4205b121cb7c74433469262',
'title': 'Was ist selbstgesteuertes Lernen?',
+ 'description': 'md5:196aa3b0509a526db62f84679522a2f5',
+ 'thumbnail': 'https://videocampus.sachsen.de/cache/6f4a85096ba24cb398e6ce54446b57ae.jpg',
'display_id': 'Was-ist-selbstgesteuertes-Lernen',
'ext': 'mp4',
},
@@ -31,66 +103,151 @@ class VideocampusSachsenIE(InfoExtractor):
'info_dict': {
'id': '09d4ed029002eb1bdda610f1103dd54c',
'title': 'Tutorial zur Nutzung von Adobe Connect aus Veranstalter-Sicht',
+ 'description': 'md5:3d379ca3cc17b9da6784d7f58cca4d58',
+ 'thumbnail': 'https://videocampus.sachsen.de/cache/2452498fe8c2d5a7dc79a05d30f407b6.jpg',
'display_id': 'Tutorial-zur-Nutzung-von-Adobe-Connect-aus-Veranstalter-Sicht',
'ext': 'mp4',
},
},
+ {
+ 'url': 'https://www2.univ-sba.dz/video/Presentation-de-la-Faculte-de-droit-et-des-sciences-politiques-Journee-portes-ouvertes-202122/0183356e41af7bfb83d7667b20d9b6a3',
+ 'info_dict': {
+ 'url': 'https://www2.univ-sba.dz/getMedium/0183356e41af7bfb83d7667b20d9b6a3.mp4',
+ 'id': '0183356e41af7bfb83d7667b20d9b6a3',
+ 'title': 'Présentation de la Faculté de droit et des sciences politiques - Journée portes ouvertes 2021/22',
+ 'description': 'md5:508958bd93e0ca002ac731d94182a54f',
+ 'thumbnail': 'https://www2.univ-sba.dz/cache/4d5d4a0b4189271a8cc6cb5328e14769.jpg',
+ 'display_id': 'Presentation-de-la-Faculte-de-droit-et-des-sciences-politiques-Journee-portes-ouvertes-202122',
+ 'ext': 'mp4',
+ }
+ },
+ {
+ 'url': 'https://vimp.weka-fachmedien.de/video/Preisverleihung-Produkte-des-Jahres-2022/c8816f1cc942c12b6cce57c835cffd7c',
+ 'info_dict': {
+ 'id': 'c8816f1cc942c12b6cce57c835cffd7c',
+ 'title': 'Preisverleihung »Produkte des Jahres 2022«',
+ 'description': 'md5:60c347568ca89aa25b772c4ea564ebd3',
+ 'thumbnail': 'https://vimp.weka-fachmedien.de/cache/da9f3090e9227b25beacf67ccf94de14.png',
+ 'display_id': 'Preisverleihung-Produkte-des-Jahres-2022',
+ 'ext': 'mp4',
+ },
+ },
+ {
+ 'url': 'https://videocampus.sachsen.de/media/embed?key=fc99c527e4205b121cb7c74433469262',
+ 'info_dict': {
+ 'id': 'fc99c527e4205b121cb7c74433469262',
+ 'title': 'Was ist selbstgesteuertes Lernen?',
+ 'ext': 'mp4',
+ },
+ },
]
def _real_extract(self, url):
- video_id, tmp_id, display_id = self._match_valid_url(url).group('id', 'tmp_id', 'display_id')
+ host, video_id, tmp_id, display_id, embed_id = self._match_valid_url(url).group(
+ 'host', 'id', 'tmp_id', 'display_id', 'embed_id')
webpage = self._download_webpage(url, video_id or tmp_id, fatal=False) or ''
- if not tmp_id:
- video_id = self._html_search_regex(
- r'src="https?://videocampus\.sachsen\.de/media/embed\?key=([0-9a-f]+)&',
+ if not video_id:
+ video_id = embed_id or self._html_search_regex(
+ rf'src="https?://{host}/media/embed.*(?:\?|&)key=([0-9a-f]+)&?',
webpage, 'video_id')
- title = self._html_search_regex(
- (r'<h1>(?P<content>[^<]+)</h1>', *self._meta_regex('title')),
- webpage, 'title', group='content', fatal=False)
+ if not (display_id or tmp_id):
+ # Title, description from embedded page's meta wouldn't be correct
+ title = self._html_search_regex(r'<video-js[^>]* data-piwik-title="([^"<]+)"', webpage, 'title', fatal=False)
+ description = None
+ thumbnail = None
+ else:
+ title = self._html_search_meta(('og:title', 'twitter:title', 'title'), webpage, fatal=False)
+ description = self._html_search_meta(
+ ('og:description', 'twitter:description', 'description'), webpage, fatal=False)
+ thumbnail = self._html_search_meta(('og:image', 'twitter:image'), webpage, fatal=False)
+
+ formats, subtitles = [], {}
+ try:
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ f'https://{host}/media/hlsMedium/key/{video_id}/format/auto/ext/mp4/learning/0/path/m3u8',
+ video_id, 'mp4', m3u8_id='hls', fatal=True)
+ except ExtractorError as e:
+ if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (404, 500):
+ raise
- formats, subtitles = self._extract_m3u8_formats_and_subtitles(
- f'https://videocampus.sachsen.de/media/hlsMedium/key/{video_id}/format/auto/ext/mp4/learning/0/path/m3u8',
- video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
- self._sort_formats(formats)
+ formats.append({'url': f'https://{host}/getMedium/{video_id}.mp4'})
return {
'id': video_id,
'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
'display_id': display_id,
'formats': formats,
- 'subtitles': subtitles
+ 'subtitles': subtitles,
}
-class VideocampusSachsenEmbedIE(InfoExtractor):
- _VALID_URL = r'https?://videocampus.sachsen.de/media/embed\?key=(?P<id>[0-9a-f]+)'
+class ViMPPlaylistIE(InfoExtractor):
+ IE_NAME = 'ViMP:Playlist'
+ _VALID_URL = r'''(?x)(?P<host>https?://(?:%s))/(?:
+ album/view/aid/(?P<album_id>[0-9]+)|
+ (?P<mode>category|channel)/(?P<name>[\w-]+)/(?P<id>[0-9]+)
+ )''' % '|'.join(map(re.escape, VideocampusSachsenIE._INSTANCES))
- _TESTS = [
- {
- 'url': 'https://videocampus.sachsen.de/media/embed?key=fc99c527e4205b121cb7c74433469262',
- 'info_dict': {
- 'id': 'fc99c527e4205b121cb7c74433469262',
- 'title': 'Was ist selbstgesteuertes Lernen?',
- 'ext': 'mp4',
- },
- }
- ]
+ _TESTS = [{
+ 'url': 'https://vimp.oth-regensburg.de/channel/Designtheorie-1-SoSe-2020/3',
+ 'info_dict': {
+ 'id': 'channel-3',
+ 'title': 'Designtheorie 1 SoSe 2020 :: Channels :: ViMP OTH Regensburg',
+ },
+ 'playlist_mincount': 9,
+ }, {
+ 'url': 'https://www.fh-bielefeld.de/medienportal/album/view/aid/208',
+ 'info_dict': {
+ 'id': 'album-208',
+ 'title': 'KG Praktikum ABT/MEC :: Playlists :: FH-Medienportal',
+ },
+ 'playlist_mincount': 4,
+ }, {
+ 'url': 'https://videocampus.sachsen.de/category/online-tutorials-onyx/91',
+ 'info_dict': {
+ 'id': 'category-91',
+ 'title': 'Online-Seminare ONYX - BPS - Bildungseinrichtungen - VCS',
+ },
+ 'playlist_mincount': 7,
+ }]
+ _PAGE_SIZE = 10
+
+ def _fetch_page(self, host, url_part, id, data, page):
+ webpage = self._download_webpage(
+ f'{host}/media/ajax/component/boxList/{url_part}', id,
+ query={'page': page, 'page_only': 1}, data=urlencode_postdata(data))
+ urls = re.findall(r'"([^"]+/video/[^"]+)"', webpage)
+
+ for url in urls:
+ yield self.url_result(host + url, VideocampusSachsenIE)
def _real_extract(self, url):
- video_id = self._match_id(url)
+ host, album_id, mode, name, id = self._match_valid_url(url).group(
+ 'host', 'album_id', 'mode', 'name', 'id')
- webpage = self._download_webpage(url, video_id)
- title = self._html_search_regex(r'<img[^>]*title="([^"<]+)"', webpage, 'title', fatal=False)
- formats, subtitles = self._extract_m3u8_formats_and_subtitles(
- f'https://videocampus.sachsen.de/media/hlsMedium/key/{video_id}/format/auto/ext/mp4/learning/0/path/m3u8',
- video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
- self._sort_formats(formats)
+ webpage = self._download_webpage(url, album_id or id, fatal=False) or ''
+ title = (self._html_search_meta('title', webpage, fatal=False)
+ or self._html_extract_title(webpage))
- return {
- 'id': video_id,
- 'title': title,
- 'formats': formats,
- 'subtitles': subtitles,
+ url_part = (f'aid/{album_id}' if album_id
+ else f'category/{name}/category_id/{id}' if mode == 'category'
+ else f'title/{name}/channel/{id}')
+
+ mode = mode or 'album'
+ data = {
+ 'vars[mode]': mode,
+ f'vars[{mode}]': album_id or id,
+ 'vars[context]': '4' if album_id else '1' if mode == 'category' else '3',
+ 'vars[context_id]': album_id or id,
+ 'vars[layout]': 'thumb',
+ 'vars[per_page][thumb]': str(self._PAGE_SIZE),
}
+
+ return self.playlist_result(
+ OnDemandPagedList(functools.partial(
+ self._fetch_page, host, url_part, album_id or id, data), self._PAGE_SIZE),
+ playlist_title=title, id=f'{mode}-{album_id or id}')
diff --git a/hypervideo_dl/extractor/videodetective.py b/hypervideo_dl/extractor/videodetective.py
index fe70db7..7928a41 100644
--- a/hypervideo_dl/extractor/videodetective.py
+++ b/hypervideo_dl/extractor/videodetective.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from .internetvideoarchive import InternetVideoArchiveIE
diff --git a/hypervideo_dl/extractor/videofyme.py b/hypervideo_dl/extractor/videofyme.py
index cd3f50a..1d1c8f7 100644
--- a/hypervideo_dl/extractor/videofyme.py
+++ b/hypervideo_dl/extractor/videofyme.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
int_or_none,
diff --git a/hypervideo_dl/extractor/videomore.py b/hypervideo_dl/extractor/videomore.py
index 17ef3b1..ddc33f7 100644
--- a/hypervideo_dl/extractor/videomore.py
+++ b/hypervideo_dl/extractor/videomore.py
@@ -1,8 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
from .common import InfoExtractor
from ..compat import (
compat_str,
@@ -50,6 +45,12 @@ class VideomoreIE(InfoExtractor):
(?P<id>\d+)
(?:[/?#&]|\.(?:xml|json)|$)
'''
+ _EMBED_REGEX = [r'''(?x)
+ (?:
+ <iframe[^>]+src=([\'"])|
+ <object[^>]+data=(["\'])https?://videomore\.ru/player\.swf\?.*config=
+ )(?P<url>https?://videomore\.ru/[^?#"']+/\d+(?:\.xml)?)
+ ''']
_TESTS = [{
'url': 'http://videomore.ru/kino_v_detalayah/5_sezon/367617',
'md5': '44455a346edc0d509ac5b5a5b531dc35',
@@ -129,19 +130,6 @@ class VideomoreIE(InfoExtractor):
}]
_GEO_BYPASS = False
- @staticmethod
- def _extract_url(webpage):
- mobj = re.search(
- r'<object[^>]+data=(["\'])https?://videomore\.ru/player\.swf\?.*config=(?P<url>https?://videomore\.ru/(?:[^/]+/)+\d+\.xml).*\1',
- webpage)
- if not mobj:
- mobj = re.search(
- r'<iframe[^>]+src=([\'"])(?P<url>https?://videomore\.ru/embed/\d+)',
- webpage)
-
- if mobj:
- return mobj.group('url')
-
def _real_extract(self, url):
mobj = self._match_valid_url(url)
video_id = mobj.group('sid') or mobj.group('id')
@@ -193,7 +181,6 @@ class VideomoreIE(InfoExtractor):
if error in ('Данное видео недоступно для просмотра на территории этой страны', 'Данное видео доступно для просмотра только на территории России'):
self.raise_geo_restricted(countries=['RU'], metadata_available=True)
self.raise_no_formats(error, expected=True)
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/videopress.py b/hypervideo_dl/extractor/videopress.py
index 6376ff0..0734aee 100644
--- a/hypervideo_dl/extractor/videopress.py
+++ b/hypervideo_dl/extractor/videopress.py
@@ -1,8 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
from .common import InfoExtractor
from ..utils import (
determine_ext,
@@ -20,6 +15,7 @@ class VideoPressIE(InfoExtractor):
_ID_REGEX = r'[\da-zA-Z]{8}'
_PATH_REGEX = r'video(?:\.word)?press\.com/embed/'
_VALID_URL = r'https?://%s(?P<id>%s)' % (_PATH_REGEX, _ID_REGEX)
+ _EMBED_REGEX = [rf'<iframe[^>]+src=["\'](?P<url>(?:https?://)?{_PATH_REGEX}{_ID_REGEX})']
_TESTS = [{
'url': 'https://videopress.com/embed/kUJmAcSf',
'md5': '706956a6c875873d51010921310e4bc6',
@@ -42,12 +38,6 @@ class VideoPressIE(InfoExtractor):
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- return re.findall(
- r'<iframe[^>]+src=["\']((?:https?://)?%s%s)' % (VideoPressIE._PATH_REGEX, VideoPressIE._ID_REGEX),
- webpage)
-
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -86,7 +76,6 @@ class VideoPressIE(InfoExtractor):
'width': int_or_none(video.get('width')),
'height': int_or_none(video.get('height')),
})
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/vidio.py b/hypervideo_dl/extractor/vidio.py
index 6bfb8d4..770aa28 100644
--- a/hypervideo_dl/extractor/vidio.py
+++ b/hypervideo_dl/extractor/vidio.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..utils import (
clean_html,
@@ -71,10 +67,10 @@ class VidioBaseIE(InfoExtractor):
class VidioIE(VidioBaseIE):
- _VALID_URL = r'https?://(?:www\.)?vidio\.com/watch/(?P<id>\d+)-(?P<display_id>[^/?#&]+)'
+ _VALID_URL = r'https?://(?:www\.)?vidio\.com/(watch|embed)/(?P<id>\d+)-(?P<display_id>[^/?#&]+)'
_TESTS = [{
'url': 'http://www.vidio.com/watch/165683-dj_ambred-booyah-live-2015',
- 'md5': 'cd2801394afc164e9775db6a140b91fe',
+ 'md5': 'abac81b1a205a8d94c609a473b5ea62a',
'info_dict': {
'id': '165683',
'display_id': 'dj_ambred-booyah-live-2015',
@@ -93,7 +89,8 @@ class VidioIE(VidioBaseIE):
'view_count': int,
'dislike_count': int,
'comment_count': int,
- 'tags': 'count:4',
+ 'tags': 'count:3',
+ 'uploader_url': 'https://www.vidio.com/@twelvepictures',
},
}, {
'url': 'https://www.vidio.com/watch/77949-south-korea-test-fires-missile-that-can-strike-all-of-the-north',
@@ -102,6 +99,30 @@ class VidioIE(VidioBaseIE):
# Premier-exclusive video
'url': 'https://www.vidio.com/watch/1550718-stand-by-me-doraemon',
'only_matching': True
+ }, {
+ # embed url from https://enamplus.liputan6.com/read/5033648/video-fakta-temuan-suspek-cacar-monyet-di-jawa-tengah
+ 'url': 'https://www.vidio.com/embed/7115874-fakta-temuan-suspek-cacar-monyet-di-jawa-tengah',
+ 'info_dict': {
+ 'id': '7115874',
+ 'ext': 'mp4',
+ 'channel_id': '40172876',
+ 'comment_count': int,
+ 'uploader_id': 'liputan6',
+ 'view_count': int,
+ 'dislike_count': int,
+ 'upload_date': '20220804',
+ 'uploader': 'Liputan6.com',
+ 'display_id': 'fakta-temuan-suspek-cacar-monyet-di-jawa-tengah',
+ 'channel': 'ENAM PLUS 165',
+ 'timestamp': 1659605520,
+ 'title': 'Fakta Temuan Suspek Cacar Monyet di Jawa Tengah',
+ 'duration': 59,
+ 'like_count': int,
+ 'tags': ['monkeypox indonesia', 'cacar monyet menyebar', 'suspek cacar monyet di indonesia', 'fakta', 'hoax atau bukan?', 'jawa tengah'],
+ 'thumbnail': 'https://thumbor.prod.vidiocdn.com/83PN-_BKm5sS7emLtRxl506MLqQ=/640x360/filters:quality(70)/vidio-web-prod-video/uploads/video/image/7115874/fakta-suspek-cacar-monyet-di-jawa-tengah-24555a.jpg',
+ 'uploader_url': 'https://www.vidio.com/@liputan6',
+ 'description': 'md5:6d595a18d3b19ee378e335a6f288d5ac',
+ },
}]
def _real_extract(self, url):
@@ -135,8 +156,6 @@ class VidioIE(VidioBaseIE):
formats, subs = self._extract_m3u8_formats_and_subtitles(
hls_url, display_id, 'mp4', 'm3u8_native')
- self._sort_formats(formats)
-
get_first = lambda x: try_get(data, lambda y: y[x + 's'][0], dict) or {}
channel = get_first('channel')
user = get_first('user')
@@ -156,7 +175,7 @@ class VidioIE(VidioBaseIE):
'uploader': user.get('name'),
'timestamp': parse_iso8601(video.get('created_at')),
'uploader_id': username,
- 'uploader_url': format_field(username, template='https://www.vidio.com/@%s'),
+ 'uploader_url': format_field(username, None, 'https://www.vidio.com/@%s'),
'channel': channel.get('name'),
'channel_id': str_or_none(channel.get('id')),
'view_count': get_count('view_count'),
@@ -272,7 +291,6 @@ class VidioLiveIE(VidioBaseIE):
if stream_meta.get('stream_url'):
formats.extend(self._extract_m3u8_formats(
stream_meta['stream_url'], display_id, 'mp4', 'm3u8_native'))
- self._sort_formats(formats)
return {
'id': video_id,
@@ -287,5 +305,5 @@ class VidioLiveIE(VidioBaseIE):
'uploader': user.get('name'),
'timestamp': parse_iso8601(stream_meta.get('start_time')),
'uploader_id': username,
- 'uploader_url': format_field(username, template='https://www.vidio.com/@%s'),
+ 'uploader_url': format_field(username, None, 'https://www.vidio.com/@%s'),
}
diff --git a/hypervideo_dl/extractor/vidlii.py b/hypervideo_dl/extractor/vidlii.py
index a63919f..5933783 100644
--- a/hypervideo_dl/extractor/vidlii.py
+++ b/hypervideo_dl/extractor/vidlii.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -80,7 +77,6 @@ class VidLiiIE(InfoExtractor):
'format_id': f'{height}p',
'height': height,
})
- self._sort_formats(formats)
title = self._search_regex(
(r'<h1>([^<]+)</h1>', r'<title>([^<]+) - VidLii<'), webpage,
@@ -103,7 +99,7 @@ class VidLiiIE(InfoExtractor):
uploader = self._search_regex(
r'<div[^>]+class=["\']wt_person[^>]+>\s*<a[^>]+\bhref=["\']/user/[^>]+>([^<]+)',
webpage, 'uploader', fatal=False)
- uploader_url = format_field(uploader, template='https://www.vidlii.com/user/%s')
+ uploader_url = format_field(uploader, None, 'https://www.vidlii.com/user/%s')
upload_date = unified_strdate(self._html_search_meta(
'datePublished', webpage, default=None) or self._search_regex(
diff --git a/hypervideo_dl/extractor/vidme.py b/hypervideo_dl/extractor/vidme.py
deleted file mode 100644
index 174e69c..0000000
--- a/hypervideo_dl/extractor/vidme.py
+++ /dev/null
@@ -1,295 +0,0 @@
-from __future__ import unicode_literals
-
-import itertools
-
-from .common import InfoExtractor
-from ..compat import compat_HTTPError
-from ..utils import (
- ExtractorError,
- int_or_none,
- float_or_none,
- parse_iso8601,
- url_or_none,
-)
-
-
-class VidmeIE(InfoExtractor):
- IE_NAME = 'vidme'
- _VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]{,5})(?:[^\da-zA-Z]|$)'
- _TESTS = [{
- 'url': 'https://vid.me/QNB',
- 'md5': 'f42d05e7149aeaec5c037b17e5d3dc82',
- 'info_dict': {
- 'id': 'QNB',
- 'ext': 'mp4',
- 'title': 'Fishing for piranha - the easy way',
- 'description': 'source: https://www.facebook.com/photo.php?v=312276045600871',
- 'thumbnail': r're:^https?://.*\.jpg',
- 'timestamp': 1406313244,
- 'upload_date': '20140725',
- 'age_limit': 0,
- 'duration': 119.92,
- 'view_count': int,
- 'like_count': int,
- 'comment_count': int,
- },
- }, {
- 'url': 'https://vid.me/Gc6M',
- 'md5': 'f42d05e7149aeaec5c037b17e5d3dc82',
- 'info_dict': {
- 'id': 'Gc6M',
- 'ext': 'mp4',
- 'title': 'O Mere Dil ke chain - Arnav and Khushi VM',
- 'thumbnail': r're:^https?://.*\.jpg',
- 'timestamp': 1441211642,
- 'upload_date': '20150902',
- 'uploader': 'SunshineM',
- 'uploader_id': '3552827',
- 'age_limit': 0,
- 'duration': 223.72,
- 'view_count': int,
- 'like_count': int,
- 'comment_count': int,
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- # tests uploader field
- 'url': 'https://vid.me/4Iib',
- 'info_dict': {
- 'id': '4Iib',
- 'ext': 'mp4',
- 'title': 'The Carver',
- 'description': 'md5:e9c24870018ae8113be936645b93ba3c',
- 'thumbnail': r're:^https?://.*\.jpg',
- 'timestamp': 1433203629,
- 'upload_date': '20150602',
- 'uploader': 'Thomas',
- 'uploader_id': '109747',
- 'age_limit': 0,
- 'duration': 97.859999999999999,
- 'view_count': int,
- 'like_count': int,
- 'comment_count': int,
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- # nsfw test from http://naked-yogi.tumblr.com/post/118312946248/naked-smoking-stretching
- 'url': 'https://vid.me/e/Wmur',
- 'info_dict': {
- 'id': 'Wmur',
- 'ext': 'mp4',
- 'title': 'naked smoking & stretching',
- 'thumbnail': r're:^https?://.*\.jpg',
- 'timestamp': 1430931613,
- 'upload_date': '20150506',
- 'uploader': 'naked-yogi',
- 'uploader_id': '1638622',
- 'age_limit': 18,
- 'duration': 653.26999999999998,
- 'view_count': int,
- 'like_count': int,
- 'comment_count': int,
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- # nsfw, user-disabled
- 'url': 'https://vid.me/dzGJ',
- 'only_matching': True,
- }, {
- # suspended
- 'url': 'https://vid.me/Ox3G',
- 'only_matching': True,
- }, {
- # deleted
- 'url': 'https://vid.me/KTPm',
- 'only_matching': True,
- }, {
- # no formats in the API response
- 'url': 'https://vid.me/e5g',
- 'info_dict': {
- 'id': 'e5g',
- 'ext': 'mp4',
- 'title': 'Video upload (e5g)',
- 'thumbnail': r're:^https?://.*\.jpg',
- 'timestamp': 1401480195,
- 'upload_date': '20140530',
- 'uploader': None,
- 'uploader_id': None,
- 'age_limit': 0,
- 'duration': 483,
- 'view_count': int,
- 'like_count': int,
- 'comment_count': int,
- },
- 'params': {
- 'skip_download': True,
- },
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- try:
- response = self._download_json(
- 'https://api.vid.me/videoByUrl/%s' % video_id, video_id)
- except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
- response = self._parse_json(e.cause.read(), video_id)
- else:
- raise
-
- error = response.get('error')
- if error:
- raise ExtractorError(
- '%s returned error: %s' % (self.IE_NAME, error), expected=True)
-
- video = response['video']
-
- if video.get('state') == 'deleted':
- raise ExtractorError(
- 'Vidme said: Sorry, this video has been deleted.',
- expected=True)
-
- if video.get('state') in ('user-disabled', 'suspended'):
- raise ExtractorError(
- 'Vidme said: This video has been suspended either due to a copyright claim, '
- 'or for violating the terms of use.',
- expected=True)
-
- formats = []
- for f in video.get('formats', []):
- format_url = url_or_none(f.get('uri'))
- if not format_url:
- continue
- format_type = f.get('type')
- if format_type == 'dash':
- formats.extend(self._extract_mpd_formats(
- format_url, video_id, mpd_id='dash', fatal=False))
- elif format_type == 'hls':
- formats.extend(self._extract_m3u8_formats(
- format_url, video_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id='hls', fatal=False))
- else:
- formats.append({
- 'format_id': f.get('type'),
- 'url': format_url,
- 'width': int_or_none(f.get('width')),
- 'height': int_or_none(f.get('height')),
- 'preference': 0 if f.get('type', '').endswith(
- 'clip') else 1,
- })
-
- if not formats and video.get('complete_url'):
- formats.append({
- 'url': video.get('complete_url'),
- 'width': int_or_none(video.get('width')),
- 'height': int_or_none(video.get('height')),
- })
-
- self._sort_formats(formats)
-
- title = video['title']
- description = video.get('description')
- thumbnail = video.get('thumbnail_url')
- timestamp = parse_iso8601(video.get('date_created'), ' ')
- uploader = video.get('user', {}).get('username')
- uploader_id = video.get('user', {}).get('user_id')
- age_limit = 18 if video.get('nsfw') is True else 0
- duration = float_or_none(video.get('duration'))
- view_count = int_or_none(video.get('view_count'))
- like_count = int_or_none(video.get('likes_count'))
- comment_count = int_or_none(video.get('comment_count'))
-
- return {
- 'id': video_id,
- 'title': title or 'Video upload (%s)' % video_id,
- 'description': description,
- 'thumbnail': thumbnail,
- 'uploader': uploader,
- 'uploader_id': uploader_id,
- 'age_limit': age_limit,
- 'timestamp': timestamp,
- 'duration': duration,
- 'view_count': view_count,
- 'like_count': like_count,
- 'comment_count': comment_count,
- 'formats': formats,
- }
-
-
-class VidmeListBaseIE(InfoExtractor):
- # Max possible limit according to https://docs.vid.me/#api-Videos-List
- _LIMIT = 100
-
- def _entries(self, user_id, user_name):
- for page_num in itertools.count(1):
- page = self._download_json(
- 'https://api.vid.me/videos/%s?user=%s&limit=%d&offset=%d'
- % (self._API_ITEM, user_id, self._LIMIT, (page_num - 1) * self._LIMIT),
- user_name, 'Downloading user %s page %d' % (self._API_ITEM, page_num))
-
- videos = page.get('videos', [])
- if not videos:
- break
-
- for video in videos:
- video_url = video.get('full_url') or video.get('embed_url')
- if video_url:
- yield self.url_result(video_url, VidmeIE.ie_key())
-
- total = int_or_none(page.get('page', {}).get('total'))
- if total and self._LIMIT * page_num >= total:
- break
-
- def _real_extract(self, url):
- user_name = self._match_id(url)
-
- user_id = self._download_json(
- 'https://api.vid.me/userByUsername?username=%s' % user_name,
- user_name)['user']['user_id']
-
- return self.playlist_result(
- self._entries(user_id, user_name), user_id,
- '%s - %s' % (user_name, self._TITLE))
-
-
-class VidmeUserIE(VidmeListBaseIE):
- IE_NAME = 'vidme:user'
- _VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z_-]{6,})(?!/likes)(?:[^\da-zA-Z_-]|$)'
- _API_ITEM = 'list'
- _TITLE = 'Videos'
- _TESTS = [{
- 'url': 'https://vid.me/MasakoX',
- 'info_dict': {
- 'id': '16112341',
- 'title': 'MasakoX - %s' % _TITLE,
- },
- 'playlist_mincount': 191,
- }, {
- 'url': 'https://vid.me/unsQuare_netWork',
- 'only_matching': True,
- }]
-
-
-class VidmeUserLikesIE(VidmeListBaseIE):
- IE_NAME = 'vidme:user:likes'
- _VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z_-]{6,})/likes'
- _API_ITEM = 'likes'
- _TITLE = 'Likes'
- _TESTS = [{
- 'url': 'https://vid.me/ErinAlexis/likes',
- 'info_dict': {
- 'id': '6483530',
- 'title': 'ErinAlexis - %s' % _TITLE,
- },
- 'playlist_mincount': 415,
- }, {
- 'url': 'https://vid.me/Kaleidoscope-Ish/likes',
- 'only_matching': True,
- }]
diff --git a/hypervideo_dl/extractor/vidzi.py b/hypervideo_dl/extractor/vidzi.py
deleted file mode 100644
index 42ea495..0000000
--- a/hypervideo_dl/extractor/vidzi.py
+++ /dev/null
@@ -1,68 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- decode_packed_codes,
- js_to_json,
- NO_DEFAULT,
- PACKED_CODES_RE,
-)
-
-
-class VidziIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?vidzi\.(?:tv|cc|si|nu)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
- _TESTS = [{
- 'url': 'http://vidzi.tv/cghql9yq6emu.html',
- 'md5': '4f16c71ca0c8c8635ab6932b5f3f1660',
- 'info_dict': {
- 'id': 'cghql9yq6emu',
- 'ext': 'mp4',
- 'title': 'youtube-dl test video 1\\\\2\'3/4<5\\\\6ä7↭',
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- }, {
- 'url': 'http://vidzi.tv/embed-4z2yb0rzphe9-600x338.html',
- 'only_matching': True,
- }, {
- 'url': 'http://vidzi.cc/cghql9yq6emu.html',
- 'only_matching': True,
- }, {
- 'url': 'https://vidzi.si/rph9gztxj1et.html',
- 'only_matching': True,
- }, {
- 'url': 'http://vidzi.nu/cghql9yq6emu.html',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(
- 'http://vidzi.tv/%s' % video_id, video_id)
- title = self._html_search_regex(
- r'(?s)<h2 class="video-title">(.*?)</h2>', webpage, 'title')
-
- codes = [webpage]
- codes.extend([
- decode_packed_codes(mobj.group(0)).replace('\\\'', '\'')
- for mobj in re.finditer(PACKED_CODES_RE, webpage)])
- for num, code in enumerate(codes, 1):
- jwplayer_data = self._parse_json(
- self._search_regex(
- r'setup\(([^)]+)\)', code, 'jwplayer data',
- default=NO_DEFAULT if num == len(codes) else '{}'),
- video_id, transform_source=lambda s: js_to_json(
- re.sub(r'\s*\+\s*window\[.+?\]', '', s)))
- if jwplayer_data:
- break
-
- info_dict = self._parse_jwplayer_data(jwplayer_data, video_id, require_title=False)
- info_dict['title'] = title
-
- return info_dict
diff --git a/hypervideo_dl/extractor/vier.py b/hypervideo_dl/extractor/vier.py
deleted file mode 100644
index 94aa350..0000000
--- a/hypervideo_dl/extractor/vier.py
+++ /dev/null
@@ -1,264 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-import itertools
-
-from .common import InfoExtractor
-from ..utils import (
- urlencode_postdata,
- int_or_none,
- unified_strdate,
-)
-
-
-class VierIE(InfoExtractor):
- IE_NAME = 'vier'
- IE_DESC = 'vier.be and vijf.be'
- _VALID_URL = r'''(?x)
- https?://
- (?:www\.)?(?P<site>vier|vijf)\.be/
- (?:
- (?:
- [^/]+/videos|
- video(?:/[^/]+)*
- )/
- (?P<display_id>[^/]+)(?:/(?P<id>\d+))?|
- (?:
- video/v3/embed|
- embed/video/public
- )/(?P<embed_id>\d+)
- )
- '''
- _NETRC_MACHINE = 'vier'
- _TESTS = [{
- 'url': 'http://www.vier.be/planb/videos/het-wordt-warm-de-moestuin/16129',
- 'md5': 'e4ae2054a6b040ef1e289e20d111b46e',
- 'info_dict': {
- 'id': '16129',
- 'display_id': 'het-wordt-warm-de-moestuin',
- 'ext': 'mp4',
- 'title': 'Het wordt warm in De Moestuin',
- 'description': 'De vele uren werk eisen hun tol. Wim droomt van assistentie...',
- 'upload_date': '20121025',
- 'series': 'Plan B',
- 'tags': ['De Moestuin', 'Moestuin', 'meisjes', 'Tomaat', 'Wim', 'Droom'],
- },
- }, {
- 'url': 'http://www.vijf.be/temptationisland/videos/zo-grappig-temptation-island-hosts-moeten-kiezen-tussen-onmogelijke-dilemmas/2561614',
- 'info_dict': {
- 'id': '2561614',
- 'display_id': 'zo-grappig-temptation-island-hosts-moeten-kiezen-tussen-onmogelijke-dilemmas',
- 'ext': 'mp4',
- 'title': 'md5:84f45fe48b8c1fa296a7f6d208d080a7',
- 'description': 'md5:0356d4981e58b8cbee19355cbd51a8fe',
- 'upload_date': '20170228',
- 'series': 'Temptation Island',
- 'tags': list,
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- 'url': 'http://www.vier.be/janigaat/videos/jani-gaat-naar-tokio-aflevering-4/2674839',
- 'info_dict': {
- 'id': '2674839',
- 'display_id': 'jani-gaat-naar-tokio-aflevering-4',
- 'ext': 'mp4',
- 'title': 'Jani gaat naar Tokio - Aflevering 4',
- 'description': 'md5:aa8d611541db6ae9e863125704511f88',
- 'upload_date': '20170501',
- 'series': 'Jani gaat',
- 'episode_number': 4,
- 'tags': ['Jani Gaat', 'Volledige Aflevering'],
- },
- 'params': {
- 'skip_download': True,
- },
- 'skip': 'Requires account credentials',
- }, {
- # Requires account credentials but bypassed extraction via v3/embed page
- # without metadata
- 'url': 'http://www.vier.be/janigaat/videos/jani-gaat-naar-tokio-aflevering-4/2674839',
- 'info_dict': {
- 'id': '2674839',
- 'display_id': 'jani-gaat-naar-tokio-aflevering-4',
- 'ext': 'mp4',
- 'title': 'jani-gaat-naar-tokio-aflevering-4',
- },
- 'params': {
- 'skip_download': True,
- },
- 'expected_warnings': ['Log in to extract metadata'],
- }, {
- # Without video id in URL
- 'url': 'http://www.vier.be/planb/videos/dit-najaar-plan-b',
- 'only_matching': True,
- }, {
- 'url': 'http://www.vier.be/video/v3/embed/16129',
- 'only_matching': True,
- }, {
- 'url': 'https://www.vijf.be/embed/video/public/4093',
- 'only_matching': True,
- }, {
- 'url': 'https://www.vier.be/video/blockbusters/in-juli-en-augustus-summer-classics',
- 'only_matching': True,
- }, {
- 'url': 'https://www.vier.be/video/achter-de-rug/2017/achter-de-rug-seizoen-1-aflevering-6',
- 'only_matching': True,
- }]
-
- def _real_initialize(self):
- self._logged_in = False
-
- def _login(self, site):
- username, password = self._get_login_info()
- if username is None or password is None:
- return
-
- login_page = self._download_webpage(
- 'http://www.%s.be/user/login' % site,
- None, note='Logging in', errnote='Unable to log in',
- data=urlencode_postdata({
- 'form_id': 'user_login',
- 'name': username,
- 'pass': password,
- }),
- headers={'Content-Type': 'application/x-www-form-urlencoded'})
-
- login_error = self._html_search_regex(
- r'(?s)<div class="messages error">\s*<div>\s*<h2.+?</h2>(.+?)<',
- login_page, 'login error', default=None)
- if login_error:
- self.report_warning('Unable to log in: %s' % login_error)
- else:
- self._logged_in = True
-
- def _real_extract(self, url):
- mobj = self._match_valid_url(url)
- embed_id = mobj.group('embed_id')
- display_id = mobj.group('display_id') or embed_id
- video_id = mobj.group('id') or embed_id
- site = mobj.group('site')
-
- if not self._logged_in:
- self._login(site)
-
- webpage = self._download_webpage(url, display_id)
-
- if r'id="user-login"' in webpage:
- self.report_warning(
- 'Log in to extract metadata', video_id=display_id)
- webpage = self._download_webpage(
- 'http://www.%s.be/video/v3/embed/%s' % (site, video_id),
- display_id)
-
- video_id = self._search_regex(
- [r'data-nid="(\d+)"', r'"nid"\s*:\s*"(\d+)"'],
- webpage, 'video id', default=video_id or display_id)
-
- playlist_url = self._search_regex(
- r'data-file=(["\'])(?P<url>(?:https?:)?//[^/]+/.+?\.m3u8.*?)\1',
- webpage, 'm3u8 url', default=None, group='url')
-
- if not playlist_url:
- application = self._search_regex(
- [r'data-application="([^"]+)"', r'"application"\s*:\s*"([^"]+)"'],
- webpage, 'application', default=site + '_vod')
- filename = self._search_regex(
- [r'data-filename="([^"]+)"', r'"filename"\s*:\s*"([^"]+)"'],
- webpage, 'filename')
- playlist_url = 'http://vod.streamcloud.be/%s/_definst_/mp4:%s.mp4/playlist.m3u8' % (application, filename)
-
- formats = self._extract_wowza_formats(
- playlist_url, display_id, skip_protocols=['dash'])
- self._sort_formats(formats)
-
- title = self._og_search_title(webpage, default=display_id)
- description = self._html_search_regex(
- r'(?s)<div\b[^>]+\bclass=(["\'])[^>]*?\bfield-type-text-with-summary\b[^>]*?\1[^>]*>.*?<p>(?P<value>.+?)</p>',
- webpage, 'description', default=None, group='value')
- thumbnail = self._og_search_thumbnail(webpage, default=None)
- upload_date = unified_strdate(self._html_search_regex(
- r'(?s)<div\b[^>]+\bclass=(["\'])[^>]*?\bfield-name-post-date\b[^>]*?\1[^>]*>.*?(?P<value>\d{2}/\d{2}/\d{4})',
- webpage, 'upload date', default=None, group='value'))
-
- series = self._search_regex(
- r'data-program=(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
- 'series', default=None, group='value')
- episode_number = int_or_none(self._search_regex(
- r'(?i)aflevering (\d+)', title, 'episode number', default=None))
- tags = re.findall(r'<a\b[^>]+\bhref=["\']/tags/[^>]+>([^<]+)<', webpage)
-
- return {
- 'id': video_id,
- 'display_id': display_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'upload_date': upload_date,
- 'series': series,
- 'episode_number': episode_number,
- 'tags': tags,
- 'formats': formats,
- }
-
-
-class VierVideosIE(InfoExtractor):
- IE_NAME = 'vier:videos'
- _VALID_URL = r'https?://(?:www\.)?(?P<site>vier|vijf)\.be/(?P<program>[^/]+)/videos(?:\?.*\bpage=(?P<page>\d+)|$)'
- _TESTS = [{
- 'url': 'http://www.vier.be/demoestuin/videos',
- 'info_dict': {
- 'id': 'demoestuin',
- },
- 'playlist_mincount': 153,
- }, {
- 'url': 'http://www.vijf.be/temptationisland/videos',
- 'info_dict': {
- 'id': 'temptationisland',
- },
- 'playlist_mincount': 159,
- }, {
- 'url': 'http://www.vier.be/demoestuin/videos?page=6',
- 'info_dict': {
- 'id': 'demoestuin-page6',
- },
- 'playlist_mincount': 20,
- }, {
- 'url': 'http://www.vier.be/demoestuin/videos?page=7',
- 'info_dict': {
- 'id': 'demoestuin-page7',
- },
- 'playlist_mincount': 13,
- }]
-
- def _real_extract(self, url):
- mobj = self._match_valid_url(url)
- program = mobj.group('program')
- site = mobj.group('site')
-
- page_id = mobj.group('page')
- if page_id:
- page_id = int(page_id)
- start_page = page_id
- playlist_id = '%s-page%d' % (program, page_id)
- else:
- start_page = 0
- playlist_id = program
-
- entries = []
- for current_page_id in itertools.count(start_page):
- current_page = self._download_webpage(
- 'http://www.%s.be/%s/videos?page=%d' % (site, program, current_page_id),
- program,
- 'Downloading page %d' % (current_page_id + 1))
- page_entries = [
- self.url_result('http://www.' + site + '.be' + video_url, 'Vier')
- for video_url in re.findall(
- r'<h[23]><a href="(/[^/]+/videos/[^/]+(?:/\d+)?)">', current_page)]
- entries.extend(page_entries)
- if page_id or '>Meer<' not in current_page:
- break
-
- return self.playlist_result(entries, playlist_id)
diff --git a/hypervideo_dl/extractor/viewlift.py b/hypervideo_dl/extractor/viewlift.py
index 4627f66..3812601 100644
--- a/hypervideo_dl/extractor/viewlift.py
+++ b/hypervideo_dl/extractor/viewlift.py
@@ -1,7 +1,4 @@
-from __future__ import unicode_literals
-
import json
-import re
from .common import InfoExtractor
from ..compat import compat_HTTPError
@@ -65,6 +62,7 @@ class ViewLiftBaseIE(InfoExtractor):
class ViewLiftEmbedIE(ViewLiftBaseIE):
IE_NAME = 'viewlift:embed'
_VALID_URL = r'https?://(?:(?:www|embed)\.)?(?P<domain>%s)/embed/player\?.*\bfilmId=(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})' % ViewLiftBaseIE._DOMAINS_REGEX
+ _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:embed\.)?(?:%s)/embed/player.+?)\1' % ViewLiftBaseIE._DOMAINS_REGEX]
_TESTS = [{
'url': 'http://embed.snagfilms.com/embed/player?filmId=74849a00-85a9-11e1-9660-123139220831&w=500',
'md5': '2924e9215c6eff7a55ed35b72276bd93',
@@ -91,14 +89,6 @@ class ViewLiftEmbedIE(ViewLiftBaseIE):
'only_matching': True,
}]
- @staticmethod
- def _extract_url(webpage):
- mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:embed\.)?(?:%s)/embed/player.+?)\1' % ViewLiftBaseIE._DOMAINS_REGEX,
- webpage)
- if mobj:
- return mobj.group('url')
-
def _real_extract(self, url):
domain, film_id = self._match_valid_url(url).groups()
site = domain.split('.')[-2]
@@ -144,7 +134,6 @@ class ViewLiftEmbedIE(ViewLiftBaseIE):
'url': sub_url,
})
- self._sort_formats(formats)
return {
'id': film_id,
'title': title,
diff --git a/hypervideo_dl/extractor/viidea.py b/hypervideo_dl/extractor/viidea.py
index 0da0681..4cdf267 100644
--- a/hypervideo_dl/extractor/viidea.py
+++ b/hypervideo_dl/extractor/viidea.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -160,7 +158,6 @@ class ViideaIE(InfoExtractor):
smil_url = '%s/%s/video/%s/smil.xml' % (base_url, lecture_slug, part_id)
smil = self._download_smil(smil_url, lecture_id)
info = self._parse_smil(smil, smil_url, lecture_id)
- self._sort_formats(info['formats'])
info['id'] = lecture_id if not multipart else '%s_part%s' % (lecture_id, part_id)
info['display_id'] = lecture_slug if not multipart else '%s_part%s' % (lecture_slug, part_id)
if multipart:
diff --git a/hypervideo_dl/extractor/viki.py b/hypervideo_dl/extractor/viki.py
index 8a93079..3246dab 100644
--- a/hypervideo_dl/extractor/viki.py
+++ b/hypervideo_dl/extractor/viki.py
@@ -1,5 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
import hashlib
import hmac
import json
@@ -265,7 +263,6 @@ class VikiIE(VikiBaseIE):
# Modify the URL to get 1080p
mpd_url = mpd_url.replace('mpdhd', 'mpdhd_high')
formats = self._extract_mpd_formats(mpd_url, video_id)
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/vimeo.py b/hypervideo_dl/extractor/vimeo.py
index 4f025a5..516b76d 100644
--- a/hypervideo_dl/extractor/vimeo.py
+++ b/hypervideo_dl/extractor/vimeo.py
@@ -1,14 +1,11 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import base64
import functools
import re
import itertools
+import urllib.error
from .common import InfoExtractor
from ..compat import (
- compat_kwargs,
compat_HTTPError,
compat_str,
compat_urlparse,
@@ -34,7 +31,6 @@ from ..utils import (
unsmuggle_url,
urlencode_postdata,
urljoin,
- unescapeHTML,
urlhandle_detect_ext,
)
@@ -44,6 +40,18 @@ class VimeoBaseInfoExtractor(InfoExtractor):
_LOGIN_REQUIRED = False
_LOGIN_URL = 'https://vimeo.com/log_in'
+ @staticmethod
+ def _smuggle_referrer(url, referrer_url):
+ return smuggle_url(url, {'http_headers': {'Referer': referrer_url}})
+
+ def _unsmuggle_headers(self, url):
+ """@returns (url, smuggled_data, headers)"""
+ url, data = unsmuggle_url(url, {})
+ headers = self.get_param('http_headers').copy()
+ if 'http_headers' in data:
+ headers.update(data['http_headers'])
+ return url, data, headers
+
def _perform_login(self, username, password):
webpage = self._download_webpage(
self._LOGIN_URL, None, 'Downloading login page')
@@ -109,21 +117,16 @@ class VimeoBaseInfoExtractor(InfoExtractor):
def _extract_vimeo_config(self, webpage, video_id, *args, **kwargs):
vimeo_config = self._search_regex(
r'vimeo\.config\s*=\s*(?:({.+?})|_extend\([^,]+,\s+({.+?})\));',
- webpage, 'vimeo config', *args, **compat_kwargs(kwargs))
+ webpage, 'vimeo config', *args, **kwargs)
if vimeo_config:
return self._parse_json(vimeo_config, video_id)
def _set_vimeo_cookie(self, name, value):
self._set_cookie('vimeo.com', name, value)
- def _vimeo_sort_formats(self, formats):
- # Note: Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps
- # at the same time without actual units specified.
- self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source'))
-
def _parse_config(self, config, video_id):
video_data = config['video']
- video_title = video_data['title']
+ video_title = video_data.get('title')
live_event = video_data.get('live_event') or {}
is_live = live_event.get('status') == 'started'
request = config.get('request') or {}
@@ -235,6 +238,9 @@ class VimeoBaseInfoExtractor(InfoExtractor):
'formats': formats,
'subtitles': subtitles,
'is_live': is_live,
+ # Note: Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps
+ # at the same time without actual units specified.
+ '_format_sort_fields': ('quality', 'res', 'fps', 'hdr:12', 'source'),
}
def _extract_original_format(self, url, video_id, unlisted_hash=None):
@@ -306,7 +312,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
)
\.
)?
- vimeo(?:pro)?\.com/
+ vimeo\.com/
(?!(?:channels|album|showcase)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/)
(?:[^/]+/)*?
(?:
@@ -320,6 +326,14 @@ class VimeoIE(VimeoBaseInfoExtractor):
/?(?:[?&].*)?(?:[#].*)?$
'''
IE_NAME = 'vimeo'
+ _EMBED_REGEX = [
+ # iframe
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/\d+.*?)\1',
+ # Embedded (swf embed) Vimeo player
+ r'<embed[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)\1',
+ # Non-standard embedded Vimeo player
+ r'<video[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vimeo\.com/[0-9]+)\1',
+ ]
_TESTS = [
{
'url': 'http://vimeo.com/56015672#at=0',
@@ -343,31 +357,6 @@ class VimeoIE(VimeoBaseInfoExtractor):
'skip': 'No longer available'
},
{
- 'url': 'http://vimeopro.com/openstreetmapus/state-of-the-map-us-2013/video/68093876',
- 'md5': '3b5ca6aa22b60dfeeadf50b72e44ed82',
- 'note': 'Vimeo Pro video (#1197)',
- 'info_dict': {
- 'id': '68093876',
- 'ext': 'mp4',
- 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/openstreetmapus',
- 'uploader_id': 'openstreetmapus',
- 'uploader': 'OpenStreetMap US',
- 'title': 'Andy Allan - Putting the Carto into OpenStreetMap Cartography',
- 'description': 'md5:2c362968038d4499f4d79f88458590c1',
- 'duration': 1595,
- 'upload_date': '20130610',
- 'timestamp': 1370893156,
- 'license': 'by',
- 'thumbnail': 'https://i.vimeocdn.com/video/440260469-19b0d92fca3bd84066623b53f1eb8aaa3980c6c809e2d67b6b39ab7b4a77a344-d_960',
- 'view_count': int,
- 'comment_count': int,
- 'like_count': int,
- },
- 'params': {
- 'format': 'best[protocol=https]',
- },
- },
- {
'url': 'http://player.vimeo.com/video/54469442',
'md5': 'b3e7f4d2cbb53bd7dc3bb6ff4ed5cfbd',
'note': 'Videos that embed the url in the player page',
@@ -721,33 +710,14 @@ class VimeoIE(VimeoBaseInfoExtractor):
# vimeo embed with check-password page protected by Referer header
]
- @staticmethod
- def _smuggle_referrer(url, referrer_url):
- return smuggle_url(url, {'http_headers': {'Referer': referrer_url}})
-
- @staticmethod
- def _extract_urls(url, webpage):
- urls = []
- # Look for embedded (iframe) Vimeo player
- for mobj in re.finditer(
- r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/\d+.*?)\1',
- webpage):
- urls.append(VimeoIE._smuggle_referrer(unescapeHTML(mobj.group('url')), url))
- PLAIN_EMBED_RE = (
- # Look for embedded (swf embed) Vimeo player
- r'<embed[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)\1',
- # Look more for non-standard embedded Vimeo player
- r'<video[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vimeo\.com/[0-9]+)\1',
- )
- for embed_re in PLAIN_EMBED_RE:
- for mobj in re.finditer(embed_re, webpage):
- urls.append(mobj.group('url'))
- return urls
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ for embed_url in super()._extract_embed_urls(url, webpage):
+ yield cls._smuggle_referrer(embed_url, url)
- @staticmethod
- def _extract_url(url, webpage):
- urls = VimeoIE._extract_urls(url, webpage)
- return urls[0] if urls else None
+ @classmethod
+ def _extract_url(cls, url, webpage):
+ return next(cls._extract_embed_urls(url, webpage), None)
def _verify_player_video_password(self, url, video_id, headers):
password = self._get_video_password()
@@ -758,8 +728,8 @@ class VimeoIE(VimeoBaseInfoExtractor):
'Content-Type': 'application/x-www-form-urlencoded',
})
checked = self._download_json(
- url + '/check-password', video_id,
- 'Verifying the password', data=data, headers=headers)
+ f'{compat_urlparse.urlsplit(url)._replace(query=None).geturl()}/check-password',
+ video_id, 'Verifying the password', data=data, headers=headers)
if checked is False:
raise ExtractorError('Wrong video password', expected=True)
return checked
@@ -780,7 +750,6 @@ class VimeoIE(VimeoBaseInfoExtractor):
})
info = self._parse_config(self._download_json(
video['config_url'], video_id), video_id)
- self._vimeo_sort_formats(info['formats'])
get_timestamp = lambda x: parse_iso8601(video.get(x + '_time'))
info.update({
'description': video.get('description'),
@@ -834,10 +803,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
raise
def _real_extract(self, url):
- url, data = unsmuggle_url(url, {})
- headers = self.get_param('http_headers').copy()
- if 'http_headers' in data:
- headers.update(data['http_headers'])
+ url, data, headers = self._unsmuggle_headers(url)
if 'Referer' not in headers:
headers['Referer'] = url
@@ -847,15 +813,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
if unlisted_hash:
return self._extract_from_api(video_id, unlisted_hash)
- orig_url = url
- is_pro = 'vimeopro.com/' in url
- if is_pro:
- # some videos require portfolio_id to be present in player url
- # https://github.com/ytdl-org/youtube-dl/issues/20070
- url = self._extract_url(url, self._download_webpage(url, video_id))
- if not url:
- url = 'https://vimeo.com/' + video_id
- elif any(p in url for p in ('play_redirect_hls', 'moogaloop.swf')):
+ if any(p in url for p in ('play_redirect_hls', 'moogaloop.swf')):
url = 'https://vimeo.com/' + video_id
self._try_album_password(url)
@@ -877,13 +835,11 @@ class VimeoIE(VimeoBaseInfoExtractor):
if '://player.vimeo.com/video/' in url:
config = self._parse_json(self._search_regex(
- r'\bconfig\s*=\s*({.+?})\s*;', webpage, 'info section'), video_id)
+ r'\b(?:playerC|c)onfig\s*=\s*({.+?})\s*;', webpage, 'info section'), video_id)
if config.get('view') == 4:
config = self._verify_player_video_password(
redirect_url, video_id, headers)
- info = self._parse_config(config, video_id)
- self._vimeo_sort_formats(info['formats'])
- return info
+ return self._parse_config(config, video_id)
if re.search(r'<form[^>]+?id="pw_form"', webpage):
video_password = self._get_video_password()
@@ -959,14 +915,6 @@ class VimeoIE(VimeoBaseInfoExtractor):
video_description = self._html_search_meta(
['description', 'og:description', 'twitter:description'],
webpage, default=None)
- if not video_description and is_pro:
- orig_webpage = self._download_webpage(
- orig_url, video_id,
- note='Downloading webpage for description',
- fatal=False)
- if orig_webpage:
- video_description = self._html_search_meta(
- 'description', orig_webpage, default=None)
if not video_description:
self.report_warning('Cannot find video description')
@@ -988,7 +936,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
info_dict_config = self._parse_config(config, video_id)
formats.extend(info_dict_config['formats'])
- self._vimeo_sort_formats(formats)
+ info_dict['_format_sort_fields'] = info_dict_config['_format_sort_fields']
json_ld = self._search_json_ld(webpage, video_id, default={})
@@ -1011,7 +959,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
return merge_dicts(info_dict, info_dict_config, json_ld)
-class VimeoOndemandIE(VimeoIE):
+class VimeoOndemandIE(VimeoIE): # XXX: Do not subclass from concrete IE
IE_NAME = 'vimeo:ondemand'
_VALID_URL = r'https?://(?:www\.)?vimeo\.com/ondemand/(?:[^/]+/)?(?P<id>[^/?#&]+)'
_TESTS = [{
@@ -1136,9 +1084,9 @@ class VimeoChannelIE(VimeoBaseInfoExtractor):
return self._extract_videos(channel_id, self._BASE_URL_TEMPL % channel_id)
-class VimeoUserIE(VimeoChannelIE):
+class VimeoUserIE(VimeoChannelIE): # XXX: Do not subclass from concrete IE
IE_NAME = 'vimeo:user'
- _VALID_URL = r'https://vimeo\.com/(?!(?:[0-9]+|watchlater)(?:$|[?#/]))(?P<id>[^/]+)(?:/videos|[#?]|$)'
+ _VALID_URL = r'https://vimeo\.com/(?!(?:[0-9]+|watchlater)(?:$|[?#/]))(?P<id>[^/]+)(?:/videos)?/?(?:$|[?#])'
_TITLE_RE = r'<a[^>]+?class="user">([^<>]+?)</a>'
_TESTS = [{
'url': 'https://vimeo.com/nkistudio/videos',
@@ -1147,6 +1095,9 @@ class VimeoUserIE(VimeoChannelIE):
'id': 'nkistudio',
},
'playlist_mincount': 66,
+ }, {
+ 'url': 'https://vimeo.com/nkistudio/',
+ 'only_matching': True,
}]
_BASE_URL_TEMPL = 'https://vimeo.com/%s'
@@ -1243,7 +1194,7 @@ class VimeoAlbumIE(VimeoBaseInfoExtractor):
entries, album_id, album.get('name'), album.get('description'))
-class VimeoGroupsIE(VimeoChannelIE):
+class VimeoGroupsIE(VimeoChannelIE): # XXX: Do not subclass from concrete IE
IE_NAME = 'vimeo:group'
_VALID_URL = r'https://vimeo\.com/groups/(?P<id>[^/]+)(?:/(?!videos?/\d+)|$)'
_TESTS = [{
@@ -1330,14 +1281,13 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
page_url + '/action', video_id)
if source_format:
info_dict['formats'].append(source_format)
- self._vimeo_sort_formats(info_dict['formats'])
info_dict['description'] = clean_html(clip_data.get('description'))
return info_dict
-class VimeoWatchLaterIE(VimeoChannelIE):
+class VimeoWatchLaterIE(VimeoChannelIE): # XXX: Do not subclass from concrete IE
IE_NAME = 'vimeo:watchlater'
- IE_DESC = 'Vimeo watch later list, "vimeowatchlater" keyword (requires authentication)'
+ IE_DESC = 'Vimeo watch later list, ":vimeowatchlater" keyword (requires authentication)'
_VALID_URL = r'https://vimeo\.com/(?:home/)?watchlater|:vimeowatchlater'
_TITLE = 'Watch Later'
_LOGIN_REQUIRED = True
@@ -1358,7 +1308,7 @@ class VimeoWatchLaterIE(VimeoChannelIE):
return self._extract_videos('watchlater', 'https://vimeo.com/watchlater')
-class VimeoLikesIE(VimeoChannelIE):
+class VimeoLikesIE(VimeoChannelIE): # XXX: Do not subclass from concrete IE
_VALID_URL = r'https://(?:www\.)?vimeo\.com/(?P<id>[^/]+)/likes/?(?:$|[?#]|sort:)'
IE_NAME = 'vimeo:likes'
IE_DESC = 'Vimeo user likes'
@@ -1385,21 +1335,107 @@ class VimeoLikesIE(VimeoChannelIE):
class VHXEmbedIE(VimeoBaseInfoExtractor):
IE_NAME = 'vhx:embed'
_VALID_URL = r'https?://embed\.vhx\.tv/videos/(?P<id>\d+)'
+ _EMBED_REGEX = [r'<iframe[^>]+src="(?P<url>https?://embed\.vhx\.tv/videos/\d+[^"]*)"']
- @staticmethod
- def _extract_url(webpage):
- mobj = re.search(
- r'<iframe[^>]+src="(https?://embed\.vhx\.tv/videos/\d+[^"]*)"', webpage)
- return unescapeHTML(mobj.group(1)) if mobj else None
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ for embed_url in super()._extract_embed_urls(url, webpage):
+ yield cls._smuggle_referrer(embed_url, url)
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
+ url, _, headers = self._unsmuggle_headers(url)
+ webpage = self._download_webpage(url, video_id, headers=headers)
config_url = self._parse_json(self._search_regex(
r'window\.OTTData\s*=\s*({.+})', webpage,
'ott data'), video_id, js_to_json)['config_url']
config = self._download_json(config_url, video_id)
info = self._parse_config(config, video_id)
info['id'] = video_id
- self._vimeo_sort_formats(info['formats'])
return info
+
+
+class VimeoProIE(VimeoBaseInfoExtractor):
+ IE_NAME = 'vimeo:pro'
+ _VALID_URL = r'https?://(?:www\.)?vimeopro\.com/[^/?#]+/(?P<slug>[^/?#]+)(?:(?:/videos?/(?P<id>[0-9]+)))?'
+ _TESTS = [{
+ # Vimeo URL derived from video_id
+ 'url': 'http://vimeopro.com/openstreetmapus/state-of-the-map-us-2013/video/68093876',
+ 'md5': '3b5ca6aa22b60dfeeadf50b72e44ed82',
+ 'note': 'Vimeo Pro video (#1197)',
+ 'info_dict': {
+ 'id': '68093876',
+ 'ext': 'mp4',
+ 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/openstreetmapus',
+ 'uploader_id': 'openstreetmapus',
+ 'uploader': 'OpenStreetMap US',
+ 'title': 'Andy Allan - Putting the Carto into OpenStreetMap Cartography',
+ 'description': 'md5:2c362968038d4499f4d79f88458590c1',
+ 'duration': 1595,
+ 'upload_date': '20130610',
+ 'timestamp': 1370893156,
+ 'license': 'by',
+ 'thumbnail': 'https://i.vimeocdn.com/video/440260469-19b0d92fca3bd84066623b53f1eb8aaa3980c6c809e2d67b6b39ab7b4a77a344-d_960',
+ 'view_count': int,
+ 'comment_count': int,
+ 'like_count': int,
+ 'tags': 'count:1',
+ },
+ 'params': {
+ 'format': 'best[protocol=https]',
+ },
+ }, {
+ # password-protected VimeoPro page with Vimeo player embed
+ 'url': 'https://vimeopro.com/cadfem/simulation-conference-mechanische-systeme-in-perfektion',
+ 'info_dict': {
+ 'id': '764543723',
+ 'ext': 'mp4',
+ 'title': 'Mechanische Systeme in Perfektion: Realität erfassen, Innovation treiben',
+ 'thumbnail': 'https://i.vimeocdn.com/video/1543784598-a1a750494a485e601110136b9fe11e28c2131942452b3a5d30391cb3800ca8fd-d_1280',
+ 'description': 'md5:2a9d195cd1b0f6f79827107dc88c2420',
+ 'uploader': 'CADFEM',
+ 'uploader_id': 'cadfem',
+ 'uploader_url': 'https://vimeo.com/cadfem',
+ 'duration': 12505,
+ 'chapters': 'count:10',
+ },
+ 'params': {
+ 'videopassword': 'Conference2022',
+ 'skip_download': True,
+ },
+ }]
+
+ def _real_extract(self, url):
+ display_id, video_id = self._match_valid_url(url).group('slug', 'id')
+ if video_id:
+ display_id = video_id
+ webpage = self._download_webpage(url, display_id)
+
+ password_form = self._search_regex(
+ r'(?is)<form[^>]+?method=["\']post["\'][^>]*>(.+?password.+?)</form>',
+ webpage, 'password form', default=None)
+ if password_form:
+ try:
+ webpage = self._download_webpage(url, display_id, data=urlencode_postdata({
+ 'password': self._get_video_password(),
+ **self._hidden_inputs(password_form),
+ }), note='Logging in with video password')
+ except ExtractorError as e:
+ if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 418:
+ raise ExtractorError('Wrong video password', expected=True)
+ raise
+
+ description = None
+ # even if we have video_id, some videos require player URL with portfolio_id query param
+ # https://github.com/ytdl-org/youtube-dl/issues/20070
+ vimeo_url = VimeoIE._extract_url(url, webpage)
+ if vimeo_url:
+ description = self._html_search_meta('description', webpage, default=None)
+ elif video_id:
+ vimeo_url = f'https://vimeo.com/{video_id}'
+ else:
+ raise ExtractorError(
+ 'No Vimeo embed or video ID could be found in VimeoPro page', expected=True)
+
+ return self.url_result(vimeo_url, VimeoIE, video_id, url_transparent=True,
+ description=description)
diff --git a/hypervideo_dl/extractor/vimm.py b/hypervideo_dl/extractor/vimm.py
index 060b92b..7097149 100644
--- a/hypervideo_dl/extractor/vimm.py
+++ b/hypervideo_dl/extractor/vimm.py
@@ -1,4 +1,3 @@
-# coding: utf-8
from .common import InfoExtractor
@@ -24,7 +23,6 @@ class VimmIE(InfoExtractor):
formats, subs = self._extract_m3u8_formats_and_subtitles(
f'https://www.vimm.tv/hls/{channel_id}.m3u8', channel_id, 'mp4', m3u8_id='hls', live=True)
- self._sort_formats(formats)
return {
'id': channel_id,
@@ -57,7 +55,6 @@ class VimmRecordingIE(InfoExtractor):
formats, subs = self._extract_m3u8_formats_and_subtitles(
f'https://d211qfrkztakg3.cloudfront.net/{channel_id}/{video_id}/index.m3u8', video_id, 'mp4', m3u8_id='hls', live=False)
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/vimple.py b/hypervideo_dl/extractor/vimple.py
index c74b437..fdccf46 100644
--- a/hypervideo_dl/extractor/vimple.py
+++ b/hypervideo_dl/extractor/vimple.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import int_or_none
@@ -15,7 +13,6 @@ class SprutoBaseIE(InfoExtractor):
formats = [{
'url': f['url'],
} for f in playlist['video']]
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/vine.py b/hypervideo_dl/extractor/vine.py
index e59b103..1909980 100644
--- a/hypervideo_dl/extractor/vine.py
+++ b/hypervideo_dl/extractor/vine.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
@@ -14,6 +10,7 @@ from ..utils import (
class VineIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?vine\.co/(?:v|oembed)/(?P<id>\w+)'
+ _EMBED_REGEX = [r'<iframe[^>]+src=[\'"](?P<url>(?:https?:)?//(?:www\.)?vine\.co/v/[^/]+/embed/(?:simple|postcard))']
_TESTS = [{
'url': 'https://vine.co/v/b9KOOWX7HUx',
'md5': '2f36fed6235b16da96ce9b4dc890940d',
@@ -89,11 +86,10 @@ class VineIE(InfoExtractor):
'quality': quality,
})
self._check_formats(formats, video_id)
- self._sort_formats(formats)
username = data.get('username')
- alt_title = format_field(username, template='Vine by %s')
+ alt_title = format_field(username, None, 'Vine by %s')
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/viqeo.py b/hypervideo_dl/extractor/viqeo.py
index be7dfa8..79b9f29 100644
--- a/hypervideo_dl/extractor/viqeo.py
+++ b/hypervideo_dl/extractor/viqeo.py
@@ -1,8 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
from .common import InfoExtractor
from ..utils import (
int_or_none,
@@ -20,6 +15,7 @@ class ViqeoIE(InfoExtractor):
)
(?P<id>[\da-f]+)
'''
+ _EMBED_REGEX = [r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//cdn\.viqeo\.tv/embed/*\?.*?\bvid=[\da-f]+.*?)\1']
_TESTS = [{
'url': 'https://cdn.viqeo.tv/embed/?vid=cde96f09d25f39bee837',
'md5': 'a169dd1a6426b350dca4296226f21e76',
@@ -38,14 +34,6 @@ class ViqeoIE(InfoExtractor):
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- return [
- mobj.group('url')
- for mobj in re.finditer(
- r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//cdn\.viqeo\.tv/embed/*\?.*?\bvid=[\da-f]+.*?)\1',
- webpage)]
-
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -86,7 +74,6 @@ class ViqeoIE(InfoExtractor):
'vcodec': 'none' if is_audio else None,
})
formats.append(f)
- self._sort_formats(formats)
duration = int_or_none(data.get('duration'))
diff --git a/hypervideo_dl/extractor/viu.py b/hypervideo_dl/extractor/viu.py
index 3cfca89..b183c88 100644
--- a/hypervideo_dl/extractor/viu.py
+++ b/hypervideo_dl/extractor/viu.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
import json
import uuid
@@ -89,7 +86,6 @@ class ViuIE(ViuBaseIE):
# r'\1whe\2', video_data['href'])
m3u8_url = video_data['href']
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4')
- self._sort_formats(formats)
for key, value in video_data.items():
mobj = re.match(r'^subtitle_(?P<lang>[^_]+)_(?P<ext>(vtt|srt))', key)
@@ -167,12 +163,17 @@ class ViuOTTIE(InfoExtractor):
},
'skip': 'Geo-restricted to Singapore',
}, {
- 'url': 'http://www.viu.com/ott/hk/zh-hk/vod/7123/%E5%A4%A7%E4%BA%BA%E5%A5%B3%E5%AD%90',
+ 'url': 'https://www.viu.com/ott/hk/zh-hk/vod/430078/%E7%AC%AC%E5%85%AD%E6%84%9F-3',
'info_dict': {
- 'id': '7123',
+ 'id': '430078',
'ext': 'mp4',
- 'title': '這就是我的生活之道',
- 'description': 'md5:4eb0d8b08cf04fcdc6bbbeb16043434f',
+ 'title': '大韓民國的1%',
+ 'description': 'md5:74d6db47ddd9ddb9c89a05739103ccdb',
+ 'episode_number': 1,
+ 'duration': 6614,
+ 'episode': '大韓民國的1%',
+ 'series': '第六感 3',
+ 'thumbnail': 'https://d2anahhhmp1ffz.cloudfront.net/1313295781/d2b14f48d008ef2f3a9200c98d8e9b63967b9cc2',
},
'params': {
'skip_download': 'm3u8 download',
@@ -180,11 +181,12 @@ class ViuOTTIE(InfoExtractor):
},
'skip': 'Geo-restricted to Hong Kong',
}, {
- 'url': 'https://www.viu.com/ott/hk/zh-hk/vod/68776/%E6%99%82%E5%B0%9A%E5%AA%BD%E5%92%AA',
- 'playlist_count': 12,
+ 'url': 'https://www.viu.com/ott/hk/zh-hk/vod/444666/%E6%88%91%E7%9A%84%E5%AE%A4%E5%8F%8B%E6%98%AF%E4%B9%9D%E5%B0%BE%E7%8B%90',
+ 'playlist_count': 16,
'info_dict': {
- 'id': '3916',
- 'title': '時尚媽咪',
+ 'id': '23807',
+ 'title': '我的室友是九尾狐',
+ 'description': 'md5:b42c95f2b4a316cdd6ae14ca695f33b9',
},
'params': {
'skip_download': 'm3u8 download',
@@ -362,17 +364,22 @@ class ViuOTTIE(InfoExtractor):
'ext': 'mp4',
'filesize': try_get(stream_data, lambda x: x['size'][vid_format], int)
})
- self._sort_formats(formats)
subtitles = {}
for sub in video_data.get('subtitle') or []:
- sub_url = sub.get('url')
- if not sub_url:
- continue
- subtitles.setdefault(sub.get('name'), []).append({
- 'url': sub_url,
- 'ext': 'srt',
- })
+ lang = sub.get('name') or 'und'
+ if sub.get('url'):
+ subtitles.setdefault(lang, []).append({
+ 'url': sub['url'],
+ 'ext': 'srt',
+ 'name': f'Spoken text for {lang}',
+ })
+ if sub.get('second_subtitle_url'):
+ subtitles.setdefault(f'{lang}_ost', []).append({
+ 'url': sub['second_subtitle_url'],
+ 'ext': 'srt',
+ 'name': f'On-screen text for {lang}',
+ })
title = strip_or_none(video_data.get('synopsis'))
return {
diff --git a/hypervideo_dl/extractor/vk.py b/hypervideo_dl/extractor/vk.py
index cbc3159..347aa38 100644
--- a/hypervideo_dl/extractor/vk.py
+++ b/hypervideo_dl/extractor/vk.py
@@ -1,14 +1,17 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import collections
+import hashlib
import re
from .common import InfoExtractor
+from .dailymotion import DailymotionIE
+from .odnoklassniki import OdnoklassnikiIE
+from .pladform import PladformIE
+from .vimeo import VimeoIE
+from .youtube import YoutubeIE
from ..compat import compat_urlparse
from ..utils import (
- clean_html,
ExtractorError,
+ clean_html,
get_element_by_class,
int_or_none,
orderedSet,
@@ -16,19 +19,29 @@ from ..utils import (
str_to_int,
unescapeHTML,
unified_timestamp,
+ update_url_query,
url_or_none,
urlencode_postdata,
)
-from .dailymotion import DailymotionIE
-from .odnoklassniki import OdnoklassnikiIE
-from .pladform import PladformIE
-from .vimeo import VimeoIE
-from .youtube import YoutubeIE
class VKBaseIE(InfoExtractor):
_NETRC_MACHINE = 'vk'
+ def _download_webpage_handle(self, url_or_request, video_id, *args, fatal=True, **kwargs):
+ response = super()._download_webpage_handle(url_or_request, video_id, *args, fatal=fatal, **kwargs)
+ challenge_url, cookie = response[1].geturl() if response else '', None
+ if challenge_url.startswith('https://vk.com/429.html?'):
+ cookie = self._get_cookies(challenge_url).get('hash429')
+ if not cookie:
+ return response
+
+ hash429 = hashlib.md5(cookie.value.encode('ascii')).hexdigest()
+ self._request_webpage(
+ update_url_query(challenge_url, {'key': hash429}), video_id, fatal=fatal,
+ note='Resolving WAF challenge', errnote='Failed to bypass WAF challenge')
+ return super()._download_webpage_handle(url_or_request, video_id, *args, fatal=True, **kwargs)
+
def _perform_login(self, username, password):
login_page, url_handle = self._download_webpage_handle(
'https://vk.com', None, 'Downloading login page')
@@ -54,11 +67,14 @@ class VKBaseIE(InfoExtractor):
'Unable to login, incorrect username and/or password', expected=True)
def _download_payload(self, path, video_id, data, fatal=True):
+ endpoint = f'https://vk.com/{path}.php'
data['al'] = 1
code, payload = self._download_json(
- 'https://vk.com/%s.php' % path, video_id,
- data=urlencode_postdata(data), fatal=fatal,
- headers={'X-Requested-With': 'XMLHttpRequest'})['payload']
+ endpoint, video_id, data=urlencode_postdata(data), fatal=fatal,
+ headers={
+ 'Referer': endpoint,
+ 'X-Requested-With': 'XMLHttpRequest',
+ })['payload']
if code == '3':
self.raise_login_required()
elif code == '8':
@@ -69,6 +85,7 @@ class VKBaseIE(InfoExtractor):
class VKIE(VKBaseIE):
IE_NAME = 'vk'
IE_DESC = 'VK'
+ _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1']
_VALID_URL = r'''(?x)
https?://
(?:
@@ -84,20 +101,25 @@ class VKIE(VKBaseIE):
(?P<videoid>-?\d+_\d+)(?:.*\blist=(?P<list_id>([\da-f]+)|(ln-[\da-zA-Z]+)))?
)
'''
+ # https://help.sibnet.ru/?sibnet_video_embed
+ _EMBED_REGEX = [r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//video\.sibnet\.ru/shell\.php\?.*?\bvideoid=\d+.*?)\1']
_TESTS = [
{
'url': 'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521',
- 'md5': '7babad3b85ea2e91948005b1b8b0cb84',
'info_dict': {
'id': '-77521_162222515',
'ext': 'mp4',
'title': 'ProtivoGunz - Хуёвая песня',
'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
- 'uploader_id': '-77521',
+ 'uploader_id': '39545378',
'duration': 195,
'timestamp': 1329049880,
'upload_date': '20120212',
+ 'comment_count': int,
+ 'like_count': int,
+ 'thumbnail': r're:https?://.+\.jpg$',
},
+ 'params': {'skip_download': 'm3u8'},
},
{
'url': 'http://vk.com/video205387401_165548505',
@@ -110,12 +132,14 @@ class VKIE(VKBaseIE):
'duration': 9,
'timestamp': 1374364108,
'upload_date': '20130720',
+ 'comment_count': int,
+ 'like_count': int,
+ 'thumbnail': r're:https?://.+\.jpg$',
}
},
{
'note': 'Embedded video',
'url': 'https://vk.com/video_ext.php?oid=-77521&id=162222515&hash=87b046504ccd8bfa',
- 'md5': '7babad3b85ea2e91948005b1b8b0cb84',
'info_dict': {
'id': '-77521_162222515',
'ext': 'mp4',
@@ -124,8 +148,10 @@ class VKIE(VKBaseIE):
'duration': 195,
'upload_date': '20120212',
'timestamp': 1329049880,
- 'uploader_id': '-77521',
+ 'uploader_id': '39545378',
+ 'thumbnail': r're:https?://.+\.jpg$',
},
+ 'params': {'skip_download': 'm3u8'},
},
{
# VIDEO NOW REMOVED
@@ -179,8 +205,13 @@ class VKIE(VKBaseIE):
'ext': 'mp4',
'title': '8 серия (озвучка)',
'duration': 8383,
+ 'comment_count': int,
+ 'uploader': 'Dizi2021',
+ 'like_count': int,
+ 'timestamp': 1640162189,
'upload_date': '20211222',
- 'view_count': int,
+ 'uploader_id': '-93049196',
+ 'thumbnail': r're:https?://.+\.jpg$',
},
},
{
@@ -207,10 +238,23 @@ class VKIE(VKBaseIE):
'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate",
'description': 'md5:bf9c26cfa4acdfb146362682edd3827a',
'duration': 178,
- 'upload_date': '20130116',
+ 'upload_date': '20130117',
'uploader': "Children's Joy Foundation Inc.",
'uploader_id': 'thecjf',
'view_count': int,
+ 'channel_id': 'UCgzCNQ11TmR9V97ECnhi3gw',
+ 'availability': 'public',
+ 'like_count': int,
+ 'live_status': 'not_live',
+ 'playable_in_embed': True,
+ 'channel': 'Children\'s Joy Foundation Inc.',
+ 'uploader_url': 'http://www.youtube.com/user/thecjf',
+ 'thumbnail': r're:https?://.+\.jpg$',
+ 'tags': 'count:27',
+ 'start_time': 0.0,
+ 'categories': ['Nonprofits & Activism'],
+ 'channel_url': 'https://www.youtube.com/channel/UCgzCNQ11TmR9V97ECnhi3gw',
+ 'age_limit': 0,
},
},
{
@@ -226,9 +270,7 @@ class VKIE(VKBaseIE):
'uploader_id': 'x1p5vl5',
'timestamp': 1473877246,
},
- 'params': {
- 'skip_download': True,
- },
+ 'skip': 'Removed'
},
{
# video key is extra_data not url\d+
@@ -243,9 +285,7 @@ class VKIE(VKBaseIE):
'timestamp': 1454859345,
'upload_date': '20160207',
},
- 'params': {
- 'skip_download': True,
- },
+ 'skip': 'Removed',
},
{
# finished live stream, postlive_mp4
@@ -256,11 +296,12 @@ class VKIE(VKBaseIE):
'title': 'ИгроМир 2016 День 1 — Игромания Утром',
'uploader': 'Игромания',
'duration': 5239,
- # TODO: use act=show to extract view_count
- # 'view_count': int,
'upload_date': '20160929',
'uploader_id': '-387766',
'timestamp': 1475137527,
+ 'thumbnail': r're:https?://.+\.jpg$',
+ 'comment_count': int,
+ 'like_count': int,
},
'params': {
'skip_download': True,
@@ -306,13 +347,6 @@ class VKIE(VKBaseIE):
'only_matching': True,
}]
- @staticmethod
- def _extract_sibnet_urls(webpage):
- # https://help.sibnet.ru/?sibnet_video_embed
- return [unescapeHTML(mobj.group('url')) for mobj in re.finditer(
- r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//video\.sibnet\.ru/shell\.php\?.*?\bvideoid=\d+.*?)\1',
- webpage)]
-
def _real_extract(self, url):
mobj = self._match_valid_url(url)
video_id = mobj.group('videoid')
@@ -320,7 +354,7 @@ class VKIE(VKBaseIE):
mv_data = {}
if video_id:
data = {
- 'act': 'show_inline',
+ 'act': 'show',
'video': video_id,
}
# Some videos (removed?) can only be downloaded with list id specified
@@ -413,17 +447,17 @@ class VKIE(VKBaseIE):
m_rutube.group(1).replace('\\', ''))
return self.url_result(rutube_url)
- dailymotion_urls = DailymotionIE._extract_urls(info_page)
- if dailymotion_urls:
- return self.url_result(dailymotion_urls[0], DailymotionIE.ie_key())
+ dailymotion_url = next(DailymotionIE._extract_embed_urls(url, info_page), None)
+ if dailymotion_url:
+ return self.url_result(dailymotion_url, DailymotionIE.ie_key())
odnoklassniki_url = OdnoklassnikiIE._extract_url(info_page)
if odnoklassniki_url:
return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key())
- sibnet_urls = self._extract_sibnet_urls(info_page)
- if sibnet_urls:
- return self.url_result(sibnet_urls[0])
+ sibnet_url = next(self._extract_embed_urls(url, info_page), None)
+ if sibnet_url:
+ return self.url_result(sibnet_url)
m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.+?});', info_page)
if m_opts:
@@ -473,7 +507,6 @@ class VKIE(VKBaseIE):
'url': format_url,
'ext': 'flv',
})
- self._sort_formats(formats)
subtitles = {}
for sub in data.get('subs') or {}:
@@ -502,7 +535,7 @@ class VKIE(VKBaseIE):
class VKUserVideosIE(VKBaseIE):
IE_NAME = 'vk:uservideos'
IE_DESC = "VK - User's Videos"
- _VALID_URL = r'https?://(?:(?:m|new)\.)?vk\.com/video/@(?P<id>[^?$#/&]+)(?!\?.*\bz=video)(?:[/?#&](?:.*?\bsection=(?P<section>\w+))?|$)'
+ _VALID_URL = r'https?://(?:(?:m|new)\.)?vk\.com/video/(?:playlist/)?(?P<id>[^?$#/&]+)(?!\?.*\bz=video)(?:[/?#&](?:.*?\bsection=(?P<section>\w+))?|$)'
_TEMPLATE_URL = 'https://vk.com/videos'
_TESTS = [{
'url': 'https://vk.com/video/@mobidevices',
@@ -516,6 +549,13 @@ class VKUserVideosIE(VKBaseIE):
'id': '-17892518_uploaded',
},
'playlist_mincount': 182,
+ }, {
+ 'url': 'https://vk.com/video/playlist/-174476437_2',
+ 'info_dict': {
+ 'id': '-174476437_2',
+ 'title': 'Анонсы'
+ },
+ 'playlist_mincount': 108,
}]
_VIDEO = collections.namedtuple('Video', ['owner_id', 'id'])
@@ -550,11 +590,19 @@ class VKUserVideosIE(VKBaseIE):
def _real_extract(self, url):
u_id, section = self._match_valid_url(url).groups()
webpage = self._download_webpage(url, u_id)
- page_id = self._search_regex(r'data-owner-id\s?=\s?"([^"]+)"', webpage, 'page_id')
+
+ if u_id.startswith('@'):
+ page_id = self._search_regex(r'data-owner-id\s?=\s?"([^"]+)"', webpage, 'page_id')
+ elif '_' in u_id:
+ page_id, section = u_id.split('_', 1)
+ else:
+ raise ExtractorError('Invalid URL', expected=True)
+
if not section:
section = 'all'
- return self.playlist_result(self._entries(page_id, section), '%s_%s' % (page_id, section))
+ playlist_title = clean_html(get_element_by_class('VideoInfoPanel__title', webpage))
+ return self.playlist_result(self._entries(page_id, section), '%s_%s' % (page_id, section), playlist_title)
class VKWallPostIE(VKBaseIE):
@@ -593,7 +641,6 @@ class VKWallPostIE(VKBaseIE):
}],
'params': {
'skip_download': True,
- 'usenetrc': True,
},
'skip': 'Requires vk account credentials',
}, {
@@ -604,9 +651,6 @@ class VKWallPostIE(VKBaseIE):
'title': 'Сергей Горбунов - Wall post 85155021_6319',
},
'playlist_count': 1,
- 'params': {
- 'usenetrc': True,
- },
'skip': 'Requires vk account credentials',
}, {
# wall page URL
diff --git a/hypervideo_dl/extractor/vlive.py b/hypervideo_dl/extractor/vlive.py
index ae35c97..e2fd393 100644
--- a/hypervideo_dl/extractor/vlive.py
+++ b/hypervideo_dl/extractor/vlive.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import itertools
import json
@@ -16,6 +13,7 @@ from ..utils import (
merge_dicts,
str_or_none,
strip_or_none,
+ traverse_obj,
try_get,
urlencode_postdata,
url_or_none,
@@ -84,6 +82,13 @@ class VLiveIE(VLiveBaseIE):
'upload_date': '20150817',
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
'timestamp': 1439816449,
+ 'like_count': int,
+ 'channel': 'Girl\'s Day',
+ 'channel_id': 'FDF27',
+ 'comment_count': int,
+ 'release_timestamp': 1439818140,
+ 'release_date': '20150817',
+ 'duration': 1014,
},
'params': {
'skip_download': True,
@@ -101,6 +106,13 @@ class VLiveIE(VLiveBaseIE):
'upload_date': '20161112',
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
'timestamp': 1478923074,
+ 'like_count': int,
+ 'channel': 'EXO',
+ 'channel_id': 'F94BD',
+ 'comment_count': int,
+ 'release_timestamp': 1478924280,
+ 'release_date': '20161112',
+ 'duration': 906,
},
'params': {
'skip_download': True,
@@ -172,6 +184,7 @@ class VLiveIE(VLiveBaseIE):
'like_count': int_or_none(video.get('likeCount')),
'comment_count': int_or_none(video.get('commentCount')),
'timestamp': int_or_none(video.get('createdAt'), scale=1000),
+ 'release_timestamp': int_or_none(traverse_obj(video, 'onAirStartAt', 'willStartAt'), scale=1000),
'thumbnail': video.get('thumb'),
}
@@ -195,7 +208,6 @@ class VLiveIE(VLiveBaseIE):
'old/v3/live/%s/playInfo',
video_id)['result']['adaptiveStreamUrl']
formats = self._extract_m3u8_formats(stream_url, video_id, 'mp4')
- self._sort_formats(formats)
info = get_common_fields()
info.update({
'title': video['title'],
@@ -273,7 +285,6 @@ class VLivePostIE(VLiveBaseIE):
'url': f_url,
'height': int_or_none(f_id[:-1]),
})
- self._sort_formats(formats)
entry = {
'formats': formats,
'id': video_id,
diff --git a/hypervideo_dl/extractor/vodlocker.py b/hypervideo_dl/extractor/vodlocker.py
index 02c9617..1c7236e 100644
--- a/hypervideo_dl/extractor/vodlocker.py
+++ b/hypervideo_dl/extractor/vodlocker.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
ExtractorError,
diff --git a/hypervideo_dl/extractor/vodpl.py b/hypervideo_dl/extractor/vodpl.py
index 9e91970..8af1572 100644
--- a/hypervideo_dl/extractor/vodpl.py
+++ b/hypervideo_dl/extractor/vodpl.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .onet import OnetBaseIE
diff --git a/hypervideo_dl/extractor/vodplatform.py b/hypervideo_dl/extractor/vodplatform.py
index 74d2257..5ff0500 100644
--- a/hypervideo_dl/extractor/vodplatform.py
+++ b/hypervideo_dl/extractor/vodplatform.py
@@ -1,12 +1,10 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import unescapeHTML
class VODPlatformIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www\.)?vod-platform\.net|embed\.kwikmotion\.com)/[eE]mbed/(?P<id>[^/?#]+)'
+ _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:(?:www\.)?vod-platform\.net|embed\.kwikmotion\.com)/[eE]mbed/.+?)\1']
_TESTS = [{
# from http://www.lbcgroup.tv/watch/chapter/29143/52844/%D8%A7%D9%84%D9%86%D8%B5%D8%B1%D8%A9-%D9%81%D9%8A-%D8%B6%D9%8A%D8%A7%D9%81%D8%A9-%D8%A7%D9%84%D9%80-cnn/ar
'url': 'http://vod-platform.net/embed/RufMcytHDolTH1MuKHY9Fw',
@@ -30,7 +28,6 @@ class VODPlatformIE(InfoExtractor):
formats = self._extract_wowza_formats(
hidden_inputs.get('HiddenmyhHlsLink') or hidden_inputs['HiddenmyDashLink'], video_id, skip_protocols=['f4m', 'smil'])
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/voicerepublic.py b/hypervideo_dl/extractor/voicerepublic.py
index a52e40a..47502af 100644
--- a/hypervideo_dl/extractor/voicerepublic.py
+++ b/hypervideo_dl/extractor/voicerepublic.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
@@ -48,7 +46,6 @@ class VoiceRepublicIE(InfoExtractor):
'ext': determine_ext(talk_url) or format_id,
'vcodec': 'none',
} for format_id, talk_url in talk['media_links'].items()]
- self._sort_formats(formats)
return {
'id': compat_str(talk.get('id') or display_id),
diff --git a/hypervideo_dl/extractor/voicy.py b/hypervideo_dl/extractor/voicy.py
index 37c7d56..7438b49 100644
--- a/hypervideo_dl/extractor/voicy.py
+++ b/hypervideo_dl/extractor/voicy.py
@@ -1,5 +1,4 @@
-# coding: utf-8
-from __future__ import unicode_literals
+import itertools
from .common import InfoExtractor
from ..compat import compat_str
@@ -12,8 +11,6 @@ from ..utils import (
unsmuggle_url,
)
-import itertools
-
class VoicyBaseIE(InfoExtractor):
def _extract_from_playlist_data(self, value):
@@ -47,7 +44,6 @@ class VoicyBaseIE(InfoExtractor):
'acodec': 'mp3',
'vcodec': 'none',
}]
- self._sort_formats(formats)
return {
'id': compat_str(entry.get('ArticleId')),
'title': entry.get('ArticleTitle'),
@@ -108,7 +104,7 @@ class VoicyChannelIE(VoicyBaseIE):
@classmethod
def suitable(cls, url):
- return not VoicyIE.suitable(url) and super(VoicyChannelIE, cls).suitable(url)
+ return not VoicyIE.suitable(url) and super().suitable(url)
def _entries(self, channel_id):
pager = ''
diff --git a/hypervideo_dl/extractor/voot.py b/hypervideo_dl/extractor/voot.py
index a9b66b9..b709b74 100644
--- a/hypervideo_dl/extractor/voot.py
+++ b/hypervideo_dl/extractor/voot.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
@@ -17,7 +14,7 @@ class VootIE(InfoExtractor):
voot:|
https?://(?:www\.)?voot\.com/?
(?:
- movies/[^/]+/|
+ movies?/[^/]+/|
(?:shows|kids)/(?:[^/]+/){4}
)
)
@@ -50,6 +47,9 @@ class VootIE(InfoExtractor):
}, {
'url': 'https://www.voot.com/movies/pandavas-5/424627',
'only_matching': True,
+ }, {
+ 'url': 'https://www.voot.com/movie/fight-club/621842',
+ 'only_matching': True,
}]
def _real_extract(self, url):
@@ -73,7 +73,6 @@ class VootIE(InfoExtractor):
formats = self._extract_m3u8_formats(
'https://cdnapisec.kaltura.com/p/1982551/playManifest/pt/https/f/applehttp/t/web/e/' + entry_id,
video_id, 'mp4', m3u8_id='hls')
- self._sort_formats(formats)
description, series, season_number, episode, episode_number = [None] * 5
diff --git a/hypervideo_dl/extractor/voxmedia.py b/hypervideo_dl/extractor/voxmedia.py
index 6612081..f936200 100644
--- a/hypervideo_dl/extractor/voxmedia.py
+++ b/hypervideo_dl/extractor/voxmedia.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from .once import OnceIE
from ..compat import compat_urllib_parse_unquote
@@ -50,7 +47,6 @@ class VoxMediaVolumeIE(OnceIE):
'tbr': int_or_none(tbr),
})
if formats:
- self._sort_formats(formats)
info['formats'] = formats
info['duration'] = int_or_none(asset.get('duration'))
return info
@@ -61,7 +57,6 @@ class VoxMediaVolumeIE(OnceIE):
continue
if provider_video_type == 'brightcove':
info['formats'] = self._extract_once_formats(provider_video_id)
- self._sort_formats(info['formats'])
else:
info.update({
'_type': 'url_transparent',
@@ -74,6 +69,7 @@ class VoxMediaVolumeIE(OnceIE):
class VoxMediaIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?:(?:theverge|vox|sbnation|eater|polygon|curbed|racked|funnyordie)\.com|recode\.net)/(?:[^/]+/)*(?P<id>[^/?]+)'
+ _EMBED_REGEX = [r'<iframe[^>]+?src="(?P<url>https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"']
_TESTS = [{
# Volume embed, Youtube
'url': 'http://www.theverge.com/2014/6/27/5849272/material-world-how-google-discovered-what-software-is-made-of',
diff --git a/hypervideo_dl/extractor/vrak.py b/hypervideo_dl/extractor/vrak.py
index daa247c..198c0a2 100644
--- a/hypervideo_dl/extractor/vrak.py
+++ b/hypervideo_dl/extractor/vrak.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/vrt.py b/hypervideo_dl/extractor/vrt.py
index 10dc94a..26f48bf 100644
--- a/hypervideo_dl/extractor/vrt.py
+++ b/hypervideo_dl/extractor/vrt.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..utils import (
extract_attributes,
diff --git a/hypervideo_dl/extractor/vrv.py b/hypervideo_dl/extractor/vrv.py
index 00e1006..89fa7af 100644
--- a/hypervideo_dl/extractor/vrv.py
+++ b/hypervideo_dl/extractor/vrv.py
@@ -1,20 +1,14 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import base64
-import json
import hashlib
import hmac
+import json
import random
import string
import time
+import urllib.parse
from .common import InfoExtractor
-from ..compat import (
- compat_HTTPError,
- compat_urllib_parse_urlencode,
- compat_urllib_parse,
-)
+from ..compat import compat_HTTPError, compat_urllib_parse_urlencode
from ..utils import (
ExtractorError,
float_or_none,
@@ -49,12 +43,12 @@ class VRVBaseIE(InfoExtractor):
headers['Content-Type'] = 'application/json'
base_string = '&'.join([
'POST' if data else 'GET',
- compat_urllib_parse.quote(base_url, ''),
- compat_urllib_parse.quote(encoded_query, '')])
+ urllib.parse.quote(base_url, ''),
+ urllib.parse.quote(encoded_query, '')])
oauth_signature = base64.b64encode(hmac.new(
(self._API_PARAMS['oAuthSecret'] + '&' + self._TOKEN_SECRET).encode('ascii'),
base_string.encode(), hashlib.sha1).digest()).decode()
- encoded_query += '&oauth_signature=' + compat_urllib_parse.quote(oauth_signature, '')
+ encoded_query += '&oauth_signature=' + urllib.parse.quote(oauth_signature, '')
try:
return self._download_json(
'?'.join([base_url, encoded_query]), video_id,
@@ -198,7 +192,6 @@ class VRVIE(VRVBaseIE):
formats.extend(self._extract_vrv_formats(
stream.get('url'), video_id, stream_type.split('_')[1],
audio_locale, stream.get('hardsub_locale')))
- self._sort_formats(formats)
subtitles = {}
for k in ('captions', 'subtitles'):
diff --git a/hypervideo_dl/extractor/vshare.py b/hypervideo_dl/extractor/vshare.py
index b4874ac..1bc7ae4 100644
--- a/hypervideo_dl/extractor/vshare.py
+++ b/hypervideo_dl/extractor/vshare.py
@@ -1,18 +1,10 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
from .common import InfoExtractor
-from ..compat import compat_chr
-from ..utils import (
- decode_packed_codes,
- ExtractorError,
-)
+from ..utils import ExtractorError, decode_packed_codes
class VShareIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?vshare\.io/[dv]/(?P<id>[^/?#&]+)'
+ _EMBED_REGEX = [r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?vshare\.io/v/[^/?#&]+)']
_TESTS = [{
'url': 'https://vshare.io/d/0f64ce6',
'md5': '17b39f55b5497ae8b59f5fbce8e35886',
@@ -26,12 +18,6 @@ class VShareIE(InfoExtractor):
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- return re.findall(
- r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?vshare\.io/v/[^/?#&]+)',
- webpage)
-
def _extract_packed(self, webpage):
packed = self._search_regex(
r'(eval\(function.+)', webpage, 'packed code')
@@ -40,7 +26,7 @@ class VShareIE(InfoExtractor):
digits = [int(digit) for digit in digits.split(',')]
key_digit = self._search_regex(
r'fromCharCode\(.+?(\d+)\)}', unpacked, 'key digit')
- chars = [compat_chr(d - int(key_digit)) for d in digits]
+ chars = [chr(d - int(key_digit)) for d in digits]
return ''.join(chars)
def _real_extract(self, url):
@@ -63,8 +49,6 @@ class VShareIE(InfoExtractor):
url, '<video>%s</video>' % self._extract_packed(webpage),
video_id)[0]
- self._sort_formats(info['formats'])
-
info.update({
'id': video_id,
'title': title,
diff --git a/hypervideo_dl/extractor/vtm.py b/hypervideo_dl/extractor/vtm.py
index 093f1aa..6381fd3 100644
--- a/hypervideo_dl/extractor/vtm.py
+++ b/hypervideo_dl/extractor/vtm.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
int_or_none,
diff --git a/hypervideo_dl/extractor/vube.py b/hypervideo_dl/extractor/vube.py
deleted file mode 100644
index 1c8f80a..0000000
--- a/hypervideo_dl/extractor/vube.py
+++ /dev/null
@@ -1,170 +0,0 @@
-from __future__ import unicode_literals
-
-
-from .common import InfoExtractor
-from ..compat import (
- compat_str,
-)
-from ..utils import (
- int_or_none,
-)
-
-
-class VubeIE(InfoExtractor):
- IE_NAME = 'vube'
- IE_DESC = 'Vube.com'
- _VALID_URL = r'https?://vube\.com/(?:[^/]+/)+(?P<id>[\da-zA-Z]{10})\b'
-
- _TESTS = [
- {
- 'url': 'http://vube.com/trending/William+Wei/Y8NUZ69Tf7?t=s',
- 'md5': 'e7aabe1f8f1aa826b9e4735e1f9cee42',
- 'info_dict': {
- 'id': 'Y8NUZ69Tf7',
- 'ext': 'mp4',
- 'title': 'Best Drummer Ever [HD]',
- 'description': 'md5:2d63c4b277b85c2277761c2cf7337d71',
- 'thumbnail': r're:^https?://.*\.jpg',
- 'uploader': 'William',
- 'timestamp': 1406876915,
- 'upload_date': '20140801',
- 'duration': 258.051,
- 'like_count': int,
- 'dislike_count': int,
- 'comment_count': int,
- 'categories': ['amazing', 'hd', 'best drummer ever', 'william wei', 'bucket drumming', 'street drummer', 'epic street drumming'],
- },
- 'skip': 'Not accessible from Travis CI server',
- }, {
- 'url': 'http://vube.com/Chiara+Grispo+Video+Channel/YL2qNPkqon',
- 'md5': 'db7aba89d4603dadd627e9d1973946fe',
- 'info_dict': {
- 'id': 'YL2qNPkqon',
- 'ext': 'mp4',
- 'title': 'Chiara Grispo - Price Tag by Jessie J',
- 'description': 'md5:8ea652a1f36818352428cb5134933313',
- 'thumbnail': r're:^http://frame\.thestaticvube\.com/snap/[0-9x]+/102e7e63057-5ebc-4f5c-4065-6ce4ebde131f\.jpg$',
- 'uploader': 'Chiara.Grispo',
- 'timestamp': 1388743358,
- 'upload_date': '20140103',
- 'duration': 170.56,
- 'like_count': int,
- 'dislike_count': int,
- 'comment_count': int,
- 'categories': ['pop', 'music', 'cover', 'singing', 'jessie j', 'price tag', 'chiara grispo'],
- },
- 'skip': 'Removed due to DMCA',
- },
- {
- 'url': 'http://vube.com/SerainaMusic/my-7-year-old-sister-and-i-singing-alive-by-krewella/UeBhTudbfS?t=s&n=1',
- 'md5': '5d4a52492d76f72712117ce6b0d98d08',
- 'info_dict': {
- 'id': 'UeBhTudbfS',
- 'ext': 'mp4',
- 'title': 'My 7 year old Sister and I singing "Alive" by Krewella',
- 'description': 'md5:40bcacb97796339f1690642c21d56f4a',
- 'thumbnail': r're:^http://frame\.thestaticvube\.com/snap/[0-9x]+/102265d5a9f-0f17-4f6b-5753-adf08484ee1e\.jpg$',
- 'uploader': 'Seraina',
- 'timestamp': 1396492438,
- 'upload_date': '20140403',
- 'duration': 240.107,
- 'like_count': int,
- 'dislike_count': int,
- 'comment_count': int,
- 'categories': ['seraina', 'jessica', 'krewella', 'alive'],
- },
- 'skip': 'Removed due to DMCA',
- }, {
- 'url': 'http://vube.com/vote/Siren+Gene/0nmsMY5vEq?n=2&t=s',
- 'md5': '0584fc13b50f887127d9d1007589d27f',
- 'info_dict': {
- 'id': '0nmsMY5vEq',
- 'ext': 'mp4',
- 'title': 'Frozen - Let It Go Cover by Siren Gene',
- 'description': 'My rendition of "Let It Go" originally sung by Idina Menzel.',
- 'thumbnail': r're:^http://frame\.thestaticvube\.com/snap/[0-9x]+/10283ab622a-86c9-4681-51f2-30d1f65774af\.jpg$',
- 'uploader': 'Siren',
- 'timestamp': 1395448018,
- 'upload_date': '20140322',
- 'duration': 221.788,
- 'like_count': int,
- 'dislike_count': int,
- 'comment_count': int,
- 'categories': ['let it go', 'cover', 'idina menzel', 'frozen', 'singing', 'disney', 'siren gene'],
- },
- 'skip': 'Removed due to DMCA',
- }
- ]
-
- def _real_extract(self, url):
- mobj = self._match_valid_url(url)
- video_id = mobj.group('id')
-
- video = self._download_json(
- 'http://vube.com/t-api/v1/video/%s' % video_id, video_id, 'Downloading video JSON')
-
- public_id = video['public_id']
-
- formats = []
-
- for media in video['media'].get('video', []) + video['media'].get('audio', []):
- if media['transcoding_status'] != 'processed':
- continue
- fmt = {
- 'url': 'http://video.thestaticvube.com/video/%s/%s.mp4' % (media['media_resolution_id'], public_id),
- 'abr': int(media['audio_bitrate']),
- 'format_id': compat_str(media['media_resolution_id']),
- }
- vbr = int(media['video_bitrate'])
- if vbr:
- fmt.update({
- 'vbr': vbr,
- 'height': int(media['height']),
- })
- formats.append(fmt)
-
- if not formats and video.get('vst') == 'dmca':
- self.raise_no_formats(
- 'This video has been removed in response to a complaint received under the US Digital Millennium Copyright Act.',
- expected=True)
-
- self._sort_formats(formats)
-
- title = video['title']
- description = video.get('description')
- thumbnail = self._proto_relative_url(video.get('thumbnail_src'), scheme='http:')
- uploader = video.get('user_alias') or video.get('channel')
- timestamp = int_or_none(video.get('upload_time'))
- duration = video['duration']
- view_count = video.get('raw_view_count')
- like_count = video.get('total_likes')
- dislike_count = video.get('total_hates')
-
- comments = video.get('comments')
- comment_count = None
- if comments is None:
- comment_data = self._download_json(
- 'http://vube.com/api/video/%s/comment' % video_id,
- video_id, 'Downloading video comment JSON', fatal=False)
- if comment_data is not None:
- comment_count = int_or_none(comment_data.get('total'))
- else:
- comment_count = len(comments)
-
- categories = [tag['text'] for tag in video['tags']]
-
- return {
- 'id': video_id,
- 'formats': formats,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'uploader': uploader,
- 'timestamp': timestamp,
- 'duration': duration,
- 'view_count': view_count,
- 'like_count': like_count,
- 'dislike_count': dislike_count,
- 'comment_count': comment_count,
- 'categories': categories,
- }
diff --git a/hypervideo_dl/extractor/vuclip.py b/hypervideo_dl/extractor/vuclip.py
index 55e087b..0e56298 100644
--- a/hypervideo_dl/extractor/vuclip.py
+++ b/hypervideo_dl/extractor/vuclip.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/vupload.py b/hypervideo_dl/extractor/vupload.py
index b561f63..23ea70c 100644
--- a/hypervideo_dl/extractor/vupload.py
+++ b/hypervideo_dl/extractor/vupload.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
parse_duration,
diff --git a/hypervideo_dl/extractor/vvvvid.py b/hypervideo_dl/extractor/vvvvid.py
index 3faa90f..ed725a5 100644
--- a/hypervideo_dl/extractor/vvvvid.py
+++ b/hypervideo_dl/extractor/vvvvid.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -65,6 +62,18 @@ class VVVVIDIE(InfoExtractor):
'skip_download': True,
},
}, {
+ # video_type == 'video/dash'
+ 'url': 'https://www.vvvvid.it/show/683/made-in-abyss/1542/693786/nanachi',
+ 'info_dict': {
+ 'id': '693786',
+ 'ext': 'mp4',
+ 'title': 'Nanachi',
+ },
+ 'params': {
+ 'skip_download': True,
+ 'format': 'mp4',
+ },
+ }, {
'url': 'https://www.vvvvid.it/show/434/perche-dovrei-guardarlo-di-dario-moccia/437/489048',
'only_matching': True
}]
@@ -205,13 +214,15 @@ class VVVVIDIE(InfoExtractor):
})
is_youtube = True
break
+ elif video_type == 'video/dash':
+ formats.extend(self._extract_m3u8_formats(
+ embed_code, video_id, 'mp4', m3u8_id='hls', fatal=False))
else:
formats.extend(self._extract_wowza_formats(
'http://sb.top-ix.org/videomg/_definst_/mp4:%s/playlist.m3u8' % embed_code, video_id))
metadata_from_url(embed_code)
if not is_youtube:
- self._sort_formats(formats)
info['formats'] = formats
metadata_from_url(video_data.get('thumbnail'))
@@ -230,7 +241,7 @@ class VVVVIDIE(InfoExtractor):
return info
-class VVVVIDShowIE(VVVVIDIE):
+class VVVVIDShowIE(VVVVIDIE): # XXX: Do not subclass from concrete IE
_VALID_URL = r'(?P<base_url>%s(?P<id>\d+)(?:/(?P<show_title>[^/?&#]+))?)/?(?:[?#&]|$)' % VVVVIDIE._VALID_URL_BASE
_TESTS = [{
'url': 'https://www.vvvvid.it/show/156/psyco-pass',
diff --git a/hypervideo_dl/extractor/vyborymos.py b/hypervideo_dl/extractor/vyborymos.py
index 4d93666..3865187 100644
--- a/hypervideo_dl/extractor/vyborymos.py
+++ b/hypervideo_dl/extractor/vyborymos.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import compat_str
diff --git a/hypervideo_dl/extractor/vzaar.py b/hypervideo_dl/extractor/vzaar.py
index 54f88bb..6b9817c 100644
--- a/hypervideo_dl/extractor/vzaar.py
+++ b/hypervideo_dl/extractor/vzaar.py
@@ -1,8 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
@@ -15,6 +10,7 @@ from ..utils import (
class VzaarIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www|view)\.)?vzaar\.com/(?:videos/)?(?P<id>\d+)'
+ _EMBED_REGEX = [r'<iframe[^>]+src=["\'](?P<url>(?:https?:)?//(?:view\.vzaar\.com)/[0-9]+)']
_TESTS = [{
# HTTP and HLS
'url': 'https://vzaar.com/videos/1152805',
@@ -50,12 +46,6 @@ class VzaarIE(InfoExtractor):
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- return re.findall(
- r'<iframe[^>]+src=["\']((?:https?:)?//(?:view\.vzaar\.com)/[0-9]+)',
- webpage)
-
def _real_extract(self, url):
video_id = self._match_id(url)
video_data = self._download_json(
@@ -100,8 +90,6 @@ class VzaarIE(InfoExtractor):
f['_decryption_key_url'] = url_templ % ('goose', '') + qs
formats.extend(m3u8_formats)
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': title,
diff --git a/hypervideo_dl/extractor/wakanim.py b/hypervideo_dl/extractor/wakanim.py
index a70a719..155008f 100644
--- a/hypervideo_dl/extractor/wakanim.py
+++ b/hypervideo_dl/extractor/wakanim.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from urllib.parse import unquote
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/walla.py b/hypervideo_dl/extractor/walla.py
index 00f081b..a1a9c17 100644
--- a/hypervideo_dl/extractor/walla.py
+++ b/hypervideo_dl/extractor/walla.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -72,7 +69,6 @@ class WallaIE(InfoExtractor):
if m:
fmt['height'] = int(m.group('height'))
formats.append(fmt)
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/wasdtv.py b/hypervideo_dl/extractor/wasdtv.py
index 38c10dc..f57c619 100644
--- a/hypervideo_dl/extractor/wasdtv.py
+++ b/hypervideo_dl/extractor/wasdtv.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
ExtractorError,
@@ -40,7 +37,6 @@ class WASDTVBaseIE(InfoExtractor):
media_url, is_live = self._get_media_url(media_meta)
video_id = media.get('media_id') or container.get('media_container_id')
formats, subtitles = self._extract_m3u8_formats_and_subtitles(media_url, video_id, 'mp4')
- self._sort_formats(formats)
return {
'id': str(video_id),
'title': container.get('media_container_name') or self._og_search_title(self._download_webpage(url, video_id)),
@@ -98,7 +94,7 @@ class WASDTVStreamIE(WASDTVBaseIE):
class WASDTVRecordIE(WASDTVBaseIE):
IE_NAME = 'wasdtv:record'
- _VALID_URL = r'https?://wasd\.tv/[^/#?]+/videos\?record=(?P<id>\d+)$'
+ _VALID_URL = r'https?://wasd\.tv/[^/#?]+(?:/videos)?\?record=(?P<id>\d+)$'
_TESTS = [{
'url': 'https://wasd.tv/spacemita/videos?record=907755',
'md5': 'c9899dd85be4cc997816ff9f9ca516ce',
@@ -113,6 +109,9 @@ class WASDTVRecordIE(WASDTVBaseIE):
'is_live': False,
'view_count': int,
},
+ }, {
+ 'url': 'https://wasd.tv/spacemita?record=907755',
+ 'only_matching': True,
}]
def _get_container(self, url):
@@ -149,7 +148,6 @@ class WASDTVClipIE(WASDTVBaseIE):
clip = self._fetch(f'v2/clips/{clip_id}', video_id=clip_id, description='clip')
clip_data = clip.get('clip_data')
formats, subtitles = self._extract_m3u8_formats_and_subtitles(clip_data.get('url'), video_id=clip_id, ext='mp4')
- self._sort_formats(formats)
return {
'id': clip_id,
'title': clip.get('clip_title') or self._og_search_title(self._download_webpage(url, clip_id, fatal=False)),
diff --git a/hypervideo_dl/extractor/washingtonpost.py b/hypervideo_dl/extractor/washingtonpost.py
index 9d6ae28..74501b1 100644
--- a/hypervideo_dl/extractor/washingtonpost.py
+++ b/hypervideo_dl/extractor/washingtonpost.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -11,7 +8,7 @@ from ..utils import traverse_obj
class WashingtonPostIE(InfoExtractor):
IE_NAME = 'washingtonpost'
_VALID_URL = r'(?:washingtonpost:|https?://(?:www\.)?washingtonpost\.com/(?:video|posttv)/(?:[^/]+/)*)(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
- _EMBED_URL = r'https?://(?:www\.)?washingtonpost\.com/video/c/embed/[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}'
+ _EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>https?://(?:www\.)?washingtonpost\.com/video/c/embed/[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})']
_TESTS = [{
'url': 'https://www.washingtonpost.com/video/c/video/480ba4ee-1ec7-11e6-82c2-a7dcb313287d',
'md5': '6f537e1334b714eb15f9563bd4b9cdfa',
@@ -31,11 +28,6 @@ class WashingtonPostIE(InfoExtractor):
'only_matching': True,
}]
- @classmethod
- def _extract_urls(cls, webpage):
- return re.findall(
- r'<iframe[^>]+\bsrc=["\'](%s)' % cls._EMBED_URL, webpage)
-
def _real_extract(self, url):
video_id = self._match_id(url)
return self.url_result(
diff --git a/hypervideo_dl/extractor/wat.py b/hypervideo_dl/extractor/wat.py
index 9ff4523..7c62d28 100644
--- a/hypervideo_dl/extractor/wat.py
+++ b/hypervideo_dl/extractor/wat.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
@@ -57,7 +54,7 @@ class WatIE(InfoExtractor):
# 'http://www.wat.tv/interface/contentv4s/' + video_id, video_id)
video_data = self._download_json(
'https://mediainfo.tf1.fr/mediainfocombo/' + video_id,
- video_id, query={'context': 'MYTF1'})
+ video_id, query={'context': 'MYTF1', 'pver': '4020003'})
video_info = video_data['media']
error_desc = video_info.get('error_desc')
@@ -98,8 +95,6 @@ class WatIE(InfoExtractor):
if manifest_urls:
extract_formats(manifest_urls)
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': title,
diff --git a/hypervideo_dl/extractor/watchbox.py b/hypervideo_dl/extractor/watchbox.py
index d19d801..c973ca9 100644
--- a/hypervideo_dl/extractor/watchbox.py
+++ b/hypervideo_dl/extractor/watchbox.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
@@ -113,7 +109,6 @@ class WatchBoxIE(InfoExtractor):
'height': int_or_none(item.get('height')),
'tbr': int_or_none(item.get('bitrate')),
})
- self._sort_formats(formats)
description = strip_or_none(item.get('descr'))
thumbnail = item.get('media_content_thumbnail_large') or source.get('poster') or item.get('media_thumbnail')
diff --git a/hypervideo_dl/extractor/watchindianporn.py b/hypervideo_dl/extractor/watchindianporn.py
index a868191..3ded2d1 100644
--- a/hypervideo_dl/extractor/watchindianporn.py
+++ b/hypervideo_dl/extractor/watchindianporn.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/wdr.py b/hypervideo_dl/extractor/wdr.py
index ef58a66..de5dc26 100644
--- a/hypervideo_dl/extractor/wdr.py
+++ b/hypervideo_dl/extractor/wdr.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -106,8 +103,6 @@ class WDRIE(InfoExtractor):
a_format['ext'] = ext
formats.append(a_format)
- self._sort_formats(formats)
-
caption_url = media_resource.get('captionURL')
if caption_url:
subtitles['de'] = [{
@@ -136,7 +131,7 @@ class WDRIE(InfoExtractor):
}
-class WDRPageIE(WDRIE):
+class WDRPageIE(WDRIE): # XXX: Do not subclass from concrete IE
_MAUS_REGEX = r'https?://(?:www\.)wdrmaus.de/(?:[^/]+/)*?(?P<maus_id>[^/?#.]+)(?:/?|/index\.php5|\.php5)$'
_PAGE_REGEX = r'/(?:mediathek/)?(?:[^/]+/)*(?P<display_id>[^/]+)\.html'
_VALID_URL = r'https?://(?:www\d?\.)?(?:(?:kinder\.)?wdr\d?|sportschau)\.de' + _PAGE_REGEX + '|' + _MAUS_REGEX
diff --git a/hypervideo_dl/extractor/webcaster.py b/hypervideo_dl/extractor/webcaster.py
index a858e99..43eeca0 100644
--- a/hypervideo_dl/extractor/webcaster.py
+++ b/hypervideo_dl/extractor/webcaster.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -53,7 +50,6 @@ class WebcasterIE(InfoExtractor):
'format_note': track.get('title'),
})
formats.extend(m3u8_formats)
- self._sort_formats(formats)
thumbnail = xpath_text(video, './/image', 'thumbnail')
@@ -67,27 +63,23 @@ class WebcasterIE(InfoExtractor):
class WebcasterFeedIE(InfoExtractor):
_VALID_URL = r'https?://bl\.webcaster\.pro/feed/start/free_(?P<id>[^/]+)'
+ _EMBED_REGEX = [r'<(?:object|a[^>]+class=["\']webcaster-player["\'])[^>]+data(?:-config)?=(["\']).*?config=(?P<url>https?://bl\.webcaster\.pro/feed/start/free_.*?)(?:[?&]|\1)']
_TEST = {
'url': 'http://bl.webcaster.pro/feed/start/free_c8cefd240aa593681c8d068cff59f407_hd/q393859/eb173f99dd5f558674dae55f4ba6806d/1480289104',
'only_matching': True,
}
- @staticmethod
- def _extract_url(ie, webpage):
- mobj = re.search(
- r'<(?:object|a[^>]+class=["\']webcaster-player["\'])[^>]+data(?:-config)?=(["\']).*?config=(?P<url>https?://bl\.webcaster\.pro/feed/start/free_.*?)(?:[?&]|\1)',
- webpage)
- if mobj:
- return mobj.group('url')
+ def _extract_from_webpage(self, url, webpage):
+ yield from super()._extract_from_webpage(url, webpage)
+
for secure in (True, False):
- video_url = ie._og_search_video_url(
- webpage, secure=secure, default=None)
+ video_url = self._og_search_video_url(webpage, secure=secure, default=None)
if video_url:
mobj = re.search(
r'config=(?P<url>https?://bl\.webcaster\.pro/feed/start/free_[^?&=]+)',
video_url)
if mobj:
- return mobj.group('url')
+ yield self.url_result(mobj.group('url'), self)
def _real_extract(self, url):
video_id = self._match_id(url)
diff --git a/hypervideo_dl/extractor/webofstories.py b/hypervideo_dl/extractor/webofstories.py
index f2b8d19..65f48f3 100644
--- a/hypervideo_dl/extractor/webofstories.py
+++ b/hypervideo_dl/extractor/webofstories.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -107,8 +104,6 @@ class WebOfStoriesIE(InfoExtractor):
'play_path': play_path,
}]
- self._sort_formats(formats)
-
return {
'id': story_id,
'title': title,
diff --git a/hypervideo_dl/extractor/weibo.py b/hypervideo_dl/extractor/weibo.py
index dafa2af..81a23b9 100644
--- a/hypervideo_dl/extractor/weibo.py
+++ b/hypervideo_dl/extractor/weibo.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
import json
@@ -91,8 +88,6 @@ class WeiboIE(InfoExtractor):
'height': res,
})
- self._sort_formats(formats)
-
uploader = self._og_search_property(
'nick-name', webpage, 'uploader', default=None)
diff --git a/hypervideo_dl/extractor/weiqitv.py b/hypervideo_dl/extractor/weiqitv.py
index 7e0befd..c9ff641 100644
--- a/hypervideo_dl/extractor/weiqitv.py
+++ b/hypervideo_dl/extractor/weiqitv.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/whowatch.py b/hypervideo_dl/extractor/whowatch.py
index e4b610d..f2808cd 100644
--- a/hypervideo_dl/extractor/whowatch.py
+++ b/hypervideo_dl/extractor/whowatch.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
int_or_none,
@@ -73,7 +70,6 @@ class WhoWatchIE(InfoExtractor):
formats.extend(self._extract_m3u8_formats(
hls_url, video_id, ext='mp4', m3u8_id='hls'))
self._remove_duplicate_formats(formats)
- self._sort_formats(formats)
uploader_url = try_get(metadata, lambda x: x['live']['user']['user_path'], compat_str)
if uploader_url:
diff --git a/hypervideo_dl/extractor/wikimedia.py b/hypervideo_dl/extractor/wikimedia.py
new file mode 100644
index 0000000..11c801f
--- /dev/null
+++ b/hypervideo_dl/extractor/wikimedia.py
@@ -0,0 +1,55 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ get_element_by_class,
+ parse_qs,
+ remove_start,
+ unescapeHTML,
+ urljoin,
+)
+
+
+class WikimediaIE(InfoExtractor):
+ IE_NAME = 'wikimedia.org'
+ _VALID_URL = r'https?://commons\.wikimedia\.org/wiki/File:(?P<id>[^/#?]+)\.\w+'
+ _TESTS = [{
+ 'url': 'https://commons.wikimedia.org/wiki/File:Die_Temperaturkurve_der_Erde_(ZDF,_Terra_X)_720p_HD_50FPS.webm',
+ 'info_dict': {
+ 'url': 're:https?://upload.wikimedia.org/wikipedia',
+ 'ext': 'webm',
+ 'id': 'Die_Temperaturkurve_der_Erde_(ZDF,_Terra_X)_720p_HD_50FPS',
+ 'title': 'Die Temperaturkurve der Erde (ZDF, Terra X) 720p HD 50FPS.webm - Wikimedia Commons',
+ 'description': 'md5:7cd84f76e7081f1be033d0b155b4a460',
+ 'license': 'Creative Commons Attribution 4.0 International',
+ 'uploader': 'ZDF/Terra X/Gruppe 5/Luise Wagner, Jonas Sichert, Andreas Hougardy',
+ 'subtitles': 'count:4'
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ subtitles = {}
+ for sub in set(re.findall(r'\bsrc\s*=\s*["\'](/w/api[^"]+)["\']', webpage)):
+ sub = urljoin('https://commons.wikimedia.org', unescapeHTML(sub))
+ qs = parse_qs(sub)
+ lang = qs.get('lang', [None])[-1]
+ sub_ext = qs.get('trackformat', [None])[-1]
+ if lang and sub_ext:
+ subtitles.setdefault(lang, []).append({'ext': sub_ext, 'url': sub})
+
+ return {
+ 'id': video_id,
+ 'url': self._html_search_regex(r'<source\s[^>]*\bsrc="([^"]+)"', webpage, 'video URL'),
+ 'description': clean_html(get_element_by_class('description', webpage)),
+ 'title': remove_start(self._og_search_title(webpage), 'File:'),
+ 'license': self._html_search_regex(
+ r'licensed under(?: the)? (.+?) license',
+ get_element_by_class('licensetpl', webpage), 'license', default=None),
+ 'uploader': self._html_search_regex(
+ r'>\s*Author\s*</td>\s*<td\b[^>]*>\s*([^<]+)\s*</td>', webpage, 'video author', default=None),
+ 'subtitles': subtitles,
+ }
diff --git a/hypervideo_dl/extractor/willow.py b/hypervideo_dl/extractor/willow.py
index 4d3d62f..0ec9c9d 100644
--- a/hypervideo_dl/extractor/willow.py
+++ b/hypervideo_dl/extractor/willow.py
@@ -1,4 +1,3 @@
-# coding: utf-8
from ..utils import ExtractorError
from .common import InfoExtractor
@@ -42,7 +41,6 @@ class WillowIE(InfoExtractor):
raise ExtractorError('No videos found')
formats = self._extract_m3u8_formats(video['secureurl'], video_id, 'mp4')
- self._sort_formats(formats)
return {
'id': str(video.get('content_id')),
diff --git a/hypervideo_dl/extractor/wimtv.py b/hypervideo_dl/extractor/wimtv.py
index ea953bf..5711123 100644
--- a/hypervideo_dl/extractor/wimtv.py
+++ b/hypervideo_dl/extractor/wimtv.py
@@ -1,8 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
from .common import InfoExtractor
from ..utils import (
determine_ext,
@@ -15,14 +10,15 @@ from ..utils import (
class WimTVIE(InfoExtractor):
_player = None
_UUID_RE = r'[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}'
- _VALID_URL = r'''(?x)
+ _VALID_URL = r'''(?x:
https?://platform.wim.tv/
(?:
(?:embed/)?\?
|\#/webtv/.+?/
)
(?P<type>vod|live|cast)[=/]
- (?P<id>%s).*?''' % _UUID_RE
+ (?P<id>%s).*?)''' % _UUID_RE
+ _EMBED_REGEX = [rf'<iframe[^>]+src=["\'](?P<url>{_VALID_URL})']
_TESTS = [{
# vod stream
'url': 'https://platform.wim.tv/embed/?vod=db29fb32-bade-47b6-a3a6-cb69fe80267a',
@@ -57,14 +53,6 @@ class WimTVIE(InfoExtractor):
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- return [
- mobj.group('url')
- for mobj in re.finditer(
- r'<iframe[^>]+src=["\'](?P<url>%s)' % WimTVIE._VALID_URL,
- webpage)]
-
def _real_initialize(self):
if not self._player:
self._get_player_data()
@@ -151,7 +139,6 @@ class WimTVIE(InfoExtractor):
})
json = json.get('resource')
thumb = self._generate_thumbnail(json.get('thumbnailId'))
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/wistia.py b/hypervideo_dl/extractor/wistia.py
index a170966..38dcc2f 100644
--- a/hypervideo_dl/extractor/wistia.py
+++ b/hypervideo_dl/extractor/wistia.py
@@ -1,32 +1,36 @@
-from __future__ import unicode_literals
-
import re
+import urllib.error
+import urllib.parse
+from base64 import b64decode
from .common import InfoExtractor
from ..utils import (
ExtractorError,
float_or_none,
int_or_none,
+ parse_qs,
+ traverse_obj,
try_get,
- unescapeHTML,
+ update_url_query,
)
class WistiaBaseIE(InfoExtractor):
_VALID_ID_REGEX = r'(?P<id>[a-z0-9]{10})'
- _VALID_URL_BASE = r'https?://(?:fast\.)?wistia\.(?:net|com)/embed/'
- _EMBED_BASE_URL = 'http://fast.wistia.com/embed/'
+ _VALID_URL_BASE = r'https?://(?:\w+\.)?wistia\.(?:net|com)/(?:embed/)?'
+ _EMBED_BASE_URL = 'http://fast.wistia.net/embed/'
def _download_embed_config(self, config_type, config_id, referer):
- base_url = self._EMBED_BASE_URL + '%ss/%s' % (config_type, config_id)
+ base_url = self._EMBED_BASE_URL + '%s/%s' % (config_type, config_id)
embed_config = self._download_json(
base_url + '.json', config_id, headers={
'Referer': referer if referer.startswith('http') else base_url, # Some videos require this.
})
- if isinstance(embed_config, dict) and embed_config.get('error'):
+ error = traverse_obj(embed_config, 'error')
+ if error:
raise ExtractorError(
- 'Error while getting the playlist', expected=True)
+ f'Error while getting the playlist: {error}', expected=True)
return embed_config
@@ -94,8 +98,6 @@ class WistiaBaseIE(InfoExtractor):
})
formats.append(f)
- self._sort_formats(formats)
-
subtitles = {}
for caption in data.get('captions', []):
language = caption.get('language')
@@ -116,10 +118,38 @@ class WistiaBaseIE(InfoExtractor):
'subtitles': subtitles,
}
+ @classmethod
+ def _extract_from_webpage(cls, url, webpage):
+ from .teachable import TeachableIE
+
+ if list(TeachableIE._extract_embed_urls(url, webpage)):
+ return
+
+ yield from super()._extract_from_webpage(url, webpage)
+
+ @classmethod
+ def _extract_wistia_async_embed(cls, webpage):
+ # https://wistia.com/support/embed-and-share/video-on-your-website
+ # https://wistia.com/support/embed-and-share/channel-embeds
+ yield from re.finditer(
+ r'''(?sx)
+ <(?:div|section)[^>]+class=([\"'])(?:(?!\1).)*?(?P<type>wistia[a-z_0-9]+)\s*\bwistia_async_(?P<id>[a-z0-9]{10})\b(?:(?!\1).)*?\1
+ ''', webpage)
+
+ @classmethod
+ def _extract_url_media_id(cls, url):
+ mobj = re.search(r'(?:wmediaid|wvideo(?:id)?)]?=(?P<id>[a-z0-9]{10})', urllib.parse.unquote_plus(url))
+ if mobj:
+ return mobj.group('id')
+
class WistiaIE(WistiaBaseIE):
_VALID_URL = r'(?:wistia:|%s(?:iframe|medias)/)%s' % (WistiaBaseIE._VALID_URL_BASE, WistiaBaseIE._VALID_ID_REGEX)
-
+ _EMBED_REGEX = [
+ r'''(?x)
+ <(?:meta[^>]+?content|(?:iframe|script)[^>]+?src)=["\']
+ (?P<url>(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/[a-z0-9]{10})
+ ''']
_TESTS = [{
# with hls video
'url': 'wistia:807fafadvk',
@@ -133,6 +163,33 @@ class WistiaIE(WistiaBaseIE):
'timestamp': 1463607249,
'duration': 4987.11,
},
+ 'skip': 'video unavailable',
+ }, {
+ 'url': 'wistia:a6ndpko1wg',
+ 'md5': '10c1ce9c4dde638202513ed17a3767bd',
+ 'info_dict': {
+ 'id': 'a6ndpko1wg',
+ 'ext': 'bin',
+ 'title': 'Episode 2: Boxed Water\'s retention is thirsty',
+ 'upload_date': '20210324',
+ 'description': 'md5:da5994c2c2d254833b412469d9666b7a',
+ 'duration': 966.0,
+ 'timestamp': 1616614369,
+ 'thumbnail': 'https://embed-ssl.wistia.com/deliveries/53dc60239348dc9b9fba3755173ea4c2.bin',
+ }
+ }, {
+ 'url': 'wistia:5vd7p4bct5',
+ 'md5': 'b9676d24bf30945d97060638fbfe77f0',
+ 'info_dict': {
+ 'id': '5vd7p4bct5',
+ 'ext': 'bin',
+ 'title': 'md5:eaa9f64c4efd7b5f098b9b6118597679',
+ 'description': 'md5:a9bea0315f0616aa5df2dc413ddcdd0f',
+ 'upload_date': '20220915',
+ 'timestamp': 1663258727,
+ 'duration': 623.019,
+ 'thumbnail': r're:https?://embed(?:-ssl)?.wistia.com/.+\.(?:jpg|bin)$',
+ },
}, {
'url': 'wistia:sh7fpupwlt',
'only_matching': True,
@@ -147,35 +204,56 @@ class WistiaIE(WistiaBaseIE):
'only_matching': True,
}]
- # https://wistia.com/support/embed-and-share/video-on-your-website
- @staticmethod
- def _extract_url(webpage):
- urls = WistiaIE._extract_urls(webpage)
- return urls[0] if urls else None
-
- @staticmethod
- def _extract_urls(webpage):
- urls = []
- for match in re.finditer(
- r'<(?:meta[^>]+?content|(?:iframe|script)[^>]+?src)=["\'](?P<url>(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/[a-z0-9]{10})', webpage):
- urls.append(unescapeHTML(match.group('url')))
- for match in re.finditer(
- r'''(?sx)
- <div[^>]+class=(["'])(?:(?!\1).)*?\bwistia_async_(?P<id>[a-z0-9]{10})\b(?:(?!\1).)*?\1
- ''', webpage):
- urls.append('wistia:%s' % match.group('id'))
- for match in re.finditer(r'(?:data-wistia-?id=["\']|Wistia\.embed\(["\']|id=["\']wistia_)(?P<id>[a-z0-9]{10})', webpage):
- urls.append('wistia:%s' % match.group('id'))
- return urls
+ _WEBPAGE_TESTS = [{
+ 'url': 'https://www.weidert.com/blog/wistia-channels-video-marketing-tool',
+ 'info_dict': {
+ 'id': 'cqwukac3z1',
+ 'ext': 'bin',
+ 'title': 'How Wistia Channels Can Help Capture Inbound Value From Your Video Content',
+ 'duration': 158.125,
+ 'timestamp': 1618974400,
+ 'description': 'md5:27abc99a758573560be72600ef95cece',
+ 'upload_date': '20210421',
+ 'thumbnail': 'https://embed-ssl.wistia.com/deliveries/6c551820ae950cdee2306d6cbe9ef742.bin',
+ }
+ }, {
+ 'url': 'https://study.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
+ 'md5': 'b9676d24bf30945d97060638fbfe77f0',
+ 'info_dict': {
+ 'id': '5vd7p4bct5',
+ 'ext': 'bin',
+ 'title': 'paywall_north-american-exploration-failed-colonies-of-spain-france-england',
+ 'upload_date': '20220915',
+ 'timestamp': 1663258727,
+ 'duration': 623.019,
+ 'thumbnail': 'https://embed-ssl.wistia.com/deliveries/83e6ec693e2c05a0ce65809cbaead86a.bin',
+ 'description': 'a Paywall Videos video',
+ },
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
- embed_config = self._download_embed_config('media', video_id, url)
+ embed_config = self._download_embed_config('medias', video_id, url)
return self._extract_media(embed_config)
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ urls = list(super()._extract_embed_urls(url, webpage))
+ for match in cls._extract_wistia_async_embed(webpage):
+ if match.group('type') != 'wistia_channel':
+ urls.append('wistia:%s' % match.group('id'))
+ for match in re.finditer(r'(?:data-wistia-?id=["\']|Wistia\.embed\(["\']|id=["\']wistia_)(?P<id>[a-z0-9]{10})',
+ webpage):
+ urls.append('wistia:%s' % match.group('id'))
+ if not WistiaChannelIE._extract_embed_urls(url, webpage): # Fallback
+ media_id = cls._extract_url_media_id(url)
+ if media_id:
+ urls.append('wistia:%s' % match.group('id'))
+ return urls
+
class WistiaPlaylistIE(WistiaBaseIE):
- _VALID_URL = r'%splaylists/%s' % (WistiaIE._VALID_URL_BASE, WistiaIE._VALID_ID_REGEX)
+ _VALID_URL = r'%splaylists/%s' % (WistiaBaseIE._VALID_URL_BASE, WistiaBaseIE._VALID_ID_REGEX)
_TEST = {
'url': 'https://fast.wistia.net/embed/playlists/aodt9etokc',
@@ -187,7 +265,7 @@ class WistiaPlaylistIE(WistiaBaseIE):
def _real_extract(self, url):
playlist_id = self._match_id(url)
- playlist = self._download_embed_config('playlist', playlist_id, url)
+ playlist = self._download_embed_config('playlists', playlist_id, url)
entries = []
for media in (try_get(playlist, lambda x: x[0]['medias']) or []):
@@ -197,3 +275,107 @@ class WistiaPlaylistIE(WistiaBaseIE):
entries.append(self._extract_media(embed_config))
return self.playlist_result(entries, playlist_id)
+
+
+class WistiaChannelIE(WistiaBaseIE):
+ _VALID_URL = r'(?:wistiachannel:|%schannel/)%s' % (WistiaBaseIE._VALID_URL_BASE, WistiaBaseIE._VALID_ID_REGEX)
+
+ _TESTS = [{
+ # JSON Embed API returns 403, should fall back to webpage
+ 'url': 'https://fast.wistia.net/embed/channel/yvyvu7wjbg?wchannelid=yvyvu7wjbg',
+ 'info_dict': {
+ 'id': 'yvyvu7wjbg',
+ 'title': 'Copysmith Tutorials and Education!',
+ 'description': 'Learn all things Copysmith via short and informative videos!'
+ },
+ 'playlist_mincount': 7,
+ 'expected_warnings': ['falling back to webpage'],
+ }, {
+ 'url': 'https://fast.wistia.net/embed/channel/3802iirk0l',
+ 'info_dict': {
+ 'id': '3802iirk0l',
+ 'title': 'The Roof',
+ },
+ 'playlist_mincount': 20,
+ }, {
+ # link to popup video, follow --no-playlist
+ 'url': 'https://fast.wistia.net/embed/channel/3802iirk0l?wchannelid=3802iirk0l&wmediaid=sp5dqjzw3n',
+ 'info_dict': {
+ 'id': 'sp5dqjzw3n',
+ 'ext': 'bin',
+ 'title': 'The Roof S2: The Modern CRO',
+ 'thumbnail': 'https://embed-ssl.wistia.com/deliveries/dadfa9233eaa505d5e0c85c23ff70741.bin',
+ 'duration': 86.487,
+ 'description': 'A sales leader on The Roof? Man, they really must be letting anyone up here this season.\n',
+ 'timestamp': 1619790290,
+ 'upload_date': '20210430',
+ },
+ 'params': {'noplaylist': True, 'skip_download': True},
+ }]
+ _WEBPAGE_TESTS = [{
+ 'url': 'https://www.profitwell.com/recur/boxed-out',
+ 'info_dict': {
+ 'id': '6jyvmqz6zs',
+ 'title': 'Boxed Out',
+ 'description': 'md5:14a8a93a1dbe236718e6a59f8c8c7bae',
+ },
+ 'playlist_mincount': 30,
+ }, {
+ # section instead of div
+ 'url': 'https://360learning.com/studio/onboarding-joei/',
+ 'info_dict': {
+ 'id': 'z874k93n2o',
+ 'title': 'Onboarding Joei.',
+ 'description': 'Coming to you weekly starting Feb 19th.',
+ },
+ 'playlist_mincount': 20,
+ }, {
+ 'url': 'https://amplitude.com/amplify-sessions?amp%5Bwmediaid%5D=pz0m0l0if3&amp%5Bwvideo%5D=pz0m0l0if3&wchannelid=emyjmwjf79&wmediaid=i8um783bdt',
+ 'info_dict': {
+ 'id': 'pz0m0l0if3',
+ 'title': 'A Framework for Improving Product Team Performance',
+ 'ext': 'bin',
+ 'timestamp': 1653935275,
+ 'upload_date': '20220530',
+ 'description': 'Learn how to help your company improve and achieve your product related goals.',
+ 'duration': 1854.39,
+ 'thumbnail': 'https://embed-ssl.wistia.com/deliveries/12fd19e56413d9d6f04e2185c16a6f8854e25226.bin',
+ },
+ 'params': {'noplaylist': True, 'skip_download': True},
+ }]
+
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
+ media_id = self._extract_url_media_id(url)
+ if not self._yes_playlist(channel_id, media_id, playlist_label='channel'):
+ return self.url_result(f'wistia:{media_id}', 'Wistia')
+
+ try:
+ data = self._download_embed_config('channel', channel_id, url)
+ except (ExtractorError, urllib.error.HTTPError):
+ # Some channels give a 403 from the JSON API
+ self.report_warning('Failed to download channel data from API, falling back to webpage.')
+ webpage = self._download_webpage(f'https://fast.wistia.net/embed/channel/{channel_id}', channel_id)
+ data = self._parse_json(
+ self._search_regex(r'wchanneljsonp-%s\'\]\s*=[^\"]*\"([A-Za-z0-9=/]*)' % channel_id, webpage, 'jsonp', channel_id),
+ channel_id, transform_source=lambda x: urllib.parse.unquote_plus(b64decode(x).decode('utf-8')))
+
+ # XXX: can there be more than one series?
+ series = traverse_obj(data, ('series', 0), default={})
+
+ entries = [
+ self.url_result(f'wistia:{video["hashedId"]}', WistiaIE, title=video.get('name'))
+ for video in traverse_obj(series, ('sections', ..., 'videos', ...)) or []
+ if video.get('hashedId')
+ ]
+
+ return self.playlist_result(
+ entries, channel_id, playlist_title=series.get('title'), playlist_description=series.get('description'))
+
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ yield from super()._extract_embed_urls(url, webpage)
+ for match in cls._extract_wistia_async_embed(webpage):
+ if match.group('type') == 'wistia_channel':
+ # original url may contain wmediaid query param
+ yield update_url_query(f'wistiachannel:{match.group("id")}', parse_qs(url))
diff --git a/hypervideo_dl/extractor/wordpress.py b/hypervideo_dl/extractor/wordpress.py
new file mode 100644
index 0000000..53820b5
--- /dev/null
+++ b/hypervideo_dl/extractor/wordpress.py
@@ -0,0 +1,154 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ extract_attributes,
+ get_elements_by_class,
+ get_elements_text_and_html_by_attribute,
+ int_or_none,
+ parse_duration,
+ traverse_obj,
+)
+
+
+# https://codex.wordpress.org/Playlist_Shortcode
+class WordpressPlaylistEmbedIE(InfoExtractor):
+ _VALID_URL = False
+ IE_NAME = 'wordpress:playlist'
+ _WEBPAGE_TESTS = [{
+ # 5 WordPress playlists. This is using wpse-playlist, which is similar.
+ # See: https://github.com/birgire/wpse-playlist
+ 'url': 'https://xlino.com/wordpress-playlist-shortcode-with-external-audio-or-video-files/',
+ 'info_dict': {
+ 'id': 'wordpress-playlist-shortcode-with-external-audio-or-video-files',
+ 'title': 'WordPress: Playlist shortcode with external audio or video files – Birgir Erlendsson (birgire)',
+ 'age_limit': 0,
+ },
+ 'playlist_count': 5,
+ }, {
+ 'url': 'https://pianoadventures.com/products/piano-adventures-level-1-lesson-book-enhanced-cd/',
+ 'info_dict': {
+ 'id': 'piano-adventures-level-1-lesson-book-enhanced-cd-wp-playlist-1',
+ 'title': 'Wordpress Playlist',
+ 'thumbnail': 'https://pianoadventures.com/wp-content/uploads/sites/13/2022/01/CD1002cover.jpg',
+ 'age_limit': 0,
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': 'CD1002-21',
+ 'ext': 'mp3',
+ 'title': '21 Half-Time Show',
+ 'thumbnail': 'https://pianoadventures.com/wp-content/plugins/media-library-assistant/images/crystal/audio.png',
+ 'album': 'Piano Adventures Level 1 Lesson Book (2nd Edition)',
+ 'genre': 'Classical',
+ 'duration': 49.0,
+ 'artist': 'Nancy and Randall Faber',
+ 'description': 'md5:a9f8e9aeabbd2912bc13cc0fab1a4ce8',
+ }
+ }],
+ 'playlist_count': 6,
+ 'params': {'skip_download': True}
+ }]
+
+ def _extract_from_webpage(self, url, webpage):
+ # class should always be "wp-playlist-script"
+ # See: https://core.trac.wordpress.org/browser/trunk/src/wp-includes/media.php#L2930
+ for i, j in enumerate(get_elements_by_class('wp-playlist-script', webpage)):
+ playlist_json = self._parse_json(j, self._generic_id(url), fatal=False, ignore_extra=True, errnote='') or {}
+ if not playlist_json:
+ continue
+ entries = [{
+ 'id': self._generic_id(track['src']),
+ 'title': track.get('title'),
+ 'url': track.get('src'),
+ 'thumbnail': traverse_obj(track, ('thumb', 'src')),
+ 'album': traverse_obj(track, ('meta', 'album')),
+ 'artist': traverse_obj(track, ('meta', 'artist')),
+ 'genre': traverse_obj(track, ('meta', 'genre')),
+ 'duration': parse_duration(traverse_obj(track, ('meta', 'length_formatted'))),
+ 'description': track.get('description'),
+ 'height': int_or_none(traverse_obj(track, ('dimensions', 'original', 'height'))),
+ 'width': int_or_none(traverse_obj(track, ('dimensions', 'original', 'width'))),
+ } for track in traverse_obj(playlist_json, ('tracks', ...), expected_type=dict)]
+ yield self.playlist_result(entries, self._generic_id(url) + f'-wp-playlist-{i+1}', 'Wordpress Playlist')
+
+
+class WordpressMiniAudioPlayerEmbedIE(InfoExtractor):
+ # WordPress MB Mini Player Plugin
+ # https://wordpress.org/plugins/wp-miniaudioplayer/
+ # Note: This is for the WordPress plugin version only.
+ _VALID_URL = False
+ IE_NAME = 'wordpress:mb.miniAudioPlayer'
+ _WEBPAGE_TESTS = [{
+ # Version 1.8.10: https://plugins.trac.wordpress.org/browser/wp-miniaudioplayer/tags/1.8.10
+ 'url': 'https://news.samsung.com/global/over-the-horizon-the-evolution-of-the-samsung-galaxy-brand-sound',
+ 'info_dict': {
+ 'id': 'over-the-horizon-the-evolution-of-the-samsung-galaxy-brand-sound',
+ 'title': 'Over the Horizon: The Evolution of the Samsung Galaxy Brand Sound',
+ 'age_limit': 0,
+ 'thumbnail': 'https://img.global.news.samsung.com/global/wp-content/uploads/2015/04/OTH_Main_Title-e1429612467870.jpg',
+ 'description': 'md5:bc3dd738d1f11d9232e94e6629983bf7',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': 'over_the_horizon_2013',
+ 'ext': 'mp3',
+ 'title': 'Over the Horizon 2013',
+ 'url': 'http://news.samsung.com/global/wp-content/uploads/ringtones/over_the_horizon_2013.mp3'
+ }
+ }],
+ 'playlist_count': 6,
+ 'params': {'skip_download': True}
+ }, {
+ # Version 1.9.3: https://plugins.trac.wordpress.org/browser/wp-miniaudioplayer/tags/1.9.3
+ 'url': 'https://www.booksontape.com/collections/audiobooks-with-teacher-guides/',
+ 'info_dict': {
+ 'id': 'audiobooks-with-teacher-guides',
+ 'title': 'Audiobooks with Teacher Guides | Books on Tape',
+ 'age_limit': 0,
+ 'thumbnail': 'https://www.booksontape.com/wp-content/uploads/2016/09/bot-logo-1200x630.jpg',
+ },
+ 'playlist_mincount': 12
+ }, {
+ # Version 1.9.7: https://plugins.trac.wordpress.org/browser/wp-miniaudioplayer/tags/1.9.7
+ # But has spaces around href filter
+ 'url': 'https://www.estudiords.com.br/temas/',
+ 'info_dict': {
+ 'id': 'temas',
+ 'title': 'Temas Variados',
+ 'age_limit': 0,
+ 'timestamp': float,
+ 'upload_date': str,
+ 'thumbnail': 'https://www.estudiords.com.br/wp-content/uploads/2021/03/LOGO-TEMAS.png',
+ 'description': 'md5:ab24d6a7ed0312ad2d466e721679f5a0',
+ },
+ 'playlist_mincount': 30
+ }]
+
+ def _extract_from_webpage(self, url, webpage):
+ # Common function for the WordPress plugin version only.
+ mb_player_params = self._search_regex(
+ r'function\s*initializeMiniAudioPlayer\(\){[^}]+jQuery([^;]+)\.mb_miniPlayer',
+ webpage, 'mb player params', default=None)
+ if not mb_player_params:
+ return
+ # v1.55 - 1.9.3 has "a[href*='.mp3'] ,a[href*='.m4a']"
+ # v1.9.4+ has "a[href*='.mp3']" only
+ file_exts = re.findall(r'a\[href\s*\*=\s*\'\.([a-zA-Z\d]+)\'', mb_player_params)
+ if not file_exts:
+ return
+
+ candidates = get_elements_text_and_html_by_attribute(
+ 'href', rf'(?:[^\"\']+\.(?:{"|".join(file_exts)}))', webpage, escape_value=False, tag='a')
+
+ for title, html in candidates:
+ attrs = extract_attributes(html)
+ # XXX: not tested - have not found any example of it being used
+ if any(c in (attrs.get('class') or '') for c in re.findall(r'\.not\("\.([^"]+)', mb_player_params)):
+ continue
+ href = attrs['href']
+ yield {
+ 'id': self._generic_id(href),
+ 'title': title or self._generic_title(href),
+ 'url': href,
+ }
diff --git a/hypervideo_dl/extractor/worldstarhiphop.py b/hypervideo_dl/extractor/worldstarhiphop.py
index 82587b4..c6948a1 100644
--- a/hypervideo_dl/extractor/worldstarhiphop.py
+++ b/hypervideo_dl/extractor/worldstarhiphop.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/wppilot.py b/hypervideo_dl/extractor/wppilot.py
index 3003a0f..5e590e2 100644
--- a/hypervideo_dl/extractor/wppilot.py
+++ b/hypervideo_dl/extractor/wppilot.py
@@ -1,5 +1,3 @@
-# coding: utf-8
-
from .common import InfoExtractor
from ..utils import (
try_get,
@@ -22,7 +20,7 @@ class WPPilotBaseIE(InfoExtractor):
def _get_channel_list(self, cache=True):
if cache is True:
- cache_res = self._downloader.cache.load('wppilot', 'channel-list')
+ cache_res = self.cache.load('wppilot', 'channel-list')
if cache_res:
return cache_res, True
webpage = self._download_webpage('https://pilot.wp.pl/tv/', None, 'Downloading webpage')
@@ -37,7 +35,7 @@ class WPPilotBaseIE(InfoExtractor):
channel_list = try_get(qhash_content, lambda x: x['data']['allChannels']['nodes'])
if channel_list is None:
continue
- self._downloader.cache.store('wppilot', 'channel-list', channel_list)
+ self.cache.store('wppilot', 'channel-list', channel_list)
return channel_list, False
raise ExtractorError('Unable to find the channel list')
@@ -103,7 +101,7 @@ class WPPilotIE(WPPilotBaseIE):
channel = self._get_channel(video_id)
video_id = str(channel['id'])
- is_authorized = next((c for c in self._downloader.cookiejar if c.name == 'netviapisessid'), None)
+ is_authorized = next((c for c in self.cookiejar if c.name == 'netviapisessid'), None)
# cookies starting with "g:" are assigned to guests
is_authorized = True if is_authorized is not None and not is_authorized.value.startswith('g:') else False
@@ -140,8 +138,6 @@ class WPPilotIE(WPPilotBaseIE):
random.choice(fmt['url']),
video_id, live=True))
- self._sort_formats(formats)
-
channel['formats'] = formats
return channel
diff --git a/hypervideo_dl/extractor/wsj.py b/hypervideo_dl/extractor/wsj.py
index 67236f3..86e2646 100644
--- a/hypervideo_dl/extractor/wsj.py
+++ b/hypervideo_dl/extractor/wsj.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
int_or_none,
@@ -85,7 +82,6 @@ class WSJIE(InfoExtractor):
'height': int_or_none(v.get('height')),
'fps': float_or_none(v.get('fps')),
})
- self._sort_formats(formats)
return {
'id': video_id,
@@ -119,5 +115,6 @@ class WSJArticleIE(InfoExtractor):
article_id = self._match_id(url)
webpage = self._download_webpage(url, article_id)
video_id = self._search_regex(
- r'data-src=["\']([a-fA-F0-9-]{36})', webpage, 'video id')
+ r'(?:id=["\']video|video-|iframe\.html\?guid=|data-src=["\'])([a-fA-F0-9-]{36})',
+ webpage, 'video id')
return self.url_result('wsj:%s' % video_id, WSJIE.ie_key(), video_id)
diff --git a/hypervideo_dl/extractor/wwe.py b/hypervideo_dl/extractor/wwe.py
index bebc77b..9bbd477 100644
--- a/hypervideo_dl/extractor/wwe.py
+++ b/hypervideo_dl/extractor/wwe.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/xbef.py b/hypervideo_dl/extractor/xbef.py
index 4c41e98..ac69528 100644
--- a/hypervideo_dl/extractor/xbef.py
+++ b/hypervideo_dl/extractor/xbef.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import compat_urllib_parse_unquote
diff --git a/hypervideo_dl/extractor/xboxclips.py b/hypervideo_dl/extractor/xboxclips.py
index 9bac982..235b567 100644
--- a/hypervideo_dl/extractor/xboxclips.py
+++ b/hypervideo_dl/extractor/xboxclips.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
diff --git a/hypervideo_dl/extractor/xfileshare.py b/hypervideo_dl/extractor/xfileshare.py
index cd97c77..08c6d6c 100644
--- a/hypervideo_dl/extractor/xfileshare.py
+++ b/hypervideo_dl/extractor/xfileshare.py
@@ -1,14 +1,10 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
-from ..compat import compat_chr
from ..utils import (
+ ExtractorError,
decode_packed_codes,
determine_ext,
- ExtractorError,
int_or_none,
js_to_json,
urlencode_postdata,
@@ -35,11 +31,11 @@ def aa_decode(aa_code):
aa_char = aa_char.replace('+ ', '')
m = re.match(r'^\d+', aa_char)
if m:
- ret += compat_chr(int(m.group(0), 8))
+ ret += chr(int(m.group(0), 8))
else:
m = re.match(r'^u([\da-f]+)', aa_char)
if m:
- ret += compat_chr(int(m.group(1), 16))
+ ret += chr(int(m.group(1), 16))
return ret
@@ -65,6 +61,7 @@ class XFileShareIE(InfoExtractor):
IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1])
_VALID_URL = (r'https?://(?:www\.)?(?P<host>%s)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
% '|'.join(site for site in list(zip(*_SITES))[0]))
+ _EMBED_REGEX = [r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:%s)/embed-[0-9a-zA-Z]+.*?)\1' % '|'.join(site for site in list(zip(*_SITES))[0])]
_FILE_NOT_FOUND_REGEXES = (
r'>(?:404 - )?File Not Found<',
@@ -72,6 +69,15 @@ class XFileShareIE(InfoExtractor):
)
_TESTS = [{
+ 'url': 'https://uqload.com/dltx1wztngdz',
+ 'md5': '3cfbb65e4c90e93d7b37bcb65a595557',
+ 'info_dict': {
+ 'id': 'dltx1wztngdz',
+ 'ext': 'mp4',
+ 'title': 'Rick Astley Never Gonna Give You mp4',
+ 'thumbnail': r're:https://.*\.jpg'
+ }
+ }, {
'url': 'http://xvideosharing.com/fq65f94nd2ve',
'md5': '4181f63957e8fe90ac836fa58dc3c8a6',
'info_dict': {
@@ -88,15 +94,6 @@ class XFileShareIE(InfoExtractor):
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- return [
- mobj.group('url')
- for mobj in re.finditer(
- r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:%s)/embed-[0-9a-zA-Z]+.*?)\1'
- % '|'.join(site for site in list(zip(*XFileShareIE._SITES))[0]),
- webpage)]
-
def _real_extract(self, url):
host, video_id = self._match_valid_url(url).groups()
@@ -185,7 +182,6 @@ class XFileShareIE(InfoExtractor):
'url': video_url,
'format_id': 'sd',
})
- self._sort_formats(formats)
thumbnail = self._search_regex(
[
@@ -198,4 +194,5 @@ class XFileShareIE(InfoExtractor):
'title': title,
'thumbnail': thumbnail,
'formats': formats,
+ 'http_headers': {'Referer': url}
}
diff --git a/hypervideo_dl/extractor/xhamster.py b/hypervideo_dl/extractor/xhamster.py
index 9d4ed47..59eecec 100644
--- a/hypervideo_dl/extractor/xhamster.py
+++ b/hypervideo_dl/extractor/xhamster.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import itertools
import re
@@ -23,7 +21,7 @@ from ..utils import (
class XHamsterIE(InfoExtractor):
- _DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.com)'
+ _DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.com|xhday\.com)'
_VALID_URL = r'''(?x)
https?://
(?:.+?\.)?%s/
@@ -34,7 +32,7 @@ class XHamsterIE(InfoExtractor):
''' % _DOMAINS
_TESTS = [{
'url': 'https://xhamster.com/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
- 'md5': '98b4687efb1ffd331c4197854dc09e8f',
+ 'md5': '34e1ab926db5dc2750fed9e1f34304bb',
'info_dict': {
'id': '1509445',
'display_id': 'femaleagent-shy-beauty-takes-the-bait',
@@ -43,6 +41,7 @@ class XHamsterIE(InfoExtractor):
'timestamp': 1350194821,
'upload_date': '20121014',
'uploader': 'Ruseful2011',
+ 'uploader_id': 'ruseful2011',
'duration': 893,
'age_limit': 18,
},
@@ -72,6 +71,7 @@ class XHamsterIE(InfoExtractor):
'timestamp': 1454948101,
'upload_date': '20160208',
'uploader': 'parejafree',
+ 'uploader_id': 'parejafree',
'duration': 72,
'age_limit': 18,
},
@@ -117,6 +117,9 @@ class XHamsterIE(InfoExtractor):
}, {
'url': 'http://de.xhamster.com/videos/skinny-girl-fucks-herself-hard-in-the-forest-xhnBJZx',
'only_matching': True,
+ }, {
+ 'url': 'https://xhday.com/videos/strapless-threesome-xhh7yVf',
+ 'only_matching': True,
}]
def _real_extract(self, url):
@@ -231,7 +234,6 @@ class XHamsterIE(InfoExtractor):
'Referer': standard_url,
},
})
- self._sort_formats(formats)
categories_list = video.get('categories')
if isinstance(categories_list, list):
@@ -246,7 +248,6 @@ class XHamsterIE(InfoExtractor):
categories = None
uploader_url = url_or_none(try_get(video, lambda x: x['author']['pageURL']))
-
return {
'id': video_id,
'display_id': display_id,
@@ -265,7 +266,7 @@ class XHamsterIE(InfoExtractor):
'dislike_count': int_or_none(try_get(
video, lambda x: x['rating']['dislikes'], int)),
'comment_count': int_or_none(video.get('views')),
- 'age_limit': age_limit,
+ 'age_limit': age_limit if age_limit is not None else 18,
'categories': categories,
'formats': formats,
}
@@ -309,8 +310,6 @@ class XHamsterIE(InfoExtractor):
'url': video_url,
})
- self._sort_formats(formats)
-
# Only a few videos have an description
mobj = re.search(r'<span>Description: </span>([^<]+)', webpage)
description = mobj.group(1) if mobj else None
@@ -371,6 +370,7 @@ class XHamsterIE(InfoExtractor):
class XHamsterEmbedIE(InfoExtractor):
_VALID_URL = r'https?://(?:.+?\.)?%s/xembed\.php\?video=(?P<id>\d+)' % XHamsterIE._DOMAINS
+ _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?xhamster\.com/xembed\.php\?video=\d+)\1']
_TEST = {
'url': 'http://xhamster.com/xembed.php?video=3328539',
'info_dict': {
@@ -385,12 +385,6 @@ class XHamsterEmbedIE(InfoExtractor):
}
}
- @staticmethod
- def _extract_urls(webpage):
- return [url for _, url in re.findall(
- r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?xhamster\.com/xembed\.php\?video=\d+)\1',
- webpage)]
-
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -425,6 +419,9 @@ class XHamsterUserIE(InfoExtractor):
'id': 'firatkaan',
},
'playlist_mincount': 1,
+ }, {
+ 'url': 'https://xhday.com/users/mobhunter',
+ 'only_matching': True,
}]
def _entries(self, user_id):
diff --git a/hypervideo_dl/extractor/xiami.py b/hypervideo_dl/extractor/xiami.py
index 769aab3..71b2956 100644
--- a/hypervideo_dl/extractor/xiami.py
+++ b/hypervideo_dl/extractor/xiami.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import compat_urllib_parse_unquote
from ..utils import int_or_none
diff --git a/hypervideo_dl/extractor/ximalaya.py b/hypervideo_dl/extractor/ximalaya.py
index 802d1bb..b25be77 100644
--- a/hypervideo_dl/extractor/ximalaya.py
+++ b/hypervideo_dl/extractor/ximalaya.py
@@ -1,11 +1,7 @@
-# coding: utf-8
-
-from __future__ import unicode_literals
-
-import itertools
-import re
+import math
from .common import InfoExtractor
+from ..utils import traverse_obj, try_call, InAdvancePagedList
class XimalayaBaseIE(InfoExtractor):
@@ -15,11 +11,10 @@ class XimalayaBaseIE(InfoExtractor):
class XimalayaIE(XimalayaBaseIE):
IE_NAME = 'ximalaya'
IE_DESC = '喜马拉雅FM'
- _VALID_URL = r'https?://(?:www\.|m\.)?ximalaya\.com/(?P<uid>[0-9]+)/sound/(?P<id>[0-9]+)'
- _USER_URL_FORMAT = '%s://www.ximalaya.com/zhubo/%i/'
+ _VALID_URL = r'https?://(?:www\.|m\.)?ximalaya\.com/(:?(?P<uid>\d+)/)?sound/(?P<id>[0-9]+)'
_TESTS = [
{
- 'url': 'http://www.ximalaya.com/61425525/sound/47740352/',
+ 'url': 'http://www.ximalaya.com/sound/47740352/',
'info_dict': {
'id': '47740352',
'ext': 'm4a',
@@ -28,19 +23,20 @@ class XimalayaIE(XimalayaBaseIE):
'uploader_url': 'http://www.ximalaya.com/zhubo/61425525/',
'title': '261.唐诗三百首.卷八.送孟浩然之广陵.李白',
'description': "contains:《送孟浩然之广陵》\n作者:李白\n故人西辞黄鹤楼,烟花三月下扬州。\n孤帆远影碧空尽,惟见长江天际流。",
+ 'thumbnail': r're:^https?://.*\.jpg',
'thumbnails': [
{
'name': 'cover_url',
- 'url': r're:^https?://.*\.jpg$',
+ 'url': r're:^https?://.*\.jpg',
},
{
'name': 'cover_url_142',
- 'url': r're:^https?://.*\.jpg$',
+ 'url': r're:^https?://.*\.jpg',
'width': 180,
'height': 180
}
],
- 'categories': ['renwen', '人文'],
+ 'categories': ['人文'],
'duration': 93,
'view_count': int,
'like_count': int,
@@ -56,77 +52,42 @@ class XimalayaIE(XimalayaBaseIE):
'uploader_url': 'http://www.ximalaya.com/zhubo/61425525/',
'title': '261.唐诗三百首.卷八.送孟浩然之广陵.李白',
'description': "contains:《送孟浩然之广陵》\n作者:李白\n故人西辞黄鹤楼,烟花三月下扬州。\n孤帆远影碧空尽,惟见长江天际流。",
+ 'thumbnail': r're:^https?://.*\.jpg',
'thumbnails': [
{
'name': 'cover_url',
- 'url': r're:^https?://.*\.jpg$',
+ 'url': r're:^https?://.*\.jpg',
},
{
'name': 'cover_url_142',
- 'url': r're:^https?://.*\.jpg$',
+ 'url': r're:^https?://.*\.jpg',
'width': 180,
'height': 180
}
],
- 'categories': ['renwen', '人文'],
+ 'categories': ['人文'],
'duration': 93,
'view_count': int,
'like_count': int,
}
- },
- {
- 'url': 'https://www.ximalaya.com/11045267/sound/15705996/',
- 'info_dict': {
- 'id': '15705996',
- 'ext': 'm4a',
- 'uploader': '李延隆老师',
- 'uploader_id': 11045267,
- 'uploader_url': 'https://www.ximalaya.com/zhubo/11045267/',
- 'title': 'Lesson 1 Excuse me!',
- 'description': "contains:Listen to the tape then answer\xa0this question. Whose handbag is it?\n"
- "听录音,然后回答问题,这是谁的手袋?",
- 'thumbnails': [
- {
- 'name': 'cover_url',
- 'url': r're:^https?://.*\.jpg$',
- },
- {
- 'name': 'cover_url_142',
- 'url': r're:^https?://.*\.jpg$',
- 'width': 180,
- 'height': 180
- }
- ],
- 'categories': ['train', '外语'],
- 'duration': 40,
- 'view_count': int,
- 'like_count': int,
- }
- },
+ }
]
def _real_extract(self, url):
-
- is_m = 'm.ximalaya' in url
scheme = 'https' if url.startswith('https') else 'http'
audio_id = self._match_id(url)
- webpage = self._download_webpage(url, audio_id,
- note='Download sound page for %s' % audio_id,
- errnote='Unable to get sound page')
-
audio_info_file = '%s://m.ximalaya.com/tracks/%s.json' % (scheme, audio_id)
audio_info = self._download_json(audio_info_file, audio_id,
'Downloading info json %s' % audio_info_file,
'Unable to download info file')
- formats = []
- for bps, k in (('24k', 'play_path_32'), ('64k', 'play_path_64')):
- if audio_info.get(k):
- formats.append({
- 'format_id': bps,
- 'url': audio_info[k],
- })
+ formats = [{
+ 'format_id': f'{bps}k',
+ 'url': audio_info[k],
+ 'abr': bps,
+ 'vcodec': 'none'
+ } for bps, k in ((24, 'play_path_32'), (64, 'play_path_64')) if audio_info.get(k)]
thumbnails = []
for k in audio_info.keys():
@@ -140,30 +101,18 @@ class XimalayaIE(XimalayaBaseIE):
audio_uploader_id = audio_info.get('uid')
- if is_m:
- audio_description = self._html_search_regex(r'(?s)<section\s+class=["\']content[^>]+>(.+?)</section>',
- webpage, 'audio_description', fatal=False)
- else:
- audio_description = self._html_search_regex(r'(?s)<div\s+class=["\']rich_intro[^>]*>(.+?</article>)',
- webpage, 'audio_description', fatal=False)
-
- if not audio_description:
- audio_description_file = '%s://www.ximalaya.com/sounds/%s/rich_intro' % (scheme, audio_id)
- audio_description = self._download_webpage(audio_description_file, audio_id,
- note='Downloading description file %s' % audio_description_file,
- errnote='Unable to download descrip file',
- fatal=False)
- audio_description = audio_description.strip() if audio_description else None
+ audio_description = try_call(
+ lambda: audio_info['intro'].replace('\r\n\r\n\r\n ', '\n').replace('\r\n', '\n'))
return {
'id': audio_id,
'uploader': audio_info.get('nickname'),
'uploader_id': audio_uploader_id,
- 'uploader_url': self._USER_URL_FORMAT % (scheme, audio_uploader_id) if audio_uploader_id else None,
+ 'uploader_url': f'{scheme}://www.ximalaya.com/zhubo/{audio_uploader_id}/' if audio_uploader_id else None,
'title': audio_info['title'],
'thumbnails': thumbnails,
'description': audio_description,
- 'categories': list(filter(None, (audio_info.get('category_name'), audio_info.get('category_title')))),
+ 'categories': list(filter(None, [audio_info.get('category_name')])),
'duration': audio_info.get('duration'),
'view_count': audio_info.get('play_count'),
'like_count': audio_info.get('favorites_count'),
@@ -174,60 +123,38 @@ class XimalayaIE(XimalayaBaseIE):
class XimalayaAlbumIE(XimalayaBaseIE):
IE_NAME = 'ximalaya:album'
IE_DESC = '喜马拉雅FM 专辑'
- _VALID_URL = r'https?://(?:www\.|m\.)?ximalaya\.com/(?P<uid>[0-9]+)/album/(?P<id>[0-9]+)'
- _TEMPLATE_URL = '%s://www.ximalaya.com/%s/album/%s/'
- _BASE_URL_TEMPL = '%s://www.ximalaya.com%s'
- _LIST_VIDEO_RE = r'<a[^>]+?href="(?P<url>/%s/sound/(?P<id>\d+)/?)"[^>]+?title="(?P<title>[^>]+)">'
+ _VALID_URL = r'https?://(?:www\.|m\.)?ximalaya\.com/\d+/album/(?P<id>[0-9]+)'
_TESTS = [{
'url': 'http://www.ximalaya.com/61425525/album/5534601/',
'info_dict': {
'title': '唐诗三百首(含赏析)',
'id': '5534601',
},
- 'playlist_count': 312,
- }, {
- 'url': 'http://m.ximalaya.com/61425525/album/5534601',
- 'info_dict': {
- 'title': '唐诗三百首(含赏析)',
- 'id': '5534601',
- },
- 'playlist_count': 312,
- },
- ]
+ 'playlist_mincount': 323,
+ }]
def _real_extract(self, url):
- self.scheme = scheme = 'https' if url.startswith('https') else 'http'
-
- mobj = self._match_valid_url(url)
- uid, playlist_id = mobj.group('uid'), mobj.group('id')
-
- webpage = self._download_webpage(self._TEMPLATE_URL % (scheme, uid, playlist_id), playlist_id,
- note='Download album page for %s' % playlist_id,
- errnote='Unable to get album info')
+ playlist_id = self._match_id(url)
- title = self._html_search_regex(r'detailContent_title[^>]*><h1(?:[^>]+)?>([^<]+)</h1>',
- webpage, 'title', fatal=False)
+ first_page = self._fetch_page(playlist_id, 1)
+ page_count = math.ceil(first_page['trackTotalCount'] / first_page['pageSize'])
- return self.playlist_result(self._entries(webpage, playlist_id, uid), playlist_id, title)
+ entries = InAdvancePagedList(
+ lambda idx: self._get_entries(self._fetch_page(playlist_id, idx + 1) if idx else first_page),
+ page_count, first_page['pageSize'])
- def _entries(self, page, playlist_id, uid):
- html = page
- for page_num in itertools.count(1):
- for entry in self._process_page(html, uid):
- yield entry
+ title = traverse_obj(first_page, ('tracks', 0, 'albumTitle'), expected_type=str)
- next_url = self._search_regex(r'<a\s+href=(["\'])(?P<more>[\S]+)\1[^>]+rel=(["\'])next\3',
- html, 'list_next_url', default=None, group='more')
- if not next_url:
- break
+ return self.playlist_result(entries, playlist_id, title)
- next_full_url = self._BASE_URL_TEMPL % (self.scheme, next_url)
- html = self._download_webpage(next_full_url, playlist_id)
+ def _fetch_page(self, playlist_id, page_idx):
+ return self._download_json(
+ 'https://www.ximalaya.com/revision/album/v1/getTracksList',
+ playlist_id, note=f'Downloading tracks list page {page_idx}',
+ query={'albumId': playlist_id, 'pageNum': page_idx, 'sort': 1})['data']
- def _process_page(self, html, uid):
- find_from = html.index('album_soundlist')
- for mobj in re.finditer(self._LIST_VIDEO_RE % uid, html[find_from:]):
- yield self.url_result(self._BASE_URL_TEMPL % (self.scheme, mobj.group('url')),
- XimalayaIE.ie_key(),
- mobj.group('id'),
- mobj.group('title'))
+ def _get_entries(self, page_data):
+ for e in page_data['tracks']:
+ yield self.url_result(
+ self._proto_relative_url(f'//www.ximalaya.com{e["url"]}'),
+ XimalayaIE, e.get('trackId'), e.get('title'))
diff --git a/hypervideo_dl/extractor/xinpianchang.py b/hypervideo_dl/extractor/xinpianchang.py
index 9832d23..ddc1d0b 100644
--- a/hypervideo_dl/extractor/xinpianchang.py
+++ b/hypervideo_dl/extractor/xinpianchang.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
int_or_none,
@@ -75,8 +72,6 @@ class XinpianchangIE(InfoExtractor):
'ext': 'mp4',
} for prog in v if prog.get('url') or []])
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': data.get('title'),
diff --git a/hypervideo_dl/extractor/xminus.py b/hypervideo_dl/extractor/xminus.py
index 36e5ead..5f11381 100644
--- a/hypervideo_dl/extractor/xminus.py
+++ b/hypervideo_dl/extractor/xminus.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
import time
diff --git a/hypervideo_dl/extractor/xnxx.py b/hypervideo_dl/extractor/xnxx.py
index 27f9916..1452aae 100644
--- a/hypervideo_dl/extractor/xnxx.py
+++ b/hypervideo_dl/extractor/xnxx.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -67,7 +64,6 @@ class XNXXIE(InfoExtractor):
'format_id': format_id,
'quality': -1 if format_id == 'low' else 0,
})
- self._sort_formats(formats)
thumbnail = self._og_search_thumbnail(webpage, default=None) or get(
'ThumbUrl', fatal=False) or get('ThumbUrl169', fatal=False)
diff --git a/hypervideo_dl/extractor/xstream.py b/hypervideo_dl/extractor/xstream.py
index 792843d..8dd1cd9 100644
--- a/hypervideo_dl/extractor/xstream.py
+++ b/hypervideo_dl/extractor/xstream.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -85,7 +82,6 @@ class XstreamIE(InfoExtractor):
'url': media_url,
'tbr': tbr,
})
- self._sort_formats(formats)
link = find_xpath_attr(
entry, xpath_with_ns('./atom:link', NS_MAP), 'rel', 'original')
diff --git a/hypervideo_dl/extractor/xtube.py b/hypervideo_dl/extractor/xtube.py
index abd3191..ce4480c 100644
--- a/hypervideo_dl/extractor/xtube.py
+++ b/hypervideo_dl/extractor/xtube.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import itertools
import re
@@ -131,7 +129,6 @@ class XTubeIE(InfoExtractor):
})
self._remove_duplicate_formats(formats)
- self._sort_formats(formats)
if not title:
title = self._search_regex(
diff --git a/hypervideo_dl/extractor/xuite.py b/hypervideo_dl/extractor/xuite.py
index 0276c0d..71ddadd 100644
--- a/hypervideo_dl/extractor/xuite.py
+++ b/hypervideo_dl/extractor/xuite.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
ExtractorError,
@@ -119,7 +116,6 @@ class XuiteIE(InfoExtractor):
'format_id': format_id,
'height': int(format_id) if format_id.isnumeric() else None,
})
- self._sort_formats(formats)
timestamp = media_info.get('PUBLISH_DATETIME')
if timestamp:
diff --git a/hypervideo_dl/extractor/xvideos.py b/hypervideo_dl/extractor/xvideos.py
index d5261b6..5c505c8 100644
--- a/hypervideo_dl/extractor/xvideos.py
+++ b/hypervideo_dl/extractor/xvideos.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -151,8 +149,6 @@ class XVideosIE(InfoExtractor):
'quality': -2 if format_id.endswith('low') else None,
})
- self._sort_formats(formats)
-
return {
'id': video_id,
'formats': formats,
diff --git a/hypervideo_dl/extractor/xxxymovies.py b/hypervideo_dl/extractor/xxxymovies.py
index 0d53601..e3e3a9f 100644
--- a/hypervideo_dl/extractor/xxxymovies.py
+++ b/hypervideo_dl/extractor/xxxymovies.py
@@ -1,6 +1,3 @@
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..utils import (
parse_duration,
diff --git a/hypervideo_dl/extractor/yahoo.py b/hypervideo_dl/extractor/yahoo.py
index 20504de..a69715b 100644
--- a/hypervideo_dl/extractor/yahoo.py
+++ b/hypervideo_dl/extractor/yahoo.py
@@ -1,33 +1,28 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import hashlib
import itertools
-import re
+import urllib.parse
+from .brightcove import BrightcoveNewIE
from .common import InfoExtractor, SearchInfoExtractor
-from ..compat import (
- compat_str,
- compat_urllib_parse,
-)
+from .youtube import YoutubeIE
from ..utils import (
- clean_html,
ExtractorError,
+ clean_html,
int_or_none,
mimetype2ext,
parse_iso8601,
smuggle_url,
+ traverse_obj,
try_get,
url_or_none,
)
-from .brightcove import BrightcoveNewIE
-from .youtube import YoutubeIE
-
class YahooIE(InfoExtractor):
IE_DESC = 'Yahoo screen and movies'
_VALID_URL = r'(?P<url>https?://(?:(?P<country>[a-zA-Z]{2}(?:-[a-zA-Z]{2})?|malaysia)\.)?(?:[\da-zA-Z_-]+\.)?yahoo\.com/(?:[^/]+/)*(?P<id>[^?&#]*-[0-9]+(?:-[a-z]+)?)\.html)'
+ _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1']
+
_TESTS = [{
'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
'info_dict': {
@@ -246,8 +241,6 @@ class YahooIE(InfoExtractor):
if not formats and msg == 'geo restricted':
self.raise_geo_restricted(metadata_available=True)
- self._sort_formats(formats)
-
thumbnails = []
for thumb in video.get('thumbnails', []):
thumb_url = thumb.get('url')
@@ -317,7 +310,7 @@ class YahooIE(InfoExtractor):
if items.get('markup'):
entries.extend(
- self.url_result(yt_url) for yt_url in YoutubeIE._extract_urls(items['markup']))
+ self.url_result(yt_url) for yt_url in YoutubeIE._extract_embed_urls(url, items['markup']))
return self.playlist_result(
entries, item.get('uuid'),
@@ -336,7 +329,7 @@ class YahooSearchIE(SearchInfoExtractor):
def _search_results(self, query):
for pagenum in itertools.count(0):
- result_url = 'http://video.search.yahoo.com/search/?p=%s&fr=screen&o=js&gs=0&b=%d' % (compat_urllib_parse.quote_plus(query), pagenum * 30)
+ result_url = 'http://video.search.yahoo.com/search/?p=%s&fr=screen&o=js&gs=0&b=%d' % (urllib.parse.quote_plus(query), pagenum * 30)
info = self._download_json(result_url, query,
note='Downloading results page ' + str(pagenum + 1))
yield from (self.url_result(result['rurl']) for result in info['results'])
@@ -437,7 +430,7 @@ class YahooGyaOIE(InfoExtractor):
page = 1
while True:
playlist = self._download_json(
- f'https://gyao.yahoo.co.jp/api/programs/{program_id}/videos?page={page}', program_id,
+ f'https://gyao.yahoo.co.jp/api/programs/{program_id}/videos?page={page}&serviceId=gy', program_id,
note=f'Downloading JSON metadata page {page}')
if not playlist:
break
@@ -462,34 +455,21 @@ class YahooGyaOIE(InfoExtractor):
class YahooJapanNewsIE(InfoExtractor):
IE_NAME = 'yahoo:japannews'
IE_DESC = 'Yahoo! Japan News'
- _VALID_URL = r'https?://(?P<host>(?:news|headlines)\.yahoo\.co\.jp)[^\d]*(?P<id>\d[\d-]*\d)?'
+ _VALID_URL = r'https?://news\.yahoo\.co\.jp/(?:articles|feature)/(?P<id>[a-zA-Z0-9]+)'
_GEO_COUNTRIES = ['JP']
_TESTS = [{
- 'url': 'https://headlines.yahoo.co.jp/videonews/ann?a=20190716-00000071-ann-int',
+ 'url': 'https://news.yahoo.co.jp/articles/a70fe3a064f1cfec937e2252c7fc6c1ba3201c0e',
'info_dict': {
- 'id': '1736242',
+ 'id': 'a70fe3a064f1cfec937e2252c7fc6c1ba3201c0e',
'ext': 'mp4',
- 'title': 'ムン大統領が対日批判を強化“現金化”効果は?(テレビ朝日系(ANN)) - Yahoo!ニュース',
- 'description': '韓国の元徴用工らを巡る裁判の原告が弁護士が差し押さえた三菱重工業の資産を売却して - Yahoo!ニュース(テレビ朝日系(ANN))',
- 'thumbnail': r're:^https?://.*\.[a-zA-Z\d]{3,4}$',
+ 'title': '【独自】安倍元総理「国葬」中止求め“脅迫メール”…「子ども誘拐」“送信者”を追跡',
+ 'description': 'md5:1c06974575f930f692d8696fbcfdc546',
+ 'thumbnail': r're:https://.+',
},
'params': {
'skip_download': True,
},
}, {
- # geo restricted
- 'url': 'https://headlines.yahoo.co.jp/hl?a=20190721-00000001-oxv-l04',
- 'only_matching': True,
- }, {
- 'url': 'https://headlines.yahoo.co.jp/videonews/',
- 'only_matching': True,
- }, {
- 'url': 'https://news.yahoo.co.jp',
- 'only_matching': True,
- }, {
- 'url': 'https://news.yahoo.co.jp/byline/hashimotojunji/20190628-00131977/',
- 'only_matching': True,
- }, {
'url': 'https://news.yahoo.co.jp/feature/1356',
'only_matching': True
}]
@@ -497,11 +477,7 @@ class YahooJapanNewsIE(InfoExtractor):
def _extract_formats(self, json_data, content_id):
formats = []
- video_data = try_get(
- json_data,
- lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'],
- list)
- for vid in video_data or []:
+ for vid in traverse_obj(json_data, ('ResultSet', 'Result', ..., 'VideoUrlSet', 'VideoUrl', ...)) or []:
delivery = vid.get('delivery')
url = url_or_none(vid.get('Url'))
if not delivery or not url:
@@ -514,73 +490,58 @@ class YahooJapanNewsIE(InfoExtractor):
else:
formats.append({
'url': url,
- 'format_id': 'http-%s' % compat_str(vid.get('bitrate', '')),
+ 'format_id': f'http-{vid.get("bitrate")}',
'height': int_or_none(vid.get('height')),
'width': int_or_none(vid.get('width')),
'tbr': int_or_none(vid.get('bitrate')),
})
self._remove_duplicate_formats(formats)
- self._sort_formats(formats)
return formats
def _real_extract(self, url):
- mobj = self._match_valid_url(url)
- host = mobj.group('host')
- display_id = mobj.group('id') or host
-
- webpage = self._download_webpage(url, display_id)
-
- title = self._html_search_meta(
- ['og:title', 'twitter:title'], webpage, 'title', default=None
- ) or self._html_extract_title(webpage)
-
- if display_id == host:
- # Headline page (w/ multiple BC playlists) ('news.yahoo.co.jp', 'headlines.yahoo.co.jp/videonews/', ...)
- stream_plists = re.findall(r'plist=(\d+)', webpage) or re.findall(r'plist["\']:\s*["\']([^"\']+)', webpage)
- entries = [
- self.url_result(
- smuggle_url(
- 'http://players.brightcove.net/5690807595001/HyZNerRl7_default/index.html?playlistId=%s' % plist_id,
- {'geo_countries': ['JP']}),
- ie='BrightcoveNew', video_id=plist_id)
- for plist_id in stream_plists]
- return self.playlist_result(entries, playlist_title=title)
-
- # Article page
- description = self._html_search_meta(
- ['og:description', 'description', 'twitter:description'],
- webpage, 'description', default=None)
- thumbnail = self._og_search_thumbnail(
- webpage, default=None) or self._html_search_meta(
- 'twitter:image', webpage, 'thumbnail', default=None)
- space_id = self._search_regex([
- r'<script[^>]+class=["\']yvpub-player["\'][^>]+spaceid=([^&"\']+)',
- r'YAHOO\.JP\.srch\.\w+link\.onLoad[^;]+spaceID["\' ]*:["\' ]+([^"\']+)',
- r'<!--\s+SpaceID=(\d+)'
- ], webpage, 'spaceid')
-
- content_id = self._search_regex(
- r'<script[^>]+class=["\']yvpub-player["\'][^>]+contentid=(?P<contentid>[^&"\']+)',
- webpage, 'contentid', group='contentid')
-
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ preloaded_state = self._search_json(r'__PRELOADED_STATE__\s*=', webpage, 'preloaded state', video_id)
+
+ content_id = traverse_obj(
+ preloaded_state, ('articleDetail', 'paragraphs', ..., 'objectItems', ..., 'video', 'vid'),
+ get_all=False, expected_type=int)
+ if content_id is None:
+ raise ExtractorError('This article does not contain a video', expected=True)
+
+ HOST = 'news.yahoo.co.jp'
+ space_id = traverse_obj(preloaded_state, ('pageData', 'spaceId'), expected_type=str)
json_data = self._download_json(
- 'https://feapi-yvpub.yahooapis.jp/v1/content/%s' % content_id,
- content_id,
- query={
+ f'https://feapi-yvpub.yahooapis.jp/v1/content/{content_id}',
+ video_id, query={
'appid': 'dj0zaiZpPVZMTVFJR0FwZWpiMyZzPWNvbnN1bWVyc2VjcmV0Jng9YjU-',
'output': 'json',
- 'space_id': space_id,
- 'domain': host,
- 'ak': hashlib.md5('_'.join((space_id, host)).encode()).hexdigest(),
+ 'domain': HOST,
+ 'ak': hashlib.md5('_'.join((space_id, HOST)).encode()).hexdigest() if space_id else '',
'device_type': '1100',
})
- formats = self._extract_formats(json_data, content_id)
+
+ title = (
+ traverse_obj(preloaded_state,
+ ('articleDetail', 'headline'), ('pageData', 'pageParam', 'title'),
+ expected_type=str)
+ or self._html_search_meta(('og:title', 'twitter:title'), webpage, 'title', default=None)
+ or self._html_extract_title(webpage))
+ description = (
+ traverse_obj(preloaded_state, ('pageData', 'description'), expected_type=str)
+ or self._html_search_meta(
+ ('og:description', 'description', 'twitter:description'),
+ webpage, 'description', default=None))
+ thumbnail = (
+ traverse_obj(preloaded_state, ('pageData', 'ogpImage'), expected_type=str)
+ or self._og_search_thumbnail(webpage, default=None)
+ or self._html_search_meta('twitter:image', webpage, 'thumbnail', default=None))
return {
- 'id': content_id,
+ 'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
- 'formats': formats,
+ 'formats': self._extract_formats(json_data, video_id),
}
diff --git a/hypervideo_dl/extractor/yandexdisk.py b/hypervideo_dl/extractor/yandexdisk.py
index c15f3a4..d5eecbd 100644
--- a/hypervideo_dl/extractor/yandexdisk.py
+++ b/hypervideo_dl/extractor/yandexdisk.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import json
from .common import InfoExtractor
@@ -130,7 +127,6 @@ class YandexDiskIE(InfoExtractor):
'url': format_url,
'width': int_or_none(size.get('width')),
})
- self._sort_formats(formats)
uid = resource.get('uid')
display_name = try_get(store, lambda x: x['users'][uid]['displayName'])
diff --git a/hypervideo_dl/extractor/yandexmusic.py b/hypervideo_dl/extractor/yandexmusic.py
index 8e94f1f..1869091 100644
--- a/hypervideo_dl/extractor/yandexmusic.py
+++ b/hypervideo_dl/extractor/yandexmusic.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import hashlib
import itertools
@@ -118,8 +115,7 @@ class YandexMusicTrackIE(YandexMusicBaseIE):
download_data = self._download_json(
'https://music.yandex.ru/api/v2.1/handlers/track/%s:%s/web-album_track-track-track-main/download/m' % (track_id, album_id),
- track_id, 'Downloading track location url JSON',
- headers={'X-Retpath-Y': url})
+ track_id, 'Downloading track location url JSON', query={'hq': 1}, headers={'X-Retpath-Y': url})
fd_data = self._download_json(
download_data['src'], track_id,
diff --git a/hypervideo_dl/extractor/yandexvideo.py b/hypervideo_dl/extractor/yandexvideo.py
index 7d3966b..535b61f 100644
--- a/hypervideo_dl/extractor/yandexvideo.py
+++ b/hypervideo_dl/extractor/yandexvideo.py
@@ -1,17 +1,15 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import itertools
-import re
from .common import InfoExtractor
from ..utils import (
determine_ext,
extract_attributes,
int_or_none,
+ lowercase_escape,
+ parse_qs,
+ traverse_obj,
try_get,
url_or_none,
- lowercase_escape,
)
@@ -26,7 +24,6 @@ class YandexVideoIE(InfoExtractor):
'''
_TESTS = [{
'url': 'https://yandex.ru/portal/video?stream_id=4dbb36ec4e0526d58f9f2dc8f0ecf374',
- 'md5': 'e02a05bfaf0d9615ef07ae3a10f4faf4',
'info_dict': {
'id': '4dbb36ec4e0526d58f9f2dc8f0ecf374',
'ext': 'mp4',
@@ -41,6 +38,7 @@ class YandexVideoIE(InfoExtractor):
'like_count': int,
'dislike_count': int,
},
+ 'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://yandex.ru/portal/efir?stream_id=4dbb262b4fe5cf15a215de4f34eee34d&from=morda',
'only_matching': True,
@@ -123,8 +121,6 @@ class YandexVideoIE(InfoExtractor):
else:
formats.append({'url': content_url})
- self._sort_formats(formats)
-
timestamp = (int_or_none(content.get('release_date'))
or int_or_none(content.get('release_date_ut'))
or int_or_none(content.get('start_time')))
@@ -150,7 +146,7 @@ class YandexVideoIE(InfoExtractor):
class YandexVideoPreviewIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?yandex\.ru/video/preview(?:/?\?.*?filmId=|/)(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:www\.)?yandex\.\w{2,3}(?:\.(?:am|ge|il|tr))?/video/preview(?:/?\?.*?filmId=|/)(?P<id>\d+)'
_TESTS = [{ # Odnoklassniki
'url': 'https://yandex.ru/video/preview/?filmId=10682852472978372885&text=summer',
'info_dict': {
@@ -177,6 +173,9 @@ class YandexVideoPreviewIE(InfoExtractor):
}, { # Odnoklassniki
'url': 'https://yandex.ru/video/preview/?text=Francis%20Lai%20-%20Le%20Bon%20Et%20Les%20MC)chants&path=wizard&parent-reqid=1643208087979310-1481782809207673478-sas3-0931-2f9-sas-l7-balancer-8080-BAL-9380&wiz_type=vital&filmId=12508152936505397283',
'only_matching': True,
+ }, { # Odnoklassniki
+ 'url': 'https://yandex.com/video/preview/?text=dossier%2051%20film%201978&path=yandex_search&parent-reqid=1664361087754492-8727541069609384458-sas2-0340-sas-l7-balancer-8080-BAL-8045&noreask=1&from_type=vast&filmId=5794987234584444632',
+ 'only_matching': True,
}]
def _real_extract(self, url):
@@ -188,34 +187,35 @@ class YandexVideoPreviewIE(InfoExtractor):
class ZenYandexIE(InfoExtractor):
- _VALID_URL = r'https?://zen\.yandex\.ru(?:/video)?/(media|watch)/(?:(?:id/[^/]+/|[^/]+/)(?:[a-z0-9-]+)-)?(?P<id>[a-z0-9-]+)'
+ _VALID_URL = r'https?://(zen\.yandex|dzen)\.ru(?:/video)?/(media|watch)/(?:(?:id/[^/]+/|[^/]+/)(?:[a-z0-9-]+)-)?(?P<id>[a-z0-9-]+)'
_TESTS = [{
- 'url': 'https://zen.yandex.ru/media/popmech/izverjenie-vulkana-iz-spichek-zreliscnyi-opyt-6002240ff8b1af50bb2da5e3',
+ 'url': 'https://zen.yandex.ru/media/id/606fd806cc13cb3c58c05cf5/vot-eto-focus-dedy-morozy-na-gidrociklah-60c7c443da18892ebfe85ed7',
'info_dict': {
- 'id': '6002240ff8b1af50bb2da5e3',
+ 'id': '60c7c443da18892ebfe85ed7',
'ext': 'mp4',
- 'title': 'Извержение вулкана из спичек: зрелищный опыт',
- 'description': 'md5:053ad3c61b5596d510c9a199dc8ee633',
- 'thumbnail': 're:^https://avatars.mds.yandex.net/',
- 'uploader': 'Популярная механика',
+ 'title': 'ВОТ ЭТО Focus. Деды Морозы на гидроциклах',
+ 'description': 'md5:f3db3d995763b9bbb7b56d4ccdedea89',
+ 'thumbnail': 're:^https://avatars.dzeninfra.ru/',
+ 'uploader': 'AcademeG DailyStream'
},
'params': {
'skip_download': 'm3u8',
+ 'format': 'bestvideo',
},
+ 'skip': 'The page does not exist',
}, {
- 'url': 'https://zen.yandex.ru/media/id/606fd806cc13cb3c58c05cf5/vot-eto-focus-dedy-morozy-na-gidrociklah-60c7c443da18892ebfe85ed7',
+ 'url': 'https://dzen.ru/media/id/606fd806cc13cb3c58c05cf5/vot-eto-focus-dedy-morozy-na-gidrociklah-60c7c443da18892ebfe85ed7',
'info_dict': {
'id': '60c7c443da18892ebfe85ed7',
'ext': 'mp4',
'title': 'ВОТ ЭТО Focus. Деды Морозы на гидроциклах',
'description': 'md5:f3db3d995763b9bbb7b56d4ccdedea89',
- 'thumbnail': 're:^https://avatars.mds.yandex.net/',
- 'uploader': 'AcademeG DailyStream'
- },
- 'params': {
- 'skip_download': 'm3u8',
- 'format': 'bestvideo',
+ 'thumbnail': r're:^https://avatars\.dzeninfra\.ru/',
+ 'uploader': 'AcademeG DailyStream',
+ 'upload_date': '20191111',
+ 'timestamp': 1573465585,
},
+ 'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://zen.yandex.ru/video/watch/6002240ff8b1af50bb2da5e3',
'info_dict': {
@@ -223,21 +223,42 @@ class ZenYandexIE(InfoExtractor):
'ext': 'mp4',
'title': 'Извержение вулкана из спичек: зрелищный опыт',
'description': 'md5:053ad3c61b5596d510c9a199dc8ee633',
- 'uploader': 'Популярная механика',
+ 'thumbnail': r're:^https://avatars\.dzeninfra\.ru/',
+ 'uploader': 'TechInsider',
+ 'timestamp': 1611378221,
+ 'upload_date': '20210123',
},
- 'params': {
- 'skip_download': 'm3u8',
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://dzen.ru/video/watch/6002240ff8b1af50bb2da5e3',
+ 'info_dict': {
+ 'id': '6002240ff8b1af50bb2da5e3',
+ 'ext': 'mp4',
+ 'title': 'Извержение вулкана из спичек: зрелищный опыт',
+ 'description': 'md5:053ad3c61b5596d510c9a199dc8ee633',
+ 'thumbnail': 're:^https://avatars.dzeninfra.ru/',
+ 'uploader': 'TechInsider',
+ 'upload_date': '20210123',
+ 'timestamp': 1611378221,
},
+ 'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://zen.yandex.ru/media/id/606fd806cc13cb3c58c05cf5/novyi-samsung-fold-3-moskvich-barahlit-612f93b7f8d48e7e945792a2?from=channel&rid=2286618386.482.1630817595976.42360',
'only_matching': True,
+ }, {
+ 'url': 'https://dzen.ru/media/id/606fd806cc13cb3c58c05cf5/novyi-samsung-fold-3-moskvich-barahlit-612f93b7f8d48e7e945792a2?from=channel&rid=2286618386.482.1630817595976.42360',
+ 'only_matching': True,
}]
def _real_extract(self, url):
- id = self._match_id(url)
- webpage = self._download_webpage(url, id)
- data_json = self._parse_json(
- self._search_regex(r'data\s*=\s*({["\']_*serverState_*video.+?});', webpage, 'metadata'), id)
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ redirect = self._search_json(r'var it\s*=', webpage, 'redirect', id, default={}).get('retpath')
+ if redirect:
+ video_id = self._match_id(redirect)
+ webpage = self._download_webpage(redirect, video_id, note='Redirecting')
+ data_json = self._search_json(
+ r'data\s*=', webpage, 'metadata', video_id, contains_pattern=r'{["\']_*serverState_*video.+}')
serverstate = self._search_regex(r'(_+serverState_+video-site_[^_]+_+)',
webpage, 'server state').replace('State', 'Settings')
uploader = self._search_regex(r'(<a\s*class=["\']card-channel-link[^"\']+["\'][^>]+>)',
@@ -252,13 +273,13 @@ class ZenYandexIE(InfoExtractor):
formats.extend(self._extract_mpd_formats(s_url, id, mpd_id='dash'))
elif ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(s_url, id, 'mp4'))
- self._sort_formats(formats)
return {
- 'id': id,
+ 'id': video_id,
'title': video_json.get('title') or self._og_search_title(webpage),
'formats': formats,
'duration': int_or_none(video_json.get('duration')),
'view_count': int_or_none(video_json.get('views')),
+ 'timestamp': int_or_none(video_json.get('publicationDate')),
'uploader': uploader_name or data_json.get('authorName') or try_get(data_json, lambda x: x['publisher']['name']),
'description': self._og_search_description(webpage) or try_get(data_json, lambda x: x['og']['description']),
'thumbnail': self._og_search_thumbnail(webpage) or try_get(data_json, lambda x: x['og']['imageUrl']),
@@ -266,40 +287,99 @@ class ZenYandexIE(InfoExtractor):
class ZenYandexChannelIE(InfoExtractor):
- _VALID_URL = r'https?://zen\.yandex\.ru/(?!media|video)(?:id/)?(?P<id>[a-z0-9-_]+)'
+ _VALID_URL = r'https?://(zen\.yandex|dzen)\.ru/(?!media|video)(?:id/)?(?P<id>[a-z0-9-_]+)'
_TESTS = [{
'url': 'https://zen.yandex.ru/tok_media',
'info_dict': {
'id': 'tok_media',
+ 'title': 'СПЕКТР',
+ 'description': 'md5:a9e5b3c247b7fe29fd21371a428bcf56',
+ },
+ 'playlist_mincount': 169,
+ }, {
+ 'url': 'https://dzen.ru/tok_media',
+ 'info_dict': {
+ 'id': 'tok_media',
+ 'title': 'СПЕКТР',
+ 'description': 'md5:a9e5b3c247b7fe29fd21371a428bcf56',
},
'playlist_mincount': 169,
}, {
'url': 'https://zen.yandex.ru/id/606fd806cc13cb3c58c05cf5',
'info_dict': {
'id': '606fd806cc13cb3c58c05cf5',
+ 'description': 'md5:517b7c97d8ca92e940f5af65448fd928',
+ 'title': 'AcademeG DailyStream',
+ },
+ 'playlist_mincount': 657,
+ }, {
+ # Test that the playlist extractor finishes extracting when the
+ # channel has less than one page
+ 'url': 'https://zen.yandex.ru/jony_me',
+ 'info_dict': {
+ 'id': 'jony_me',
+ 'description': 'md5:a2c62b4ef5cf3e3efb13d25f61f739e1',
+ 'title': 'JONY ',
+ },
+ 'playlist_count': 20,
+ }, {
+ # Test that the playlist extractor finishes extracting when the
+ # channel has more than one page of entries
+ 'url': 'https://zen.yandex.ru/tatyanareva',
+ 'info_dict': {
+ 'id': 'tatyanareva',
+ 'description': 'md5:296b588d60841c3756c9105f237b70c6',
+ 'title': 'Татьяна Рева',
+ 'entries': 'maxcount:200',
+ },
+ 'playlist_count': 46,
+ }, {
+ 'url': 'https://dzen.ru/id/606fd806cc13cb3c58c05cf5',
+ 'info_dict': {
+ 'id': '606fd806cc13cb3c58c05cf5',
+ 'title': 'AcademeG DailyStream',
+ 'description': 'md5:517b7c97d8ca92e940f5af65448fd928',
},
'playlist_mincount': 657,
}]
- def _entries(self, id, url):
- webpage = self._download_webpage(url, id)
- data_json = self._parse_json(re.findall(r'var\s?data\s?=\s?({.+?})\s?;', webpage)[-1], id)
- for key in data_json.keys():
- if key.startswith('__serverState__'):
- data_json = data_json[key]
- items = list(try_get(data_json, lambda x: x['feed']['items'], dict).values())
- more = try_get(data_json, lambda x: x['links']['more']) or None
+ def _entries(self, item_id, server_state_json, server_settings_json):
+ items = (traverse_obj(server_state_json, ('feed', 'items', ...))
+ or traverse_obj(server_settings_json, ('exportData', 'items', ...)))
+
+ more = (traverse_obj(server_state_json, ('links', 'more'))
+ or traverse_obj(server_settings_json, ('exportData', 'more', 'link')))
+
+ next_page_id = None
for page in itertools.count(1):
- for item in items:
- video_id = item.get('publication_id') or item.get('publicationId')
- video_url = item.get('link')
- yield self.url_result(video_url, ie=ZenYandexIE.ie_key(), video_id=video_id.split(':')[-1])
- if not more:
+ for item in items or []:
+ if item.get('type') != 'gif':
+ continue
+ video_id = traverse_obj(item, 'publication_id', 'publicationId') or ''
+ yield self.url_result(item['link'], ZenYandexIE, video_id.split(':')[-1])
+
+ current_page_id = next_page_id
+ next_page_id = traverse_obj(parse_qs(more), ('next_page_id', -1))
+ if not all((more, items, next_page_id, next_page_id != current_page_id)):
break
- data_json = self._download_json(more, id, note='Downloading Page %d' % page)
- items = data_json.get('items', [])
- more = try_get(data_json, lambda x: x['more']['link']) or None
+
+ data = self._download_json(more, item_id, note=f'Downloading Page {page}')
+ items, more = data.get('items'), traverse_obj(data, ('more', 'link'))
def _real_extract(self, url):
- id = self._match_id(url)
- return self.playlist_result(self._entries(id, url), playlist_id=id)
+ item_id = self._match_id(url)
+ webpage = self._download_webpage(url, item_id)
+ redirect = self._search_json(
+ r'var it\s*=', webpage, 'redirect', item_id, default={}).get('retpath')
+ if redirect:
+ item_id = self._match_id(redirect)
+ webpage = self._download_webpage(redirect, item_id, note='Redirecting')
+ data = self._search_json(
+ r'var\s+data\s*=', webpage, 'channel data', item_id, contains_pattern=r'{\"__serverState__.+}')
+ server_state_json = traverse_obj(data, lambda k, _: k.startswith('__serverState__'), get_all=False)
+ server_settings_json = traverse_obj(data, lambda k, _: k.startswith('__serverSettings__'), get_all=False)
+
+ return self.playlist_result(
+ self._entries(item_id, server_state_json, server_settings_json),
+ item_id, traverse_obj(server_state_json, ('channel', 'source', 'title')),
+ traverse_obj(server_state_json, ('channel', 'source', 'description')))
diff --git a/hypervideo_dl/extractor/yapfiles.py b/hypervideo_dl/extractor/yapfiles.py
index cfb368d..19812ba 100644
--- a/hypervideo_dl/extractor/yapfiles.py
+++ b/hypervideo_dl/extractor/yapfiles.py
@@ -1,14 +1,8 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
qualities,
- unescapeHTML,
url_or_none,
)
@@ -16,6 +10,7 @@ from ..utils import (
class YapFilesIE(InfoExtractor):
_YAPFILES_URL = r'//(?:(?:www|api)\.)?yapfiles\.ru/get_player/*\?.*?\bv=(?P<id>\w+)'
_VALID_URL = r'https?:%s' % _YAPFILES_URL
+ _EMBED_REGEX = [rf'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?{_YAPFILES_URL}.*?)\1']
_TESTS = [{
# with hd
'url': 'http://www.yapfiles.ru/get_player/?v=vMDE1NjcyNDUt0413',
@@ -33,12 +28,6 @@ class YapFilesIE(InfoExtractor):
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- return [unescapeHTML(mobj.group('url')) for mobj in re.finditer(
- r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?%s.*?)\1'
- % YapFilesIE._YAPFILES_URL, webpage)]
-
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -90,7 +79,6 @@ class YapFilesIE(InfoExtractor):
'quality': quality_key(format_id),
'height': hd_height if is_hd else None,
})
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/yesjapan.py b/hypervideo_dl/extractor/yesjapan.py
index 681338c..b45fa8f 100644
--- a/hypervideo_dl/extractor/yesjapan.py
+++ b/hypervideo_dl/extractor/yesjapan.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
HEADRequest,
diff --git a/hypervideo_dl/extractor/yinyuetai.py b/hypervideo_dl/extractor/yinyuetai.py
index 1fd8d35..b2e3172 100644
--- a/hypervideo_dl/extractor/yinyuetai.py
+++ b/hypervideo_dl/extractor/yinyuetai.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import ExtractorError
@@ -44,7 +41,6 @@ class YinYueTaiIE(InfoExtractor):
'ext': 'mp4',
'tbr': format_info.get('bitrate'),
} for format_info in info['videoUrlModels']]
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/yle_areena.py b/hypervideo_dl/extractor/yle_areena.py
new file mode 100644
index 0000000..118dc12
--- /dev/null
+++ b/hypervideo_dl/extractor/yle_areena.py
@@ -0,0 +1,71 @@
+from .common import InfoExtractor
+from .kaltura import KalturaIE
+from ..utils import int_or_none, traverse_obj, url_or_none
+
+
+class YleAreenaIE(InfoExtractor):
+ _VALID_URL = r'https?://areena\.yle\.fi/(?P<id>[\d-]+)'
+ _TESTS = [{
+ 'url': 'https://areena.yle.fi/1-4371942',
+ 'md5': '932edda0ecf5dfd6423804182d32f8ac',
+ 'info_dict': {
+ 'id': '0_a3tjk92c',
+ 'ext': 'mp4',
+ 'title': 'Pouchit',
+ 'description': 'md5:d487309c3abbe5650265bbd1742d2f82',
+ 'series': 'Modernit miehet',
+ 'season': 'Season 1',
+ 'season_number': 1,
+ 'episode': 'Episode 2',
+ 'episode_number': 2,
+ 'thumbnail': 'http://cfvod.kaltura.com/p/1955031/sp/195503100/thumbnail/entry_id/0_a3tjk92c/version/100061',
+ 'uploader_id': 'ovp@yle.fi',
+ 'duration': 1435,
+ 'view_count': int,
+ 'upload_date': '20181204',
+ 'timestamp': 1543916210,
+ 'subtitles': {'fin': [{'url': r're:^https?://', 'ext': 'srt'}]},
+ 'age_limit': 7,
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ info = self._search_json_ld(self._download_webpage(url, video_id), video_id, default={})
+ video_data = self._download_json(
+ f'https://player.api.yle.fi/v1/preview/{video_id}.json?app_id=player_static_prod&app_key=8930d72170e48303cf5f3867780d549b',
+ video_id)
+
+ # Example title: 'K1, J2: Pouchit | Modernit miehet'
+ series, season_number, episode_number, episode = self._search_regex(
+ r'K(?P<season_no>[\d]+),\s*J(?P<episode_no>[\d]+):?\s*\b(?P<episode>[^|]+)\s*|\s*(?P<series>.+)',
+ info.get('title') or '', 'episode metadata', group=('season_no', 'episode_no', 'episode', 'series'),
+ default=(None, None, None, None))
+ description = traverse_obj(video_data, ('data', 'ongoing_ondemand', 'description', 'fin'), expected_type=str)
+
+ subtitles = {}
+ for sub in traverse_obj(video_data, ('data', 'ongoing_ondemand', 'subtitles', ...)):
+ if url_or_none(sub.get('uri')):
+ subtitles.setdefault(sub.get('language') or 'und', []).append({
+ 'url': sub['uri'],
+ 'ext': 'srt',
+ 'name': sub.get('kind'),
+ })
+
+ return {
+ '_type': 'url_transparent',
+ 'url': 'kaltura:1955031:%s' % traverse_obj(video_data, ('data', 'ongoing_ondemand', 'kaltura', 'id')),
+ 'ie_key': KalturaIE.ie_key(),
+ 'title': (traverse_obj(video_data, ('data', 'ongoing_ondemand', 'title', 'fin'), expected_type=str)
+ or episode or info.get('title')),
+ 'description': description,
+ 'series': (traverse_obj(video_data, ('data', 'ongoing_ondemand', 'series', 'title', 'fin'), expected_type=str)
+ or series),
+ 'season_number': (int_or_none(self._search_regex(r'Kausi (\d+)', description, 'season number', default=None))
+ or int(season_number)),
+ 'episode_number': (traverse_obj(video_data, ('data', 'ongoing_ondemand', 'episode_number'), expected_type=int_or_none)
+ or int(episode_number)),
+ 'thumbnails': traverse_obj(info, ('thumbnails', ..., {'url': 'url'})),
+ 'age_limit': traverse_obj(video_data, ('data', 'ongoing_ondemand', 'content_rating', 'age_restriction'), expected_type=int_or_none),
+ 'subtitles': subtitles,
+ }
diff --git a/hypervideo_dl/extractor/ynet.py b/hypervideo_dl/extractor/ynet.py
index c4ae4d8..a7d7371 100644
--- a/hypervideo_dl/extractor/ynet.py
+++ b/hypervideo_dl/extractor/ynet.py
@@ -1,11 +1,8 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
import json
+import re
+import urllib.parse
from .common import InfoExtractor
-from ..compat import compat_urllib_parse_unquote_plus
class YnetIE(InfoExtractor):
@@ -34,7 +31,7 @@ class YnetIE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- content = compat_urllib_parse_unquote_plus(self._og_search_video_url(webpage))
+ content = urllib.parse.unquote_plus(self._og_search_video_url(webpage))
config = json.loads(self._search_regex(r'config=({.+?})$', content, 'video config'))
f4m_url = config['clip']['url']
title = self._og_search_title(webpage)
@@ -42,7 +39,6 @@ class YnetIE(InfoExtractor):
if m:
title = m.group('title')
formats = self._extract_f4m_formats(f4m_url, video_id)
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/youjizz.py b/hypervideo_dl/extractor/youjizz.py
index 111623f..cd12be5 100644
--- a/hypervideo_dl/extractor/youjizz.py
+++ b/hypervideo_dl/extractor/youjizz.py
@@ -1,6 +1,3 @@
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..utils import (
determine_ext,
diff --git a/hypervideo_dl/extractor/youku.py b/hypervideo_dl/extractor/youku.py
index b505799..624975b 100644
--- a/hypervideo_dl/extractor/youku.py
+++ b/hypervideo_dl/extractor/youku.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import random
import re
import string
@@ -201,7 +198,6 @@ class YoukuIE(InfoExtractor):
'width': stream.get('width'),
'height': stream.get('height'),
} for stream in data['stream'] if stream.get('channel_type') != 'tail']
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/younow.py b/hypervideo_dl/extractor/younow.py
index 583aea3..18112ba 100644
--- a/hypervideo_dl/extractor/younow.py
+++ b/hypervideo_dl/extractor/younow.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import itertools
from .common import InfoExtractor
@@ -94,7 +91,7 @@ def _extract_moment(item, fatal=True):
uploader = try_get(item, lambda x: x['owner']['name'], compat_str)
uploader_id = try_get(item, lambda x: x['owner']['userId'])
- uploader_url = format_field(uploader, template='https://www.younow.com/%s')
+ uploader_url = format_field(uploader, None, 'https://www.younow.com/%s')
entry = {
'extractor_key': 'YouNowMoment',
diff --git a/hypervideo_dl/extractor/youporn.py b/hypervideo_dl/extractor/youporn.py
index 5feb568..8f1b991 100644
--- a/hypervideo_dl/extractor/youporn.py
+++ b/hypervideo_dl/extractor/youporn.py
@@ -1,11 +1,10 @@
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
from ..utils import (
extract_attributes,
int_or_none,
+ merge_dicts,
str_to_int,
unified_strdate,
url_or_none,
@@ -14,6 +13,7 @@ from ..utils import (
class YouPornIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?youporn\.com/(?:watch|embed)/(?P<id>\d+)(?:/(?P<display_id>[^/?#&]+))?'
+ _EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//(?:www\.)?youporn\.com/embed/\d+)']
_TESTS = [{
'url': 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
'md5': '3744d24c50438cf5b6f6d59feb5055c2',
@@ -65,14 +65,26 @@ class YouPornIE(InfoExtractor):
}, {
'url': 'https://www.youporn.com/watch/13922959/femdom-principal/',
'only_matching': True,
+ }, {
+ 'url': 'https://www.youporn.com/watch/16290308/tinderspecial-trailer1/',
+ 'info_dict': {
+ 'id': '16290308',
+ 'age_limit': 18,
+ 'categories': [],
+ 'description': 'md5:00ea70f642f431c379763c17c2f396bc',
+ 'display_id': 'tinderspecial-trailer1',
+ 'duration': 298.0,
+ 'ext': 'mp4',
+ 'upload_date': '20201123',
+ 'uploader': 'Ersties',
+ 'tags': [],
+ 'thumbnail': 'https://fi1.ypncdn.com/202011/23/16290308/original/8/tinderspecial-trailer1-8(m=eaAaaEPbaaaa).jpg',
+ 'timestamp': 1606089600,
+ 'title': 'Tinder In Real Life',
+ 'view_count': int,
+ }
}]
- @staticmethod
- def _extract_urls(webpage):
- return re.findall(
- r'<iframe[^>]+\bsrc=["\']((?:https?:)?//(?:www\.)?youporn\.com/embed/\d+)',
- webpage)
-
def _real_extract(self, url):
mobj = self._match_valid_url(url)
video_id = mobj.group('id')
@@ -110,7 +122,6 @@ class YouPornIE(InfoExtractor):
})
f['height'] = height
formats.append(f)
- self._sort_formats(formats)
webpage = self._download_webpage(
'http://www.youporn.com/watch/%s' % video_id, display_id,
@@ -137,9 +148,10 @@ class YouPornIE(InfoExtractor):
r'(?s)<div[^>]+class=["\']submitByLink["\'][^>]*>(.+?)</div>',
webpage, 'uploader', fatal=False)
upload_date = unified_strdate(self._html_search_regex(
- [r'UPLOADED:\s*<span>([^<]+)',
+ (r'UPLOADED:\s*<span>([^<]+)',
r'Date\s+[Aa]dded:\s*<span>([^<]+)',
- r'(?s)<div[^>]+class=["\']videoInfo(?:Date|Time)["\'][^>]*>(.+?)</div>'],
+ r'''(?s)<div[^>]+class=["']videoInfo(?:Date|Time)\b[^>]*>(.+?)</div>''',
+ r'(?s)<label\b[^>]*>Uploaded[^<]*</label>\s*<span\b[^>]*>(.+?)</span>'),
webpage, 'upload date', fatal=False))
age_limit = self._rta_search(webpage)
@@ -166,7 +178,8 @@ class YouPornIE(InfoExtractor):
r'(?s)Tags:.*?</div>\s*<div[^>]+class=["\']tagBoxContent["\'][^>]*>(.+?)</div>',
'tags')
- return {
+ data = self._search_json_ld(webpage, video_id, expected_type='VideoObject', fatal=False)
+ return merge_dicts(data, {
'id': video_id,
'display_id': display_id,
'title': title,
@@ -181,4 +194,4 @@ class YouPornIE(InfoExtractor):
'tags': tags,
'age_limit': age_limit,
'formats': formats,
- }
+ })
diff --git a/hypervideo_dl/extractor/yourporn.py b/hypervideo_dl/extractor/yourporn.py
index 9834749..38f42a9 100644
--- a/hypervideo_dl/extractor/yourporn.py
+++ b/hypervideo_dl/extractor/yourporn.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
diff --git a/hypervideo_dl/extractor/yourupload.py b/hypervideo_dl/extractor/yourupload.py
index 9fa7728..def6329 100644
--- a/hypervideo_dl/extractor/yourupload.py
+++ b/hypervideo_dl/extractor/yourupload.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import urljoin
diff --git a/hypervideo_dl/extractor/youtube.py b/hypervideo_dl/extractor/youtube.py
index dec3b14..f7e3c75 100644
--- a/hypervideo_dl/extractor/youtube.py
+++ b/hypervideo_dl/extractor/youtube.py
@@ -1,11 +1,9 @@
-# coding: utf-8
-
-from __future__ import unicode_literals
-
+import base64
import calendar
+import collections
import copy
import datetime
-import functools
+import enum
import hashlib
import itertools
import json
@@ -14,29 +12,27 @@ import os.path
import random
import re
import sys
+import threading
import time
import traceback
-import threading
+import urllib.error
+import urllib.parse
from .common import InfoExtractor, SearchInfoExtractor
-from ..compat import (
- compat_chr,
- compat_HTTPError,
- compat_parse_qs,
- compat_str,
- compat_urllib_parse_unquote_plus,
- compat_urllib_parse_urlencode,
- compat_urllib_parse_urlparse,
- compat_urlparse,
-)
+from .openload import PhantomJSwrapper
+from ..compat import functools
from ..jsinterp import JSInterpreter
from ..utils import (
+ NO_DEFAULT,
+ ExtractorError,
+ LazyList,
+ UserNotLive,
bug_reports_message,
+ classproperty,
clean_html,
datetime_from_str,
dict_get,
- error_to_compat_str,
- ExtractorError,
+ filter_dict,
float_or_none,
format_field,
get_first,
@@ -46,7 +42,6 @@ from ..utils import (
js_to_json,
mimetype2ext,
network_exceptions,
- NO_DEFAULT,
orderedSet,
parse_codecs,
parse_count,
@@ -54,7 +49,6 @@ from ..utils import (
parse_iso8601,
parse_qs,
qualities,
- remove_end,
remove_start,
smuggle_url,
str_or_none,
@@ -72,15 +66,14 @@ from ..utils import (
variadic,
)
-
-# any clients starting with _ cannot be explicity requested by the user
+# any clients starting with _ cannot be explicitly requested by the user
INNERTUBE_CLIENTS = {
'web': {
'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'WEB',
- 'clientVersion': '2.20211221.00.00',
+ 'clientVersion': '2.20220801.00.00',
}
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 1
@@ -90,7 +83,7 @@ INNERTUBE_CLIENTS = {
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'WEB_EMBEDDED_PLAYER',
- 'clientVersion': '1.20211215.00.01',
+ 'clientVersion': '1.20220731.00.00',
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 56
@@ -101,7 +94,7 @@ INNERTUBE_CLIENTS = {
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'WEB_REMIX',
- 'clientVersion': '1.20211213.00.00',
+ 'clientVersion': '1.20220727.01.00',
}
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
@@ -111,7 +104,7 @@ INNERTUBE_CLIENTS = {
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'WEB_CREATOR',
- 'clientVersion': '1.20211220.02.00',
+ 'clientVersion': '1.20220726.00.00',
}
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
@@ -121,7 +114,9 @@ INNERTUBE_CLIENTS = {
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'ANDROID',
- 'clientVersion': '16.49',
+ 'clientVersion': '17.31.35',
+ 'androidSdkVersion': 30,
+ 'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
}
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
@@ -132,7 +127,9 @@ INNERTUBE_CLIENTS = {
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'ANDROID_EMBEDDED_PLAYER',
- 'clientVersion': '16.49',
+ 'clientVersion': '17.31.35',
+ 'androidSdkVersion': 30,
+ 'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
@@ -143,7 +140,9 @@ INNERTUBE_CLIENTS = {
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'ANDROID_MUSIC',
- 'clientVersion': '4.57',
+ 'clientVersion': '5.16.51',
+ 'androidSdkVersion': 30,
+ 'userAgent': 'com.google.android.apps.youtube.music/5.16.51 (Linux; U; Android 11) gzip'
}
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
@@ -154,7 +153,9 @@ INNERTUBE_CLIENTS = {
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'ANDROID_CREATOR',
- 'clientVersion': '21.47',
+ 'clientVersion': '22.30.100',
+ 'androidSdkVersion': 30,
+ 'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip'
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
@@ -167,8 +168,9 @@ INNERTUBE_CLIENTS = {
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'IOS',
- 'clientVersion': '16.46',
+ 'clientVersion': '17.33.2',
'deviceModel': 'iPhone14,3',
+ 'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
}
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
@@ -178,8 +180,9 @@ INNERTUBE_CLIENTS = {
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'IOS_MESSAGES_EXTENSION',
- 'clientVersion': '16.46',
+ 'clientVersion': '17.33.2',
'deviceModel': 'iPhone14,3',
+ 'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
@@ -190,7 +193,9 @@ INNERTUBE_CLIENTS = {
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'IOS_MUSIC',
- 'clientVersion': '4.57',
+ 'clientVersion': '5.21',
+ 'deviceModel': 'iPhone14,3',
+ 'userAgent': 'com.google.ios.youtubemusic/5.21 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
@@ -200,7 +205,9 @@ INNERTUBE_CLIENTS = {
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'IOS_CREATOR',
- 'clientVersion': '21.47',
+ 'clientVersion': '22.33.101',
+ 'deviceModel': 'iPhone14,3',
+ 'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
@@ -213,7 +220,7 @@ INNERTUBE_CLIENTS = {
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'MWEB',
- 'clientVersion': '2.20211221.01.00',
+ 'clientVersion': '2.20220801.00.00',
}
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 2
@@ -272,14 +279,23 @@ def build_innertube_clients():
build_innertube_clients()
+class BadgeType(enum.Enum):
+ AVAILABILITY_UNLISTED = enum.auto()
+ AVAILABILITY_PRIVATE = enum.auto()
+ AVAILABILITY_PUBLIC = enum.auto()
+ AVAILABILITY_PREMIUM = enum.auto()
+ AVAILABILITY_SUBSCRIPTION = enum.auto()
+ LIVE_NOW = enum.auto()
+
+
class YoutubeBaseInfoExtractor(InfoExtractor):
"""Provide base functions for Youtube extractors"""
_RESERVED_NAMES = (
r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
r'shorts|movies|results|search|shared|hashtag|trending|explore|feed|feeds|'
- r'browse|oembed|get_video_info|iframe_api|s/player|'
- r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
+ r'browse|oembed|get_video_info|iframe_api|s/player|source|'
+ r'storefront|oops|index|account|t/terms|about|upload|signin|logout')
_PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
@@ -292,7 +308,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
# invidious-redirect websites
r'(?:www\.)?redirect\.invidious\.io',
r'(?:(?:www|dev)\.)?invidio\.us',
- # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
+ # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
r'(?:www\.)?invidious\.pussthecat\.org',
r'(?:www\.)?invidious\.zee\.li',
r'(?:www\.)?invidious\.ethibox\.fr',
@@ -352,8 +368,62 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
+ # piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
+ r'(?:www\.)?piped\.kavin\.rocks',
+ r'(?:www\.)?piped\.tokhmi\.xyz',
+ r'(?:www\.)?piped\.syncpundit\.io',
+ r'(?:www\.)?piped\.mha\.fi',
+ r'(?:www\.)?watch\.whatever\.social',
+ r'(?:www\.)?piped\.garudalinux\.org',
+ r'(?:www\.)?piped\.rivo\.lol',
+ r'(?:www\.)?piped-libre\.kavin\.rocks',
+ r'(?:www\.)?yt\.jae\.fi',
+ r'(?:www\.)?piped\.mint\.lgbt',
+ r'(?:www\.)?il\.ax',
+ r'(?:www\.)?piped\.esmailelbob\.xyz',
+ r'(?:www\.)?piped\.projectsegfau\.lt',
+ r'(?:www\.)?piped\.privacydev\.net',
+ r'(?:www\.)?piped\.palveluntarjoaja\.eu',
+ r'(?:www\.)?piped\.smnz\.de',
+ r'(?:www\.)?piped\.adminforge\.de',
+ r'(?:www\.)?watch\.whatevertinfoil\.de',
+ r'(?:www\.)?piped\.qdi\.fi',
+ r'(?:www\.)?piped\.video',
+ r'(?:www\.)?piped\.aeong\.one',
)
+ # extracted from account/account_menu ep
+ # XXX: These are the supported YouTube UI and API languages,
+ # which is slightly different from languages supported for translation in YouTube studio
+ _SUPPORTED_LANG_CODES = [
+ 'af', 'az', 'id', 'ms', 'bs', 'ca', 'cs', 'da', 'de', 'et', 'en-IN', 'en-GB', 'en', 'es',
+ 'es-419', 'es-US', 'eu', 'fil', 'fr', 'fr-CA', 'gl', 'hr', 'zu', 'is', 'it', 'sw', 'lv',
+ 'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi',
+ 'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw',
+ 'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml',
+ 'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko'
+ ]
+
+ _IGNORED_WARNINGS = {'Unavailable videos will be hidden during playback'}
+
+ @functools.cached_property
+ def _preferred_lang(self):
+ """
+ Returns a language code supported by YouTube for the user preferred language.
+ Returns None if no preferred language set.
+ """
+ preferred_lang = self._configuration_arg('lang', ie_key='Youtube', casesense=True, default=[''])[0]
+ if not preferred_lang:
+ return
+ if preferred_lang not in self._SUPPORTED_LANG_CODES:
+ raise ExtractorError(
+ f'Unsupported language code: {preferred_lang}. Supported language codes (case-sensitive): {join_nonempty(*self._SUPPORTED_LANG_CODES, delim=", ")}.',
+ expected=True)
+ elif preferred_lang != 'en':
+ self.report_warning(
+ f'Preferring "{preferred_lang}" translated fields. Note that some metadata extraction may fail or be incorrect.')
+ return preferred_lang
+
def _initialize_consent(self):
cookies = self._get_cookies('https://www.youtube.com/')
if cookies.get('__Secure-3PSID'):
@@ -375,23 +445,23 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
pref = {}
if pref_cookie:
try:
- pref = dict(compat_urlparse.parse_qsl(pref_cookie.value))
+ pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
except ValueError:
self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
- pref.update({'hl': 'en', 'tz': 'UTC'})
- self._set_cookie('.youtube.com', name='PREF', value=compat_urllib_parse_urlencode(pref))
+ pref.update({'hl': self._preferred_lang or 'en', 'tz': 'UTC'})
+ self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
def _real_initialize(self):
self._initialize_pref()
self._initialize_consent()
- if (self._LOGIN_REQUIRED
- and self.get_param('cookiefile') is None
- and self.get_param('cookiesfrombrowser') is None):
+ self._check_login_required()
+
+ def _check_login_required(self):
+ if self._LOGIN_REQUIRED and not self._cookies_passed:
self.raise_login_required('Login details are needed to download this content', method='cookies')
- _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
- _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
- _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
+ _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='
+ _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
def _get_default_ytcfg(self, client='web'):
return copy.deepcopy(INNERTUBE_CLIENTS[client])
@@ -407,22 +477,26 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
def _extract_client_name(self, ytcfg, default_client='web'):
return self._ytcfg_get_safe(
ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
- lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
+ lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)
def _extract_client_version(self, ytcfg, default_client='web'):
return self._ytcfg_get_safe(
ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
- lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
+ lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)
+
+ def _select_api_hostname(self, req_api_hostname, default_client=None):
+ return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
+ or req_api_hostname or self._get_innertube_host(default_client or 'web'))
def _extract_api_key(self, ytcfg=None, default_client='web'):
- return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
+ return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)
def _extract_context(self, ytcfg=None, default_client='web'):
context = get_first(
(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
# Enforce language and tz for extraction
client_context = traverse_obj(context, 'client', expected_type=dict, default={})
- client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
+ client_context.update({'hl': self._preferred_lang or 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
return context
_SAPISID = None
@@ -449,7 +523,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
return None
# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
sapisidhash = hashlib.sha1(
- f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()
+ f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
return f'SAPISIDHASH {time_now}_{sapisidhash}'
def _call_api(self, ep, query, video_id, fatal=True, headers=None,
@@ -462,18 +536,16 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
real_headers.update({'content-type': 'application/json'})
if headers:
real_headers.update(headers)
+ api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]
+ or api_key or self._extract_api_key(default_client=default_client))
return self._download_json(
- 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
+ f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',
video_id=video_id, fatal=fatal, note=note, errnote=errnote,
data=json.dumps(data).encode('utf8'), headers=real_headers,
- query={'key': api_key or self._extract_api_key(), 'prettyPrint': 'false'})
+ query={'key': api_key, 'prettyPrint': 'false'})
def extract_yt_initial_data(self, item_id, webpage, fatal=True):
- data = self._search_regex(
- (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
- self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)
- if data:
- return self._parse_json(data, item_id, fatal=fatal)
+ return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)
@staticmethod
def _extract_session_index(*data):
@@ -489,7 +561,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
# Deprecated?
def _extract_identity_token(self, ytcfg=None, webpage=None):
if ytcfg:
- token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
+ token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)
if token:
return token
if webpage:
@@ -505,12 +577,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
"""
for data in args:
# ytcfg includes channel_syncid if on secondary channel
- delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
+ delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)
if delegated_sid:
return delegated_sid
sync_ids = (try_get(
data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
- lambda x: x['DATASYNC_ID']), compat_str) or '').split('||')
+ lambda x: x['DATASYNC_ID']), str) or '').split('||')
if len(sync_ids) >= 2 and sync_ids[1]:
# datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
# and just "user_syncid||" for primary channel. We only want the channel_syncid
@@ -526,7 +598,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],
expected_type=str)
- @property
+ @functools.cached_property
def is_authenticated(self):
return bool(self._generate_sapisidhash_header())
@@ -542,15 +614,16 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
self, *, ytcfg=None, account_syncid=None, session_index=None,
visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
- origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
+ origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))
headers = {
- 'X-YouTube-Client-Name': compat_str(
+ 'X-YouTube-Client-Name': str(
self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
'Origin': origin,
'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
- 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)
+ 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg),
+ 'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client)
}
if session_index is None:
session_index = self._extract_session_index(ytcfg)
@@ -561,7 +634,19 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
if auth is not None:
headers['Authorization'] = auth
headers['X-Origin'] = origin
- return {h: v for h, v in headers.items() if v is not None}
+ return filter_dict(headers)
+
+ def _download_ytcfg(self, client, video_id):
+ url = {
+ 'web': 'https://www.youtube.com',
+ 'web_music': 'https://music.youtube.com',
+ 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
+ }.get(client)
+ if not url:
+ return {}
+ webpage = self._download_webpage(
+ url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')
+ return self.extract_ytcfg(video_id, webpage) or {}
@staticmethod
def _build_api_continuation_query(continuation, ctp=None):
@@ -592,7 +677,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
def _extract_continuation_ep_data(cls, continuation_ep: dict):
if isinstance(continuation_ep, dict):
continuation = try_get(
- continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
+ continuation_ep, lambda x: x['continuationCommand']['token'], str)
if not continuation:
return
ctp = continuation_ep.get('clickTrackingParams')
@@ -604,20 +689,10 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
if next_continuation:
return next_continuation
- contents = []
- for key in ('contents', 'items'):
- contents.extend(try_get(renderer, lambda x: x[key], list) or [])
-
- for content in contents:
- if not isinstance(content, dict):
- continue
- continuation_ep = try_get(
- content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
- lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
- dict)
- continuation = cls._extract_continuation_ep_data(continuation_ep)
- if continuation:
- return continuation
+ return traverse_obj(renderer, (
+ ('contents', 'items', 'rows'), ..., 'continuationItemRenderer',
+ ('continuationEndpoint', ('button', 'buttonRenderer', 'command'))
+ ), get_all=False, expected_type=cls._extract_continuation_ep_data)
@classmethod
def _extract_alerts(cls, data):
@@ -633,16 +708,15 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
yield alert_type, message
def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
- errors = []
- warnings = []
+ errors, warnings = [], []
for alert_type, alert_message in alerts:
if alert_type.lower() == 'error' and fatal:
errors.append([alert_type, alert_message])
- else:
+ elif alert_message not in self._IGNORED_WARNINGS:
warnings.append([alert_type, alert_message])
for alert_type, alert_message in (warnings + errors[:-1]):
- self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message), only_once=only_once)
+ self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)
if errors:
raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
@@ -650,14 +724,50 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
def _extract_badges(self, renderer: dict):
- badges = set()
- for badge in try_get(renderer, lambda x: x['badges'], list) or []:
- label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
- if label:
- badges.add(label.lower())
+ privacy_icon_map = {
+ 'PRIVACY_UNLISTED': BadgeType.AVAILABILITY_UNLISTED,
+ 'PRIVACY_PRIVATE': BadgeType.AVAILABILITY_PRIVATE,
+ 'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC
+ }
+
+ badge_style_map = {
+ 'BADGE_STYLE_TYPE_MEMBERS_ONLY': BadgeType.AVAILABILITY_SUBSCRIPTION,
+ 'BADGE_STYLE_TYPE_PREMIUM': BadgeType.AVAILABILITY_PREMIUM,
+ 'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW
+ }
+
+ label_map = {
+ 'unlisted': BadgeType.AVAILABILITY_UNLISTED,
+ 'private': BadgeType.AVAILABILITY_PRIVATE,
+ 'members only': BadgeType.AVAILABILITY_SUBSCRIPTION,
+ 'live': BadgeType.LIVE_NOW,
+ 'premium': BadgeType.AVAILABILITY_PREMIUM
+ }
+
+ badges = []
+ for badge in traverse_obj(renderer, ('badges', ..., 'metadataBadgeRenderer'), default=[]):
+ badge_type = (
+ privacy_icon_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str))
+ or badge_style_map.get(traverse_obj(badge, 'style'))
+ )
+ if badge_type:
+ badges.append({'type': badge_type})
+ continue
+
+ # fallback, won't work in some languages
+ label = traverse_obj(badge, 'label', expected_type=str, default='')
+ for match, label_badge_type in label_map.items():
+ if match in label.lower():
+ badges.append({'type': badge_type})
+ continue
+
return badges
@staticmethod
+ def _has_badge(badges, badge_type):
+ return bool(traverse_obj(badges, lambda _, v: v['type'] == badge_type))
+
+ @staticmethod
def _get_text(data, *path_list, max_runs=None):
for path in path_list or [None]:
if path is None:
@@ -667,7 +777,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
obj = [obj]
for item in obj:
- text = try_get(item, lambda x: x['simpleText'], compat_str)
+ text = try_get(item, lambda x: x['simpleText'], str)
if text:
return text
runs = try_get(item, lambda x: x['runs'], list) or []
@@ -727,8 +837,9 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
except ValueError:
return None
- def _extract_time_text(self, renderer, *path_list):
- text = self._get_text(renderer, *path_list) or ''
+ def _parse_time_text(self, text):
+ if not text:
+ return
dt = self.extract_relative_time(text)
timestamp = None
if isinstance(dt, datetime.datetime):
@@ -741,81 +852,62 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
(r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),
text.lower(), 'time text', default=None)))
- if text and timestamp is None:
- self.report_warning(f"Cannot parse localized time text '{text}'" + bug_reports_message(), only_once=True)
- return timestamp, text
+ if text and timestamp is None and self._preferred_lang in (None, 'en'):
+ self.report_warning(
+ f'Cannot parse localized time text "{text}"', only_once=True)
+ return timestamp
def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
default_client='web'):
- response = None
- last_error = None
- count = -1
- retries = self.get_param('extractor_retries', 3)
- if check_get_keys is None:
- check_get_keys = []
- while count < retries:
- count += 1
- if last_error:
- self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))
+ for retry in self.RetryManager():
try:
response = self._call_api(
ep=ep, fatal=True, headers=headers,
- video_id=item_id, query=query,
+ video_id=item_id, query=query, note=note,
context=self._extract_context(ytcfg, default_client),
api_key=self._extract_api_key(ytcfg, default_client),
- api_hostname=api_hostname, default_client=default_client,
- note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
+ api_hostname=api_hostname, default_client=default_client)
except ExtractorError as e:
- if isinstance(e.cause, network_exceptions):
- if isinstance(e.cause, compat_HTTPError):
- first_bytes = e.cause.read(512)
- if not is_html(first_bytes):
- yt_error = try_get(
- self._parse_json(
- self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
- lambda x: x['error']['message'], compat_str)
- if yt_error:
- self._report_alerts([('ERROR', yt_error)], fatal=False)
- # Downloading page may result in intermittent 5xx HTTP error
- # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
- # We also want to catch all other network exceptions since errors in later pages can be troublesome
- # See https://github.com/hypervideo/hypervideo/issues/507#issuecomment-880188210
- if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
- last_error = error_to_compat_str(e.cause or e.msg)
- if count < retries:
- continue
- if fatal:
- raise
- else:
- self.report_warning(error_to_compat_str(e))
- return
+ if not isinstance(e.cause, network_exceptions):
+ return self._error_or_warning(e, fatal=fatal)
+ elif not isinstance(e.cause, urllib.error.HTTPError):
+ retry.error = e
+ continue
- else:
- try:
- self._extract_and_report_alerts(response, only_once=True)
- except ExtractorError as e:
- # YouTube servers may return errors we want to retry on in a 200 OK response
- # See: https://github.com/hypervideo/hypervideo/issues/839
- if 'unknown error' in e.msg.lower():
- last_error = e.msg
- continue
- if fatal:
- raise
- self.report_warning(error_to_compat_str(e))
- return
- if not check_get_keys or dict_get(response, check_get_keys):
- break
- # Youtube sometimes sends incomplete data
- # See: https://github.com/ytdl-org/youtube-dl/issues/28194
- last_error = 'Incomplete data received'
- if count >= retries:
- if fatal:
- raise ExtractorError(last_error)
- else:
- self.report_warning(last_error)
- return
- return response
+ first_bytes = e.cause.read(512)
+ if not is_html(first_bytes):
+ yt_error = try_get(
+ self._parse_json(
+ self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
+ lambda x: x['error']['message'], str)
+ if yt_error:
+ self._report_alerts([('ERROR', yt_error)], fatal=False)
+ # Downloading page may result in intermittent 5xx HTTP error
+ # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
+ # We also want to catch all other network exceptions since errors in later pages can be troublesome
+ # See https://github.com/hypervideo/hypervideo/issues/507#issuecomment-880188210
+ if e.cause.code not in (403, 429):
+ retry.error = e
+ continue
+ return self._error_or_warning(e, fatal=fatal)
+
+ try:
+ self._extract_and_report_alerts(response, only_once=True)
+ except ExtractorError as e:
+ # YouTube servers may return errors we want to retry on in a 200 OK response
+ # See: https://github.com/hypervideo/hypervideo/issues/839
+ if 'unknown error' in e.msg.lower():
+ retry.error = e
+ continue
+ return self._error_or_warning(e, fatal=fatal)
+ # Youtube sometimes sends incomplete data
+ # See: https://github.com/ytdl-org/youtube-dl/issues/28194
+ if not traverse_obj(response, *variadic(check_get_keys)):
+ retry.error = ExtractorError('Incomplete data received', expected=True)
+ continue
+
+ return response
@staticmethod
def is_music_url(url):
@@ -823,29 +915,36 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
def _extract_video(self, renderer):
video_id = renderer.get('videoId')
- title = self._get_text(renderer, 'title')
+
+ reel_header_renderer = traverse_obj(renderer, (
+ 'navigationEndpoint', 'reelWatchEndpoint', 'overlay', 'reelPlayerOverlayRenderer',
+ 'reelPlayerHeaderSupportedRenderers', 'reelPlayerHeaderRenderer'))
+
+ title = self._get_text(renderer, 'title', 'headline') or self._get_text(reel_header_renderer, 'reelTitleText')
description = self._get_text(renderer, 'descriptionSnippet')
- duration = parse_duration(self._get_text(
- renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
+
+ duration = int_or_none(renderer.get('lengthSeconds'))
+ if duration is None:
+ duration = parse_duration(self._get_text(
+ renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
if duration is None:
+ # XXX: should write a parser to be more general to support more cases (e.g. shorts in shorts tab)
duration = parse_duration(self._search_regex(
r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',
traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),
video_id, default=None, group='duration'))
- view_count = self._get_count(renderer, 'viewCountText')
-
- uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
channel_id = traverse_obj(
renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),
expected_type=str, get_all=False)
- timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText')
- scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
+ if not channel_id:
+ channel_id = traverse_obj(reel_header_renderer, ('channelNavigationEndpoint', 'browseEndpoint', 'browseId'))
+
overlay_style = traverse_obj(
renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),
get_all=False, expected_type=str)
badges = self._extract_badges(renderer)
- thumbnails = self._extract_thumbnails(renderer, 'thumbnail')
+
navigation_url = urljoin('https://www.youtube.com/', traverse_obj(
renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),
expected_type=str)) or ''
@@ -853,6 +952,22 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:
url = f'https://www.youtube.com/shorts/{video_id}'
+ time_text = (self._get_text(renderer, 'publishedTimeText', 'videoInfo')
+ or self._get_text(reel_header_renderer, 'timestampText') or '')
+ scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
+
+ live_status = (
+ 'is_upcoming' if scheduled_timestamp is not None
+ else 'was_live' if 'streamed' in time_text.lower()
+ else 'is_live' if overlay_style == 'LIVE' or self._has_badge(badges, BadgeType.LIVE_NOW)
+ else None)
+
+ # videoInfo is a string like '50K views • 10 years ago'.
+ view_count_text = self._get_text(renderer, 'viewCountText', 'shortViewCountText', 'videoInfo') or ''
+ view_count = (0 if 'no views' in view_count_text.lower()
+ else self._get_count({'simpleText': view_count_text}))
+ view_count_field = 'concurrent_view_count' if live_status in ('is_live', 'is_upcoming') else 'view_count'
+
return {
'_type': 'url',
'ie_key': YoutubeIE.ie_key(),
@@ -861,19 +976,24 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'title': title,
'description': description,
'duration': duration,
- 'view_count': view_count,
- 'uploader': uploader,
'channel_id': channel_id,
- 'thumbnails': thumbnails,
- 'upload_date': (strftime_or_none(timestamp, '%Y%m%d')
- if self._configuration_arg('approximate_date', ie_key='youtubetab')
- else None),
- 'live_status': ('is_upcoming' if scheduled_timestamp is not None
- else 'was_live' if 'streamed' in time_text.lower()
- else 'is_live' if overlay_style is not None and overlay_style == 'LIVE' or 'live now' in badges
- else None),
+ 'channel': (self._get_text(renderer, 'ownerText', 'shortBylineText')
+ or self._get_text(reel_header_renderer, 'channelTitleText')),
+ 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
+ 'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),
+ 'timestamp': (self._parse_time_text(time_text)
+ if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)
+ else None),
'release_timestamp': scheduled_timestamp,
- 'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)
+ 'availability':
+ 'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
+ else self._availability(
+ is_private=self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE) or None,
+ needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
+ needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
+ is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None),
+ view_count_field: view_count,
+ 'live_status': live_status
}
@@ -914,6 +1034,27 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
(?:\#|$)""" % {
'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
}
+ _EMBED_REGEX = [
+ r'''(?x)
+ (?:
+ <(?:[0-9A-Za-z-]+?)?iframe[^>]+?src=|
+ data-video-url=|
+ <embed[^>]+?src=|
+ embedSWF\(?:\s*|
+ <object[^>]+data=|
+ new\s+SWFObject\(
+ )
+ (["\'])
+ (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
+ (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
+ \1''',
+ # https://wordpress.org/plugins/lazy-load-for-videos/
+ r'''(?xs)
+ <a\s[^>]*\bhref="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"
+ \s[^>]*\bclass="[^"]*\blazy-load-youtube''',
+ ]
+ _RETURN_TYPE = 'video' # XXX: How to handle multifeed?
+
_PLAYER_INFO_RE = (
r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
@@ -1060,6 +1201,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'age_limit': 0,
'start_time': 1,
'end_time': 9,
+ 'comment_count': int,
'channel_follower_count': int
}
},
@@ -1104,6 +1246,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
'live_status': 'not_live',
'age_limit': 0,
+ 'comment_count': int,
'channel_follower_count': int
},
'params': {
@@ -1246,6 +1389,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'categories': ['Entertainment'],
'duration': 106,
'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
+ 'comment_count': int,
'channel_follower_count': int
},
},
@@ -1333,7 +1477,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'upload_date': '20150827',
'uploader_id': 'olympic',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
- 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
+ 'description': 'md5:04bbbf3ccceb6795947572ca36f45904',
'uploader': 'Olympics',
'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
'like_count': int,
@@ -1382,6 +1526,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'like_count': int,
'live_status': 'not_live',
'availability': 'unlisted',
+ 'comment_count': int,
'channel_follower_count': int
},
},
@@ -1439,66 +1584,99 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'skip': 'This live event has ended.',
},
{
- # Multifeed videos (multiple cameras), URL is for Main Camera
- 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
+ # Multifeed videos (multiple cameras), URL can be of any Camera
+ 'url': 'https://www.youtube.com/watch?v=zaPI8MvL8pg',
'info_dict': {
- 'id': 'jvGDaLqkpTg',
- 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
- 'description': 'md5:e03b909557865076822aa169218d6a5d',
+ 'id': 'zaPI8MvL8pg',
+ 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04',
+ 'description': 'md5:563ccbc698b39298481ca3c571169519',
},
'playlist': [{
'info_dict': {
- 'id': 'jvGDaLqkpTg',
- 'ext': 'mp4',
- 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
- 'description': 'md5:e03b909557865076822aa169218d6a5d',
- 'duration': 10643,
- 'upload_date': '20161111',
- 'uploader': 'Team PGP',
- 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
- },
- }, {
- 'info_dict': {
- 'id': '3AKt1R1aDnw',
+ 'id': 'j5yGuxZ8lLU',
'ext': 'mp4',
- 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
- 'description': 'md5:e03b909557865076822aa169218d6a5d',
- 'duration': 10991,
- 'upload_date': '20161111',
- 'uploader': 'Team PGP',
- 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
+ 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Chris)',
+ 'uploader': 'WiiLikeToPlay',
+ 'description': 'md5:563ccbc698b39298481ca3c571169519',
+ 'uploader_url': 'http://www.youtube.com/user/WiiRikeToPray',
+ 'duration': 10120,
+ 'channel_follower_count': int,
+ 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
+ 'availability': 'public',
+ 'playable_in_embed': True,
+ 'upload_date': '20131105',
+ 'uploader_id': 'WiiRikeToPray',
+ 'categories': ['Gaming'],
+ 'live_status': 'was_live',
+ 'tags': 'count:24',
+ 'release_timestamp': 1383701910,
+ 'thumbnail': 'https://i.ytimg.com/vi/j5yGuxZ8lLU/maxresdefault.jpg',
+ 'comment_count': int,
+ 'age_limit': 0,
+ 'like_count': int,
+ 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
+ 'channel': 'WiiLikeToPlay',
+ 'view_count': int,
+ 'release_date': '20131106',
},
}, {
'info_dict': {
- 'id': 'RtAMM00gpVc',
+ 'id': 'zaPI8MvL8pg',
'ext': 'mp4',
- 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
- 'description': 'md5:e03b909557865076822aa169218d6a5d',
- 'duration': 10995,
- 'upload_date': '20161111',
- 'uploader': 'Team PGP',
- 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
+ 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Tyson)',
+ 'uploader_id': 'WiiRikeToPray',
+ 'availability': 'public',
+ 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
+ 'channel': 'WiiLikeToPlay',
+ 'uploader_url': 'http://www.youtube.com/user/WiiRikeToPray',
+ 'channel_follower_count': int,
+ 'description': 'md5:563ccbc698b39298481ca3c571169519',
+ 'duration': 10108,
+ 'age_limit': 0,
+ 'like_count': int,
+ 'tags': 'count:24',
+ 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
+ 'uploader': 'WiiLikeToPlay',
+ 'release_timestamp': 1383701915,
+ 'comment_count': int,
+ 'upload_date': '20131105',
+ 'thumbnail': 'https://i.ytimg.com/vi/zaPI8MvL8pg/maxresdefault.jpg',
+ 'release_date': '20131106',
+ 'playable_in_embed': True,
+ 'live_status': 'was_live',
+ 'categories': ['Gaming'],
+ 'view_count': int,
},
}, {
'info_dict': {
- 'id': '6N2fdlP3C5U',
+ 'id': 'R7r3vfO7Hao',
'ext': 'mp4',
- 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
- 'description': 'md5:e03b909557865076822aa169218d6a5d',
- 'duration': 10990,
- 'upload_date': '20161111',
- 'uploader': 'Team PGP',
- 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
+ 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Spencer)',
+ 'thumbnail': 'https://i.ytimg.com/vi/R7r3vfO7Hao/maxresdefault.jpg',
+ 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
+ 'like_count': int,
+ 'availability': 'public',
+ 'playable_in_embed': True,
+ 'upload_date': '20131105',
+ 'description': 'md5:563ccbc698b39298481ca3c571169519',
+ 'uploader_id': 'WiiRikeToPray',
+ 'uploader_url': 'http://www.youtube.com/user/WiiRikeToPray',
+ 'channel_follower_count': int,
+ 'tags': 'count:24',
+ 'release_date': '20131106',
+ 'uploader': 'WiiLikeToPlay',
+ 'comment_count': int,
+ 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
+ 'channel': 'WiiLikeToPlay',
+ 'categories': ['Gaming'],
+ 'release_timestamp': 1383701914,
+ 'live_status': 'was_live',
+ 'age_limit': 0,
+ 'duration': 10128,
+ 'view_count': int,
},
}],
- 'params': {
- 'skip_download': True,
- },
- 'skip': 'Not multifeed anymore',
+ 'params': {'skip_download': True},
},
{
# Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
@@ -1610,7 +1788,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',
'live_status': 'not_live',
'playable_in_embed': True,
- 'channel_follower_count': int
+ 'comment_count': int,
+ 'channel_follower_count': int,
+ 'chapters': list,
},
'params': {
'skip_download': True,
@@ -1642,7 +1822,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'view_count': int,
'live_status': 'not_live',
'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
- 'channel_follower_count': int
+ 'comment_count': int,
+ 'channel_follower_count': int,
+ 'chapters': list,
},
'params': {
'skip_download': True,
@@ -1906,7 +2088,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'view_count': int,
'duration': 522,
'channel': 'kudvenkat',
- 'channel_follower_count': int
+ 'comment_count': int,
+ 'channel_follower_count': int,
+ 'chapters': list,
},
'params': {
'skip_download': True,
@@ -2056,7 +2240,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'like_count': int,
'live_status': 'not_live',
'playable_in_embed': True,
- 'channel_follower_count': int
+ 'channel_follower_count': int,
+ 'chapters': list,
},
'params': {
'format': '17', # 3gp format available on android
@@ -2100,7 +2285,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'duration': 248,
'categories': ['Education'],
'age_limit': 0,
- 'channel_follower_count': int
+ 'channel_follower_count': int,
+ 'chapters': list,
}, 'params': {'format': 'mhtml', 'skip_download': True}
}, {
# Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
@@ -2127,9 +2313,39 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'availability': 'public',
'channel': 'Leon Nguyen',
'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
+ 'comment_count': int,
'channel_follower_count': int
}
}, {
+ # Same video as above, but with --compat-opt no-youtube-prefer-utc-upload-date
+ 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
+ 'info_dict': {
+ 'id': '2NUZ8W2llS4',
+ 'ext': 'mp4',
+ 'title': 'The NP that test your phone performance 🙂',
+ 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
+ 'uploader': 'Leon Nguyen',
+ 'uploader_id': 'VNSXIII',
+ 'uploader_url': 'http://www.youtube.com/user/VNSXIII',
+ 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
+ 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
+ 'duration': 21,
+ 'view_count': int,
+ 'age_limit': 0,
+ 'categories': ['Gaming'],
+ 'tags': 'count:23',
+ 'playable_in_embed': True,
+ 'live_status': 'not_live',
+ 'upload_date': '20220102',
+ 'like_count': int,
+ 'availability': 'public',
+ 'channel': 'Leon Nguyen',
+ 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
+ 'comment_count': int,
+ 'channel_follower_count': int
+ },
+ 'params': {'compat_opts': ['no-youtube-prefer-utc-upload-date']}
+ }, {
# date text is premiered video, ensure upload date in UTC (published 1641172509)
'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',
'info_dict': {
@@ -2186,8 +2402,184 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'view_count': int,
'playable_in_embed': True,
'description': 'md5:2ef1d002cad520f65825346e2084e49d',
+ 'concurrent_view_count': int,
},
'params': {'skip_download': True}
+ }, {
+ # Story. Requires specific player params to work.
+ 'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI',
+ 'info_dict': {
+ 'id': 'vv8qTUWmulI',
+ 'ext': 'mp4',
+ 'availability': 'unlisted',
+ 'view_count': int,
+ 'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA',
+ 'upload_date': '20220526',
+ 'categories': ['Education'],
+ 'title': 'Story',
+ 'channel': 'IT\'S HISTORY',
+ 'description': '',
+ 'uploader_id': 'BlastfromthePast',
+ 'duration': 12,
+ 'uploader': 'IT\'S HISTORY',
+ 'playable_in_embed': True,
+ 'age_limit': 0,
+ 'live_status': 'not_live',
+ 'tags': [],
+ 'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',
+ 'uploader_url': 'http://www.youtube.com/user/BlastfromthePast',
+ 'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',
+ },
+ 'skip': 'stories get removed after some period of time',
+ }, {
+ 'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',
+ 'info_dict': {
+ 'id': 'tjjjtzRLHvA',
+ 'ext': 'mp4',
+ 'title': 'ハッシュタグ無し };if window.ytcsi',
+ 'upload_date': '20220323',
+ 'like_count': int,
+ 'availability': 'unlisted',
+ 'channel': 'nao20010128nao',
+ 'thumbnail': 'https://i.ytimg.com/vi_webp/tjjjtzRLHvA/maxresdefault.webp',
+ 'age_limit': 0,
+ 'uploader': 'nao20010128nao',
+ 'uploader_id': 'nao20010128nao',
+ 'categories': ['Music'],
+ 'view_count': int,
+ 'description': '',
+ 'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',
+ 'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',
+ 'live_status': 'not_live',
+ 'playable_in_embed': True,
+ 'channel_follower_count': int,
+ 'duration': 6,
+ 'tags': [],
+ 'uploader_url': 'http://www.youtube.com/user/nao20010128nao',
+ }
+ }, {
+ # Prefer primary title+description language metadata by default
+ # Do not prefer translated description if primary is empty
+ 'url': 'https://www.youtube.com/watch?v=el3E4MbxRqQ',
+ 'info_dict': {
+ 'id': 'el3E4MbxRqQ',
+ 'ext': 'mp4',
+ 'title': 'dlp test video 2 - primary sv no desc',
+ 'description': '',
+ 'channel': 'cole-dlp-test-acc',
+ 'tags': [],
+ 'view_count': int,
+ 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
+ 'like_count': int,
+ 'playable_in_embed': True,
+ 'availability': 'unlisted',
+ 'thumbnail': 'https://i.ytimg.com/vi_webp/el3E4MbxRqQ/maxresdefault.webp',
+ 'age_limit': 0,
+ 'duration': 5,
+ 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
+ 'uploader_url': 'http://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
+ 'live_status': 'not_live',
+ 'upload_date': '20220908',
+ 'categories': ['People & Blogs'],
+ 'uploader': 'cole-dlp-test-acc',
+ 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
+ },
+ 'params': {'skip_download': True}
+ }, {
+ # Extractor argument: prefer translated title+description
+ 'url': 'https://www.youtube.com/watch?v=gHKT4uU8Zng',
+ 'info_dict': {
+ 'id': 'gHKT4uU8Zng',
+ 'ext': 'mp4',
+ 'channel': 'cole-dlp-test-acc',
+ 'tags': [],
+ 'duration': 5,
+ 'live_status': 'not_live',
+ 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
+ 'upload_date': '20220728',
+ 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
+ 'view_count': int,
+ 'categories': ['People & Blogs'],
+ 'thumbnail': 'https://i.ytimg.com/vi_webp/gHKT4uU8Zng/maxresdefault.webp',
+ 'title': 'dlp test video title translated (fr)',
+ 'availability': 'public',
+ 'uploader': 'cole-dlp-test-acc',
+ 'age_limit': 0,
+ 'description': 'dlp test video description translated (fr)',
+ 'playable_in_embed': True,
+ 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
+ 'uploader_url': 'http://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
+ },
+ 'params': {'skip_download': True, 'extractor_args': {'youtube': {'lang': ['fr']}}},
+ 'expected_warnings': [r'Preferring "fr" translated fields'],
+ }, {
+ 'note': '6 channel audio',
+ 'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',
+ 'only_matching': True,
+ }, {
+ 'note': 'Multiple HLS formats with same itag',
+ 'url': 'https://www.youtube.com/watch?v=kX3nB4PpJko',
+ 'info_dict': {
+ 'id': 'kX3nB4PpJko',
+ 'ext': 'mp4',
+ 'categories': ['Entertainment'],
+ 'description': 'md5:e8031ff6e426cdb6a77670c9b81f6fa6',
+ 'uploader_url': 'http://www.youtube.com/user/MrBeast6000',
+ 'live_status': 'not_live',
+ 'duration': 937,
+ 'channel_follower_count': int,
+ 'thumbnail': 'https://i.ytimg.com/vi_webp/kX3nB4PpJko/maxresdefault.webp',
+ 'title': 'Last To Take Hand Off Jet, Keeps It!',
+ 'channel': 'MrBeast',
+ 'playable_in_embed': True,
+ 'view_count': int,
+ 'upload_date': '20221112',
+ 'uploader': 'MrBeast',
+ 'uploader_id': 'MrBeast6000',
+ 'channel_url': 'https://www.youtube.com/channel/UCX6OQ3DkcsbYNE6H8uQQuVA',
+ 'age_limit': 0,
+ 'availability': 'public',
+ 'channel_id': 'UCX6OQ3DkcsbYNE6H8uQQuVA',
+ 'like_count': int,
+ 'tags': [],
+ },
+ 'params': {'extractor_args': {'youtube': {'player_client': ['ios']}}, 'format': '233-1'},
+ }
+ ]
+
+ _WEBPAGE_TESTS = [
+ # YouTube <object> embed
+ {
+ 'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
+ 'md5': '873c81d308b979f0e23ee7e620b312a3',
+ 'info_dict': {
+ 'id': 'msN87y-iEx0',
+ 'ext': 'mp4',
+ 'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
+ 'upload_date': '20080526',
+ 'description': 'md5:873c81d308b979f0e23ee7e620b312a3',
+ 'uploader': 'Christopher Sykes',
+ 'uploader_id': 'ChristopherJSykes',
+ 'age_limit': 0,
+ 'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],
+ 'channel_id': 'UCCeo--lls1vna5YJABWAcVA',
+ 'playable_in_embed': True,
+ 'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg',
+ 'like_count': int,
+ 'comment_count': int,
+ 'channel': 'Christopher Sykes',
+ 'live_status': 'not_live',
+ 'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA',
+ 'availability': 'public',
+ 'duration': 195,
+ 'view_count': int,
+ 'categories': ['Science & Technology'],
+ 'channel_follower_count': int,
+ 'uploader_url': 'http://www.youtube.com/user/ChristopherJSykes',
+ },
+ 'params': {
+ 'skip_download': True,
+ }
},
]
@@ -2198,17 +2590,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
qs = parse_qs(url)
if qs.get('list', [None])[0]:
return False
- return super(YoutubeIE, cls).suitable(url)
+ return super().suitable(url)
def __init__(self, *args, **kwargs):
- super(YoutubeIE, self).__init__(*args, **kwargs)
+ super().__init__(*args, **kwargs)
self._code_cache = {}
self._player_cache = {}
- def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data):
+ def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data, is_live):
lock = threading.Lock()
-
- is_live = True
start_time = time.time()
formats = [f for f in formats if f.get('is_from_start')]
@@ -2223,7 +2613,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
microformats = traverse_obj(
prs, (..., 'microformat', 'playerMicroformatRenderer'),
expected_type=dict, default=[])
- _, is_live, _, formats = self._list_formats(video_id, microformats, video_details, prs, player_url)
+ _, live_status, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)
+ is_live = live_status == 'is_live'
start_time = time.time()
def mpd_feed(format_id, delay):
@@ -2244,12 +2635,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return f['manifest_url'], f['manifest_stream_number'], is_live
for f in formats:
- f['is_live'] = True
- f['protocol'] = 'http_dash_segments_generator'
- f['fragments'] = functools.partial(
- self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed)
+ f['is_live'] = is_live
+ gen = functools.partial(self._live_dash_fragments, video_id, f['format_id'],
+ live_start_time, mpd_feed, not is_live and f.copy())
+ if is_live:
+ f['fragments'] = gen
+ f['protocol'] = 'http_dash_segments_generator'
+ else:
+ f['fragments'] = LazyList(gen({}))
+ del f['is_from_start']
- def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx):
+ def _live_dash_fragments(self, video_id, format_id, live_start_time, mpd_feed, manifestless_orig_fmt, ctx):
FETCH_SPAN, MAX_DURATION = 5, 432000
mpd_url, stream_number, is_live = None, None, True
@@ -2272,7 +2668,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# Obtain from MPD's maximum seq value
old_mpd_url = mpd_url
last_error = ctx.pop('last_error', None)
- expire_fast = immediate or last_error and isinstance(last_error, compat_HTTPError) and last_error.code == 403
+ expire_fast = immediate or last_error and isinstance(last_error, urllib.error.HTTPError) and last_error.code == 403
mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
or (mpd_url, stream_number, False))
if not refresh_sequence:
@@ -2280,15 +2676,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return False, last_seq
elif old_mpd_url == mpd_url:
return True, last_seq
- try:
- fmts, _ = self._extract_mpd_formats_and_subtitles(
- mpd_url, None, note=False, errnote=False, fatal=False)
- except ExtractorError:
- fmts = None
- if not fmts:
- no_fragment_score += 2
- return False, last_seq
- fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)
+ if manifestless_orig_fmt:
+ fmt_info = manifestless_orig_fmt
+ else:
+ try:
+ fmts, _ = self._extract_mpd_formats_and_subtitles(
+ mpd_url, None, note=False, errnote=False, fatal=False)
+ except ExtractorError:
+ fmts = None
+ if not fmts:
+ no_fragment_score += 2
+ return False, last_seq
+ fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)
fragments = fmt_info['fragments']
fragment_base_url = fmt_info['fragment_base_url']
assert fragment_base_url
@@ -2296,6 +2695,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
_last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))
return True, _last_seq
+ self.write_debug(f'[{video_id}] Generating fragments for format {format_id}')
while is_live:
fetch_time = time.time()
if no_fragment_score > 30:
@@ -2339,6 +2739,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)
yield {
'url': last_segment_url,
+ 'fragment_count': last_seq,
}
if known_idx == last_seq:
no_fragment_score += 5
@@ -2348,12 +2749,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
except ExtractorError:
continue
+ if manifestless_orig_fmt:
+ # Stop at the first iteration if running for post-live manifestless;
+ # fragment count no longer increase since it starts
+ break
+
time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))
def _extract_player_url(self, *ytcfgs, webpage=None):
player_url = traverse_obj(
ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
- get_all=False, expected_type=compat_str)
+ get_all=False, expected_type=str)
if not player_url:
return
return urljoin('https://www.youtube.com', player_url)
@@ -2370,7 +2776,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _signature_cache_id(self, example_sig):
""" Return a string representation of a signature """
- return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
+ return '.'.join(str(len(part)) for part in example_sig.split('.'))
@classmethod
def _extract_player_info(cls, player_url):
@@ -2397,24 +2803,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
player_id = self._extract_player_info(player_url)
# Read from filesystem cache
- func_id = 'js_%s_%s' % (
- player_id, self._signature_cache_id(example_sig))
+ func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'
assert os.path.basename(func_id) == func_id
- cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
- if cache_spec is not None:
- return lambda s: ''.join(s[i] for i in cache_spec)
+ self.write_debug(f'Extracting signature function {func_id}')
+ cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None
- code = self._load_player(video_id, player_url)
+ if not cache_spec:
+ code = self._load_player(video_id, player_url)
if code:
res = self._parse_sig_js(code)
+ test_string = ''.join(map(chr, range(len(example_sig))))
+ cache_spec = [ord(c) for c in res(test_string)]
+ self.cache.store('youtube-sigfuncs', func_id, cache_spec)
- test_string = ''.join(map(compat_chr, range(len(example_sig))))
- cache_res = res(test_string)
- cache_spec = [ord(c) for c in cache_res]
-
- self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
- return res
+ return lambda s: ''.join(s[i] for i in cache_spec)
def _print_sig_code(self, func, example_sig):
if not self.get_param('youtube_print_sig_code'):
@@ -2425,7 +2828,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
starts = '' if start == 0 else str(start)
ends = (':%d' % (end + step)) if end + step >= 0 else ':'
steps = '' if step == 1 else (':%d' % step)
- return 's[%s%s%s]' % (starts, ends, steps)
+ return f's[{starts}{ends}{steps}]'
step = None
# Quelch pyflakes warnings - start will be set when step is set
@@ -2448,12 +2851,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
else:
yield _genslice(start, i, step)
- test_string = ''.join(map(compat_chr, range(len(example_sig))))
+ test_string = ''.join(map(chr, range(len(example_sig))))
cache_res = func(test_string)
cache_spec = [ord(c) for c in cache_res]
expr_code = ' + '.join(gen_sig_code(cache_spec))
signature_id_tuple = '(%s)' % (
- ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
+ ', '.join(str(len(p)) for p in example_sig.split('.')))
code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
' return %s\n') % (signature_id_tuple, expr_code)
self.to_screen('Extracted signature function:\n' + code)
@@ -2482,24 +2885,29 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
initial_function = jsi.extract_function(funcname)
return lambda s: initial_function([s])
- def _decrypt_signature(self, s, video_id, player_url):
- """Turn the encrypted s field into a working signature"""
+ def _cached(self, func, *cache_id):
+ def inner(*args, **kwargs):
+ if cache_id not in self._player_cache:
+ try:
+ self._player_cache[cache_id] = func(*args, **kwargs)
+ except ExtractorError as e:
+ self._player_cache[cache_id] = e
+ except Exception as e:
+ self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)
- if player_url is None:
- raise ExtractorError('Cannot decrypt signature without player_url')
+ ret = self._player_cache[cache_id]
+ if isinstance(ret, Exception):
+ raise ret
+ return ret
+ return inner
- try:
- player_id = (player_url, self._signature_cache_id(s))
- if player_id not in self._player_cache:
- func = self._extract_signature_function(
- video_id, player_url, s
- )
- self._player_cache[player_id] = func
- func = self._player_cache[player_id]
- self._print_sig_code(func, s)
- return func(s)
- except Exception as e:
- raise ExtractorError('Signature extraction failed: ' + traceback.format_exc(), cause=e)
+ def _decrypt_signature(self, s, video_id, player_url):
+ """Turn the encrypted s field into a working signature"""
+ extract_sig = self._cached(
+ self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))
+ func = extract_sig(video_id, player_url, s)
+ self._print_sig_code(func, s)
+ return func(s)
def _decrypt_nsig(self, s, video_id, player_url):
"""Turn the encrypted n field into a working signature"""
@@ -2507,48 +2915,87 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
raise ExtractorError('Cannot decrypt nsig without player_url')
player_url = urljoin('https://www.youtube.com', player_url)
- sig_id = ('nsig_value', s)
- if sig_id in self._player_cache:
- return self._player_cache[sig_id]
+ try:
+ jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)
+ except ExtractorError as e:
+ raise ExtractorError('Unable to extract nsig function code', cause=e)
+ if self.get_param('youtube_print_sig_code'):
+ self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
try:
- player_id = ('nsig', player_url)
- if player_id not in self._player_cache:
- self._player_cache[player_id] = self._extract_n_function(video_id, player_url)
- func = self._player_cache[player_id]
- self._player_cache[sig_id] = func(s)
- self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}')
- return self._player_cache[sig_id]
- except Exception as e:
- raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)
+ extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)
+ ret = extract_nsig(jsi, func_code)(s)
+ except JSInterpreter.Exception as e:
+ try:
+ jsi = PhantomJSwrapper(self, timeout=5000)
+ except ExtractorError:
+ raise e
+ self.report_warning(
+ f'Native nsig extraction failed: Trying with PhantomJS\n'
+ f' n = {s} ; player = {player_url}', video_id)
+ self.write_debug(e, only_once=True)
+
+ args, func_body = func_code
+ ret = jsi.execute(
+ f'console.log(function({", ".join(args)}) {{ {func_body} }}({s!r}));',
+ video_id=video_id, note='Executing signature code').strip()
+
+ self.write_debug(f'Decrypted nsig {s} => {ret}')
+ return ret
def _extract_n_function_name(self, jscode):
- nfunc, idx = self._search_regex(
+ funcname, idx = self._search_regex(
r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)',
jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
if not idx:
- return nfunc
+ return funcname
+
return json.loads(js_to_json(self._search_regex(
- rf'var {re.escape(nfunc)}\s*=\s*(\[.+?\]);', jscode,
- f'Initial JS player n function list ({nfunc}.{idx})')))[int(idx)]
+ rf'var {re.escape(funcname)}\s*=\s*(\[.+?\]);', jscode,
+ f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
- def _extract_n_function(self, video_id, player_url):
+ def _extract_n_function_code(self, video_id, player_url):
player_id = self._extract_player_info(player_url)
- func_code = self._downloader.cache.load('youtube-nsig', player_id)
+ func_code = self.cache.load('youtube-nsig', player_id, min_ver='2022.09.1')
+ jscode = func_code or self._load_player(video_id, player_url)
+ jsi = JSInterpreter(jscode)
+
+ if func_code:
+ return jsi, player_id, func_code
+
+ func_name = self._extract_n_function_name(jscode)
+ # For redundancy
+ func_code = self._search_regex(
+ r'''(?xs)%s\s*=\s*function\s*\((?P<var>[\w$]+)\)\s*
+ # NB: The end of the regex is intentionally kept strict
+ {(?P<code>.+?}\s*return\ [\w$]+.join\(""\))};''' % func_name,
+ jscode, 'nsig function', group=('var', 'code'), default=None)
if func_code:
- jsi = JSInterpreter(func_code)
+ func_code = ([func_code[0]], func_code[1])
else:
- jscode = self._load_player(video_id, player_url)
- funcname = self._extract_n_function_name(jscode)
- jsi = JSInterpreter(jscode)
- func_code = jsi.extract_function_code(funcname)
- self._downloader.cache.store('youtube-nsig', player_id, func_code)
+ self.write_debug('Extracting nsig function with jsinterp')
+ func_code = jsi.extract_function_code(func_name)
- if self.get_param('youtube_print_sig_code'):
- self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
+ self.cache.store('youtube-nsig', player_id, func_code)
+ return jsi, player_id, func_code
+
+ def _extract_n_function_from_code(self, jsi, func_code):
+ func = jsi.extract_function_from_code(*func_code)
+
+ def extract_nsig(s):
+ try:
+ ret = func([s])
+ except JSInterpreter.Exception:
+ raise
+ except Exception as e:
+ raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
- return lambda s: jsi.extract_function_from_code(*func_code)([s])
+ if ret.startswith('enhanced_except_'):
+ raise JSInterpreter.Exception('Signature function returned an exception')
+ return ret
+
+ return extract_nsig
def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
"""
@@ -2575,74 +3022,76 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return sts
def _mark_watched(self, video_id, player_responses):
- playback_url = get_first(
- player_responses, ('playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
- expected_type=url_or_none)
- if not playback_url:
- self.report_warning('Unable to mark watched')
- return
- parsed_playback_url = compat_urlparse.urlparse(playback_url)
- qs = compat_urlparse.parse_qs(parsed_playback_url.query)
+ for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):
+ label = 'fully ' if is_full else ''
+ url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),
+ expected_type=url_or_none)
+ if not url:
+ self.report_warning(f'Unable to mark {label}watched')
+ return
+ parsed_url = urllib.parse.urlparse(url)
+ qs = urllib.parse.parse_qs(parsed_url.query)
+
+ # cpn generation algorithm is reverse engineered from base.js.
+ # In fact it works even with dummy cpn.
+ CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
+ cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))
+
+ # # more consistent results setting it to right before the end
+ video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]
+
+ qs.update({
+ 'ver': ['2'],
+ 'cpn': [cpn],
+ 'cmt': video_length,
+ 'el': 'detailpage', # otherwise defaults to "shorts"
+ })
- # cpn generation algorithm is reverse engineered from base.js.
- # In fact it works even with dummy cpn.
- CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
- cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
+ if is_full:
+ # these seem to mark watchtime "history" in the real world
+ # they're required, so send in a single value
+ qs.update({
+ 'st': 0,
+ 'et': video_length,
+ })
- qs.update({
- 'ver': ['2'],
- 'cpn': [cpn],
- })
- playback_url = compat_urlparse.urlunparse(
- parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
+ url = urllib.parse.urlunparse(
+ parsed_url._replace(query=urllib.parse.urlencode(qs, True)))
- self._download_webpage(
- playback_url, video_id, 'Marking watched',
- 'Unable to mark watched', fatal=False)
+ self._download_webpage(
+ url, video_id, f'Marking {label}watched',
+ 'Unable to mark watched', fatal=False)
- @staticmethod
- def _extract_urls(webpage):
- # Embedded YouTube player
- entries = [
- unescapeHTML(mobj.group('url'))
- for mobj in re.finditer(r'''(?x)
- (?:
- <iframe[^>]+?src=|
- data-video-url=|
- <embed[^>]+?src=|
- embedSWF\(?:\s*|
- <object[^>]+data=|
- new\s+SWFObject\(
- )
- (["\'])
- (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
- (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
- \1''', webpage)]
+ @classmethod
+ def _extract_from_webpage(cls, url, webpage):
+ # Invidious Instances
+ # https://github.com/hypervideo/hypervideo/issues/195
+ # https://github.com/iv-org/invidious/pull/1730
+ mobj = re.search(
+ r'<link rel="alternate" href="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',
+ webpage)
+ if mobj:
+ yield cls.url_result(mobj.group('url'), cls)
+ raise cls.StopExtraction()
+
+ yield from super()._extract_from_webpage(url, webpage)
# lazyYT YouTube embed
- entries.extend(list(map(
- unescapeHTML,
- re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
+ for id_ in re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage):
+ yield cls.url_result(unescapeHTML(id_), cls, id_)
# Wordpress "YouTube Video Importer" plugin
- matches = re.findall(r'''(?x)<div[^>]+
- class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
- data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
- entries.extend(m[-1] for m in matches)
-
- return entries
-
- @staticmethod
- def _extract_url(webpage):
- urls = YoutubeIE._extract_urls(webpage)
- return urls[0] if urls else None
+ for m in re.findall(r'''(?x)<div[^>]+
+ class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
+ data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage):
+ yield cls.url_result(m[-1], cls, m[-1])
@classmethod
def extract_id(cls, url):
- mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
- if mobj is None:
- raise ExtractorError('Invalid URL: %s' % url)
- return mobj.group('id')
+ video_id = cls.get_temp_id(url)
+ if not video_id:
+ raise ExtractorError(f'Invalid URL: {url}')
+ return video_id
def _extract_chapters_from_json(self, data, duration):
chapter_list = traverse_obj(
@@ -2667,39 +3116,42 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
chapter_title = lambda chapter: self._get_text(chapter, 'title')
- return next((
- filter(None, (
- self._extract_chapters(
- traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
- chapter_time, chapter_title, duration)
- for contents in content_list
- ))), [])
-
- def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
- chapters = []
- last_chapter = {'start_time': 0}
- for idx, chapter in enumerate(chapter_list or []):
- title = chapter_title(chapter)
- start_time = chapter_time(chapter)
- if start_time is None:
- continue
- last_chapter['end_time'] = start_time
- if start_time < last_chapter['start_time']:
- if idx == 1:
- chapters.pop()
- self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
- else:
- self.report_warning(f'Invalid start time for chapter "{title}"')
- continue
- last_chapter = {'start_time': start_time, 'title': title}
- chapters.append(last_chapter)
- last_chapter['end_time'] = duration
- return chapters
+ return next(filter(None, (
+ self._extract_chapters(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
+ chapter_time, chapter_title, duration)
+ for contents in content_list)), [])
- def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
- return self._parse_json(self._search_regex(
- (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
- regex), webpage, name, default='{}'), video_id, fatal=False)
+ def _extract_chapters_from_description(self, description, duration):
+ duration_re = r'(?:\d+:)?\d{1,2}:\d{2}'
+ sep_re = r'(?m)^\s*(%s)\b\W*\s(%s)\s*$'
+ return self._extract_chapters(
+ re.findall(sep_re % (duration_re, r'.+?'), description or ''),
+ chapter_time=lambda x: parse_duration(x[0]), chapter_title=lambda x: x[1],
+ duration=duration, strict=False) or self._extract_chapters(
+ re.findall(sep_re % (r'.+?', duration_re), description or ''),
+ chapter_time=lambda x: parse_duration(x[1]), chapter_title=lambda x: x[0],
+ duration=duration, strict=False)
+
+ def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration, strict=True):
+ if not duration:
+ return
+ chapter_list = [{
+ 'start_time': chapter_time(chapter),
+ 'title': chapter_title(chapter),
+ } for chapter in chapter_list or []]
+ if not strict:
+ chapter_list.sort(key=lambda c: c['start_time'] or 0)
+
+ chapters = [{'start_time': 0}]
+ for idx, chapter in enumerate(chapter_list):
+ if chapter['start_time'] is None:
+ self.report_warning(f'Incomplete chapter {idx}')
+ elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:
+ chapters.append(chapter)
+ elif chapter not in chapters:
+ self.report_warning(
+ f'Invalid start time ({chapter["start_time"]} < {chapters[-1]["start_time"]}) for chapter "{chapter["title"]}"')
+ return chapters[1:]
def _extract_comment(self, comment_renderer, parent=None):
comment_id = comment_renderer.get('commentId')
@@ -2708,16 +3160,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
text = self._get_text(comment_renderer, 'contentText')
- # note: timestamp is an estimate calculated from the current time and time_text
- timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')
+ # Timestamp is an estimate calculated from the current time and time_text
+ time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
+ timestamp = self._parse_time_text(time_text)
+
author = self._get_text(comment_renderer, 'authorText')
author_id = try_get(comment_renderer,
- lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
+ lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], str)
votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
- lambda x: x['likeCount']), compat_str)) or 0
+ lambda x: x['likeCount']), str)) or 0
author_thumbnail = try_get(comment_renderer,
- lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
+ lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], str)
author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
is_favorited = 'creatorHeart' in (try_get(
@@ -2796,8 +3250,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
comment_entries_iter = self._comment_entries(
comment_replies_renderer, ytcfg, video_id,
parent=comment.get('id'), tracker=tracker)
- for reply_comment in itertools.islice(comment_entries_iter, min(max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments']))):
- yield reply_comment
+ yield from itertools.islice(comment_entries_iter, min(
+ max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))
# Keeps track of counts across recursive calls
if not tracker:
@@ -2812,8 +3266,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# YouTube comments have a max depth of 2
max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
if max_depth:
- self._downloader.deprecation_warning(
- '[youtube] max_comment_depth extractor argument is deprecated. Set max replies in the max-comments extractor argument instead.')
+ self._downloader.deprecated_feature('[youtube] max_comment_depth extractor argument is deprecated. '
+ 'Set max replies in the max-comments extractor argument instead')
if max_depth == 1 and parent:
return
@@ -2821,12 +3275,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)
continuation = self._extract_continuation(root_continuation_data)
- message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
- if message and not parent:
- self.report_warning(message, video_id=video_id)
response = None
+ is_forced_continuation = False
is_first_continuation = parent is None
+ if is_first_continuation and not continuation:
+ # Sometimes you can get comments by generating the continuation yourself,
+ # even if YouTube initially reports them being disabled - e.g. stories comments.
+ # Note: if the comment section is actually disabled, YouTube may return a response with
+ # required check_get_keys missing. So we will disable that check initially in this case.
+ continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))
+ is_forced_continuation = True
for page_num in itertools.count(0):
if not continuation:
@@ -2843,12 +3302,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
' ' if parent else '', ' replies' if parent else '',
page_num, comment_prog_str)
-
- response = self._extract_response(
- item_id=None, query=continuation,
- ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
- check_get_keys='onResponseReceivedEndpoints')
-
+ try:
+ response = self._extract_response(
+ item_id=None, query=continuation,
+ ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
+ check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)
+ except ExtractorError as e:
+ # Ignore incomplete data error for replies if retries didn't work.
+ # This is to allow any other parent comments and comment threads to be downloaded.
+ # See: https://github.com/hypervideo/hypervideo/issues/4669
+ if 'incomplete data' in str(e).lower() and parent and self.get_param('ignoreerrors') is True:
+ self.report_warning(
+ 'Received incomplete data for a comment reply thread and retrying did not help. '
+ 'Ignoring to let other comments be downloaded.')
+ else:
+ raise
+ is_forced_continuation = False
continuation_contents = traverse_obj(
response, 'onResponseReceivedEndpoints', expected_type=list, default=[])
@@ -2873,6 +3342,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if continuation:
break
+ message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
+ if message and not parent and tracker['running_total'] == 0:
+ self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)
+ raise self.CommentsDisabled
+
+ @staticmethod
+ def _generate_comment_continuation(video_id):
+ """
+ Generates initial comment section continuation token from given video id
+ """
+ token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'
+ return base64.b64encode(token.encode()).decode()
+
def _get_comments(self, ytcfg, video_id, contents, webpage):
"""Entry for comment extraction"""
def _real_comment_extract(contents):
@@ -2918,7 +3400,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _is_unplayable(player_response):
return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
- def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):
+ _STORY_PLAYER_PARAMS = '8AEB'
+
+ def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):
session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
@@ -2926,7 +3410,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
headers = self.generate_api_headers(
ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
- yt_query = {'videoId': video_id}
+ yt_query = {
+ 'videoId': video_id,
+ }
+ if smuggled_data.get('is_story') or _split_innertube_client(client)[0] == 'android':
+ yt_query['params'] = self._STORY_PLAYER_PARAMS
+
yt_query.update(self._generate_player_context(sts))
return self._extract_response(
item_id=video_id, ep='player', query=yt_query,
@@ -2939,7 +3428,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
requested_clients = []
default = ['android', 'web']
allowed_clients = sorted(
- [client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'],
+ (client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),
key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
for client in self._configuration_arg('player_client'):
if client in allowed_clients:
@@ -2959,22 +3448,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return orderedSet(requested_clients)
- def _extract_player_ytcfg(self, client, video_id):
- url = {
- 'web_music': 'https://music.youtube.com',
- 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
- }.get(client)
- if not url:
- return {}
- webpage = self._download_webpage(url, video_id, fatal=False, note='Downloading %s config' % client.replace('_', ' ').strip())
- return self.extract_ytcfg(video_id, webpage) or {}
-
- def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):
+ def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):
initial_pr = None
if webpage:
- initial_pr = self._extract_yt_initial_variable(
- webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
- video_id, 'initial player response')
+ initial_pr = self._search_json(
+ self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
all_clients = set(clients)
clients = clients[::-1]
@@ -3005,8 +3483,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
while clients:
client, base_client, variant = _split_innertube_client(clients.pop())
player_ytcfg = master_ytcfg if client == 'web' else {}
- if 'configs' not in self._configuration_arg('player_skip'):
- player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
+ if 'configs' not in self._configuration_arg('player_skip') and client != 'web':
+ player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg
player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
@@ -3020,7 +3498,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
try:
pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
- client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)
+ client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data)
except ExtractorError as e:
if last_error:
self.report_warning(last_error)
@@ -3028,7 +3506,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
continue
if pr:
- prs.append(pr)
+ # YouTube may return a different video player response than expected.
+ # See: https://github.com/TeamNewPipe/NewPipe/issues/8713
+ pr_video_id = traverse_obj(pr, ('videoDetails', 'videoId'))
+ if pr_video_id and pr_video_id != video_id:
+ self.report_warning(
+ f'Skipping player response from {client} client (got player response for video "{pr_video_id}" instead of "{video_id}")' + bug_reports_message())
+ else:
+ prs.append(pr)
# creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:
@@ -3045,9 +3530,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
self.report_warning(last_error)
return prs, player_url
- def _extract_formats(self, streaming_data, video_id, player_url, is_live, duration):
- itags, stream_ids = {}, []
- itag_qualities, res_qualities = {}, {}
+ def _needs_live_processing(self, live_status, duration):
+ if (live_status == 'is_live' and self.get_param('live_from_start')
+ or live_status == 'post_live' and (duration or 0) > 4 * 3600):
+ return live_status
+
+ def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):
+ itags, stream_ids = collections.defaultdict(set), []
+ itag_qualities, res_qualities = {}, {0: None}
q = qualities([
# Normally tiny is the smallest video-only formats. But
# audio-only formats with unknown quality may get tagged as tiny
@@ -3088,45 +3578,59 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
fmt_url = fmt.get('url')
if not fmt_url:
- sc = compat_parse_qs(fmt.get('signatureCipher'))
+ sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))
fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
encrypted_sig = try_get(sc, lambda x: x['s'][0])
- if not (sc and fmt_url and encrypted_sig):
+ if not all((sc, fmt_url, player_url, encrypted_sig)):
continue
- if not player_url:
+ try:
+ fmt_url += '&%s=%s' % (
+ traverse_obj(sc, ('sp', -1)) or 'signature',
+ self._decrypt_signature(encrypted_sig, video_id, player_url)
+ )
+ except ExtractorError as e:
+ self.report_warning('Signature extraction failed: Some formats may be missing',
+ video_id=video_id, only_once=True)
+ self.write_debug(e, only_once=True)
continue
- signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
- sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
- fmt_url += '&' + sp + '=' + signature
query = parse_qs(fmt_url)
throttled = False
if query.get('n'):
try:
+ decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])
fmt_url = update_url_query(fmt_url, {
- 'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})
+ 'n': decrypt_nsig(query['n'][0], video_id, player_url)
+ })
except ExtractorError as e:
- self.report_warning(
- f'nsig extraction failed: You may experience throttling for some formats\n'
- f'n = {query["n"][0]} ; player = {player_url}\n{e}', only_once=True)
+ phantomjs_hint = ''
+ if isinstance(e, JSInterpreter.Exception):
+ phantomjs_hint = (f' Install {self._downloader._format_err("PhantomJS", self._downloader.Styles.EMPHASIS)} '
+ f'to workaround the issue. {PhantomJSwrapper.INSTALL_HINT}\n')
+ if player_url:
+ self.report_warning(
+ f'nsig extraction failed: You may experience throttling for some formats\n{phantomjs_hint}'
+ f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)
+ self.write_debug(e, only_once=True)
+ else:
+ self.report_warning(
+ 'Cannot decrypt nsig without player_url: You may experience throttling for some formats',
+ video_id=video_id, only_once=True)
throttled = True
- if itag:
- itags[itag] = 'https'
- stream_ids.append(stream_id)
-
tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
language_preference = (
10 if audio_track.get('audioIsDefault') and 10
else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10
else -1)
# Some formats may have much smaller duration than others (possibly damaged during encoding)
- # Eg: 2-nOtRESiUc Ref: https://github.com/hypervideo/hypervideo/issues/2823
+ # E.g. 2-nOtRESiUc Ref: https://github.com/hypervideo/hypervideo/issues/2823
# Make sure to avoid false positives with small duration differences.
- # Eg: __2ABJjxzNo, ySuUZEjARPY
+ # E.g. __2ABJjxzNo, ySuUZEjARPY
is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)
if is_damaged:
- self.report_warning(f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
+ self.report_warning(
+ f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
dct = {
'asr': int_or_none(fmt.get('audioSampleRate')),
'filesize': int_or_none(fmt.get('contentLength')),
@@ -3135,9 +3639,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'%s%s' % (audio_track.get('displayName') or '',
' (default)' if language_preference > 0 else ''),
fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
+ try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
+ try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),
- 'source_preference': -10 if throttled else -1,
+ # Format 22 is likely to be damaged. See https://github.com/hypervideo/hypervideo/issues/3372
+ 'source_preference': -10 if throttled else -5 if itag == '22' else -1,
'fps': int_or_none(fmt.get('fps')) or None,
+ 'audio_channels': fmt.get('audioChannels'),
'height': height,
'quality': q(quality),
'has_drm': bool(fmt.get('drmFamilies')),
@@ -3168,49 +3676,70 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
}
if dct.get('ext'):
dct['container'] = dct['ext'] + '_dash'
+
+ if itag:
+ itags[itag].add(('https', dct.get('language')))
+ stream_ids.append(stream_id)
yield dct
- live_from_start = is_live and self.get_param('live_from_start')
- skip_manifests = self._configuration_arg('skip')
- if not self.get_param('youtube_include_hls_manifest', True):
- skip_manifests.append('hls')
- get_dash = 'dash' not in skip_manifests and (
- not is_live or live_from_start or self._configuration_arg('include_live_dash'))
- get_hls = not live_from_start and 'hls' not in skip_manifests
+ needs_live_processing = self._needs_live_processing(live_status, duration)
+ skip_bad_formats = not self._configuration_arg('include_incomplete_formats')
+
+ skip_manifests = set(self._configuration_arg('skip'))
+ if (not self.get_param('youtube_include_hls_manifest', True)
+ or needs_live_processing == 'is_live' # These will be filtered out by YoutubeDL anyway
+ or needs_live_processing and skip_bad_formats):
+ skip_manifests.add('hls')
+
+ if not self.get_param('youtube_include_dash_manifest', True):
+ skip_manifests.add('dash')
+ if self._configuration_arg('include_live_dash'):
+ self._downloader.deprecated_feature('[youtube] include_live_dash extractor argument is deprecated. '
+ 'Use include_incomplete_formats extractor argument instead')
+ elif skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live':
+ skip_manifests.add('dash')
def process_manifest_format(f, proto, itag):
- if itag in itags:
- if itags[itag] == proto or f'{itag}-{proto}' in itags:
- return False
- itag = f'{itag}-{proto}'
- if itag:
+ key = (proto, f.get('language'))
+ if key in itags[itag]:
+ return False
+ itags[itag].add(key)
+
+ if any(p != proto for p, _ in itags[itag]):
+ f['format_id'] = f'{itag}-{proto}'
+ elif itag:
f['format_id'] = itag
- itags[itag] = proto
- f['quality'] = next((
- q(qdict[val])
- for val, qdict in ((f.get('format_id', '').split('-')[0], itag_qualities), (f.get('height'), res_qualities))
- if val in qdict), -1)
+ f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))
+ if f['quality'] == -1 and f.get('height'):
+ f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])
return True
+ subtitles = {}
for sd in streaming_data:
- hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
+ hls_manifest_url = 'hls' not in skip_manifests and sd.get('hlsManifestUrl')
if hls_manifest_url:
- for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ hls_manifest_url, video_id, 'mp4', fatal=False, live=live_status == 'is_live')
+ subtitles = self._merge_subtitles(subs, subtitles)
+ for f in fmts:
if process_manifest_format(f, 'hls', self._search_regex(
r'/itag/(\d+)', f['url'], 'itag', default=None)):
yield f
- dash_manifest_url = get_dash and sd.get('dashManifestUrl')
+ dash_manifest_url = 'dash' not in skip_manifests and sd.get('dashManifestUrl')
if dash_manifest_url:
- for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
+ formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)
+ subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH
+ for f in formats:
if process_manifest_format(f, 'dash', f['format_id']):
f['filesize'] = int_or_none(self._search_regex(
r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
- if live_from_start:
+ if needs_live_processing:
f['is_from_start'] = True
yield f
+ yield subtitles
def _extract_storyboard(self, player_responses, duration):
spec = get_first(
@@ -3241,6 +3770,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'url': url,
'width': width,
'height': height,
+ 'fps': frame_count / duration,
+ 'rows': rows,
+ 'columns': cols,
'fragments': [{
'url': url.replace('$M', str(j)),
'duration': min(fragment_duration, duration - (j * fragment_duration)),
@@ -3250,14 +3782,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
webpage = None
if 'webpage' not in self._configuration_arg('player_skip'):
+ query = {'bpctr': '9999999999', 'has_verified': '1'}
+ if smuggled_data.get('is_story'):
+ query['pp'] = self._STORY_PLAYER_PARAMS
webpage = self._download_webpage(
- webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
+ webpage_url, video_id, fatal=False, query=query)
master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
player_responses, player_url = self._extract_player_responses(
self._get_requested_clients(url, smuggled_data),
- video_id, webpage, master_ytcfg)
+ video_id, webpage, master_ytcfg, smuggled_data)
return webpage, master_ytcfg, player_responses, player_url
@@ -3266,11 +3801,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
is_live = get_first(video_details, 'isLive')
if is_live is None:
is_live = get_first(live_broadcast_details, 'isLiveNow')
-
+ live_content = get_first(video_details, 'isLiveContent')
+ is_upcoming = get_first(video_details, 'isUpcoming')
+ post_live = get_first(video_details, 'isPostLiveDvr')
+ live_status = ('post_live' if post_live
+ else 'is_live' if is_live
+ else 'is_upcoming' if is_upcoming
+ else 'was_live' if live_content
+ else 'not_live' if False in (is_live, live_content)
+ else None)
streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
- formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live, duration))
+ *formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, live_status, duration)
- return live_broadcast_details, is_live, streaming_data, formats
+ return live_broadcast_details, live_status, streaming_data, formats, subtitles
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
@@ -3300,11 +3843,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
microformats = traverse_obj(
player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
expected_type=dict, default=[])
- video_title = (
- get_first(video_details, 'title')
- or self._get_text(microformats, (..., 'title'))
- or search_meta(['og:title', 'twitter:title', 'title']))
- video_description = get_first(video_details, 'shortDescription')
+
+ translated_title = self._get_text(microformats, (..., 'title'))
+ video_title = (self._preferred_lang and translated_title
+ or get_first(video_details, 'title') # primary
+ or translated_title
+ or search_meta(['og:title', 'twitter:title', 'title']))
+ translated_description = self._get_text(microformats, (..., 'description'))
+ original_description = get_first(video_details, 'shortDescription')
+ video_description = (
+ self._preferred_lang and translated_description
+ # If original description is blank, it will be an empty string.
+ # Do not prefer translated description in this case.
+ or original_description if original_description is not None else translated_description)
multifeed_metadata_list = get_first(
player_responses,
@@ -3320,12 +3871,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# Unquote should take place before split on comma (,) since textual
# fields may contain comma as well (see
# https://github.com/ytdl-org/youtube-dl/issues/8536)
- feed_data = compat_parse_qs(
- compat_urllib_parse_unquote_plus(feed))
+ feed_data = urllib.parse.parse_qs(
+ urllib.parse.unquote_plus(feed))
def feed_entry(name):
return try_get(
- feed_data, lambda x: x[name][0], compat_str)
+ feed_data, lambda x: x[name][0], str)
feed_id = feed_entry('id')
if not feed_id:
@@ -3349,13 +3900,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return self.playlist_result(
entries, video_id, video_title, video_description)
- duration = int_or_none(
- get_first(video_details, 'lengthSeconds')
- or get_first(microformats, 'lengthSeconds')
- or parse_duration(search_meta('duration'))) or None
+ duration = (int_or_none(get_first(video_details, 'lengthSeconds'))
+ or int_or_none(get_first(microformats, 'lengthSeconds'))
+ or parse_duration(search_meta('duration')) or None)
- live_broadcast_details, is_live, streaming_data, formats = self._list_formats(
- video_id, microformats, video_details, player_responses, player_url, duration)
+ live_broadcast_details, live_status, streaming_data, formats, automatic_captions = \
+ self._list_formats(video_id, microformats, video_details, player_responses, player_url, duration)
+ if live_status == 'post_live':
+ self.write_debug(f'{video_id}: Video is in Post-Live Manifestless mode')
if not formats:
if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
@@ -3402,19 +3954,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
original_thumbnails = thumbnails.copy()
# The best resolution thumbnails sometimes does not appear in the webpage
- # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/hypervideo/hypervideo/issues/340
+ # See: https://github.com/hypervideo/hypervideo/issues/340
# List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
thumbnail_names = [
- 'maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3',
- 'hqdefault', 'hq1', 'hq2', 'hq3', '0',
- 'mqdefault', 'mq1', 'mq2', 'mq3',
- 'default', '1', '2', '3'
+ # While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants
+ # in resolution, these are not the custom thumbnail. So de-prioritize them
+ 'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',
+ 'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'
]
n_thumbnail_names = len(thumbnail_names)
thumbnails.extend({
'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
video_id=video_id, name=name, ext=ext,
- webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
+ webp='_webp' if ext == 'webp' else '', live='_live' if live_status == 'is_live' else ''),
} for name in thumbnail_names for ext in ('webp', 'jpg'))
for thumb in thumbnails:
i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
@@ -3429,26 +3981,29 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
or search_meta('channelId'))
owner_profile_url = get_first(microformats, 'ownerProfileUrl')
- live_content = get_first(video_details, 'isLiveContent')
- is_upcoming = get_first(video_details, 'isUpcoming')
- if is_live is None:
- if is_upcoming or live_content is False:
- is_live = False
- if is_upcoming is None and (live_content or is_live):
- is_upcoming = False
live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
if not duration and live_end_time and live_start_time:
duration = live_end_time - live_start_time
- if is_live and self.get_param('live_from_start'):
- self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data)
+ needs_live_processing = self._needs_live_processing(live_status, duration)
- formats.extend(self._extract_storyboard(player_responses, duration))
+ def is_bad_format(fmt):
+ if needs_live_processing and not fmt.get('is_from_start'):
+ return True
+ elif (live_status == 'is_live' and needs_live_processing != 'is_live'
+ and fmt.get('protocol') == 'http_dash_segments'):
+ return True
+
+ for fmt in filter(is_bad_format, formats):
+ fmt['preference'] = (fmt.get('preference') or -1) - 10
+ fmt['format_note'] = join_nonempty(fmt.get('format_note'), '(Last 4 hours)', delim=' ')
- # Source is given priority since formats that throttle are given lower source_preference
- # When throttling issue is fully fixed, remove this
- self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto'))
+ if needs_live_processing:
+ self._prepare_live_from_start_formats(
+ formats, video_id, live_start_time, url, webpage_url, smuggled_data, live_status == 'is_live')
+
+ formats.extend(self._extract_storyboard(player_responses, duration))
info = {
'id': video_id,
@@ -3463,7 +4018,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
'uploader_url': owner_profile_url,
'channel_id': channel_id,
- 'channel_url': format_field(channel_id, template='https://www.youtube.com/channel/%s'),
+ 'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s'),
'duration': duration,
'view_count': int_or_none(
get_first((video_details, microformats), (..., 'viewCount'))
@@ -3477,14 +4032,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'categories': [category] if category else None,
'tags': keywords,
'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
- 'is_live': is_live,
- 'was_live': (False if is_live or is_upcoming or live_content is False
- else None if is_live is None or is_upcoming is None
- else live_content),
- 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL
+ 'live_status': live_status,
'release_timestamp': live_start_time,
+ '_format_sort_fields': ( # source_preference is lower for throttled/potentially damaged formats
+ 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto')
}
+ subtitles = {}
pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
if pctr:
def get_lang_code(track):
@@ -3511,7 +4065,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'name': sub_name,
})
- subtitles, automatic_captions = {}, {}
+ # NB: Constructing the full subtitle dictionary is slow
+ get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and (
+ self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles'))
for lang_code, caption_track in captions.items():
base_url = caption_track.get('baseUrl')
orig_lang = parse_qs(base_url).get('lang', [None])[-1]
@@ -3529,11 +4085,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if not trans_code:
continue
orig_trans_code = trans_code
- if caption_track.get('kind') != 'asr':
- if 'translated_subs' in self._configuration_arg('skip'):
+ if caption_track.get('kind') != 'asr' and trans_code != 'und':
+ if not get_translated_subs:
continue
trans_code += f'-{lang_code}'
- trans_name += format_field(lang_name, template=' from %s')
+ trans_name += format_field(lang_name, None, ' from %s')
# Add an "-orig" label to the original language so that it can be distinguished.
# The subs are returned without "-orig" as well for compatibility
if lang_code == f'a-{orig_trans_code}':
@@ -3542,12 +4098,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# Setting tlang=lang returns damaged subtitles.
process_language(automatic_captions, base_url, trans_code, trans_name,
{} if orig_lang == orig_trans_code else {'tlang': trans_code})
- info['automatic_captions'] = automatic_captions
- info['subtitles'] = subtitles
- parsed_url = compat_urllib_parse_urlparse(url)
+ info['automatic_captions'] = automatic_captions
+ info['subtitles'] = subtitles
+
+ parsed_url = urllib.parse.urlparse(url)
for component in [parsed_url.fragment, parsed_url.query]:
- query = compat_parse_qs(component)
+ query = urllib.parse.parse_qs(component)
for k, v in query.items():
for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
d_k += '_time'
@@ -3556,7 +4113,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# Youtube Music Auto-generated description
if video_description:
- mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
+ mobj = re.search(
+ r'''(?xs)
+ (?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+
+ (?P<album>[^\n]+)
+ (?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?
+ (?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
+ (.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?
+ .+\nAuto-generated\ by\ YouTube\.\s*$
+ ''', video_description)
if mobj:
release_year = mobj.group('release_year')
release_date = mobj.group('release_date')
@@ -3574,9 +4139,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
initial_data = None
if webpage:
- initial_data = self._extract_yt_initial_variable(
- webpage, self._YT_INITIAL_DATA_RE, video_id,
- 'yt initial data')
+ initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)
if not initial_data:
query = {'videoId': video_id}
query.update(self._get_checkok_params())
@@ -3586,22 +4149,33 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
headers=self.generate_api_headers(ytcfg=master_ytcfg),
note='Downloading initial data API JSON')
- try:
- # This will error if there is no livechat
+ info['comment_count'] = traverse_obj(initial_data, (
+ 'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',
+ 'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount', 'simpleText'
+ ), (
+ 'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',
+ 'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo', 'runs', ..., 'text'
+ ), expected_type=int_or_none, get_all=False)
+
+ try: # This will error if there is no livechat
initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
+ except (KeyError, IndexError, TypeError):
+ pass
+ else:
info.setdefault('subtitles', {})['live_chat'] = [{
- 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
+ # url is needed to set cookies
+ 'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',
'video_id': video_id,
'ext': 'json',
- 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
+ 'protocol': ('youtube_live_chat' if live_status in ('is_live', 'is_upcoming')
+ else 'youtube_live_chat_replay'),
}]
- except (KeyError, IndexError, TypeError):
- pass
if initial_data:
info['chapters'] = (
self._extract_chapters_from_json(initial_data, duration)
or self._extract_chapters_from_engagement_panel(initial_data, duration)
+ or self._extract_chapters_from_description(video_description, duration)
or None)
contents = traverse_obj(
@@ -3618,7 +4192,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
info['location'] = stl
else:
- mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
+ mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)
if mobj:
info.update({
'series': mobj.group(1),
@@ -3629,19 +4203,24 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
vpir,
lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
list) or []):
- tbr = tlb.get('toggleButtonRenderer') or {}
- for getter, regex in [(
- lambda x: x['defaultText']['accessibility']['accessibilityData'],
- r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
- lambda x: x['accessibility'],
- lambda x: x['accessibilityData']['accessibilityData'],
- ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
- label = (try_get(tbr, getter, dict) or {}).get('label')
- if label:
- mobj = re.match(regex, label)
- if mobj:
- info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
- break
+ tbrs = variadic(
+ traverse_obj(
+ tlb, 'toggleButtonRenderer',
+ ('segmentedLikeDislikeButtonRenderer', ..., 'toggleButtonRenderer'),
+ default=[]))
+ for tbr in tbrs:
+ for getter, regex in [(
+ lambda x: x['defaultText']['accessibility']['accessibilityData'],
+ r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
+ lambda x: x['accessibility'],
+ lambda x: x['accessibilityData']['accessibilityData'],
+ ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
+ label = (try_get(tbr, getter, dict) or {}).get('label')
+ if label:
+ mobj = re.match(regex, label)
+ if mobj:
+ info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
+ break
sbr_tooltip = try_get(
vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
if sbr_tooltip:
@@ -3650,6 +4229,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'like_count': str_to_int(like_count),
'dislike_count': str_to_int(dislike_count),
})
+ vcr = traverse_obj(vpir, ('viewCount', 'videoViewCountRenderer'))
+ if vcr:
+ vc = self._get_count(vcr, 'viewCount')
+ # Upcoming premieres with waiting count are treated as live here
+ if vcr.get('isLive'):
+ info['concurrent_view_count'] = vc
+ elif info.get('view_count') is None:
+ info['view_count'] = vc
+
vsir = get_first(contents, 'videoSecondaryInfoRenderer')
if vsir:
vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))
@@ -3695,8 +4283,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
upload_date = (
unified_strdate(get_first(microformats, 'uploadDate'))
or unified_strdate(search_meta('uploadDate')))
- if not upload_date or (not info.get('is_live') and not info.get('was_live') and info.get('live_status') != 'is_upcoming'):
- upload_date = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d')
+ if not upload_date or (
+ live_status in ('not_live', None)
+ and 'no-youtube-prefer-utc-upload-date' not in self.get_param('compat_opts', [])
+ ):
+ upload_date = strftime_or_none(
+ self._parse_time_text(self._get_text(vpir, 'dateText')), '%Y%m%d') or upload_date
info['upload_date'] = upload_date
for to, frm in fallbacks.items():
@@ -3708,33 +4300,25 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if v:
info[d_k] = v
- is_private = get_first(video_details, 'isPrivate', expected_type=bool)
- is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
- is_membersonly = None
- is_premium = None
- if initial_data and is_private is not None:
- is_membersonly = False
- is_premium = False
- contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
- badge_labels = set()
- for content in contents:
- if not isinstance(content, dict):
- continue
- badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
- for badge_label in badge_labels:
- if badge_label.lower() == 'members only':
- is_membersonly = True
- elif badge_label.lower() == 'premium':
- is_premium = True
- elif badge_label.lower() == 'unlisted':
- is_unlisted = True
-
- info['availability'] = self._availability(
- is_private=is_private,
- needs_premium=is_premium,
- needs_subscription=is_membersonly,
- needs_auth=info['age_limit'] >= 18,
- is_unlisted=None if is_private is None else is_unlisted)
+ badges = self._extract_badges(traverse_obj(contents, (..., 'videoPrimaryInfoRenderer'), get_all=False))
+
+ is_private = (self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
+ or get_first(video_details, 'isPrivate', expected_type=bool))
+
+ info['availability'] = (
+ 'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
+ else self._availability(
+ is_private=is_private,
+ needs_premium=(
+ self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM)
+ or False if initial_data and is_private is not None else None),
+ needs_subscription=(
+ self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION)
+ or False if initial_data and is_private is not None else None),
+ needs_auth=info['age_limit'] >= 18,
+ is_unlisted=None if is_private is None else (
+ self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
+ or get_first(microformats, 'isUnlisted', expected_type=bool))))
info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
@@ -3744,15 +4328,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
-
@staticmethod
def passthrough_smuggled_data(func):
- def _smuggle(entries, smuggled_data):
- for entry in entries:
- # TODO: Convert URL to music.youtube instead.
- # Do we need to passthrough any other smuggled_data?
- entry['url'] = smuggle_url(entry['url'], smuggled_data)
- yield entry
+ def _smuggle(info, smuggled_data):
+ if info.get('_type') not in ('url', 'url_transparent'):
+ return info
+ if smuggled_data.get('is_music_url'):
+ parsed_url = urllib.parse.urlparse(info['url'])
+ if parsed_url.netloc in ('www.youtube.com', 'music.youtube.com'):
+ smuggled_data.pop('is_music_url')
+ info['url'] = urllib.parse.urlunparse(parsed_url._replace(netloc='music.youtube.com'))
+ if smuggled_data:
+ info['url'] = smuggle_url(info['url'], smuggled_data)
+ return info
@functools.wraps(func)
def wrapper(self, url):
@@ -3760,8 +4348,10 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
if self.is_music_url(url):
smuggled_data['is_music_url'] = True
info_dict = func(self, url, smuggled_data)
- if smuggled_data and info_dict.get('entries'):
- info_dict['entries'] = _smuggle(info_dict['entries'], smuggled_data)
+ if smuggled_data:
+ _smuggle(info_dict, smuggled_data)
+ if info_dict.get('entries'):
+ info_dict['entries'] = (_smuggle(i, smuggled_data.copy()) for i in info_dict['entries'])
return info_dict
return wrapper
@@ -3824,7 +4414,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
# generic endpoint URL support
ep_url = urljoin('https://www.youtube.com/', try_get(
renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
- compat_str))
+ str))
if ep_url:
for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
if ie.suitable(ep_url):
@@ -3859,8 +4449,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
# TODO: add support for nested playlists so each shelf is processed
# as separate playlist
# TODO: this includes only first N items
- for entry in self._grid_entries(renderer):
- yield entry
+ yield from self._grid_entries(renderer)
renderer = content.get('horizontalListRenderer')
if renderer:
# TODO
@@ -3869,7 +4458,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
def _shelf_entries(self, shelf_renderer, skip_channels=False):
ep = try_get(
shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
- compat_str)
+ str)
shelf_url = urljoin('https://www.youtube.com', ep)
if shelf_url:
# Skipping links to another channels, note that checking for
@@ -3880,8 +4469,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
title = self._get_text(shelf_renderer, 'title')
yield self.url_result(shelf_url, video_title=title)
# Shelf may not contain shelf URL, fallback to extraction from content
- for entry in self._shelf_entries_from_content(shelf_renderer):
- yield entry
+ yield from self._shelf_entries_from_content(shelf_renderer)
def _playlist_entries(self, video_list_renderer):
for content in video_list_renderer['contents']:
@@ -3896,8 +4484,8 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
yield self._extract_video(renderer)
def _rich_entries(self, rich_grid_renderer):
- renderer = try_get(
- rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
+ renderer = traverse_obj(
+ rich_grid_renderer, ('content', ('videoRenderer', 'reelItemRenderer')), get_all=False) or {}
video_id = renderer.get('videoId')
if not video_id:
return
@@ -3930,7 +4518,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
yield entry
# playlist attachment
playlist_id = try_get(
- post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
+ post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)
if playlist_id:
yield self.url_result(
'https://www.youtube.com/playlist?list=%s' % playlist_id,
@@ -3941,7 +4529,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
if not isinstance(run, dict):
continue
ep_url = try_get(
- run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
+ run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str)
if not ep_url:
continue
if not YoutubeIE.suitable(ep_url):
@@ -3957,10 +4545,12 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
return
for content in contents:
renderer = content.get('backstagePostThreadRenderer')
- if not isinstance(renderer, dict):
+ if isinstance(renderer, dict):
+ yield from self._post_thread_entries(renderer)
continue
- for entry in self._post_thread_entries(renderer):
- yield entry
+ renderer = content.get('videoRenderer')
+ if isinstance(renderer, dict):
+ yield self._video_entry(renderer)
r''' # unused
def _rich_grid_entries(self, contents):
@@ -3972,6 +4562,13 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
yield entry
'''
+ def _report_history_entries(self, renderer):
+ for url in traverse_obj(renderer, (
+ 'rows', ..., 'reportHistoryTableRowRenderer', 'cells', ...,
+ 'reportHistoryTableCellRenderer', 'cell', 'reportHistoryTableTextCellRenderer', 'text', 'runs', ...,
+ 'navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url')):
+ yield self.url_result(urljoin('https://www.youtube.com', url), YoutubeIE)
+
def _extract_entries(self, parent_renderer, continuation_list):
# continuation_list is modified in-place with continuation_list = [continuation_token]
continuation_list[:] = [None]
@@ -3983,12 +4580,16 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',
expected_type=dict)
if not is_renderer:
- renderer = content.get('richItemRenderer')
- if renderer:
- for entry in self._rich_entries(renderer):
+ if content.get('richItemRenderer'):
+ for entry in self._rich_entries(content['richItemRenderer']):
yield entry
continuation_list[0] = self._extract_continuation(parent_renderer)
+ elif content.get('reportHistorySectionRenderer'): # https://www.youtube.com/reporthistory
+ table = traverse_obj(content, ('reportHistorySectionRenderer', 'table', 'tableRenderer'))
+ yield from self._report_history_entries(table)
+ continuation_list[0] = self._extract_continuation(table)
continue
+
isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
for isr_content in isr_contents:
if not isinstance(isr_content, dict):
@@ -4030,8 +4631,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
parent_renderer = (
try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
- for entry in extract_entries(parent_renderer):
- yield entry
+ yield from extract_entries(parent_renderer)
continuation = continuation_list[0]
for page_num in itertools.count(1):
@@ -4040,7 +4640,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
headers = self.generate_api_headers(
ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
response = self._extract_response(
- item_id='%s page %s' % (item_id, page_num),
+ item_id=f'{item_id} page {page_num}',
query=continuation, headers=headers, ytcfg=ytcfg,
check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
@@ -4050,27 +4650,6 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
# See: https://github.com/ytdl-org/youtube-dl/issues/28702
visitor_data = self._extract_visitor_data(response) or visitor_data
- known_continuation_renderers = {
- 'playlistVideoListContinuation': self._playlist_entries,
- 'gridContinuation': self._grid_entries,
- 'itemSectionContinuation': self._post_thread_continuation_entries,
- 'sectionListContinuation': extract_entries, # for feeds
- }
- continuation_contents = try_get(
- response, lambda x: x['continuationContents'], dict) or {}
- continuation_renderer = None
- for key, value in continuation_contents.items():
- if key not in known_continuation_renderers:
- continue
- continuation_renderer = value
- continuation_list = [None]
- for entry in known_continuation_renderers[key](continuation_renderer):
- yield entry
- continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
- break
- if continuation_renderer:
- continue
-
known_renderers = {
'videoRenderer': (self._grid_entries, 'items'), # for membership tab
'gridPlaylistRenderer': (self._grid_entries, 'items'),
@@ -4079,79 +4658,81 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
'playlistVideoRenderer': (self._playlist_entries, 'contents'),
'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
'richItemRenderer': (extract_entries, 'contents'), # for hashtag
- 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
+ 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents'),
+ 'reportHistoryTableRowRenderer': (self._report_history_entries, 'rows'),
+ 'playlistVideoListContinuation': (self._playlist_entries, None),
+ 'gridContinuation': (self._grid_entries, None),
+ 'itemSectionContinuation': (self._post_thread_continuation_entries, None),
+ 'sectionListContinuation': (extract_entries, None), # for feeds
}
- on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
- continuation_items = try_get(
- on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
- continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
+
+ continuation_items = traverse_obj(response, (
+ ('onResponseReceivedActions', 'onResponseReceivedEndpoints'), ...,
+ 'appendContinuationItemsAction', 'continuationItems'
+ ), 'continuationContents', get_all=False)
+ continuation_item = traverse_obj(continuation_items, 0, None, expected_type=dict, default={})
+
video_items_renderer = None
- for key, value in continuation_item.items():
+ for key in continuation_item.keys():
if key not in known_renderers:
continue
- video_items_renderer = {known_renderers[key][1]: continuation_items}
+ func, parent_key = known_renderers[key]
+ video_items_renderer = {parent_key: continuation_items} if parent_key else continuation_items
continuation_list = [None]
- for entry in known_renderers[key][0](video_items_renderer):
- yield entry
+ yield from func(video_items_renderer)
continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
+
+ if not video_items_renderer:
break
- if video_items_renderer:
- continue
- break
@staticmethod
def _extract_selected_tab(tabs, fatal=True):
- for tab in tabs:
- renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
- if renderer.get('selected') is True:
- return renderer
- else:
- if fatal:
- raise ExtractorError('Unable to find selected tab')
+ for tab_renderer in tabs:
+ if tab_renderer.get('selected'):
+ return tab_renderer
+ if fatal:
+ raise ExtractorError('Unable to find selected tab')
- @classmethod
- def _extract_uploader(cls, data):
- uploader = {}
- renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
- owner = try_get(
- renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
- if owner:
- uploader['uploader'] = owner.get('text')
- uploader['uploader_id'] = try_get(
- owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
- uploader['uploader_url'] = urljoin(
- 'https://www.youtube.com/',
- try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
- return {k: v for k, v in uploader.items() if v is not None}
+ @staticmethod
+ def _extract_tab_renderers(response):
+ return traverse_obj(
+ response, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs', ..., ('tabRenderer', 'expandableTabRenderer')), expected_type=dict)
def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
- playlist_id = title = description = channel_url = channel_name = channel_id = None
- tags = []
+ metadata = self._extract_metadata_from_tabs(item_id, data)
selected_tab = self._extract_selected_tab(tabs)
- primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
- renderer = try_get(
- data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
- if renderer:
- channel_name = renderer.get('title')
- channel_url = renderer.get('channelUrl')
- channel_id = renderer.get('externalId')
- else:
- renderer = try_get(
- data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
+ metadata['title'] += format_field(selected_tab, 'title', ' - %s')
+ metadata['title'] += format_field(selected_tab, 'expandedText', ' - %s')
- if renderer:
- title = renderer.get('title')
- description = renderer.get('description', '')
- playlist_id = channel_id
- tags = renderer.get('keywords', '').split()
+ return self.playlist_result(
+ self._entries(
+ selected_tab, metadata['id'], ytcfg,
+ self._extract_account_syncid(ytcfg, data),
+ self._extract_visitor_data(data, ytcfg)),
+ **metadata)
+
+ def _extract_metadata_from_tabs(self, item_id, data):
+ info = {'id': item_id}
+
+ metadata_renderer = traverse_obj(data, ('metadata', 'channelMetadataRenderer'), expected_type=dict)
+ if metadata_renderer:
+ info.update({
+ 'uploader': metadata_renderer.get('title'),
+ 'uploader_id': metadata_renderer.get('externalId'),
+ 'uploader_url': metadata_renderer.get('channelUrl'),
+ })
+ if info['uploader_id']:
+ info['id'] = info['uploader_id']
+ else:
+ metadata_renderer = traverse_obj(data, ('metadata', 'playlistMetadataRenderer'), expected_type=dict)
# We can get the uncropped banner/avatar by replacing the crop params with '=s0'
# See: https://github.com/hypervideo/hypervideo/issues/2237#issuecomment-1013694714
def _get_uncropped(url):
return url_or_none((url or '').split('=')[0] + '=s0')
- avatar_thumbnails = self._extract_thumbnails(renderer, 'avatar')
+ avatar_thumbnails = self._extract_thumbnails(metadata_renderer, 'avatar')
if avatar_thumbnails:
uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])
if uncropped_avatar:
@@ -4162,7 +4743,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
})
channel_banners = self._extract_thumbnails(
- data, ('header', ..., ['banner', 'mobileBanner', 'tvBanner']))
+ data, ('header', ..., ('banner', 'mobileBanner', 'tvBanner')))
for banner in channel_banners:
banner['preference'] = -10
@@ -4175,48 +4756,66 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
'preference': -5
})
+ # Deprecated - remove primary_sidebar_renderer when layout discontinued
+ primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
+ playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer'), expected_type=dict)
+
primary_thumbnails = self._extract_thumbnails(
primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))
+ playlist_thumbnails = self._extract_thumbnails(
+ playlist_header_renderer, ('playlistHeaderBanner', 'heroPlaylistThumbnailRenderer', 'thumbnail'))
- if playlist_id is None:
- playlist_id = item_id
-
- playlist_stats = traverse_obj(primary_sidebar_renderer, 'stats')
- last_updated_unix, _ = self._extract_time_text(playlist_stats, 2)
- if title is None:
- title = self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or playlist_id
- title += format_field(selected_tab, 'title', ' - %s')
- title += format_field(selected_tab, 'expandedText', ' - %s')
-
- metadata = {
- 'playlist_id': playlist_id,
- 'playlist_title': title,
- 'playlist_description': description,
- 'uploader': channel_name,
- 'uploader_id': channel_id,
- 'uploader_url': channel_url,
- 'thumbnails': primary_thumbnails + avatar_thumbnails + channel_banners,
- 'tags': tags,
- 'view_count': self._get_count(playlist_stats, 1),
+ info.update({
+ 'title': (traverse_obj(metadata_renderer, 'title')
+ or self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag'))
+ or info['id']),
'availability': self._extract_availability(data),
- 'modified_date': strftime_or_none(last_updated_unix, '%Y%m%d'),
- 'playlist_count': self._get_count(playlist_stats, 0),
'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),
- }
- if not channel_id:
- metadata.update(self._extract_uploader(data))
- metadata.update({
- 'channel': metadata['uploader'],
- 'channel_id': metadata['uploader_id'],
- 'channel_url': metadata['uploader_url']})
- return self.playlist_result(
- self._entries(
- selected_tab, playlist_id, ytcfg,
- self._extract_account_syncid(ytcfg, data),
- self._extract_visitor_data(data, ytcfg)),
- **metadata)
+ 'description': try_get(metadata_renderer, lambda x: x.get('description', '')),
+ 'tags': try_get(metadata_renderer or {}, lambda x: x.get('keywords', '').split()),
+ 'thumbnails': (primary_thumbnails or playlist_thumbnails) + avatar_thumbnails + channel_banners,
+ })
- def _extract_mix_playlist(self, playlist, playlist_id, data, ytcfg):
+ # Playlist stats is a text runs array containing [video count, view count, last updated].
+ # last updated or (view count and last updated) may be missing.
+ playlist_stats = get_first(
+ (primary_sidebar_renderer, playlist_header_renderer), (('stats', 'briefStats', 'numVideosText'), ))
+
+ last_updated_unix = self._parse_time_text(
+ self._get_text(playlist_stats, 2) # deprecated, remove when old layout discontinued
+ or self._get_text(playlist_header_renderer, ('byline', 1, 'playlistBylineRenderer', 'text')))
+ info['modified_date'] = strftime_or_none(last_updated_unix, '%Y%m%d')
+
+ info['view_count'] = self._get_count(playlist_stats, 1)
+ if info['view_count'] is None: # 0 is allowed
+ info['view_count'] = self._get_count(playlist_header_renderer, 'viewCountText')
+
+ info['playlist_count'] = self._get_count(playlist_stats, 0)
+ if info['playlist_count'] is None: # 0 is allowed
+ info['playlist_count'] = self._get_count(playlist_header_renderer, ('byline', 0, 'playlistBylineRenderer', 'text'))
+
+ if not info.get('uploader_id'):
+ owner = traverse_obj(playlist_header_renderer, 'ownerText')
+ if not owner: # Deprecated
+ owner = traverse_obj(
+ self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer'),
+ ('videoOwner', 'videoOwnerRenderer', 'title'))
+ owner_text = self._get_text(owner)
+ browse_ep = traverse_obj(owner, ('runs', 0, 'navigationEndpoint', 'browseEndpoint')) or {}
+ info.update({
+ 'uploader': self._search_regex(r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text),
+ 'uploader_id': browse_ep.get('browseId'),
+ 'uploader_url': urljoin('https://www.youtube.com', browse_ep.get('canonicalBaseUrl'))
+ })
+
+ info.update({
+ 'channel': info['uploader'],
+ 'channel_id': info['uploader_id'],
+ 'channel_url': info['uploader_url']
+ })
+ return info
+
+ def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):
first_id = last_id = response = None
for page_num in itertools.count(1):
videos = list(self._playlist_entries(playlist))
@@ -4225,11 +4824,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
if start >= len(videos):
return
- for video in videos[start:]:
- if video['id'] == first_id:
- self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
- return
- yield video
+ yield from videos[start:]
first_id = first_id or videos[0]['id']
last_id = videos[-1]['id']
watch_endpoint = try_get(
@@ -4253,20 +4848,25 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
title = playlist.get('title') or try_get(
- data, lambda x: x['titleText']['simpleText'], compat_str)
+ data, lambda x: x['titleText']['simpleText'], str)
playlist_id = playlist.get('playlistId') or item_id
# Delegating everything except mix playlists to regular tab-based playlist URL
playlist_url = urljoin(url, try_get(
playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
- compat_str))
- if playlist_url and playlist_url != url:
+ str))
+
+ # Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]
+ # [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg
+ is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)
+
+ if playlist_url and playlist_url != url and not is_known_unviewable:
return self.url_result(
playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
video_title=title)
return self.playlist_result(
- self._extract_mix_playlist(playlist, playlist_id, data, ytcfg),
+ self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),
playlist_id=playlist_id, playlist_title=title)
def _extract_availability(self, data):
@@ -4275,31 +4875,40 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
Note: Unless YouTube tells us explicitly, we do not assume it is public
@param data: response
"""
- is_private = is_unlisted = None
- renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
- badge_labels = self._extract_badges(renderer)
+ sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
+ playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer')) or {}
+ player_header_privacy = playlist_header_renderer.get('privacy')
+
+ badges = self._extract_badges(sidebar_renderer)
# Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
- privacy_dropdown_entries = try_get(
- renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
- for renderer_dict in privacy_dropdown_entries:
- is_selected = try_get(
- renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
- if not is_selected:
- continue
- label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
- if label:
- badge_labels.add(label.lower())
- break
+ privacy_setting_icon = get_first(
+ (playlist_header_renderer, sidebar_renderer),
+ ('privacyForm', 'dropdownFormFieldRenderer', 'dropdown', 'dropdownRenderer', 'entries',
+ lambda _, v: v['privacyDropdownItemRenderer']['isSelected'], 'privacyDropdownItemRenderer', 'icon', 'iconType'),
+ expected_type=str)
- for badge_label in badge_labels:
- if badge_label == 'unlisted':
- is_unlisted = True
- elif badge_label == 'private':
- is_private = True
- elif badge_label == 'public':
- is_unlisted = is_private = False
- return self._availability(is_private, False, False, False, is_unlisted)
+ microformats_is_unlisted = traverse_obj(
+ data, ('microformat', 'microformatDataRenderer', 'unlisted'), expected_type=bool)
+
+ return (
+ 'public' if (
+ self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
+ or player_header_privacy == 'PUBLIC'
+ or privacy_setting_icon == 'PRIVACY_PUBLIC')
+ else self._availability(
+ is_private=(
+ self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
+ or player_header_privacy == 'PRIVATE' if player_header_privacy is not None
+ else privacy_setting_icon == 'PRIVACY_PRIVATE' if privacy_setting_icon is not None else None),
+ is_unlisted=(
+ self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
+ or player_header_privacy == 'UNLISTED' if player_header_privacy is not None
+ else privacy_setting_icon == 'PRIVACY_UNLISTED' if privacy_setting_icon is not None
+ else microformats_is_unlisted if microformats_is_unlisted is not None else None),
+ needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
+ needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
+ needs_auth=False))
@staticmethod
def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
@@ -4312,94 +4921,75 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
"""
- Get playlist with unavailable videos if the 'show unavailable videos' button exists.
+ Reload playlists with unavailable videos (e.g. private videos, region blocked, etc.)
"""
- browse_id = params = None
- renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
- if not renderer:
+ is_playlist = bool(traverse_obj(
+ data, ('metadata', 'playlistMetadataRenderer'), ('header', 'playlistHeaderRenderer')))
+ if not is_playlist:
return
- menu_renderer = try_get(
- renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
- for menu_item in menu_renderer:
- if not isinstance(menu_item, dict):
- continue
- nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
- text = try_get(
- nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
- if not text or text.lower() != 'show unavailable videos':
- continue
- browse_endpoint = try_get(
- nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
- browse_id = browse_endpoint.get('browseId')
- params = browse_endpoint.get('params')
- break
-
headers = self.generate_api_headers(
ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
visitor_data=self._extract_visitor_data(data, ytcfg))
query = {
- 'params': params or 'wgYCCAA=',
- 'browseId': browse_id or 'VL%s' % item_id
+ 'params': 'wgYCCAA=',
+ 'browseId': f'VL{item_id}'
}
return self._extract_response(
item_id=item_id, headers=headers, query=query,
check_get_keys='contents', fatal=False, ytcfg=ytcfg,
- note='Downloading API JSON with unavailable videos')
+ note='Redownloading playlist API JSON with unavailable videos')
+
+ @functools.cached_property
+ def skip_webpage(self):
+ return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())
def _extract_webpage(self, url, item_id, fatal=True):
- retries = self.get_param('extractor_retries', 3)
- count = -1
- webpage = data = last_error = None
- while count < retries:
- count += 1
- # Sometimes youtube returns a webpage with incomplete ytInitialData
- # See: https://github.com/hypervideo/hypervideo/issues/116
- if last_error:
- self.report_warning('%s. Retrying ...' % last_error)
+ webpage, data = None, None
+ for retry in self.RetryManager(fatal=fatal):
try:
- webpage = self._download_webpage(
- url, item_id,
- note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',))
+ webpage = self._download_webpage(url, item_id, note='Downloading webpage')
data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
except ExtractorError as e:
if isinstance(e.cause, network_exceptions):
- if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
- last_error = error_to_compat_str(e.cause or e.msg)
- if count < retries:
- continue
- if fatal:
- raise
- self.report_warning(error_to_compat_str(e))
+ if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):
+ retry.error = e
+ continue
+ self._error_or_warning(e, fatal=fatal)
break
- else:
- try:
- self._extract_and_report_alerts(data)
- except ExtractorError as e:
- if fatal:
- raise
- self.report_warning(error_to_compat_str(e))
- break
- if dict_get(data, ('contents', 'currentVideoEndpoint', 'onResponseReceivedActions')):
- break
+ try:
+ self._extract_and_report_alerts(data)
+ except ExtractorError as e:
+ self._error_or_warning(e, fatal=fatal)
+ break
- last_error = 'Incomplete yt initial data received'
- if count >= retries:
- if fatal:
- raise ExtractorError(last_error)
- self.report_warning(last_error)
- break
+ # Sometimes youtube returns a webpage with incomplete ytInitialData
+ # See: https://github.com/hypervideo/hypervideo/issues/116
+ if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):
+ retry.error = ExtractorError('Incomplete yt initial data received')
+ continue
return webpage, data
+ def _report_playlist_authcheck(self, ytcfg, fatal=True):
+ """Use if failed to extract ytcfg (and data) from initial webpage"""
+ if not ytcfg and self.is_authenticated:
+ msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'
+ if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:
+ raise ExtractorError(
+ f'{msg}. If you are not downloading private content, or '
+ 'your cookies are only for the first account and channel,'
+ ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
+ expected=True)
+ self.report_warning(msg, only_once=True)
+
def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
data = None
- if 'webpage' not in self._configuration_arg('skip'):
+ if not self.skip_webpage:
webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
# Reject webpage data if redirected to home page without explicitly requesting
- selected_tab = self._extract_selected_tab(traverse_obj(
- data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list, default=[]), fatal=False) or {}
+ selected_tab = self._extract_selected_tab(self._extract_tab_renderers(data), fatal=False) or {}
if (url != 'https://www.youtube.com/feed/recommended'
and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page
and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):
@@ -4408,14 +4998,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
raise ExtractorError(msg, expected=True)
self.report_warning(msg, only_once=True)
if not data:
- if not ytcfg and self.is_authenticated:
- msg = 'Playlists that require authentication may not extract correctly without a successful webpage download.'
- if 'authcheck' not in self._configuration_arg('skip') and fatal:
- raise ExtractorError(
- msg + ' If you are not downloading private content, or your cookies are only for the first account and channel,'
- ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
- expected=True)
- self.report_warning(msg, only_once=True)
+ self._report_playlist_authcheck(ytcfg, fatal=fatal)
data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
return data, ytcfg
@@ -4453,14 +5036,20 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),
('continuationContents', ),
)
- check_get_keys = tuple(set(keys[0] for keys in content_keys))
+ display_id = f'query "{query}"'
+ check_get_keys = tuple({keys[0] for keys in content_keys})
+ ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}
+ self._report_playlist_authcheck(ytcfg, fatal=False)
continuation_list = [None]
+ search = None
for page_num in itertools.count(1):
data.update(continuation_list[0] or {})
+ headers = self.generate_api_headers(
+ ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)
search = self._extract_response(
- item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
- default_client=default_client, check_get_keys=check_get_keys)
+ item_id=f'{display_id} page {page_num}', ep='search', query=data,
+ default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)
slr_contents = traverse_obj(search, *content_keys)
yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)
if not continuation_list[0]:
@@ -4578,6 +5167,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
+ 'availability': 'public',
},
'playlist_count': 1,
}, {
@@ -4595,6 +5185,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
+ 'availability': 'public',
},
'playlist_count': 0,
}, {
@@ -4741,6 +5332,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',
'channel': 'Christiaan008',
+ 'availability': 'public',
},
'playlist_count': 96,
}, {
@@ -4759,6 +5351,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'view_count': int,
'description': '',
'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
+ 'availability': 'public',
},
'playlist_mincount': 1123,
'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
@@ -4782,6 +5375,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'channel': 'Interstellar Movie',
'description': '',
'modified_date': r're:\d{8}',
+ 'availability': 'public',
},
'playlist_mincount': 21,
}, {
@@ -4800,6 +5394,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
'modified_date': r're:\d{8}',
+ 'availability': 'public',
},
'playlist_mincount': 200,
'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
@@ -4819,6 +5414,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'uploader_url': 'https://www.youtube.com/c/blanktv',
'modified_date': r're:\d{8}',
'description': '',
+ 'availability': 'public',
},
'playlist_mincount': 1000,
'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
@@ -4837,6 +5433,8 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',
'channel_url': 'https://www.youtube.com/user/Computerphile',
'channel': 'Computerphile',
+ 'availability': 'public',
+ 'modified_date': '20190712',
},
'playlist_mincount': 11,
}, {
@@ -4874,7 +5472,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
}, {
'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
'info_dict': {
- 'id': 'GgL890LIznQ', # This will keep changing
+ 'id': 'Wq15eF5vCbI', # This will keep changing
'ext': 'mp4',
'title': str,
'uploader': 'Sky News',
@@ -4885,18 +5483,19 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'categories': ['News & Politics'],
'tags': list,
'like_count': int,
- 'release_timestamp': 1642502819,
+ 'release_timestamp': int,
'channel': 'Sky News',
'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',
'age_limit': 0,
'view_count': int,
- 'thumbnail': 'https://i.ytimg.com/vi/GgL890LIznQ/maxresdefault_live.jpg',
+ 'thumbnail': r're:https?://i\.ytimg\.com/vi/[^/]+/maxresdefault(?:_live)?\.jpg',
'playable_in_embed': True,
- 'release_date': '20220118',
+ 'release_date': r're:\d+',
'availability': 'public',
'live_status': 'is_live',
'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',
- 'channel_follower_count': int
+ 'channel_follower_count': int,
+ 'concurrent_view_count': int,
},
'params': {
'skip_download': True,
@@ -4974,7 +5573,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'title': '#cctv9',
'tags': [],
},
- 'playlist_mincount': 350,
+ 'playlist_mincount': 300, # not consistent but should be over 300
}, {
'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
'only_matching': True,
@@ -4994,7 +5593,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'uploader': 'NoCopyrightSounds',
'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
- 'title': 'NCS Releases',
+ 'title': 'NCS : All Releases 💿',
'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',
'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',
'modified_date': r're:\d{8}',
@@ -5002,6 +5601,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
'tags': [],
'channel': 'NoCopyrightSounds',
+ 'availability': 'public',
},
'playlist_mincount': 166,
'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
@@ -5022,23 +5622,18 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'modified_date': r're:\d{8}',
'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
'description': '',
+ 'availability': 'public',
},
- 'expected_warnings': [
- 'The URL does not have a videos tab',
- r'[Uu]navailable videos (are|will be) hidden',
- ],
'playlist_mincount': 101,
}, {
- 'note': 'Topic without a UU playlist',
+ # Destination channel with only a hidden self tab (tab id is UCtFRv9O2AHqOZjjynzrv-xg)
+ # Treat as a general feed
'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
'info_dict': {
'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
'tags': [],
},
- 'expected_warnings': [
- 'the playlist redirect gave error',
- ],
'playlist_mincount': 9,
}, {
'note': 'Youtube music Album',
@@ -5063,7 +5658,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'title': 'hypervideo unlisted playlist test',
'availability': 'unlisted',
'tags': [],
- 'modified_date': '20211208',
+ 'modified_date': '20220418',
'channel': 'colethedj',
'view_count': int,
'description': '',
@@ -5106,6 +5701,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'skip_download': True,
'extractor_args': {'youtubetab': {'skip': ['webpage']}}
},
+ 'skip': 'Query for sorting no longer works',
}, {
'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
@@ -5122,11 +5718,8 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'channel': 'Royalty Free Music - Topic',
'view_count': int,
'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
+ 'availability': 'public',
},
- 'expected_warnings': [
- 'does not have a videos tab',
- r'[Uu]navailable videos (are|will be) hidden',
- ],
'playlist_mincount': 101,
'params': {
'skip_download': True,
@@ -5136,130 +5729,429 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'note': 'non-standard redirect to regional channel',
'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',
'only_matching': True
+ }, {
+ 'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',
+ 'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
+ 'info_dict': {
+ 'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
+ 'modified_date': '20220407',
+ 'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
+ 'tags': [],
+ 'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
+ 'uploader': 'pukkandan',
+ 'availability': 'unlisted',
+ 'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
+ 'channel': 'pukkandan',
+ 'description': 'Test for collaborative playlist',
+ 'title': 'hypervideo test - collaborative playlist',
+ 'view_count': int,
+ 'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
+ },
+ 'playlist_mincount': 2
+ }, {
+ 'note': 'translated tab name',
+ 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/playlists',
+ 'info_dict': {
+ 'id': 'UCiu-3thuViMebBjw_5nWYrA',
+ 'tags': [],
+ 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
+ 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
+ 'description': 'test description',
+ 'title': 'cole-dlp-test-acc - 再生リスト',
+ 'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
+ 'uploader': 'cole-dlp-test-acc',
+ 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
+ 'channel': 'cole-dlp-test-acc',
+ 'channel_follower_count': int,
+ },
+ 'playlist_mincount': 1,
+ 'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
+ 'expected_warnings': ['Preferring "ja"'],
+ }, {
+ # XXX: this should really check flat playlist entries, but the test suite doesn't support that
+ 'note': 'preferred lang set with playlist with translated video titles',
+ 'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
+ 'info_dict': {
+ 'id': 'PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
+ 'tags': [],
+ 'view_count': int,
+ 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
+ 'uploader': 'cole-dlp-test-acc',
+ 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
+ 'channel': 'cole-dlp-test-acc',
+ 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
+ 'description': 'test',
+ 'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
+ 'title': 'dlp test playlist',
+ 'availability': 'public',
+ },
+ 'playlist_mincount': 1,
+ 'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
+ 'expected_warnings': ['Preferring "ja"'],
+ }, {
+ # shorts audio pivot for 2GtVksBMYFM.
+ 'url': 'https://www.youtube.com/feed/sfv_audio_pivot?bp=8gUrCikSJwoLMkd0VmtzQk1ZRk0SCzJHdFZrc0JNWUZNGgsyR3RWa3NCTVlGTQ==',
+ 'info_dict': {
+ 'id': 'sfv_audio_pivot',
+ 'title': 'sfv_audio_pivot',
+ 'tags': [],
+ },
+ 'playlist_mincount': 50,
+
+ }, {
+ # Channel with a real live tab (not to be mistaken with streams tab)
+ # Do not treat like it should redirect to live stream
+ 'url': 'https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live',
+ 'info_dict': {
+ 'id': 'UCEH7P7kyJIkS_gJf93VYbmg',
+ 'title': 'UCEH7P7kyJIkS_gJf93VYbmg - Live',
+ 'tags': [],
+ },
+ 'playlist_mincount': 20,
+ }, {
+ # Tab name is not the same as tab id
+ 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/letsplay',
+ 'info_dict': {
+ 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
+ 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Let\'s play',
+ 'tags': [],
+ },
+ 'playlist_mincount': 8,
+ }, {
+ # Home tab id is literally home. Not to get mistaken with featured
+ 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/home',
+ 'info_dict': {
+ 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
+ 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Home',
+ 'tags': [],
+ },
+ 'playlist_mincount': 8,
+ }, {
+ # Should get three playlists for videos, shorts and streams tabs
+ 'url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
+ 'info_dict': {
+ 'id': 'UCK9V2B22uJYu3N7eR_BT9QA',
+ 'title': 'Polka Ch. 尾丸ポルカ',
+ 'channel_follower_count': int,
+ 'channel_id': 'UCK9V2B22uJYu3N7eR_BT9QA',
+ 'channel_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
+ 'uploader': 'Polka Ch. 尾丸ポルカ',
+ 'description': 'md5:3b8df1ac5af337aa206e37ee3d181ec9',
+ 'channel': 'Polka Ch. 尾丸ポルカ',
+ 'tags': 'count:35',
+ 'uploader_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
+ 'uploader_id': 'UCK9V2B22uJYu3N7eR_BT9QA',
+ },
+ 'playlist_count': 3,
+ }, {
+ # Shorts tab with channel with handle
+ 'url': 'https://www.youtube.com/@NotJustBikes/shorts',
+ 'info_dict': {
+ 'id': 'UC0intLFzLaudFG-xAvUEO-A',
+ 'title': 'Not Just Bikes - Shorts',
+ 'tags': 'count:12',
+ 'uploader': 'Not Just Bikes',
+ 'channel_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',
+ 'description': 'md5:7513148b1f02b924783157d84c4ea555',
+ 'channel_follower_count': int,
+ 'uploader_id': 'UC0intLFzLaudFG-xAvUEO-A',
+ 'channel_id': 'UC0intLFzLaudFG-xAvUEO-A',
+ 'uploader_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',
+ 'channel': 'Not Just Bikes',
+ },
+ 'playlist_mincount': 10,
+ }, {
+ # Streams tab
+ 'url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig/streams',
+ 'info_dict': {
+ 'id': 'UC3eYAvjCVwNHgkaGbXX3sig',
+ 'title': '中村悠一 - Live',
+ 'tags': 'count:7',
+ 'channel_id': 'UC3eYAvjCVwNHgkaGbXX3sig',
+ 'channel_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',
+ 'uploader_id': 'UC3eYAvjCVwNHgkaGbXX3sig',
+ 'channel': '中村悠一',
+ 'uploader_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',
+ 'channel_follower_count': int,
+ 'uploader': '中村悠一',
+ 'description': 'md5:e744f6c93dafa7a03c0c6deecb157300',
+ },
+ 'playlist_mincount': 60,
+ }, {
+ # Channel with no uploads and hence no videos, streams, shorts tabs or uploads playlist. This should fail.
+ # See test_youtube_lists
+ 'url': 'https://www.youtube.com/channel/UC2yXPzFejc422buOIzn_0CA',
+ 'only_matching': True,
+ }, {
+ # No uploads and no UCID given. Should fail with no uploads error
+ # See test_youtube_lists
+ 'url': 'https://www.youtube.com/news',
+ 'only_matching': True
+ }, {
+ # No videos tab but has a shorts tab
+ 'url': 'https://www.youtube.com/c/TKFShorts',
+ 'info_dict': {
+ 'id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
+ 'title': 'Shorts Break - Shorts',
+ 'tags': 'count:32',
+ 'channel_id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
+ 'channel': 'Shorts Break',
+ 'description': 'md5:a6c234cf3d50d878ef8721e34457cd11',
+ 'uploader': 'Shorts Break',
+ 'channel_follower_count': int,
+ 'uploader_id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
+ 'uploader_url': 'https://www.youtube.com/channel/UCgJ5_1F6yJhYLnyMszUdmUg',
+ 'channel_url': 'https://www.youtube.com/channel/UCgJ5_1F6yJhYLnyMszUdmUg',
+ },
+ 'playlist_mincount': 30,
+ }, {
+ # Trending Now Tab. tab id is empty
+ 'url': 'https://www.youtube.com/feed/trending',
+ 'info_dict': {
+ 'id': 'trending',
+ 'title': 'trending - Now',
+ 'tags': [],
+ },
+ 'playlist_mincount': 30,
+ }, {
+ # Trending Gaming Tab. tab id is empty
+ 'url': 'https://www.youtube.com/feed/trending?bp=4gIcGhpnYW1pbmdfY29ycHVzX21vc3RfcG9wdWxhcg%3D%3D',
+ 'info_dict': {
+ 'id': 'trending',
+ 'title': 'trending - Gaming',
+ 'tags': [],
+ },
+ 'playlist_mincount': 30,
+ }, {
+ # Shorts url result in shorts tab
+ 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/shorts',
+ 'info_dict': {
+ 'id': 'UCiu-3thuViMebBjw_5nWYrA',
+ 'title': 'cole-dlp-test-acc - Shorts',
+ 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
+ 'channel': 'cole-dlp-test-acc',
+ 'channel_follower_count': int,
+ 'description': 'test description',
+ 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
+ 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
+ 'tags': [],
+ 'uploader': 'cole-dlp-test-acc',
+ 'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
+
+ },
+ 'playlist': [{
+ 'info_dict': {
+ '_type': 'url',
+ 'ie_key': 'Youtube',
+ 'url': 'https://www.youtube.com/shorts/sSM9J5YH_60',
+ 'id': 'sSM9J5YH_60',
+ 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
+ 'title': 'SHORT short',
+ 'channel': 'cole-dlp-test-acc',
+ 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
+ 'view_count': int,
+ 'thumbnails': list,
+ }
+ }],
+ 'params': {'extract_flat': True},
+ }, {
+ # Live video status should be extracted
+ 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/live',
+ 'info_dict': {
+ 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
+ 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Live', # TODO, should be Minecraft - Live or Minecraft - Topic - Live
+ 'tags': []
+ },
+ 'playlist': [{
+ 'info_dict': {
+ '_type': 'url',
+ 'ie_key': 'Youtube',
+ 'url': 'startswith:https://www.youtube.com/watch?v=',
+ 'id': str,
+ 'title': str,
+ 'live_status': 'is_live',
+ 'channel_id': str,
+ 'channel_url': str,
+ 'concurrent_view_count': int,
+ 'channel': str,
+ }
+ }],
+ 'params': {'extract_flat': True},
+ 'playlist_mincount': 1
}]
@classmethod
def suitable(cls, url):
- return False if YoutubeIE.suitable(url) else super(
- YoutubeTabIE, cls).suitable(url)
+ return False if YoutubeIE.suitable(url) else super().suitable(url)
+
+ _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/[^?#/]+))?(?P<post>.*)$')
+
+ def _get_url_mobj(self, url):
+ mobj = self._URL_RE.match(url).groupdict()
+ mobj.update((k, '') for k, v in mobj.items() if v is None)
+ return mobj
+
+ def _extract_tab_id_and_name(self, tab, base_url='https://www.youtube.com'):
+ tab_name = (tab.get('title') or '').lower()
+ tab_url = urljoin(base_url, traverse_obj(
+ tab, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url')))
+
+ tab_id = (tab_url and self._get_url_mobj(tab_url)['tab'][1:]
+ or traverse_obj(tab, 'tabIdentifier', expected_type=str))
+ if tab_id:
+ return {
+ 'TAB_ID_SPONSORSHIPS': 'membership',
+ }.get(tab_id, tab_id), tab_name
+
+ # Fallback to tab name if we cannot get the tab id.
+ # XXX: should we strip non-ascii letters? e.g. in case of 'let's play' tab example on special gaming channel
+ # Note that in the case of translated tab name this may result in an empty string, which we don't want.
+ if tab_name:
+ self.write_debug(f'Falling back to selected tab name: {tab_name}')
+ return {
+ 'home': 'featured',
+ 'live': 'streams',
+ }.get(tab_name, tab_name), tab_name
- _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/\w+))?(?P<post>.*)$')
+ def _has_tab(self, tabs, tab_id):
+ return any(self._extract_tab_id_and_name(tab)[0] == tab_id for tab in tabs)
@YoutubeTabBaseInfoExtractor.passthrough_smuggled_data
def _real_extract(self, url, smuggled_data):
item_id = self._match_id(url)
- url = compat_urlparse.urlunparse(
- compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
+ url = urllib.parse.urlunparse(
+ urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))
compat_opts = self.get_param('compat_opts', [])
- def get_mobj(url):
- mobj = self._URL_RE.match(url).groupdict()
- mobj.update((k, '') for k, v in mobj.items() if v is None)
- return mobj
-
- mobj, redirect_warning = get_mobj(url), None
- # Youtube returns incomplete data if tabname is not lower case
- pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
- if is_channel:
- if smuggled_data.get('is_music_url'):
- if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist
- item_id = item_id[2:]
- pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False
- elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
- mdata = self._extract_tab_endpoint(
- f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
- murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
- get_all=False, expected_type=compat_str)
- if not murl:
- raise ExtractorError('Failed to resolve album to playlist')
- return self.url_result(murl, ie=YoutubeTabIE.ie_key())
- elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/
- pre = f'https://www.youtube.com/channel/{item_id}'
-
- original_tab_name = tab
+ mobj = self._get_url_mobj(url)
+ pre, tab, post, is_channel = mobj['pre'], mobj['tab'], mobj['post'], not mobj['not_channel']
+ if is_channel and smuggled_data.get('is_music_url'):
+ if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist
+ return self.url_result(
+ f'https://music.youtube.com/playlist?list={item_id[2:]}', YoutubeTabIE, item_id[2:])
+ elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
+ mdata = self._extract_tab_endpoint(
+ f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
+ murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
+ get_all=False, expected_type=str)
+ if not murl:
+ raise ExtractorError('Failed to resolve album to playlist')
+ return self.url_result(murl, YoutubeTabIE)
+ elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/
+ return self.url_result(
+ f'https://music.youtube.com/channel/{item_id}{tab}{post}', YoutubeTabIE, item_id)
+
+ original_tab_id, display_id = tab[1:], f'{item_id}{tab}'
if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
- # Home URLs should redirect to /videos/
- redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '
- 'To download only the videos in the home page, add a "/featured" to the URL')
- tab = '/videos'
-
- url = ''.join((pre, tab, post))
- mobj = get_mobj(url)
+ url = f'{pre}/videos{post}'
# Handle both video/playlist URLs
qs = parse_qs(url)
- video_id, playlist_id = [qs.get(key, [None])[0] for key in ('v', 'list')]
-
+ video_id, playlist_id = [traverse_obj(qs, (key, 0)) for key in ('v', 'list')]
if not video_id and mobj['not_channel'].startswith('watch'):
if not playlist_id:
# If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
- raise ExtractorError('Unable to recognize tab page')
+ raise ExtractorError('A video URL was given without video ID', expected=True)
# Common mistake: https://www.youtube.com/watch?list=playlist_id
self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')
- url = f'https://www.youtube.com/playlist?list={playlist_id}'
- mobj = get_mobj(url)
+ return self.url_result(
+ f'https://www.youtube.com/playlist?list={playlist_id}', YoutubeTabIE, playlist_id)
- if video_id and playlist_id:
- if self.get_param('noplaylist'):
- self.to_screen(f'Downloading just video {video_id} because of --no-playlist')
- return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
- ie=YoutubeIE.ie_key(), video_id=video_id)
- self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')
+ if not self._yes_playlist(playlist_id, video_id):
+ return self.url_result(
+ f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)
- data, ytcfg = self._extract_data(url, item_id)
+ data, ytcfg = self._extract_data(url, display_id)
# YouTube may provide a non-standard redirect to the regional channel
# See: https://github.com/hypervideo/hypervideo/issues/2694
+ # https://support.google.com/youtube/answer/2976814#zippy=,conditional-redirects
redirect_url = traverse_obj(
data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)
if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:
- redirect_url = ''.join((
- urljoin('https://www.youtube.com', redirect_url), mobj['tab'], mobj['post']))
- self.to_screen(f'This playlist is likely not available in your region. Following redirect to regional playlist {redirect_url}')
- return self.url_result(redirect_url, ie=YoutubeTabIE.ie_key())
+ redirect_url = ''.join((urljoin('https://www.youtube.com', redirect_url), tab, post))
+ self.to_screen(f'This playlist is likely not available in your region. Following conditional redirect to {redirect_url}')
+ return self.url_result(redirect_url, YoutubeTabIE)
- tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
- if tabs:
+ tabs, extra_tabs = self._extract_tab_renderers(data), []
+ if is_channel and tabs and 'no-youtube-channel-redirect' not in compat_opts:
selected_tab = self._extract_selected_tab(tabs)
- selected_tab_name = selected_tab.get('title', '').lower()
- if selected_tab_name == 'home':
- selected_tab_name = 'featured'
- requested_tab_name = mobj['tab'][1:]
- if 'no-youtube-channel-redirect' not in compat_opts:
- if requested_tab_name == 'live':
- # Live tab should have redirected to the video
- raise ExtractorError('The channel is not currently live', expected=True)
- if requested_tab_name not in ('', selected_tab_name):
- redirect_warning = f'The channel does not have a {requested_tab_name} tab'
- if not original_tab_name:
- if item_id[:2] == 'UC':
- # Topic channels don't have /videos. Use the equivalent playlist instead
- pl_id = f'UU{item_id[2:]}'
- pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
- try:
- data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)
- except ExtractorError:
- redirect_warning += ' and the playlist redirect gave error'
- else:
- item_id, url, selected_tab_name = pl_id, pl_url, requested_tab_name
- redirect_warning += f'. Redirecting to playlist {pl_id} instead'
- if selected_tab_name and selected_tab_name != requested_tab_name:
- redirect_warning += f'. {selected_tab_name} tab is being downloaded instead'
+ selected_tab_id, selected_tab_name = self._extract_tab_id_and_name(selected_tab, url) # NB: Name may be translated
+ self.write_debug(f'Selected tab: {selected_tab_id!r} ({selected_tab_name}), Requested tab: {original_tab_id!r}')
+
+ if not original_tab_id and selected_tab_name:
+ self.to_screen('Downloading all uploads of the channel. '
+ 'To download only the videos in a specific tab, pass the tab\'s URL')
+ if self._has_tab(tabs, 'streams'):
+ extra_tabs.append(''.join((pre, '/streams', post)))
+ if self._has_tab(tabs, 'shorts'):
+ extra_tabs.append(''.join((pre, '/shorts', post)))
+ # XXX: Members-only tab should also be extracted
+
+ if not extra_tabs and selected_tab_id != 'videos':
+ # Channel does not have streams, shorts or videos tabs
+ if item_id[:2] != 'UC':
+ raise ExtractorError('This channel has no uploads', expected=True)
+
+ # Topic channels don't have /videos. Use the equivalent playlist instead
+ pl_id = f'UU{item_id[2:]}'
+ pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
+ try:
+ data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)
+ except ExtractorError:
+ raise ExtractorError('This channel has no uploads', expected=True)
else:
- raise ExtractorError(redirect_warning, expected=True)
+ item_id, url = pl_id, pl_url
+ self.to_screen(
+ f'The channel does not have a videos, shorts, or live tab. Redirecting to playlist {pl_id} instead')
+
+ elif extra_tabs and selected_tab_id != 'videos':
+ # When there are shorts/live tabs but not videos tab
+ url, data = f'{pre}{post}', None
+
+ elif (original_tab_id or 'videos') != selected_tab_id:
+ if original_tab_id == 'live':
+ # Live tab should have redirected to the video
+ # Except in the case the channel has an actual live tab
+ # Example: https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live
+ raise UserNotLive(video_id=item_id)
+ elif selected_tab_name:
+ raise ExtractorError(f'This channel does not have a {original_tab_id} tab', expected=True)
- if redirect_warning:
- self.to_screen(redirect_warning)
- self.write_debug(f'Final URL: {url}')
+ # For channels such as https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg
+ url = f'{pre}{post}'
# YouTube sometimes provides a button to reload playlist with unavailable videos.
if 'no-youtube-unavailable-videos' not in compat_opts:
- data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data
+ data = self._reload_with_unavailable_videos(display_id, data, ytcfg) or data
self._extract_and_report_alerts(data, only_once=True)
- tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
- if tabs:
- return self._extract_from_tabs(item_id, ytcfg, data, tabs)
+ tabs, entries = self._extract_tab_renderers(data), []
+ if tabs:
+ entries = [self._extract_from_tabs(item_id, ytcfg, data, tabs)]
+ entries[0].update({
+ 'extractor_key': YoutubeTabIE.ie_key(),
+ 'extractor': YoutubeTabIE.IE_NAME,
+ 'webpage_url': url,
+ })
+ if self.get_param('playlist_items') == '0':
+ entries.extend(self.url_result(u, YoutubeTabIE) for u in extra_tabs)
+ else: # Users expect to get all `video_id`s even with `--flat-playlist`. So don't return `url_result`
+ entries.extend(map(self._real_extract, extra_tabs))
+
+ if len(entries) == 1:
+ return entries[0]
+ elif entries:
+ metadata = self._extract_metadata_from_tabs(item_id, data)
+ uploads_url = 'the Uploads (UU) playlist URL'
+ if try_get(metadata, lambda x: x['channel_id'].startswith('UC')):
+ uploads_url = f'https://www.youtube.com/playlist?list=UU{metadata["channel_id"][2:]}'
+ self.to_screen(
+ 'Downloading as multiple playlists, separated by tabs. '
+ f'To download as a single playlist instead, pass {uploads_url}')
+ return self.playlist_result(entries, item_id, **metadata)
+
+ # Inline playlist
playlist = traverse_obj(
data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)
if playlist:
@@ -5268,10 +6160,9 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
video_id = traverse_obj(
data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id
if video_id:
- if mobj['tab'] != '/live': # live tab is expected to redirect to video
+ if tab != '/live': # live tab is expected to redirect to video
self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')
- return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
- ie=YoutubeIE.ie_key(), video_id=video_id)
+ return self.url_result(f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)
raise ExtractorError('Unable to recognize tab page')
@@ -5304,12 +6195,13 @@ class YoutubePlaylistIE(InfoExtractor):
'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
'view_count': int,
- 'uploader_url': 'https://www.youtube.com/user/Wickydoo',
+ 'uploader_url': 'https://www.youtube.com/c/WickmanVT',
'modified_date': r're:\d{8}',
'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
'channel': 'Wickman',
'tags': [],
- 'channel_url': 'https://www.youtube.com/user/Wickydoo',
+ 'channel_url': 'https://www.youtube.com/c/WickmanVT',
+ 'availability': 'public',
},
'playlist_mincount': 29,
}, {
@@ -5337,11 +6229,12 @@ class YoutubePlaylistIE(InfoExtractor):
'channel': 'milan',
'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
+ 'availability': 'public',
},
- 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
+ 'expected_warnings': [r'[Uu]navailable videos? (is|are|will be) hidden'],
}, {
'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
- 'playlist_mincount': 654,
+ 'playlist_mincount': 455,
'info_dict': {
'title': '2018 Chinese New Singles (11/6 updated)',
'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
@@ -5355,6 +6248,7 @@ class YoutubePlaylistIE(InfoExtractor):
'uploader_url': 'https://www.youtube.com/c/愛低音的國王',
'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',
'modified_date': r're:\d{8}',
+ 'availability': 'public',
},
'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
}, {
@@ -5374,7 +6268,7 @@ class YoutubePlaylistIE(InfoExtractor):
qs = parse_qs(url)
if qs.get('v', [None])[0]:
return False
- return super(YoutubePlaylistIE, cls).suitable(url)
+ return super().suitable(url)
def _real_extract(self, url):
playlist_id = self._match_id(url)
@@ -5414,6 +6308,8 @@ class YoutubeYtBeIE(InfoExtractor):
'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',
'availability': 'public',
'duration': 59,
+ 'comment_count': int,
+ 'channel_follower_count': int
},
'params': {
'noplaylist': True,
@@ -5462,9 +6358,7 @@ class YoutubeYtUserIE(InfoExtractor):
def _real_extract(self, url):
user_id = self._match_id(url)
- return self.url_result(
- 'https://www.youtube.com/user/%s/videos' % user_id,
- ie=YoutubeTabIE.ie_key(), video_id=user_id)
+ return self.url_result(f'https://www.youtube.com/user/{user_id}', YoutubeTabIE, user_id)
class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
@@ -5486,6 +6380,97 @@ class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
ie=YoutubeTabIE.ie_key())
+class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):
+ IE_NAME = 'youtube:notif'
+ IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'
+ _VALID_URL = r':ytnotif(?:ication)?s?'
+ _LOGIN_REQUIRED = True
+ _TESTS = [{
+ 'url': ':ytnotif',
+ 'only_matching': True,
+ }, {
+ 'url': ':ytnotifications',
+ 'only_matching': True,
+ }]
+
+ def _extract_notification_menu(self, response, continuation_list):
+ notification_list = traverse_obj(
+ response,
+ ('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),
+ ('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),
+ expected_type=list) or []
+ continuation_list[0] = None
+ for item in notification_list:
+ entry = self._extract_notification_renderer(item.get('notificationRenderer'))
+ if entry:
+ yield entry
+ continuation = item.get('continuationItemRenderer')
+ if continuation:
+ continuation_list[0] = continuation
+
+ def _extract_notification_renderer(self, notification):
+ video_id = traverse_obj(
+ notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)
+ url = f'https://www.youtube.com/watch?v={video_id}'
+ channel_id = None
+ if not video_id:
+ browse_ep = traverse_obj(
+ notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)
+ channel_id = traverse_obj(browse_ep, 'browseId', expected_type=str)
+ post_id = self._search_regex(
+ r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),
+ 'post id', default=None)
+ if not channel_id or not post_id:
+ return
+ # The direct /post url redirects to this in the browser
+ url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'
+
+ channel = traverse_obj(
+ notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),
+ expected_type=str)
+ notification_title = self._get_text(notification, 'shortMessage')
+ if notification_title:
+ notification_title = notification_title.replace('\xad', '') # remove soft hyphens
+ # TODO: handle recommended videos
+ title = self._search_regex(
+ rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,
+ 'video title', default=None)
+ timestamp = (self._parse_time_text(self._get_text(notification, 'sentTimeText'))
+ if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)
+ else None)
+ return {
+ '_type': 'url',
+ 'url': url,
+ 'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),
+ 'video_id': video_id,
+ 'title': title,
+ 'channel_id': channel_id,
+ 'channel': channel,
+ 'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),
+ 'timestamp': timestamp,
+ }
+
+ def _notification_menu_entries(self, ytcfg):
+ continuation_list = [None]
+ response = None
+ for page in itertools.count(1):
+ ctoken = traverse_obj(
+ continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)
+ response = self._extract_response(
+ item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,
+ ep='notification/get_notification_menu', check_get_keys='actions',
+ headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))
+ yield from self._extract_notification_menu(response, continuation_list)
+ if not continuation_list[0]:
+ break
+
+ def _real_extract(self, url):
+ display_id = 'notifications'
+ ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}
+ self._report_playlist_authcheck(ytcfg)
+ return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)
+
+
class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
IE_DESC = 'YouTube search'
IE_NAME = 'youtube:search'
@@ -5540,10 +6525,11 @@ class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
'info_dict': {
'id': '#cats',
'title': '#cats',
- 'entries': [{
- 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',
- 'title': '#cats',
- }],
+ # The test suite does not have support for nested playlists
+ # 'entries': [{
+ # 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',
+ # 'title': '#cats',
+ # }],
},
}, {
'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
@@ -5557,7 +6543,7 @@ class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):
- IE_DESC = 'YouTube music search URLs with selectable sections (Eg: #songs)'
+ IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs'
IE_NAME = 'youtube:music:search_url'
_VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
_TESTS = [{
@@ -5601,7 +6587,7 @@ class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):
if params:
section = next((k for k, v in self._SECTIONS.items() if v == params), params)
else:
- section = compat_urllib_parse_unquote_plus((url.split('#') + [''])[1]).lower()
+ section = urllib.parse.unquote_plus((url.split('#') + [''])[1]).lower()
params = self._SECTIONS.get(section)
if not params:
section = None
@@ -5612,14 +6598,17 @@ class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):
class YoutubeFeedsInfoExtractor(InfoExtractor):
"""
Base class for feed extractors
- Subclasses must define the _FEED_NAME property.
+ Subclasses must re-define the _FEED_NAME property.
"""
_LOGIN_REQUIRED = True
- _TESTS = []
+ _FEED_NAME = 'feeds'
+
+ def _real_initialize(self):
+ YoutubeBaseInfoExtractor._check_login_required(self)
- @property
+ @classproperty
def IE_NAME(self):
- return 'youtube:%s' % self._FEED_NAME
+ return f'youtube:{self._FEED_NAME}'
def _real_extract(self, url):
return self.url_result(
@@ -5680,6 +6669,46 @@ class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
}]
+class YoutubeStoriesIE(InfoExtractor):
+ IE_DESC = 'YouTube channel stories; "ytstories:" prefix'
+ IE_NAME = 'youtube:stories'
+ _VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'
+ _TESTS = [{
+ 'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = f'RLTD{self._match_id(url)}'
+ return self.url_result(
+ smuggle_url(f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1', {'is_story': True}),
+ ie=YoutubeTabIE, video_id=playlist_id)
+
+
+class YoutubeShortsAudioPivotIE(InfoExtractor):
+ IE_DESC = 'YouTube Shorts audio pivot (Shorts using audio of a given video)'
+ IE_NAME = 'youtube:shorts:pivot:audio'
+ _VALID_URL = r'https?://(?:www\.)?youtube\.com/source/(?P<id>[\w-]{11})/shorts'
+ _TESTS = [{
+ 'url': 'https://www.youtube.com/source/Lyj-MZSAA9o/shorts',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _generate_audio_pivot_params(video_id):
+ """
+ Generates sfv_audio_pivot browse params for this video id
+ """
+ pb_params = b'\xf2\x05+\n)\x12\'\n\x0b%b\x12\x0b%b\x1a\x0b%b' % ((video_id.encode(),) * 3)
+ return urllib.parse.quote(base64.b64encode(pb_params).decode())
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return self.url_result(
+ f'https://www.youtube.com/feed/sfv_audio_pivot?bp={self._generate_audio_pivot_params(video_id)}',
+ ie=YoutubeTabIE)
+
+
class YoutubeTruncatedURLIE(InfoExtractor):
IE_NAME = 'youtube:truncated_url'
IE_DESC = False # Do not list
@@ -5729,14 +6758,62 @@ class YoutubeTruncatedURLIE(InfoExtractor):
expected=True)
-class YoutubeClipIE(InfoExtractor):
+class YoutubeClipIE(YoutubeTabBaseInfoExtractor):
IE_NAME = 'youtube:clip'
- IE_DESC = False # Do not list
- _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/'
+ _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'
+ _TESTS = [{
+ # FIXME: Other metadata should be extracted from the clip, not from the base video
+ 'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',
+ 'info_dict': {
+ 'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',
+ 'ext': 'mp4',
+ 'section_start': 29.0,
+ 'section_end': 39.7,
+ 'duration': 10.7,
+ 'age_limit': 0,
+ 'availability': 'public',
+ 'categories': ['Gaming'],
+ 'channel': 'Scott The Woz',
+ 'channel_id': 'UC4rqhyiTs7XyuODcECvuiiQ',
+ 'channel_url': 'https://www.youtube.com/channel/UC4rqhyiTs7XyuODcECvuiiQ',
+ 'description': 'md5:7a4517a17ea9b4bd98996399d8bb36e7',
+ 'like_count': int,
+ 'playable_in_embed': True,
+ 'tags': 'count:17',
+ 'thumbnail': 'https://i.ytimg.com/vi_webp/ScPX26pdQik/maxresdefault.webp',
+ 'title': 'Mobile Games on Console - Scott The Woz',
+ 'upload_date': '20210920',
+ 'uploader': 'Scott The Woz',
+ 'uploader_id': 'scottthewoz',
+ 'uploader_url': 'http://www.youtube.com/user/scottthewoz',
+ 'view_count': int,
+ 'live_status': 'not_live',
+ 'channel_follower_count': int
+ }
+ }]
def _real_extract(self, url):
- self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead')
- return self.url_result(url, 'Generic')
+ clip_id = self._match_id(url)
+ _, data = self._extract_webpage(url, clip_id)
+
+ video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))
+ if not video_id:
+ raise ExtractorError('Unable to find video ID')
+
+ clip_data = traverse_obj(data, (
+ 'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',
+ 'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,
+ 'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',
+ 'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)
+
+ return {
+ '_type': 'url_transparent',
+ 'url': f'https://www.youtube.com/watch?v={video_id}',
+ 'ie_key': YoutubeIE.ie_key(),
+ 'id': clip_id,
+ 'section_start': int(clip_data['startTimeMs']) / 1000,
+ 'section_end': int(clip_data['endTimeMs']) / 1000,
+ }
class YoutubeTruncatedIDIE(InfoExtractor):
@@ -5752,5 +6829,5 @@ class YoutubeTruncatedIDIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
raise ExtractorError(
- 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
+ f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',
expected=True)
diff --git a/hypervideo_dl/extractor/zapiks.py b/hypervideo_dl/extractor/zapiks.py
index 161b011..88f526b 100644
--- a/hypervideo_dl/extractor/zapiks.py
+++ b/hypervideo_dl/extractor/zapiks.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -15,6 +12,7 @@ from ..utils import (
class ZapiksIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?zapiks\.(?:fr|com)/(?:(?:[a-z]{2}/)?(?P<display_id>.+?)\.html|index\.php\?.*\bmedia_id=(?P<id>\d+))'
+ _EMBED_REGEX = [r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"']
_TESTS = [
{
'url': 'http://www.zapiks.fr/ep2s3-bon-appetit-eh-be-viva.html',
@@ -94,7 +92,6 @@ class ZapiksIE(InfoExtractor):
if m:
f['height'] = int(m.group('height'))
formats.append(f)
- self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/zaq1.py b/hypervideo_dl/extractor/zaq1.py
deleted file mode 100644
index 889aff5..0000000
--- a/hypervideo_dl/extractor/zaq1.py
+++ /dev/null
@@ -1,101 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import (
- int_or_none,
- unified_timestamp,
-)
-
-
-class Zaq1IE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?zaq1\.pl/video/(?P<id>[^/?#&]+)'
- _TESTS = [{
- 'url': 'http://zaq1.pl/video/xev0e',
- 'md5': '24a5eb3f052e604ae597c4d0d19b351e',
- 'info_dict': {
- 'id': 'xev0e',
- 'title': 'DJ NA WESELE. TANIEC Z FIGURAMI.węgrów/sokołów podlaski/siedlce/mińsk mazowiecki/warszawa',
- 'description': 'www.facebook.com/weseledjKontakt: 728 448 199 / 505 419 147',
- 'ext': 'mp4',
- 'duration': 511,
- 'timestamp': 1490896361,
- 'uploader': 'Anonim',
- 'upload_date': '20170330',
- 'view_count': int,
- }
- }, {
- # malformed JSON-LD
- 'url': 'http://zaq1.pl/video/x81vn',
- 'info_dict': {
- 'id': 'x81vn',
- 'title': 'SEKRETNE ŻYCIE WALTERA MITTY',
- 'ext': 'mp4',
- 'duration': 6234,
- 'timestamp': 1493494860,
- 'uploader': 'Anonim',
- 'upload_date': '20170429',
- 'view_count': int,
- },
- 'params': {
- 'skip_download': True,
- },
- 'expected_warnings': ['Failed to parse JSON'],
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- video_url = self._search_regex(
- r'data-video-url=(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
- 'video url', group='url')
-
- info = self._search_json_ld(webpage, video_id, fatal=False)
-
- def extract_data(field, name, fatal=False):
- return self._search_regex(
- r'data-%s=(["\'])(?P<field>(?:(?!\1).)+)\1' % field,
- webpage, field, fatal=fatal, group='field')
-
- if not info.get('title'):
- info['title'] = extract_data('file-name', 'title', fatal=True)
-
- if not info.get('duration'):
- info['duration'] = int_or_none(extract_data('duration', 'duration'))
-
- if not info.get('thumbnail'):
- info['thumbnail'] = extract_data('photo-url', 'thumbnail')
-
- if not info.get('timestamp'):
- info['timestamp'] = unified_timestamp(self._html_search_meta(
- 'uploadDate', webpage, 'timestamp'))
-
- if not info.get('interactionCount'):
- info['view_count'] = int_or_none(self._html_search_meta(
- 'interactionCount', webpage, 'view count'))
-
- uploader = self._html_search_regex(
- r'Wideo dodał:\s*<a[^>]*>([^<]+)</a>', webpage, 'uploader',
- fatal=False)
-
- width = int_or_none(self._html_search_meta(
- 'width', webpage, fatal=False))
- height = int_or_none(self._html_search_meta(
- 'height', webpage, fatal=False))
-
- info.update({
- 'id': video_id,
- 'formats': [{
- 'url': video_url,
- 'width': width,
- 'height': height,
- 'http_headers': {
- 'Referer': url,
- },
- }],
- 'uploader': uploader,
- })
-
- return info
diff --git a/hypervideo_dl/extractor/zattoo.py b/hypervideo_dl/extractor/zattoo.py
index c02b4ca..22620c0 100644
--- a/hypervideo_dl/extractor/zattoo.py
+++ b/hypervideo_dl/extractor/zattoo.py
@@ -1,14 +1,8 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from uuid import uuid4
from .common import InfoExtractor
-from ..compat import (
- compat_HTTPError,
- compat_str,
-)
+from ..compat import compat_HTTPError, compat_str
from ..utils import (
ExtractorError,
int_or_none,
@@ -51,25 +45,30 @@ class ZattooPlatformBaseIE(InfoExtractor):
self._power_guide_hash = data['session']['power_guide_hash']
def _initialize_pre_login(self):
- webpage = self._download_webpage(
- self._host_url(), None, 'Downloading app token')
- app_token = self._html_search_regex(
- r'appToken\s*=\s*(["\'])(?P<token>(?:(?!\1).)+?)\1',
- webpage, 'app token', group='token')
- app_version = self._html_search_regex(
- r'<!--\w+-(.+?)-', webpage, 'app version', default='2.8.2')
+ session_token = self._download_json(
+ f'{self._host_url()}/token.json', None, 'Downloading session token')['session_token']
# Will setup appropriate cookies
self._request_webpage(
- '%s/zapi/v2/session/hello' % self._host_url(), None,
+ '%s/zapi/v3/session/hello' % self._host_url(), None,
'Opening session', data=urlencode_postdata({
- 'client_app_token': app_token,
'uuid': compat_str(uuid4()),
'lang': 'en',
- 'app_version': app_version,
+ 'app_version': '1.8.2',
'format': 'json',
+ 'client_app_token': session_token,
}))
+ def _extract_video_id_from_recording(self, recid):
+ playlist = self._download_json(
+ f'{self._host_url()}/zapi/v2/playlist', recid, 'Downloading playlist')
+ try:
+ return next(
+ str(item['program_id']) for item in playlist['recordings']
+ if item.get('program_id') and str(item.get('id')) == recid)
+ except (StopIteration, KeyError):
+ raise ExtractorError('Could not extract video id from recording')
+
def _extract_cid(self, video_id, channel_name):
channel_groups = self._download_json(
'%s/zapi/v2/cached/channels/%s' % (self._host_url(),
@@ -118,7 +117,26 @@ class ZattooPlatformBaseIE(InfoExtractor):
return cid, info_dict
- def _extract_formats(self, cid, video_id, record_id=None, is_live=False):
+ def _extract_ondemand_info(self, ondemand_id):
+ """
+ @returns (ondemand_token, ondemand_type, info_dict)
+ """
+ data = self._download_json(
+ '%s/zapi/vod/movies/%s' % (self._host_url(), ondemand_id),
+ ondemand_id, 'Downloading ondemand information')
+ info_dict = {
+ 'id': ondemand_id,
+ 'title': data.get('title'),
+ 'description': data.get('description'),
+ 'duration': int_or_none(data.get('duration')),
+ 'release_year': int_or_none(data.get('year')),
+ 'episode_number': int_or_none(data.get('episode_number')),
+ 'season_number': int_or_none(data.get('season_number')),
+ 'categories': try_get(data, lambda x: x['categories'], list),
+ }
+ return data['terms_catalog'][0]['terms'][0]['token'], data['type'], info_dict
+
+ def _extract_formats(self, cid, video_id, record_id=None, ondemand_id=None, ondemand_termtoken=None, ondemand_type=None, is_live=False):
postdata_common = {
'https_watch_urls': True,
}
@@ -128,11 +146,18 @@ class ZattooPlatformBaseIE(InfoExtractor):
url = '%s/zapi/watch/live/%s' % (self._host_url(), cid)
elif record_id:
url = '%s/zapi/watch/recording/%s' % (self._host_url(), record_id)
+ elif ondemand_id:
+ postdata_common.update({
+ 'teasable_id': ondemand_id,
+ 'term_token': ondemand_termtoken,
+ 'teasable_type': ondemand_type
+ })
+ url = '%s/zapi/watch/vod/video' % self._host_url()
else:
- url = '%s/zapi/watch/recall/%s/%s' % (self._host_url(), cid, video_id)
-
+ url = '%s/zapi/v3/watch/replay/%s/%s' % (self._host_url(), cid, video_id)
formats = []
- for stream_type in ('dash', 'hls', 'hls5', 'hds'):
+ subtitles = {}
+ for stream_type in ('dash', 'hls7'):
postdata = postdata_common.copy()
postdata['stream_type'] = stream_type
@@ -156,14 +181,16 @@ class ZattooPlatformBaseIE(InfoExtractor):
audio_channel = watch.get('audio_channel')
preference = 1 if audio_channel == 'A' else None
format_id = join_nonempty(stream_type, watch.get('maxrate'), audio_channel)
- if stream_type in ('dash', 'dash_widevine', 'dash_playready'):
- this_formats = self._extract_mpd_formats(
+ if stream_type.startswith('dash'):
+ this_formats, subs = self._extract_mpd_formats_and_subtitles(
watch_url, video_id, mpd_id=format_id, fatal=False)
- elif stream_type in ('hls', 'hls5', 'hls5_fairplay'):
- this_formats = self._extract_m3u8_formats(
+ self._merge_subtitles(subs, target=subtitles)
+ elif stream_type.startswith('hls'):
+ this_formats, subs = self._extract_m3u8_formats_and_subtitles(
watch_url, video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id=format_id,
fatal=False)
+ self._merge_subtitles(subs, target=subtitles)
elif stream_type == 'hds':
this_formats = self._extract_f4m_formats(
watch_url, video_id, f4m_id=format_id, fatal=False)
@@ -175,58 +202,48 @@ class ZattooPlatformBaseIE(InfoExtractor):
for this_format in this_formats:
this_format['quality'] = preference
formats.extend(this_formats)
- self._sort_formats(formats)
- return formats
+ return formats, subtitles
- def _extract_video(self, channel_name, video_id, record_id=None, is_live=False):
- if is_live:
- cid = self._extract_cid(video_id, channel_name)
- info_dict = {
- 'id': channel_name,
- 'title': channel_name,
- 'is_live': True,
- }
- else:
- cid, info_dict = self._extract_cid_and_video_info(video_id)
- formats = self._extract_formats(
- cid, video_id, record_id=record_id, is_live=is_live)
- info_dict['formats'] = formats
+ def _extract_video(self, video_id, record_id=None):
+ cid, info_dict = self._extract_cid_and_video_info(video_id)
+ info_dict['formats'], info_dict['subtitles'] = self._extract_formats(cid, video_id, record_id=record_id)
return info_dict
+ def _extract_live(self, channel_name):
+ cid = self._extract_cid(channel_name, channel_name)
+ formats, subtitles = self._extract_formats(cid, cid, is_live=True)
+ return {
+ 'id': channel_name,
+ 'title': channel_name,
+ 'is_live': True,
+ 'formats': formats,
+ 'subtitles': subtitles
+ }
-class QuicklineBaseIE(ZattooPlatformBaseIE):
- _NETRC_MACHINE = 'quickline'
- _HOST = 'mobiltv.quickline.com'
-
-
-class QuicklineIE(QuicklineBaseIE):
- _VALID_URL = r'https?://(?:www\.)?%s/watch/(?P<channel>[^/]+)/(?P<id>[0-9]+)' % re.escape(QuicklineBaseIE._HOST)
+ def _extract_record(self, record_id):
+ video_id = self._extract_video_id_from_recording(record_id)
+ cid, info_dict = self._extract_cid_and_video_info(video_id)
+ info_dict['formats'], info_dict['subtitles'] = self._extract_formats(cid, video_id, record_id=record_id)
+ return info_dict
- _TEST = {
- 'url': 'https://mobiltv.quickline.com/watch/prosieben/130671867-maze-runner-die-auserwaehlten-in-der-brandwueste',
- 'only_matching': True,
- }
+ def _extract_ondemand(self, ondemand_id):
+ ondemand_termtoken, ondemand_type, info_dict = self._extract_ondemand_info(ondemand_id)
+ info_dict['formats'], info_dict['subtitles'] = self._extract_formats(
+ None, ondemand_id, ondemand_id=ondemand_id,
+ ondemand_termtoken=ondemand_termtoken, ondemand_type=ondemand_type)
+ return info_dict
def _real_extract(self, url):
- channel_name, video_id = self._match_valid_url(url).groups()
- return self._extract_video(channel_name, video_id)
-
+ video_id, record_id = self._match_valid_url(url).groups()
+ return getattr(self, f'_extract_{self._TYPE}')(video_id or record_id)
-class QuicklineLiveIE(QuicklineBaseIE):
- _VALID_URL = r'https?://(?:www\.)?%s/watch/(?P<id>[^/]+)' % re.escape(QuicklineBaseIE._HOST)
- _TEST = {
- 'url': 'https://mobiltv.quickline.com/watch/srf1',
- 'only_matching': True,
- }
-
- @classmethod
- def suitable(cls, url):
- return False if QuicklineIE.suitable(url) else super(QuicklineLiveIE, cls).suitable(url)
-
- def _real_extract(self, url):
- channel_name = video_id = self._match_id(url)
- return self._extract_video(channel_name, video_id, is_live=True)
+def _create_valid_url(host, match, qs, base_re=None):
+ match_base = fr'|{base_re}/(?P<vid1>{match})' if base_re else '(?P<vid1>)'
+ return rf'''(?x)https?://(?:www\.)?{re.escape(host)}/(?:
+ [^?#]+\?(?:[^#]+&)?{qs}=(?P<vid2>{match})
+ {match_base}
+ )'''
class ZattooBaseIE(ZattooPlatformBaseIE):
@@ -234,191 +251,614 @@ class ZattooBaseIE(ZattooPlatformBaseIE):
_HOST = 'zattoo.com'
-def _make_valid_url(tmpl, host):
- return tmpl % re.escape(host)
-
-
class ZattooIE(ZattooBaseIE):
- _VALID_URL_TEMPLATE = r'https?://(?:www\.)?%s/watch/(?P<channel>[^/]+?)/(?P<id>[0-9]+)[^/]+(?:/(?P<recid>[0-9]+))?'
- _VALID_URL = _make_valid_url(_VALID_URL_TEMPLATE, ZattooBaseIE._HOST)
-
- # Since regular videos are only available for 7 days and recorded videos
- # are only available for a specific user, we cannot have detailed tests.
+ _VALID_URL = _create_valid_url(ZattooBaseIE._HOST, r'\d+', 'program', '(?:program|watch)/[^/]+')
+ _TYPE = 'video'
_TESTS = [{
- 'url': 'https://zattoo.com/watch/prosieben/130671867-maze-runner-die-auserwaehlten-in-der-brandwueste',
+ 'url': 'https://zattoo.com/program/zdf/250170418',
+ 'info_dict': {
+ 'id': '250170418',
+ 'ext': 'mp4',
+ 'title': 'Markus Lanz',
+ 'description': 'md5:e41cb1257de008ca62a73bb876ffa7fc',
+ 'thumbnail': 're:http://images.zattic.com/cms/.+/format_480x360.jpg',
+ 'creator': 'ZDF HD',
+ 'release_year': 2022,
+ 'episode': 'Folge 1655',
+ 'categories': 'count:1',
+ 'tags': 'count:2'
+ },
+ 'params': {'skip_download': 'm3u8'}
+ }, {
+ 'url': 'https://zattoo.com/program/daserste/210177916',
'only_matching': True,
}, {
- 'url': 'https://zattoo.com/watch/srf_zwei/132905652-eishockey-spengler-cup/102791477/1512211800000/1514433500000/92000',
+ 'url': 'https://zattoo.com/guide/german?channel=srf1&program=169860555',
'only_matching': True,
}]
- def _real_extract(self, url):
- channel_name, video_id, record_id = self._match_valid_url(url).groups()
- return self._extract_video(channel_name, video_id, record_id)
-
class ZattooLiveIE(ZattooBaseIE):
- _VALID_URL = r'https?://(?:www\.)?zattoo\.com/watch/(?P<id>[^/]+)'
-
- _TEST = {
- 'url': 'https://zattoo.com/watch/srf1',
+ _VALID_URL = _create_valid_url(ZattooBaseIE._HOST, r'[^/?&#]+', 'channel', 'live')
+ _TYPE = 'live'
+ _TESTS = [{
+ 'url': 'https://zattoo.com/channels/german?channel=srf_zwei',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://zattoo.com/live/srf1',
'only_matching': True,
- }
+ }]
@classmethod
def suitable(cls, url):
- return False if ZattooIE.suitable(url) else super(ZattooLiveIE, cls).suitable(url)
+ return False if ZattooIE.suitable(url) else super().suitable(url)
+
+
+class ZattooMoviesIE(ZattooBaseIE):
+ _VALID_URL = _create_valid_url(ZattooBaseIE._HOST, r'\w+', 'movie_id', 'vod/movies')
+ _TYPE = 'ondemand'
+ _TESTS = [{
+ 'url': 'https://zattoo.com/vod/movies/7521',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://zattoo.com/ondemand?movie_id=7521&term_token=9f00f43183269484edde',
+ 'only_matching': True,
+ }]
- def _real_extract(self, url):
- channel_name = video_id = self._match_id(url)
- return self._extract_video(channel_name, video_id, is_live=True)
+class ZattooRecordingsIE(ZattooBaseIE):
+ _VALID_URL = _create_valid_url('zattoo.com', r'\d+', 'recording')
+ _TYPE = 'record'
+ _TESTS = [{
+ 'url': 'https://zattoo.com/recordings?recording=193615508',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://zattoo.com/tc/ptc_recordings_all_recordings?recording=193615420',
+ 'only_matching': True,
+ }]
-class NetPlusIE(ZattooIE):
+
+class NetPlusTVBaseIE(ZattooPlatformBaseIE):
_NETRC_MACHINE = 'netplus'
_HOST = 'netplus.tv'
_API_HOST = 'www.%s' % _HOST
- _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
+
+class NetPlusTVIE(NetPlusTVBaseIE):
+ _VALID_URL = _create_valid_url(NetPlusTVBaseIE._HOST, r'\d+', 'program', '(?:program|watch)/[^/]+')
+ _TYPE = 'video'
+ _TESTS = [{
+ 'url': 'https://netplus.tv/program/daserste/210177916',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://netplus.tv/guide/german?channel=srf1&program=169860555',
+ 'only_matching': True,
+ }]
+
+
+class NetPlusTVLiveIE(NetPlusTVBaseIE):
+ _VALID_URL = _create_valid_url(NetPlusTVBaseIE._HOST, r'[^/?&#]+', 'channel', 'live')
+ _TYPE = 'live'
+ _TESTS = [{
+ 'url': 'https://netplus.tv/channels/german?channel=srf_zwei',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://netplus.tv/live/srf1',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if NetPlusTVIE.suitable(url) else super().suitable(url)
+
+
+class NetPlusTVRecordingsIE(NetPlusTVBaseIE):
+ _VALID_URL = _create_valid_url(NetPlusTVBaseIE._HOST, r'\d+', 'recording')
+ _TYPE = 'record'
_TESTS = [{
- 'url': 'https://www.netplus.tv/watch/abc/123-abc',
+ 'url': 'https://netplus.tv/recordings?recording=193615508',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://netplus.tv/tc/ptc_recordings_all_recordings?recording=193615420',
'only_matching': True,
}]
-class MNetTVIE(ZattooIE):
+class MNetTVBaseIE(ZattooPlatformBaseIE):
_NETRC_MACHINE = 'mnettv'
_HOST = 'tvplus.m-net.de'
- _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
+
+class MNetTVIE(MNetTVBaseIE):
+ _VALID_URL = _create_valid_url(MNetTVBaseIE._HOST, r'\d+', 'program', '(?:program|watch)/[^/]+')
+ _TYPE = 'video'
_TESTS = [{
- 'url': 'https://tvplus.m-net.de/watch/abc/123-abc',
+ 'url': 'https://tvplus.m-net.de/program/daserste/210177916',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tvplus.m-net.de/guide/german?channel=srf1&program=169860555',
+ 'only_matching': True,
+ }]
+
+
+class MNetTVLiveIE(MNetTVBaseIE):
+ _VALID_URL = _create_valid_url(MNetTVBaseIE._HOST, r'[^/?&#]+', 'channel', 'live')
+ _TYPE = 'live'
+ _TESTS = [{
+ 'url': 'https://tvplus.m-net.de/channels/german?channel=srf_zwei',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tvplus.m-net.de/live/srf1',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if MNetTVIE.suitable(url) else super().suitable(url)
+
+
+class MNetTVRecordingsIE(MNetTVBaseIE):
+ _VALID_URL = _create_valid_url(MNetTVBaseIE._HOST, r'\d+', 'recording')
+ _TYPE = 'record'
+ _TESTS = [{
+ 'url': 'https://tvplus.m-net.de/recordings?recording=193615508',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tvplus.m-net.de/tc/ptc_recordings_all_recordings?recording=193615420',
'only_matching': True,
}]
-class WalyTVIE(ZattooIE):
+class WalyTVBaseIE(ZattooPlatformBaseIE):
_NETRC_MACHINE = 'walytv'
_HOST = 'player.waly.tv'
- _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
+
+class WalyTVIE(WalyTVBaseIE):
+ _VALID_URL = _create_valid_url(WalyTVBaseIE._HOST, r'\d+', 'program', '(?:program|watch)/[^/]+')
+ _TYPE = 'video'
+ _TESTS = [{
+ 'url': 'https://player.waly.tv/program/daserste/210177916',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://player.waly.tv/guide/german?channel=srf1&program=169860555',
+ 'only_matching': True,
+ }]
+
+
+class WalyTVLiveIE(WalyTVBaseIE):
+ _VALID_URL = _create_valid_url(WalyTVBaseIE._HOST, r'[^/?&#]+', 'channel', 'live')
+ _TYPE = 'live'
_TESTS = [{
- 'url': 'https://player.waly.tv/watch/abc/123-abc',
+ 'url': 'https://player.waly.tv/channels/german?channel=srf_zwei',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://player.waly.tv/live/srf1',
'only_matching': True,
}]
+ @classmethod
+ def suitable(cls, url):
+ return False if WalyTVIE.suitable(url) else super().suitable(url)
+
-class BBVTVIE(ZattooIE):
+class WalyTVRecordingsIE(WalyTVBaseIE):
+ _VALID_URL = _create_valid_url(WalyTVBaseIE._HOST, r'\d+', 'recording')
+ _TYPE = 'record'
+ _TESTS = [{
+ 'url': 'https://player.waly.tv/recordings?recording=193615508',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://player.waly.tv/tc/ptc_recordings_all_recordings?recording=193615420',
+ 'only_matching': True,
+ }]
+
+
+class BBVTVBaseIE(ZattooPlatformBaseIE):
_NETRC_MACHINE = 'bbvtv'
_HOST = 'bbv-tv.net'
_API_HOST = 'www.%s' % _HOST
- _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
+
+class BBVTVIE(BBVTVBaseIE):
+ _VALID_URL = _create_valid_url(BBVTVBaseIE._HOST, r'\d+', 'program', '(?:program|watch)/[^/]+')
+ _TYPE = 'video'
+ _TESTS = [{
+ 'url': 'https://bbv-tv.net/program/daserste/210177916',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://bbv-tv.net/guide/german?channel=srf1&program=169860555',
+ 'only_matching': True,
+ }]
+
+
+class BBVTVLiveIE(BBVTVBaseIE):
+ _VALID_URL = _create_valid_url(BBVTVBaseIE._HOST, r'[^/?&#]+', 'channel', 'live')
+ _TYPE = 'live'
_TESTS = [{
- 'url': 'https://www.bbv-tv.net/watch/abc/123-abc',
+ 'url': 'https://bbv-tv.net/channels/german?channel=srf_zwei',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://bbv-tv.net/live/srf1',
'only_matching': True,
}]
+ @classmethod
+ def suitable(cls, url):
+ return False if BBVTVIE.suitable(url) else super().suitable(url)
+
-class VTXTVIE(ZattooIE):
+class BBVTVRecordingsIE(BBVTVBaseIE):
+ _VALID_URL = _create_valid_url(BBVTVBaseIE._HOST, r'\d+', 'recording')
+ _TYPE = 'record'
+ _TESTS = [{
+ 'url': 'https://bbv-tv.net/recordings?recording=193615508',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://bbv-tv.net/tc/ptc_recordings_all_recordings?recording=193615420',
+ 'only_matching': True,
+ }]
+
+
+class VTXTVBaseIE(ZattooPlatformBaseIE):
_NETRC_MACHINE = 'vtxtv'
_HOST = 'vtxtv.ch'
_API_HOST = 'www.%s' % _HOST
- _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
+
+class VTXTVIE(VTXTVBaseIE):
+ _VALID_URL = _create_valid_url(VTXTVBaseIE._HOST, r'\d+', 'program', '(?:program|watch)/[^/]+')
+ _TYPE = 'video'
_TESTS = [{
- 'url': 'https://www.vtxtv.ch/watch/abc/123-abc',
+ 'url': 'https://vtxtv.ch/program/daserste/210177916',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://vtxtv.ch/guide/german?channel=srf1&program=169860555',
'only_matching': True,
}]
-class MyVisionTVIE(ZattooIE):
- _NETRC_MACHINE = 'myvisiontv'
- _HOST = 'myvisiontv.ch'
- _API_HOST = 'www.%s' % _HOST
- _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
+class VTXTVLiveIE(VTXTVBaseIE):
+ _VALID_URL = _create_valid_url(VTXTVBaseIE._HOST, r'[^/?&#]+', 'channel', 'live')
+ _TYPE = 'live'
+ _TESTS = [{
+ 'url': 'https://vtxtv.ch/channels/german?channel=srf_zwei',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://vtxtv.ch/live/srf1',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if VTXTVIE.suitable(url) else super().suitable(url)
+
+class VTXTVRecordingsIE(VTXTVBaseIE):
+ _VALID_URL = _create_valid_url(VTXTVBaseIE._HOST, r'\d+', 'recording')
+ _TYPE = 'record'
_TESTS = [{
- 'url': 'https://www.myvisiontv.ch/watch/abc/123-abc',
+ 'url': 'https://vtxtv.ch/recordings?recording=193615508',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://vtxtv.ch/tc/ptc_recordings_all_recordings?recording=193615420',
'only_matching': True,
}]
-class GlattvisionTVIE(ZattooIE):
+class GlattvisionTVBaseIE(ZattooPlatformBaseIE):
_NETRC_MACHINE = 'glattvisiontv'
_HOST = 'iptv.glattvision.ch'
- _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
+
+class GlattvisionTVIE(GlattvisionTVBaseIE):
+ _VALID_URL = _create_valid_url(GlattvisionTVBaseIE._HOST, r'\d+', 'program', '(?:program|watch)/[^/]+')
+ _TYPE = 'video'
+ _TESTS = [{
+ 'url': 'https://iptv.glattvision.ch/program/daserste/210177916',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://iptv.glattvision.ch/guide/german?channel=srf1&program=169860555',
+ 'only_matching': True,
+ }]
+
+
+class GlattvisionTVLiveIE(GlattvisionTVBaseIE):
+ _VALID_URL = _create_valid_url(GlattvisionTVBaseIE._HOST, r'[^/?&#]+', 'channel', 'live')
+ _TYPE = 'live'
+ _TESTS = [{
+ 'url': 'https://iptv.glattvision.ch/channels/german?channel=srf_zwei',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://iptv.glattvision.ch/live/srf1',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if GlattvisionTVIE.suitable(url) else super().suitable(url)
+
+
+class GlattvisionTVRecordingsIE(GlattvisionTVBaseIE):
+ _VALID_URL = _create_valid_url(GlattvisionTVBaseIE._HOST, r'\d+', 'recording')
+ _TYPE = 'record'
_TESTS = [{
- 'url': 'https://iptv.glattvision.ch/watch/abc/123-abc',
+ 'url': 'https://iptv.glattvision.ch/recordings?recording=193615508',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://iptv.glattvision.ch/tc/ptc_recordings_all_recordings?recording=193615420',
'only_matching': True,
}]
-class SAKTVIE(ZattooIE):
+class SAKTVBaseIE(ZattooPlatformBaseIE):
_NETRC_MACHINE = 'saktv'
_HOST = 'saktv.ch'
_API_HOST = 'www.%s' % _HOST
- _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
+
+class SAKTVIE(SAKTVBaseIE):
+ _VALID_URL = _create_valid_url(SAKTVBaseIE._HOST, r'\d+', 'program', '(?:program|watch)/[^/]+')
+ _TYPE = 'video'
+ _TESTS = [{
+ 'url': 'https://saktv.ch/program/daserste/210177916',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://saktv.ch/guide/german?channel=srf1&program=169860555',
+ 'only_matching': True,
+ }]
+
+
+class SAKTVLiveIE(SAKTVBaseIE):
+ _VALID_URL = _create_valid_url(SAKTVBaseIE._HOST, r'[^/?&#]+', 'channel', 'live')
+ _TYPE = 'live'
+ _TESTS = [{
+ 'url': 'https://saktv.ch/channels/german?channel=srf_zwei',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://saktv.ch/live/srf1',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if SAKTVIE.suitable(url) else super().suitable(url)
+
+
+class SAKTVRecordingsIE(SAKTVBaseIE):
+ _VALID_URL = _create_valid_url(SAKTVBaseIE._HOST, r'\d+', 'recording')
+ _TYPE = 'record'
_TESTS = [{
- 'url': 'https://www.saktv.ch/watch/abc/123-abc',
+ 'url': 'https://saktv.ch/recordings?recording=193615508',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://saktv.ch/tc/ptc_recordings_all_recordings?recording=193615420',
'only_matching': True,
}]
-class EWETVIE(ZattooIE):
+class EWETVBaseIE(ZattooPlatformBaseIE):
_NETRC_MACHINE = 'ewetv'
_HOST = 'tvonline.ewe.de'
- _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
+
+class EWETVIE(EWETVBaseIE):
+ _VALID_URL = _create_valid_url(EWETVBaseIE._HOST, r'\d+', 'program', '(?:program|watch)/[^/]+')
+ _TYPE = 'video'
+ _TESTS = [{
+ 'url': 'https://tvonline.ewe.de/program/daserste/210177916',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tvonline.ewe.de/guide/german?channel=srf1&program=169860555',
+ 'only_matching': True,
+ }]
+
+
+class EWETVLiveIE(EWETVBaseIE):
+ _VALID_URL = _create_valid_url(EWETVBaseIE._HOST, r'[^/?&#]+', 'channel', 'live')
+ _TYPE = 'live'
+ _TESTS = [{
+ 'url': 'https://tvonline.ewe.de/channels/german?channel=srf_zwei',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tvonline.ewe.de/live/srf1',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if EWETVIE.suitable(url) else super().suitable(url)
+
+
+class EWETVRecordingsIE(EWETVBaseIE):
+ _VALID_URL = _create_valid_url(EWETVBaseIE._HOST, r'\d+', 'recording')
+ _TYPE = 'record'
_TESTS = [{
- 'url': 'https://tvonline.ewe.de/watch/abc/123-abc',
+ 'url': 'https://tvonline.ewe.de/recordings?recording=193615508',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tvonline.ewe.de/tc/ptc_recordings_all_recordings?recording=193615420',
'only_matching': True,
}]
-class QuantumTVIE(ZattooIE):
+class QuantumTVBaseIE(ZattooPlatformBaseIE):
_NETRC_MACHINE = 'quantumtv'
_HOST = 'quantum-tv.com'
_API_HOST = 'www.%s' % _HOST
- _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
+
+class QuantumTVIE(QuantumTVBaseIE):
+ _VALID_URL = _create_valid_url(QuantumTVBaseIE._HOST, r'\d+', 'program', '(?:program|watch)/[^/]+')
+ _TYPE = 'video'
_TESTS = [{
- 'url': 'https://www.quantum-tv.com/watch/abc/123-abc',
+ 'url': 'https://quantum-tv.com/program/daserste/210177916',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://quantum-tv.com/guide/german?channel=srf1&program=169860555',
'only_matching': True,
}]
-class OsnatelTVIE(ZattooIE):
+class QuantumTVLiveIE(QuantumTVBaseIE):
+ _VALID_URL = _create_valid_url(QuantumTVBaseIE._HOST, r'[^/?&#]+', 'channel', 'live')
+ _TYPE = 'live'
+ _TESTS = [{
+ 'url': 'https://quantum-tv.com/channels/german?channel=srf_zwei',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://quantum-tv.com/live/srf1',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if QuantumTVIE.suitable(url) else super().suitable(url)
+
+
+class QuantumTVRecordingsIE(QuantumTVBaseIE):
+ _VALID_URL = _create_valid_url(QuantumTVBaseIE._HOST, r'\d+', 'recording')
+ _TYPE = 'record'
+ _TESTS = [{
+ 'url': 'https://quantum-tv.com/recordings?recording=193615508',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://quantum-tv.com/tc/ptc_recordings_all_recordings?recording=193615420',
+ 'only_matching': True,
+ }]
+
+
+class OsnatelTVBaseIE(ZattooPlatformBaseIE):
_NETRC_MACHINE = 'osnateltv'
_HOST = 'tvonline.osnatel.de'
- _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
+
+class OsnatelTVIE(OsnatelTVBaseIE):
+ _VALID_URL = _create_valid_url(OsnatelTVBaseIE._HOST, r'\d+', 'program', '(?:program|watch)/[^/]+')
+ _TYPE = 'video'
+ _TESTS = [{
+ 'url': 'https://tvonline.osnatel.de/program/daserste/210177916',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tvonline.osnatel.de/guide/german?channel=srf1&program=169860555',
+ 'only_matching': True,
+ }]
+
+
+class OsnatelTVLiveIE(OsnatelTVBaseIE):
+ _VALID_URL = _create_valid_url(OsnatelTVBaseIE._HOST, r'[^/?&#]+', 'channel', 'live')
+ _TYPE = 'live'
+ _TESTS = [{
+ 'url': 'https://tvonline.osnatel.de/channels/german?channel=srf_zwei',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tvonline.osnatel.de/live/srf1',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if OsnatelTVIE.suitable(url) else super().suitable(url)
+
+
+class OsnatelTVRecordingsIE(OsnatelTVBaseIE):
+ _VALID_URL = _create_valid_url(OsnatelTVBaseIE._HOST, r'\d+', 'recording')
+ _TYPE = 'record'
_TESTS = [{
- 'url': 'https://tvonline.osnatel.de/watch/abc/123-abc',
+ 'url': 'https://tvonline.osnatel.de/recordings?recording=193615508',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tvonline.osnatel.de/tc/ptc_recordings_all_recordings?recording=193615420',
'only_matching': True,
}]
-class EinsUndEinsTVIE(ZattooIE):
+class EinsUndEinsTVBaseIE(ZattooPlatformBaseIE):
_NETRC_MACHINE = '1und1tv'
_HOST = '1und1.tv'
_API_HOST = 'www.%s' % _HOST
- _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
+
+class EinsUndEinsTVIE(EinsUndEinsTVBaseIE):
+ _VALID_URL = _create_valid_url(EinsUndEinsTVBaseIE._HOST, r'\d+', 'program', '(?:program|watch)/[^/]+')
+ _TYPE = 'video'
_TESTS = [{
- 'url': 'https://www.1und1.tv/watch/abc/123-abc',
+ 'url': 'https://1und1.tv/program/daserste/210177916',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://1und1.tv/guide/german?channel=srf1&program=169860555',
'only_matching': True,
}]
-class SaltTVIE(ZattooIE):
+class EinsUndEinsTVLiveIE(EinsUndEinsTVBaseIE):
+ _VALID_URL = _create_valid_url(EinsUndEinsTVBaseIE._HOST, r'[^/?&#]+', 'channel', 'live')
+ _TYPE = 'live'
+ _TESTS = [{
+ 'url': 'https://1und1.tv/channels/german?channel=srf_zwei',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://1und1.tv/live/srf1',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if EinsUndEinsTVIE.suitable(url) else super().suitable(url)
+
+
+class EinsUndEinsTVRecordingsIE(EinsUndEinsTVBaseIE):
+ _VALID_URL = _create_valid_url(EinsUndEinsTVBaseIE._HOST, r'\d+', 'recording')
+ _TYPE = 'record'
+ _TESTS = [{
+ 'url': 'https://1und1.tv/recordings?recording=193615508',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://1und1.tv/tc/ptc_recordings_all_recordings?recording=193615420',
+ 'only_matching': True,
+ }]
+
+
+class SaltTVBaseIE(ZattooPlatformBaseIE):
_NETRC_MACHINE = 'salttv'
_HOST = 'tv.salt.ch'
- _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
+
+class SaltTVIE(SaltTVBaseIE):
+ _VALID_URL = _create_valid_url(SaltTVBaseIE._HOST, r'\d+', 'program', '(?:program|watch)/[^/]+')
+ _TYPE = 'video'
_TESTS = [{
- 'url': 'https://tv.salt.ch/watch/abc/123-abc',
+ 'url': 'https://tv.salt.ch/program/daserste/210177916',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tv.salt.ch/guide/german?channel=srf1&program=169860555',
+ 'only_matching': True,
+ }]
+
+
+class SaltTVLiveIE(SaltTVBaseIE):
+ _VALID_URL = _create_valid_url(SaltTVBaseIE._HOST, r'[^/?&#]+', 'channel', 'live')
+ _TYPE = 'live'
+ _TESTS = [{
+ 'url': 'https://tv.salt.ch/channels/german?channel=srf_zwei',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tv.salt.ch/live/srf1',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if SaltTVIE.suitable(url) else super().suitable(url)
+
+
+class SaltTVRecordingsIE(SaltTVBaseIE):
+ _VALID_URL = _create_valid_url(SaltTVBaseIE._HOST, r'\d+', 'recording')
+ _TYPE = 'record'
+ _TESTS = [{
+ 'url': 'https://tv.salt.ch/recordings?recording=193615508',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tv.salt.ch/tc/ptc_recordings_all_recordings?recording=193615420',
'only_matching': True,
}]
diff --git a/hypervideo_dl/extractor/zdf.py b/hypervideo_dl/extractor/zdf.py
index 5f4d266..fca426a 100644
--- a/hypervideo_dl/extractor/zdf.py
+++ b/hypervideo_dl/extractor/zdf.py
@@ -1,18 +1,16 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
+ NO_DEFAULT,
+ ExtractorError,
determine_ext,
+ extract_attributes,
float_or_none,
int_or_none,
join_nonempty,
merge_dicts,
- NO_DEFAULT,
- orderedSet,
parse_codecs,
qualities,
traverse_obj,
@@ -72,6 +70,7 @@ class ZDFBaseIE(InfoExtractor):
f.update({
'url': format_url,
'format_id': join_nonempty('http', meta.get('type'), meta.get('quality')),
+ 'tbr': int_or_none(self._search_regex(r'_(\d+)k_', format_url, 'tbr', default=None))
})
new_formats = [f]
formats.extend(merge_dicts(f, {
@@ -111,7 +110,6 @@ class ZDFBaseIE(InfoExtractor):
'class': track.get('class'),
'language': track.get('language'),
})
- self._sort_formats(formats, ('hasaud', 'res', 'quality', 'language_preference'))
duration = float_or_none(try_get(
ptmd, lambda x: x['attributes']['duration']['value']), scale=1000)
@@ -122,6 +120,7 @@ class ZDFBaseIE(InfoExtractor):
'duration': duration,
'formats': formats,
'subtitles': self._extract_subtitles(ptmd),
+ '_format_sort_fields': ('tbr', 'res', 'quality', 'language_preference'),
}
def _extract_player(self, webpage, video_id, fatal=True):
@@ -190,7 +189,7 @@ class ZDFIE(ZDFBaseIE):
},
}, {
'url': 'https://www.zdf.de/funk/druck-11790/funk-alles-ist-verzaubert-102.html',
- 'md5': '3d6f1049e9682178a11c54b91f3dd065',
+ 'md5': '1b93bdec7d02fc0b703c5e7687461628',
'info_dict': {
'ext': 'mp4',
'id': 'video_funk_1770473',
@@ -233,23 +232,34 @@ class ZDFIE(ZDFBaseIE):
'timestamp': 1641355200,
'upload_date': '20220105',
},
+ 'skip': 'No longer available "Diese Seite wurde leider nicht gefunden"'
+ }, {
+ 'url': 'https://www.zdf.de/serien/soko-stuttgart/das-geld-anderer-leute-100.html',
+ 'info_dict': {
+ 'id': '191205_1800_sendung_sok8',
+ 'ext': 'mp4',
+ 'title': 'Das Geld anderer Leute',
+ 'description': 'md5:cb6f660850dc5eb7d1ab776ea094959d',
+ 'duration': 2581.0,
+ 'timestamp': 1654790700,
+ 'upload_date': '20220609',
+ 'thumbnail': 'https://epg-image.zdf.de/fotobase-webdelivery/images/e2d7e55a-09f0-424e-ac73-6cac4dd65f35?layout=2400x1350',
+ },
}]
def _extract_entry(self, url, player, content, video_id):
title = content.get('title') or content['teaserHeadline']
t = content['mainVideoContent']['http://zdf.de/rels/target']
-
- ptmd_path = t.get('http://zdf.de/rels/streams/ptmd')
-
+ ptmd_path = traverse_obj(t, (
+ (('streams', 'default'), None),
+ ('http://zdf.de/rels/streams/ptmd', 'http://zdf.de/rels/streams/ptmd-template')
+ ), get_all=False)
if not ptmd_path:
- ptmd_path = traverse_obj(
- t, ('streams', 'default', 'http://zdf.de/rels/streams/ptmd-template'),
- 'http://zdf.de/rels/streams/ptmd-template').replace(
- '{playerId}', 'ngplayer_2_4')
+ raise ExtractorError('Could not extract ptmd_path')
info = self._extract_ptmd(
- urljoin(url, ptmd_path), video_id, player['apiToken'], url)
+ urljoin(url, ptmd_path.replace('{playerId}', 'ngplayer_2_4')), video_id, player['apiToken'], url)
thumbnails = []
layouts = try_get(
@@ -298,16 +308,16 @@ class ZDFIE(ZDFBaseIE):
'https://zdf-cdn.live.cellular.de/mediathekV2/document/%s' % video_id,
video_id)
- document = video['document']
-
- title = document['titel']
- content_id = document['basename']
-
formats = []
- format_urls = set()
- for f in document['formitaeten']:
- self._extract_format(content_id, formats, format_urls, f)
- self._sort_formats(formats)
+ formitaeten = try_get(video, lambda x: x['document']['formitaeten'], list)
+ document = formitaeten and video['document']
+ if formitaeten:
+ title = document['titel']
+ content_id = document['basename']
+
+ format_urls = set()
+ for f in formitaeten or []:
+ self._extract_format(content_id, formats, format_urls, f)
thumbnails = []
teaser_bild = document.get('teaserBild')
@@ -353,9 +363,9 @@ class ZDFChannelIE(ZDFBaseIE):
'url': 'https://www.zdf.de/sport/das-aktuelle-sportstudio',
'info_dict': {
'id': 'das-aktuelle-sportstudio',
- 'title': 'das aktuelle sportstudio | ZDF',
+ 'title': 'das aktuelle sportstudio',
},
- 'playlist_mincount': 23,
+ 'playlist_mincount': 18,
}, {
'url': 'https://www.zdf.de/dokumentation/planet-e',
'info_dict': {
@@ -364,6 +374,14 @@ class ZDFChannelIE(ZDFBaseIE):
},
'playlist_mincount': 50,
}, {
+ 'url': 'https://www.zdf.de/gesellschaft/aktenzeichen-xy-ungeloest',
+ 'info_dict': {
+ 'id': 'aktenzeichen-xy-ungeloest',
+ 'title': 'Aktenzeichen XY... ungelöst',
+ 'entries': "lambda x: not any('xy580-fall1-kindermoerder-gesucht-100' in e['url'] for e in x)",
+ },
+ 'playlist_mincount': 2,
+ }, {
'url': 'https://www.zdf.de/filme/taunuskrimi/',
'only_matching': True,
}]
@@ -372,60 +390,36 @@ class ZDFChannelIE(ZDFBaseIE):
def suitable(cls, url):
return False if ZDFIE.suitable(url) else super(ZDFChannelIE, cls).suitable(url)
+ def _og_search_title(self, webpage, fatal=False):
+ title = super(ZDFChannelIE, self)._og_search_title(webpage, fatal=fatal)
+ return re.split(r'\s+[-|]\s+ZDF(?:mediathek)?$', title or '')[0] or None
+
def _real_extract(self, url):
channel_id = self._match_id(url)
webpage = self._download_webpage(url, channel_id)
- entries = [
- self.url_result(item_url, ie=ZDFIE.ie_key())
- for item_url in orderedSet(re.findall(
- r'data-plusbar-url=["\'](http.+?\.html)', webpage))]
-
- return self.playlist_result(
- entries, channel_id, self._og_search_title(webpage, fatal=False))
-
- r"""
- player = self._extract_player(webpage, channel_id)
-
- channel_id = self._search_regex(
- r'docId\s*:\s*(["\'])(?P<id>(?!\1).+?)\1', webpage,
- 'channel id', group='id')
-
- channel = self._call_api(
- 'https://api.zdf.de/content/documents/%s.json' % channel_id,
- player, url, channel_id)
-
- items = []
- for module in channel['module']:
- for teaser in try_get(module, lambda x: x['teaser'], list) or []:
- t = try_get(
- teaser, lambda x: x['http://zdf.de/rels/target'], dict)
- if not t:
- continue
- items.extend(try_get(
- t,
- lambda x: x['resultsWithVideo']['http://zdf.de/rels/search/results'],
- list) or [])
- items.extend(try_get(
- module,
- lambda x: x['filterRef']['resultsWithVideo']['http://zdf.de/rels/search/results'],
- list) or [])
-
- entries = []
- entry_urls = set()
- for item in items:
- t = try_get(item, lambda x: x['http://zdf.de/rels/target'], dict)
- if not t:
- continue
- sharing_url = t.get('http://zdf.de/rels/sharing-url')
- if not sharing_url or not isinstance(sharing_url, compat_str):
- continue
- if sharing_url in entry_urls:
- continue
- entry_urls.add(sharing_url)
- entries.append(self.url_result(
- sharing_url, ie=ZDFIE.ie_key(), video_id=t.get('id')))
-
- return self.playlist_result(entries, channel_id, channel.get('title'))
- """
+ matches = re.finditer(
+ r'''<div\b[^>]*?\sdata-plusbar-id\s*=\s*(["'])(?P<p_id>[\w-]+)\1[^>]*?\sdata-plusbar-url=\1(?P<url>%s)\1''' % ZDFIE._VALID_URL,
+ webpage)
+
+ if self._downloader.params.get('noplaylist', False):
+ entry = next(
+ (self.url_result(m.group('url'), ie=ZDFIE.ie_key()) for m in matches),
+ None)
+ self.to_screen('Downloading just the main video because of --no-playlist')
+ if entry:
+ return entry
+ else:
+ self.to_screen('Downloading playlist %s - add --no-playlist to download just the main video' % (channel_id, ))
+
+ def check_video(m):
+ v_ref = self._search_regex(
+ r'''(<a\b[^>]*?\shref\s*=[^>]+?\sdata-target-id\s*=\s*(["'])%s\2[^>]*>)''' % (m.group('p_id'), ),
+ webpage, 'check id', default='')
+ v_ref = extract_attributes(v_ref)
+ return v_ref.get('data-target-video-type') != 'novideo'
+
+ return self.playlist_from_matches(
+ (m.group('url') for m in matches if check_video(m)),
+ channel_id, self._og_search_title(webpage, fatal=False))
diff --git a/hypervideo_dl/extractor/zee5.py b/hypervideo_dl/extractor/zee5.py
index 3e3f11b..a64eb9e 100644
--- a/hypervideo_dl/extractor/zee5.py
+++ b/hypervideo_dl/extractor/zee5.py
@@ -1,7 +1,6 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import json
+import random
+import string
from .common import InfoExtractor
from ..compat import compat_str
@@ -24,25 +23,25 @@ class Zee5IE(InfoExtractor):
https?://(?:www\.)?zee5\.com/(?:[^#?]+/)?
(?:
(?:tv-shows|kids|web-series|zee5originals)(?:/[^#/?]+){3}
- |movies/[^#/?]+
+ |(?:movies|kids|videos|news|music-videos)/(?!kids-shows)[^#/?]+
)/(?P<display_id>[^#/?]+)/
)
(?P<id>[^#/?]+)/?(?:$|[?#])
'''
_TESTS = [{
- 'url': 'https://www.zee5.com/movies/details/krishna-the-birth/0-0-63098',
+ 'url': 'https://www.zee5.com/movies/details/adavari-matalaku-ardhale-verule/0-0-movie_1143162669',
'info_dict': {
- 'id': '0-0-63098',
+ 'id': '0-0-movie_1143162669',
'ext': 'mp4',
- 'display_id': 'krishna-the-birth',
- 'title': 'Krishna - The Birth',
- 'duration': 4368,
+ 'display_id': 'adavari-matalaku-ardhale-verule',
+ 'title': 'Adavari Matalaku Ardhale Verule',
+ 'duration': 9360,
'description': compat_str,
- 'alt_title': 'Krishna - The Birth',
+ 'alt_title': 'Adavari Matalaku Ardhale Verule',
'uploader': 'Zee Entertainment Enterprises Ltd',
- 'release_date': '20060101',
- 'upload_date': '20060101',
- 'timestamp': 1136073600,
+ 'release_date': '20070427',
+ 'upload_date': '20070427',
+ 'timestamp': 1177632000,
'thumbnail': r're:^https?://.*\.jpg$',
'episode_number': 0,
'episode': 'Episode 0',
@@ -85,9 +84,18 @@ class Zee5IE(InfoExtractor):
}, {
'url': 'https://www.zee5.com/web-series/details/mithya/0-6-4z587408/maine-dekhi-hai-uski-mrityu/0-1-6z587412',
'only_matching': True
+ }, {
+ 'url': 'https://www.zee5.com/kids/kids-movies/maya-bommalu/0-0-movie_1040370005',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.zee5.com/news/details/jana-sena-chief-pawan-kalyan-shows-slippers-to-ysrcp-leaders/0-0-newsauto_6ettj4242oo0',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.zee5.com/music-videos/details/adhento-gaani-vunnapaatuga-jersey-nani-shraddha-srinath/0-0-56973',
+ 'only_matching': True
}]
- _DETAIL_API_URL = 'https://spapi.zee5.com/singlePlayback/getDetails?content_id={}&device_id={}&platform_name=desktop_web&country=IN&check_parental_control=false'
- _DEVICE_ID = 'iIxsxYf40cqO3koIkwzKHZhnJzHN13zb'
+ _DETAIL_API_URL = 'https://spapi.zee5.com/singlePlayback/getDetails/secure?content_id={}&device_id={}&platform_name=desktop_web&country=IN&check_parental_control=false'
+ _DEVICE_ID = ''.join(random.choices(string.ascii_letters + string.digits, k=20)).ljust(32, '0')
_USER_TOKEN = None
_LOGIN_HINT = 'Use "--username <mobile_number>" to login using otp or "--username token" and "--password <user_token>" to login using user token.'
_NETRC_MACHINE = 'zee5'
@@ -96,14 +104,14 @@ class Zee5IE(InfoExtractor):
def _perform_login(self, username, password):
if len(username) == 10 and username.isdigit() and self._USER_TOKEN is None:
self.report_login()
- otp_request_json = self._download_json('https://b2bapi.zee5.com/device/sendotp_v1.php?phoneno=91{}'.format(username),
+ otp_request_json = self._download_json(f'https://b2bapi.zee5.com/device/sendotp_v1.php?phoneno=91{username}',
None, note='Sending OTP')
if otp_request_json['code'] == 0:
self.to_screen(otp_request_json['message'])
else:
raise ExtractorError(otp_request_json['message'], expected=True)
otp_code = self._get_tfa_info('OTP')
- otp_verify_json = self._download_json('https://b2bapi.zee5.com/device/verifyotp_v1.php?phoneno=91{}&otp={}&guest_token={}&platform=web'.format(username, otp_code, self._DEVICE_ID),
+ otp_verify_json = self._download_json(f'https://b2bapi.zee5.com/device/verifyotp_v1.php?phoneno=91{username}&otp={otp_code}&guest_token={self._DEVICE_ID}&platform=web',
None, note='Verifying OTP', fatal=False)
if not otp_verify_json:
raise ExtractorError('Unable to verify OTP.', expected=True)
@@ -138,7 +146,6 @@ class Zee5IE(InfoExtractor):
if not asset_data.get('hls_url'):
self.raise_login_required(self._LOGIN_HINT, metadata_available=True, method=None)
formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles(asset_data['hls_url'], video_id, 'mp4', fatal=False)
- self._sort_formats(formats)
subtitles = {}
for sub in asset_data.get('subtitle_url', []):
@@ -177,7 +184,7 @@ class Zee5SeriesIE(InfoExtractor):
(?:
zee5:series:|
https?://(?:www\.)?zee5\.com/(?:[^#?]+/)?
- (?:tv-shows|web-series|kids|zee5originals)(?:/[^#/?]+){2}/
+ (?:tv-shows|web-series|kids|zee5originals)/(?!kids-movies)(?:[^#/?]+/){2}
)
(?P<id>[^#/?]+)(?:/episodes)?/?(?:$|[?#])
'''
@@ -227,13 +234,13 @@ class Zee5SeriesIE(InfoExtractor):
'X-Access-Token': access_token_request['token'],
'Referer': 'https://www.zee5.com/',
}
- show_url = 'https://gwapi.zee5.com/content/tvshow/{}?translation=en&country=IN'.format(show_id)
+ show_url = f'https://gwapi.zee5.com/content/tvshow/{show_id}?translation=en&country=IN'
page_num = 0
show_json = self._download_json(show_url, video_id=show_id, headers=headers)
for season in show_json.get('seasons') or []:
season_id = try_get(season, lambda x: x['id'], compat_str)
- next_url = 'https://gwapi.zee5.com/content/tvshow/?season_id={}&type=episode&translation=en&country=IN&on_air=false&asset_subtype=tvshow&page=1&limit=100'.format(season_id)
+ next_url = f'https://gwapi.zee5.com/content/tvshow/?season_id={season_id}&type=episode&translation=en&country=IN&on_air=false&asset_subtype=tvshow&page=1&limit=100'
while next_url:
page_num += 1
episodes_json = self._download_json(
diff --git a/hypervideo_dl/extractor/zeenews.py b/hypervideo_dl/extractor/zeenews.py
new file mode 100644
index 0000000..1616dbf
--- /dev/null
+++ b/hypervideo_dl/extractor/zeenews.py
@@ -0,0 +1,57 @@
+from .common import InfoExtractor
+from ..utils import ExtractorError, traverse_obj
+
+
+class ZeeNewsIE(InfoExtractor):
+ _VALID_URL = r'https?://zeenews\.india\.com/[^#?]+/video/(?P<display_id>[^#/?]+)/(?P<id>\d+)'
+ _TESTS = [
+ {
+ 'url': 'https://zeenews.india.com/hindi/india/delhi-ncr-haryana/delhi-ncr/video/greater-noida-video-viral-on-social-media-attackers-beat-businessman-and-his-son-oppose-market-closed-atdnh/1402138',
+ 'info_dict': {
+ 'id': '1402138',
+ 'ext': 'mp4',
+ 'title': 'Greater Noida Video: हमलावरों ने दिनदहाड़े दुकान में घुसकर की मारपीट, देखें वीडियो',
+ 'display_id': 'greater-noida-video-viral-on-social-media-attackers-beat-businessman-and-his-son-oppose-market-closed-atdnh',
+ 'upload_date': '20221019',
+ 'thumbnail': r're:^https?://.*\.jpg*',
+ 'timestamp': 1666174501,
+ 'view_count': int,
+ 'duration': 97,
+ 'description': 'ग्रेटर नोएडा जारचा थाना क्षेत्र के प्याबली में दिनदहाड़े दुकान में घुसकर अज्ञात हमलावरों ने हमला कर',
+ }
+ },
+ {
+ 'url': 'https://zeenews.india.com/hindi/india/video/videsh-superfast-queen-elizabeth-iis-funeral-today/1357710',
+ 'info_dict': {
+ 'id': '1357710',
+ 'ext': 'mp4',
+ 'title': 'Videsh Superfast: महारानी के अंतिम संस्कार की तैयारी शुरू',
+ 'display_id': 'videsh-superfast-queen-elizabeth-iis-funeral-today',
+ 'upload_date': '20220919',
+ 'thumbnail': r're:^https?://.*\.jpg*',
+ 'timestamp': 1663556881,
+ 'view_count': int,
+ 'duration': 133,
+ 'description': 'सेगमेंट विदेश सुपराफास्ट में देखिए देश और दुनिया की सभी बड़ी खबरें, वो भी हर खबर फटाफट अंदाज में.',
+ }
+ }
+ ]
+
+ def _real_extract(self, url):
+ content_id, display_id = self._match_valid_url(url).group('id', 'display_id')
+ webpage = self._download_webpage(url, content_id)
+ json_ld_list = list(self._yield_json_ld(webpage, display_id))
+
+ embed_url = traverse_obj(
+ json_ld_list, (lambda _, v: v['@type'] == 'VideoObject', 'embedUrl'), get_all=False)
+ if not embed_url:
+ raise ExtractorError('No video found', expected=True)
+
+ formats = self._extract_m3u8_formats(embed_url, content_id, 'mp4')
+
+ return {
+ **self._json_ld(json_ld_list, display_id),
+ 'id': content_id,
+ 'display_id': display_id,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/zhihu.py b/hypervideo_dl/extractor/zhihu.py
index 278a943..c24b338 100644
--- a/hypervideo_dl/extractor/zhihu.py
+++ b/hypervideo_dl/extractor/zhihu.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import format_field, float_or_none, int_or_none
@@ -48,7 +45,6 @@ class ZhihuIE(InfoExtractor):
'url': play_url,
'width': int_or_none(q.get('width')),
})
- self._sort_formats(formats)
author = zvideo.get('author') or {}
url_token = author.get('url_token')
@@ -61,7 +57,7 @@ class ZhihuIE(InfoExtractor):
'uploader': author.get('name'),
'timestamp': int_or_none(zvideo.get('published_at')),
'uploader_id': author.get('id'),
- 'uploader_url': format_field(url_token, template='https://www.zhihu.com/people/%s'),
+ 'uploader_url': format_field(url_token, None, 'https://www.zhihu.com/people/%s'),
'duration': float_or_none(video.get('duration')),
'view_count': int_or_none(zvideo.get('play_count')),
'like_count': int_or_none(zvideo.get('liked_count')),
diff --git a/hypervideo_dl/extractor/zingmp3.py b/hypervideo_dl/extractor/zingmp3.py
index 419bf30..a818c9f 100644
--- a/hypervideo_dl/extractor/zingmp3.py
+++ b/hypervideo_dl/extractor/zingmp3.py
@@ -1,131 +1,77 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
+import functools
import hashlib
import hmac
+import json
import urllib.parse
from .common import InfoExtractor
from ..utils import (
+ OnDemandPagedList,
int_or_none,
traverse_obj,
+ urljoin,
)
class ZingMp3BaseIE(InfoExtractor):
- _VALID_URL_TMPL = r'https?://(?:mp3\.zing|zingmp3)\.vn/(?P<type>(?:%s))/[^/]+/(?P<id>\w+)(?:\.html|\?)'
+ _VALID_URL_TMPL = r'https?://(?:mp3\.zing|zingmp3)\.vn/(?P<type>(?:%s))/[^/?#]+/(?P<id>\w+)(?:\.html|\?)'
_GEO_COUNTRIES = ['VN']
_DOMAIN = 'https://zingmp3.vn'
- _SLUG_API = {
+ _PER_PAGE = 50
+ _API_SLUGS = {
+ # Audio/video
'bai-hat': '/api/v2/page/get/song',
'embed': '/api/v2/page/get/song',
'video-clip': '/api/v2/page/get/video',
+ 'lyric': '/api/v2/lyric/get/lyric',
+ 'song-streaming': '/api/v2/song/get/streaming',
+ # Playlist
'playlist': '/api/v2/page/get/playlist',
'album': '/api/v2/page/get/playlist',
- 'lyric': '/api/v2/lyric/get/lyric',
- 'song_streaming': '/api/v2/song/get/streaming',
+ # Chart
+ 'zing-chart': '/api/v2/page/get/chart-home',
+ 'zing-chart-tuan': '/api/v2/page/get/week-chart',
+ 'moi-phat-hanh': '/api/v2/page/get/newrelease-chart',
+ 'the-loai-video': '/api/v2/video/get/list',
+ # User
+ 'info-artist': '/api/v2/page/get/artist',
+ 'user-list-song': '/api/v2/song/get/list',
+ 'user-list-video': '/api/v2/video/get/list',
}
- _API_KEY = '88265e23d4284f25963e6eedac8fbfa3'
- _SECRET_KEY = b'2aa2d1c561e809b267f3638c4a307aab'
-
- def _extract_item(self, item, song_id, type_url, fatal):
- item_id = item.get('encodeId') or song_id
- title = item.get('title') or item.get('alias')
-
- if type_url == 'video-clip':
- source = item.get('streaming')
- else:
- api = self.get_api_with_signature(name_api=self._SLUG_API.get('song_streaming'), param={'id': item_id})
- source = self._download_json(api, video_id=item_id).get('data')
-
- formats = []
- for k, v in (source or {}).items():
- if not v:
- continue
- if k in ('mp4', 'hls'):
- for res, video_url in v.items():
- if not video_url:
- continue
- if k == 'hls':
- formats.extend(self._extract_m3u8_formats(
- video_url, item_id, 'mp4',
- 'm3u8_native', m3u8_id=k, fatal=False))
- elif k == 'mp4':
- formats.append({
- 'format_id': 'mp4-' + res,
- 'url': video_url,
- 'height': int_or_none(self._search_regex(
- r'^(\d+)p', res, 'resolution', default=None)),
- })
- continue
- elif v == 'VIP':
- continue
- formats.append({
- 'ext': 'mp3',
- 'format_id': k,
- 'tbr': int_or_none(k),
- 'url': self._proto_relative_url(v),
- 'vcodec': 'none',
- })
- if not formats:
- if not fatal:
- return
- msg = item.get('msg')
- if msg == 'Sorry, this content is not available in your country.':
- self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True)
- self.raise_no_formats(msg, expected=True)
- self._sort_formats(formats)
-
- lyric = item.get('lyric')
- if not lyric:
- api = self.get_api_with_signature(name_api=self._SLUG_API.get("lyric"), param={'id': item_id})
- info_lyric = self._download_json(api, video_id=item_id)
- lyric = traverse_obj(info_lyric, ('data', 'file'))
- subtitles = {
- 'origin': [{
- 'url': lyric,
- }],
- } if lyric else None
-
- album = item.get('album') or {}
-
- return {
- 'id': item_id,
- 'title': title,
- 'formats': formats,
- 'thumbnail': traverse_obj(item, 'thumbnail', 'thumbnailM'),
- 'subtitles': subtitles,
- 'duration': int_or_none(item.get('duration')),
- 'track': title,
- 'artist': traverse_obj(item, 'artistsNames', 'artists_names'),
- 'album': traverse_obj(album, 'name', 'title'),
- 'album_artist': traverse_obj(album, 'artistsNames', 'artists_names'),
+ def _api_url(self, url_type, params):
+ api_slug = self._API_SLUGS[url_type]
+ params.update({'ctime': '1'})
+ sha256 = hashlib.sha256(
+ ''.join(f'{k}={v}' for k, v in sorted(params.items())).encode()).hexdigest()
+ data = {
+ **params,
+ 'apiKey': '88265e23d4284f25963e6eedac8fbfa3',
+ 'sig': hmac.new(
+ b'2aa2d1c561e809b267f3638c4a307aab', f'{api_slug}{sha256}'.encode(), hashlib.sha512).hexdigest(),
}
+ return f'{self._DOMAIN}{api_slug}?{urllib.parse.urlencode(data)}'
+
+ def _call_api(self, url_type, params, display_id=None, **kwargs):
+ resp = self._download_json(
+ self._api_url(url_type, params), display_id or params.get('id'),
+ note=f'Downloading {url_type} JSON metadata', **kwargs)
+ return (resp or {}).get('data') or {}
def _real_initialize(self):
- if not self.get_param('cookiefile') and not self.get_param('cookiesfrombrowser'):
- self._request_webpage(self.get_api_with_signature(name_api=self._SLUG_API['bai-hat'], param={'id': ''}),
- None, note='Updating cookies')
+ if not self._cookies_passed:
+ self._request_webpage(
+ self._api_url('bai-hat', {'id': ''}), None, note='Updating cookies')
- def _real_extract(self, url):
- song_id, type_url = self._match_valid_url(url).group('id', 'type')
- api = self.get_api_with_signature(name_api=self._SLUG_API[type_url], param={'id': song_id})
- return self._process_data(self._download_json(api, song_id)['data'], song_id, type_url)
-
- def get_api_with_signature(self, name_api, param):
- param.update({'ctime': '1'})
- sha256 = hashlib.sha256(''.join(f'{i}={param[i]}' for i in sorted(param)).encode('utf-8')).hexdigest()
- data = {
- 'apiKey': self._API_KEY,
- 'sig': hmac.new(self._SECRET_KEY, f'{name_api}{sha256}'.encode('utf-8'), hashlib.sha512).hexdigest(),
- **param,
- }
- return f'{self._DOMAIN}{name_api}?{urllib.parse.urlencode(data)}'
+ def _parse_items(self, items):
+ for url in traverse_obj(items, (..., 'link')) or []:
+ yield self.url_result(urljoin(self._DOMAIN, url))
class ZingMp3IE(ZingMp3BaseIE):
_VALID_URL = ZingMp3BaseIE._VALID_URL_TMPL % 'bai-hat|video-clip|embed'
+ IE_NAME = 'zingmp3'
+ IE_DESC = 'zingmp3.vn'
_TESTS = [{
'url': 'https://mp3.zing.vn/bai-hat/Xa-Mai-Xa-Bao-Thy/ZWZB9WAB.html',
'md5': 'ead7ae13693b3205cbc89536a077daed',
@@ -147,7 +93,7 @@ class ZingMp3IE(ZingMp3BaseIE):
},
}, {
'url': 'https://zingmp3.vn/video-clip/Suong-Hoa-Dua-Loi-K-ICM-RYO/ZO8ZF7C7.html',
- 'md5': 'c7f23d971ac1a4f675456ed13c9b9612',
+ 'md5': '3c2081e79471a2f4a3edd90b70b185ea',
'info_dict': {
'id': 'ZO8ZF7C7',
'title': 'Sương Hoa Đưa Lối',
@@ -180,11 +126,63 @@ class ZingMp3IE(ZingMp3BaseIE):
'url': 'https://zingmp3.vn/bai-hat/Xa-Mai-Xa-Bao-Thy/ZWZB9WAB.html',
'only_matching': True,
}]
- IE_NAME = 'zingmp3'
- IE_DESC = 'zingmp3.vn'
- def _process_data(self, data, song_id, type_url):
- return self._extract_item(data, song_id, type_url, True)
+ def _real_extract(self, url):
+ song_id, url_type = self._match_valid_url(url).group('id', 'type')
+ item = self._call_api(url_type, {'id': song_id})
+
+ item_id = item.get('encodeId') or song_id
+ if url_type == 'video-clip':
+ source = item.get('streaming')
+ source['mp4'] = self._download_json(
+ 'http://api.mp3.zing.vn/api/mobile/video/getvideoinfo', item_id,
+ query={'requestdata': json.dumps({'id': item_id})},
+ note='Downloading mp4 JSON metadata').get('source')
+ else:
+ source = self._call_api('song-streaming', {'id': item_id})
+
+ formats = []
+ for k, v in (source or {}).items():
+ if not v or v == 'VIP':
+ continue
+ if k not in ('mp4', 'hls'):
+ formats.append({
+ 'ext': 'mp3',
+ 'format_id': k,
+ 'tbr': int_or_none(k),
+ 'url': self._proto_relative_url(v),
+ 'vcodec': 'none',
+ })
+ continue
+ for res, video_url in v.items():
+ if not video_url:
+ continue
+ if k == 'hls':
+ formats.extend(self._extract_m3u8_formats(video_url, item_id, 'mp4', m3u8_id=k, fatal=False))
+ continue
+ formats.append({
+ 'format_id': f'mp4-{res}',
+ 'url': video_url,
+ 'height': int_or_none(res),
+ })
+
+ if not formats and item.get('msg') == 'Sorry, this content is not available in your country.':
+ self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True)
+
+ lyric = item.get('lyric') or self._call_api('lyric', {'id': item_id}, fatal=False).get('file')
+
+ return {
+ 'id': item_id,
+ 'title': traverse_obj(item, 'title', 'alias'),
+ 'thumbnail': traverse_obj(item, 'thumbnail', 'thumbnailM'),
+ 'duration': int_or_none(item.get('duration')),
+ 'track': traverse_obj(item, 'title', 'alias'),
+ 'artist': traverse_obj(item, 'artistsNames', 'artists_names'),
+ 'album': traverse_obj(item, ('album', ('name', 'title')), get_all=False),
+ 'album_artist': traverse_obj(item, ('album', ('artistsNames', 'artists_names')), get_all=False),
+ 'formats': formats,
+ 'subtitles': {'origin': [{'url': lyric}]} if lyric else None,
+ }
class ZingMp3AlbumIE(ZingMp3BaseIE):
@@ -192,19 +190,17 @@ class ZingMp3AlbumIE(ZingMp3BaseIE):
_TESTS = [{
'url': 'http://mp3.zing.vn/album/Lau-Dai-Tinh-Ai-Bang-Kieu-Minh-Tuyet/ZWZBWDAF.html',
'info_dict': {
- '_type': 'playlist',
'id': 'ZWZBWDAF',
'title': 'Lâu Đài Tình Ái',
},
- 'playlist_count': 9,
+ 'playlist_mincount': 9,
}, {
'url': 'https://zingmp3.vn/album/Nhung-Bai-Hat-Hay-Nhat-Cua-Mr-Siro-Mr-Siro/ZWZAEZZD.html',
'info_dict': {
- '_type': 'playlist',
'id': 'ZWZAEZZD',
'title': 'Những Bài Hát Hay Nhất Của Mr. Siro',
},
- 'playlist_count': 49,
+ 'playlist_mincount': 49,
}, {
'url': 'http://mp3.zing.vn/playlist/Duong-Hong-Loan-apollobee/IWCAACCB.html',
'only_matching': True,
@@ -214,12 +210,176 @@ class ZingMp3AlbumIE(ZingMp3BaseIE):
}]
IE_NAME = 'zingmp3:album'
- def _process_data(self, data, song_id, type_url):
- def entries():
- for item in traverse_obj(data, ('song', 'items')) or []:
- entry = self._extract_item(item, song_id, type_url, False)
- if entry:
- yield entry
+ def _real_extract(self, url):
+ song_id, url_type = self._match_valid_url(url).group('id', 'type')
+ data = self._call_api(url_type, {'id': song_id})
+ return self.playlist_result(
+ self._parse_items(traverse_obj(data, ('song', 'items'))),
+ traverse_obj(data, 'id', 'encodeId'), traverse_obj(data, 'name', 'title'))
+
+
+class ZingMp3ChartHomeIE(ZingMp3BaseIE):
+ _VALID_URL = r'https?://(?:mp3\.zing|zingmp3)\.vn/(?P<id>(?:zing-chart|moi-phat-hanh))/?(?:[#?]|$)'
+ _TESTS = [{
+ 'url': 'https://zingmp3.vn/zing-chart',
+ 'info_dict': {
+ 'id': 'zing-chart',
+ },
+ 'playlist_mincount': 100,
+ }, {
+ 'url': 'https://zingmp3.vn/moi-phat-hanh',
+ 'info_dict': {
+ 'id': 'moi-phat-hanh',
+ },
+ 'playlist_mincount': 100,
+ }]
+ IE_NAME = 'zingmp3:chart-home'
+
+ def _real_extract(self, url):
+ url_type = self._match_id(url)
+ data = self._call_api(url_type, {'id': url_type})
+ items = traverse_obj(data, ('RTChart', 'items') if url_type == 'zing-chart' else 'items')
+ return self.playlist_result(self._parse_items(items), url_type)
+
- return self.playlist_result(entries(), traverse_obj(data, 'id', 'encodeId'),
- traverse_obj(data, 'name', 'title'))
+class ZingMp3WeekChartIE(ZingMp3BaseIE):
+ _VALID_URL = ZingMp3BaseIE._VALID_URL_TMPL % 'zing-chart-tuan'
+ IE_NAME = 'zingmp3:week-chart'
+ _TESTS = [{
+ 'url': 'https://zingmp3.vn/zing-chart-tuan/Bai-hat-Viet-Nam/IWZ9Z08I.html',
+ 'info_dict': {
+ 'id': 'IWZ9Z08I',
+ 'title': 'zing-chart-vn',
+ },
+ 'playlist_mincount': 10,
+ }, {
+ 'url': 'https://zingmp3.vn/zing-chart-tuan/Bai-hat-US-UK/IWZ9Z0BW.html',
+ 'info_dict': {
+ 'id': 'IWZ9Z0BW',
+ 'title': 'zing-chart-us',
+ },
+ 'playlist_mincount': 10,
+ }, {
+ 'url': 'https://zingmp3.vn/zing-chart-tuan/Bai-hat-KPop/IWZ9Z0BO.html',
+ 'info_dict': {
+ 'id': 'IWZ9Z0BO',
+ 'title': 'zing-chart-korea',
+ },
+ 'playlist_mincount': 10,
+ }]
+
+ def _real_extract(self, url):
+ song_id, url_type = self._match_valid_url(url).group('id', 'type')
+ data = self._call_api(url_type, {'id': song_id})
+ return self.playlist_result(
+ self._parse_items(data['items']), song_id, f'zing-chart-{data.get("country", "")}')
+
+
+class ZingMp3ChartMusicVideoIE(ZingMp3BaseIE):
+ _VALID_URL = r'https?://(?:mp3\.zing|zingmp3)\.vn/(?P<type>the-loai-video)/(?P<regions>[^/]+)/(?P<id>[^\.]+)'
+ IE_NAME = 'zingmp3:chart-music-video'
+ _TESTS = [{
+ 'url': 'https://zingmp3.vn/the-loai-video/Viet-Nam/IWZ9Z08I.html',
+ 'info_dict': {
+ 'id': 'IWZ9Z08I',
+ 'title': 'the-loai-video_Viet-Nam',
+ },
+ 'playlist_mincount': 400,
+ }, {
+ 'url': 'https://zingmp3.vn/the-loai-video/Au-My/IWZ9Z08O.html',
+ 'info_dict': {
+ 'id': 'IWZ9Z08O',
+ 'title': 'the-loai-video_Au-My',
+ },
+ 'playlist_mincount': 40,
+ }, {
+ 'url': 'https://zingmp3.vn/the-loai-video/Han-Quoc/IWZ9Z08W.html',
+ 'info_dict': {
+ 'id': 'IWZ9Z08W',
+ 'title': 'the-loai-video_Han-Quoc',
+ },
+ 'playlist_mincount': 30,
+ }, {
+ 'url': 'https://zingmp3.vn/the-loai-video/Khong-Loi/IWZ9Z086.html',
+ 'info_dict': {
+ 'id': 'IWZ9Z086',
+ 'title': 'the-loai-video_Khong-Loi',
+ },
+ 'playlist_mincount': 10,
+ }]
+
+ def _fetch_page(self, song_id, url_type, page):
+ return self._parse_items(self._call_api(url_type, {
+ 'id': song_id,
+ 'type': 'genre',
+ 'page': page + 1,
+ 'count': self._PER_PAGE
+ }).get('items'))
+
+ def _real_extract(self, url):
+ song_id, regions, url_type = self._match_valid_url(url).group('id', 'regions', 'type')
+ return self.playlist_result(
+ OnDemandPagedList(functools.partial(self._fetch_page, song_id, url_type), self._PER_PAGE),
+ song_id, f'{url_type}_{regions}')
+
+
+class ZingMp3UserIE(ZingMp3BaseIE):
+ _VALID_URL = r'https?://(?:mp3\.zing|zingmp3)\.vn/(?P<user>[^/]+)/(?P<type>bai-hat|single|album|video)/?(?:[?#]|$)'
+ IE_NAME = 'zingmp3:user'
+ _TESTS = [{
+ 'url': 'https://zingmp3.vn/Mr-Siro/bai-hat',
+ 'info_dict': {
+ 'id': 'IWZ98609',
+ 'title': 'Mr. Siro - bai-hat',
+ 'description': 'md5:85ab29bd7b21725c12bf76fd1d6922e5',
+ },
+ 'playlist_mincount': 91,
+ }, {
+ 'url': 'https://zingmp3.vn/Mr-Siro/album',
+ 'info_dict': {
+ 'id': 'IWZ98609',
+ 'title': 'Mr. Siro - album',
+ 'description': 'md5:85ab29bd7b21725c12bf76fd1d6922e5',
+ },
+ 'playlist_mincount': 3,
+ }, {
+ 'url': 'https://zingmp3.vn/Mr-Siro/single',
+ 'info_dict': {
+ 'id': 'IWZ98609',
+ 'title': 'Mr. Siro - single',
+ 'description': 'md5:85ab29bd7b21725c12bf76fd1d6922e5',
+ },
+ 'playlist_mincount': 20,
+ }, {
+ 'url': 'https://zingmp3.vn/Mr-Siro/video',
+ 'info_dict': {
+ 'id': 'IWZ98609',
+ 'title': 'Mr. Siro - video',
+ 'description': 'md5:85ab29bd7b21725c12bf76fd1d6922e5',
+ },
+ 'playlist_mincount': 15,
+ }]
+
+ def _fetch_page(self, user_id, url_type, page):
+ url_type = 'user-list-song' if url_type == 'bai-hat' else 'user-list-video'
+ return self._parse_items(self._call_api(url_type, {
+ 'id': user_id,
+ 'type': 'artist',
+ 'page': page + 1,
+ 'count': self._PER_PAGE
+ }, query={'sort': 'new', 'sectionId': 'aSong'}).get('items'))
+
+ def _real_extract(self, url):
+ user_alias, url_type = self._match_valid_url(url).group('user', 'type')
+ if not url_type:
+ url_type = 'bai-hat'
+
+ user_info = self._call_api('info-artist', {}, user_alias, query={'alias': user_alias})
+ if url_type in ('bai-hat', 'video'):
+ entries = OnDemandPagedList(
+ functools.partial(self._fetch_page, user_info['id'], url_type), self._PER_PAGE)
+ else:
+ entries = self._parse_items(traverse_obj(user_info, (
+ 'sections', lambda _, v: v['link'] == f'/{user_alias}/{url_type}', 'items', ...)))
+ return self.playlist_result(
+ entries, user_info['id'], f'{user_info.get("name")} - {url_type}', user_info.get('biography'))
diff --git a/hypervideo_dl/extractor/zoom.py b/hypervideo_dl/extractor/zoom.py
index c005488..ef8b715 100644
--- a/hypervideo_dl/extractor/zoom.py
+++ b/hypervideo_dl/extractor/zoom.py
@@ -1,7 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
from .common import InfoExtractor
from ..utils import (
ExtractorError,
@@ -90,8 +86,6 @@ class ZoomIE(InfoExtractor):
'preference': -1
})
- self._sort_formats(formats)
-
return {
'id': play_id,
'title': data.get('topic'),
diff --git a/hypervideo_dl/extractor/zype.py b/hypervideo_dl/extractor/zype.py
index 7663cb3..8cf9945 100644
--- a/hypervideo_dl/extractor/zype.py
+++ b/hypervideo_dl/extractor/zype.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
@@ -18,6 +15,7 @@ class ZypeIE(InfoExtractor):
_ID_RE = r'[\da-fA-F]+'
_COMMON_RE = r'//player\.zype\.com/embed/%s\.(?:js|json|html)\?.*?(?:access_token|(?:ap[ip]|player)_key)='
_VALID_URL = r'https?:%s[^&]+' % (_COMMON_RE % ('(?P<id>%s)' % _ID_RE))
+ _EMBED_REGEX = [fr'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?{_COMMON_RE % _ID_RE}.+?)\1']
_TEST = {
'url': 'https://player.zype.com/embed/5b400b834b32992a310622b9.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ&autoplay=false&controls=true&da=false',
'md5': 'eaee31d474c76a955bdaba02a505c595',
@@ -32,14 +30,6 @@ class ZypeIE(InfoExtractor):
},
}
- @staticmethod
- def _extract_urls(webpage):
- return [
- mobj.group('url')
- for mobj in re.finditer(
- r'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?%s.+?)\1' % (ZypeIE._COMMON_RE % ZypeIE._ID_RE),
- webpage)]
-
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -107,7 +97,6 @@ class ZypeIE(InfoExtractor):
if text_tracks:
text_tracks = self._parse_json(
text_tracks, video_id, js_to_json, False)
- self._sort_formats(formats)
if text_tracks:
for text_track in text_tracks:
diff --git a/hypervideo_dl/jsinterp.py b/hypervideo_dl/jsinterp.py
index 46834f8..adc5a19 100644
--- a/hypervideo_dl/jsinterp.py
+++ b/hypervideo_dl/jsinterp.py
@@ -1,31 +1,136 @@
-from collections.abc import MutableMapping
+import collections
+import contextlib
+import itertools
import json
+import math
import operator
import re
from .utils import (
+ NO_DEFAULT,
ExtractorError,
+ js_to_json,
remove_quotes,
+ truncate_string,
+ unified_timestamp,
+ write_string,
)
-_OPERATORS = [
- ('|', operator.or_),
- ('^', operator.xor),
- ('&', operator.and_),
- ('>>', operator.rshift),
- ('<<', operator.lshift),
- ('-', operator.sub),
- ('+', operator.add),
- ('%', operator.mod),
- ('/', operator.truediv),
- ('*', operator.mul),
-]
-_ASSIGN_OPERATORS = [(op + '=', opfunc) for op, opfunc in _OPERATORS]
-_ASSIGN_OPERATORS.append(('=', (lambda cur, right: right)))
-_NAME_RE = r'[a-zA-Z_$][a-zA-Z_$0-9]*'
+def _js_bit_op(op):
+ def zeroise(x):
+ return 0 if x in (None, JS_Undefined) else x
-_MATCHING_PARENS = dict(zip('({[', ')}]'))
+ def wrapped(a, b):
+ return op(zeroise(a), zeroise(b)) & 0xffffffff
+
+ return wrapped
+
+
+def _js_arith_op(op):
+
+ def wrapped(a, b):
+ if JS_Undefined in (a, b):
+ return float('nan')
+ return op(a or 0, b or 0)
+
+ return wrapped
+
+
+def _js_div(a, b):
+ if JS_Undefined in (a, b) or not (a and b):
+ return float('nan')
+ return (a or 0) / b if b else float('inf')
+
+
+def _js_mod(a, b):
+ if JS_Undefined in (a, b) or not b:
+ return float('nan')
+ return (a or 0) % b
+
+
+def _js_exp(a, b):
+ if not b:
+ return 1 # even 0 ** 0 !!
+ elif JS_Undefined in (a, b):
+ return float('nan')
+ return (a or 0) ** b
+
+
+def _js_eq_op(op):
+
+ def wrapped(a, b):
+ if {a, b} <= {None, JS_Undefined}:
+ return op(a, a)
+ return op(a, b)
+
+ return wrapped
+
+
+def _js_comp_op(op):
+
+ def wrapped(a, b):
+ if JS_Undefined in (a, b):
+ return False
+ if isinstance(a, str) or isinstance(b, str):
+ return op(str(a or 0), str(b or 0))
+ return op(a or 0, b or 0)
+
+ return wrapped
+
+
+def _js_ternary(cndn, if_true=True, if_false=False):
+ """Simulate JS's ternary operator (cndn?if_true:if_false)"""
+ if cndn in (False, None, 0, '', JS_Undefined):
+ return if_false
+ with contextlib.suppress(TypeError):
+ if math.isnan(cndn): # NB: NaN cannot be checked by membership
+ return if_false
+ return if_true
+
+
+# Ref: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Operator_Precedence
+_OPERATORS = { # None => Defined in JSInterpreter._operator
+ '?': None,
+ '??': None,
+ '||': None,
+ '&&': None,
+
+ '|': _js_bit_op(operator.or_),
+ '^': _js_bit_op(operator.xor),
+ '&': _js_bit_op(operator.and_),
+
+ '===': operator.is_,
+ '!==': operator.is_not,
+ '==': _js_eq_op(operator.eq),
+ '!=': _js_eq_op(operator.ne),
+
+ '<=': _js_comp_op(operator.le),
+ '>=': _js_comp_op(operator.ge),
+ '<': _js_comp_op(operator.lt),
+ '>': _js_comp_op(operator.gt),
+
+ '>>': _js_bit_op(operator.rshift),
+ '<<': _js_bit_op(operator.lshift),
+
+ '+': _js_arith_op(operator.add),
+ '-': _js_arith_op(operator.sub),
+
+ '*': _js_arith_op(operator.mul),
+ '%': _js_mod,
+ '/': _js_div,
+ '**': _js_exp,
+}
+
+_COMP_OPERATORS = {'===', '!==', '==', '!=', '<=', '>=', '<', '>'}
+
+_NAME_RE = r'[a-zA-Z_$][\w$]*'
+_MATCHING_PARENS = dict(zip(*zip('()', '{}', '[]')))
+_QUOTES = '\'"/'
+
+
+class JS_Undefined:
+ pass
class JS_Break(ExtractorError):
@@ -38,47 +143,79 @@ class JS_Continue(ExtractorError):
ExtractorError.__init__(self, 'Invalid continue')
-class LocalNameSpace(MutableMapping):
- def __init__(self, *stack):
- self.stack = tuple(stack)
+class JS_Throw(ExtractorError):
+ def __init__(self, e):
+ self.error = e
+ ExtractorError.__init__(self, f'Uncaught exception {e}')
- def __getitem__(self, key):
- for scope in self.stack:
- if key in scope:
- return scope[key]
- raise KeyError(key)
+class LocalNameSpace(collections.ChainMap):
def __setitem__(self, key, value):
- for scope in self.stack:
+ for scope in self.maps:
if key in scope:
scope[key] = value
- break
- else:
- self.stack[0][key] = value
- return value
+ return
+ self.maps[0][key] = value
def __delitem__(self, key):
raise NotImplementedError('Deleting is not supported')
- def __iter__(self):
- for scope in self.stack:
- yield from scope
-
- def __len__(self, key):
- return len(iter(self))
- def __repr__(self):
- return f'LocalNameSpace{self.stack}'
+class Debugger:
+ import sys
+ ENABLED = False and 'pytest' in sys.modules
+ @staticmethod
+ def write(*args, level=100):
+ write_string(f'[debug] JS: {" " * (100 - level)}'
+ f'{" ".join(truncate_string(str(x), 50, 50) for x in args)}\n')
+
+ @classmethod
+ def wrap_interpreter(cls, f):
+ def interpret_statement(self, stmt, local_vars, allow_recursion, *args, **kwargs):
+ if cls.ENABLED and stmt.strip():
+ cls.write(stmt, level=allow_recursion)
+ try:
+ ret, should_ret = f(self, stmt, local_vars, allow_recursion, *args, **kwargs)
+ except Exception as e:
+ if cls.ENABLED:
+ if isinstance(e, ExtractorError):
+ e = e.orig_msg
+ cls.write('=> Raises:', e, '<-|', stmt, level=allow_recursion)
+ raise
+ if cls.ENABLED and stmt.strip():
+ cls.write(['->', '=>'][should_ret], repr(ret), '<-|', stmt, level=allow_recursion)
+ return ret, should_ret
+ return interpret_statement
+
+
+class JSInterpreter:
+ __named_object_counter = 0
+
+ _RE_FLAGS = {
+ # special knowledge: Python's re flags are bitmask values, current max 128
+ # invent new bitmask values well above that for literal parsing
+ # TODO: new pattern class to execute matches with these flags
+ 'd': 1024, # Generate indices for substring matches
+ 'g': 2048, # Global search
+ 'i': re.I, # Case-insensitive search
+ 'm': re.M, # Multi-line search
+ 's': re.S, # Allows . to match newline characters
+ 'u': re.U, # Treat a pattern as a sequence of unicode code points
+ 'y': 4096, # Perform a "sticky" search that matches starting at the current position in the target string
+ }
+
+ _EXC_NAME = '__hypervideo_dl_exception__'
-class JSInterpreter(object):
def __init__(self, code, objects=None):
- if objects is None:
- objects = {}
- self.code = code
- self._functions = {}
- self._objects = objects
- self.__named_object_counter = 0
+ self.code, self._functions = code, {}
+ self._objects = {} if objects is None else objects
+
+ class Exception(ExtractorError):
+ def __init__(self, msg, expr=None, *args, **kwargs):
+ if expr is not None:
+ msg = f'{msg.rstrip()} in: {truncate_string(expr, 50, 50)}'
+ super().__init__(msg, *args, **kwargs)
def _named_object(self, namespace, obj):
self.__named_object_counter += 1
@@ -86,18 +223,42 @@ class JSInterpreter(object):
namespace[name] = obj
return name
+ @classmethod
+ def _regex_flags(cls, expr):
+ flags = 0
+ if not expr:
+ return flags, expr
+ for idx, ch in enumerate(expr):
+ if ch not in cls._RE_FLAGS:
+ break
+ flags |= cls._RE_FLAGS[ch]
+ return flags, expr[idx + 1:]
+
@staticmethod
def _separate(expr, delim=',', max_split=None):
+ OP_CHARS = '+-*/%&|^=<>!,;{}:['
if not expr:
return
counters = {k: 0 for k in _MATCHING_PARENS.values()}
start, splits, pos, delim_len = 0, 0, 0, len(delim) - 1
+ in_quote, escaping, after_op, in_regex_char_group = None, False, True, False
for idx, char in enumerate(expr):
- if char in _MATCHING_PARENS:
+ if not in_quote and char in _MATCHING_PARENS:
counters[_MATCHING_PARENS[char]] += 1
- elif char in counters:
- counters[char] -= 1
- if char != delim[pos] or any(counters.values()):
+ elif not in_quote and char in counters:
+ # Something's wrong if we get negative, but ignore it anyway
+ if counters[char]:
+ counters[char] -= 1
+ elif not escaping:
+ if char in _QUOTES and in_quote in (char, None):
+ if in_quote or after_op or char != '/':
+ in_quote = None if in_quote and not in_regex_char_group else char
+ elif in_quote == '/' and char in '[]':
+ in_regex_char_group = char == '['
+ escaping = not escaping and in_quote and char == '\\'
+ after_op = not in_quote and char in OP_CHARS or (char.isspace() and after_op)
+
+ if char != delim[pos] or any(counters.values()) or in_quote:
pos = 0
continue
elif pos != delim_len:
@@ -110,150 +271,241 @@ class JSInterpreter(object):
break
yield expr[start:]
- @staticmethod
- def _separate_at_paren(expr, delim):
- separated = list(JSInterpreter._separate(expr, delim, 1))
+ @classmethod
+ def _separate_at_paren(cls, expr, delim=None):
+ if delim is None:
+ delim = expr and _MATCHING_PARENS[expr[0]]
+ separated = list(cls._separate(expr, delim, 1))
if len(separated) < 2:
- raise ExtractorError(f'No terminating paren {delim} in {expr}')
+ raise cls.Exception(f'No terminating paren {delim}', expr)
return separated[0][1:].strip(), separated[1].strip()
+ def _operator(self, op, left_val, right_expr, expr, local_vars, allow_recursion):
+ if op in ('||', '&&'):
+ if (op == '&&') ^ _js_ternary(left_val):
+ return left_val # short circuiting
+ elif op == '??':
+ if left_val not in (None, JS_Undefined):
+ return left_val
+ elif op == '?':
+ right_expr = _js_ternary(left_val, *self._separate(right_expr, ':', 1))
+
+ right_val = self.interpret_expression(right_expr, local_vars, allow_recursion)
+ if not _OPERATORS.get(op):
+ return right_val
+
+ try:
+ return _OPERATORS[op](left_val, right_val)
+ except Exception as e:
+ raise self.Exception(f'Failed to evaluate {left_val!r} {op} {right_val!r}', expr, cause=e)
+
+ def _index(self, obj, idx, allow_undefined=False):
+ if idx == 'length':
+ return len(obj)
+ try:
+ return obj[int(idx)] if isinstance(obj, list) else obj[idx]
+ except Exception as e:
+ if allow_undefined:
+ return JS_Undefined
+ raise self.Exception(f'Cannot get index {idx}', repr(obj), cause=e)
+
+ def _dump(self, obj, namespace):
+ try:
+ return json.dumps(obj)
+ except TypeError:
+ return self._named_object(namespace, obj)
+
+ @Debugger.wrap_interpreter
def interpret_statement(self, stmt, local_vars, allow_recursion=100):
if allow_recursion < 0:
- raise ExtractorError('Recursion limit reached')
+ raise self.Exception('Recursion limit reached')
+ allow_recursion -= 1
+
+ should_return = False
+ sub_statements = list(self._separate(stmt, ';')) or ['']
+ expr = stmt = sub_statements.pop().strip()
- sub_statements = list(self._separate(stmt, ';'))
- stmt = (sub_statements or ['']).pop()
for sub_stmt in sub_statements:
- ret, should_abort = self.interpret_statement(sub_stmt, local_vars, allow_recursion - 1)
- if should_abort:
- return ret
+ ret, should_return = self.interpret_statement(sub_stmt, local_vars, allow_recursion)
+ if should_return:
+ return ret, should_return
- should_abort = False
- stmt = stmt.lstrip()
- stmt_m = re.match(r'var\s', stmt)
- if stmt_m:
- expr = stmt[len(stmt_m.group(0)):]
- else:
- return_m = re.match(r'return(?:\s+|$)', stmt)
- if return_m:
- expr = stmt[len(return_m.group(0)):]
- should_abort = True
- else:
- # Try interpreting it as an expression
- expr = stmt
+ m = re.match(r'(?P<var>(?:var|const|let)\s)|return(?:\s+|(?=["\'])|$)|(?P<throw>throw\s+)', stmt)
+ if m:
+ expr = stmt[len(m.group(0)):].strip()
+ if m.group('throw'):
+ raise JS_Throw(self.interpret_expression(expr, local_vars, allow_recursion))
+ should_return = not m.group('var')
+ if not expr:
+ return None, should_return
- v = self.interpret_expression(expr, local_vars, allow_recursion)
- return v, should_abort
+ if expr[0] in _QUOTES:
+ inner, outer = self._separate(expr, expr[0], 1)
+ if expr[0] == '/':
+ flags, outer = self._regex_flags(outer)
+ inner = re.compile(inner[1:], flags=flags)
+ else:
+ inner = json.loads(js_to_json(f'{inner}{expr[0]}', strict=True))
+ if not outer:
+ return inner, should_return
+ expr = self._named_object(local_vars, inner) + outer
+
+ if expr.startswith('new '):
+ obj = expr[4:]
+ if obj.startswith('Date('):
+ left, right = self._separate_at_paren(obj[4:])
+ expr = unified_timestamp(
+ self.interpret_expression(left, local_vars, allow_recursion), False)
+ if not expr:
+ raise self.Exception(f'Failed to parse date {left!r}', expr)
+ expr = self._dump(int(expr * 1000), local_vars) + right
+ else:
+ raise self.Exception(f'Unsupported object {obj}', expr)
- def interpret_expression(self, expr, local_vars, allow_recursion):
- expr = expr.strip()
- if expr == '': # Empty expression
- return None
+ if expr.startswith('void '):
+ left = self.interpret_expression(expr[5:], local_vars, allow_recursion)
+ return None, should_return
if expr.startswith('{'):
- inner, outer = self._separate_at_paren(expr, '}')
- inner, should_abort = self.interpret_statement(inner, local_vars, allow_recursion - 1)
+ inner, outer = self._separate_at_paren(expr)
+ # try for object expression (Map)
+ sub_expressions = [list(self._separate(sub_expr.strip(), ':', 1)) for sub_expr in self._separate(inner)]
+ if all(len(sub_expr) == 2 for sub_expr in sub_expressions):
+ def dict_item(key, val):
+ val = self.interpret_expression(val, local_vars, allow_recursion)
+ if re.match(_NAME_RE, key):
+ return key, val
+ return self.interpret_expression(key, local_vars, allow_recursion), val
+
+ return dict(dict_item(k, v) for k, v in sub_expressions), should_return
+
+ inner, should_abort = self.interpret_statement(inner, local_vars, allow_recursion)
if not outer or should_abort:
- return inner
+ return inner, should_abort or should_return
else:
- expr = json.dumps(inner) + outer
+ expr = self._dump(inner, local_vars) + outer
if expr.startswith('('):
- inner, outer = self._separate_at_paren(expr, ')')
- inner = self.interpret_expression(inner, local_vars, allow_recursion)
- if not outer:
- return inner
+ inner, outer = self._separate_at_paren(expr)
+ inner, should_abort = self.interpret_statement(inner, local_vars, allow_recursion)
+ if not outer or should_abort:
+ return inner, should_abort or should_return
else:
- expr = json.dumps(inner) + outer
+ expr = self._dump(inner, local_vars) + outer
if expr.startswith('['):
- inner, outer = self._separate_at_paren(expr, ']')
+ inner, outer = self._separate_at_paren(expr)
name = self._named_object(local_vars, [
self.interpret_expression(item, local_vars, allow_recursion)
for item in self._separate(inner)])
expr = name + outer
- m = re.match(r'try\s*', expr)
- if m:
- if expr[m.end()] == '{':
- try_expr, expr = self._separate_at_paren(expr[m.end():], '}')
- else:
- try_expr, expr = expr[m.end() - 1:], ''
- ret, should_abort = self.interpret_statement(try_expr, local_vars, allow_recursion - 1)
+ m = re.match(r'''(?x)
+ (?P<try>try)\s*\{|
+ (?P<switch>switch)\s*\(|
+ (?P<for>for)\s*\(
+ ''', expr)
+ md = m.groupdict() if m else {}
+ if md.get('try'):
+ try_expr, expr = self._separate_at_paren(expr[m.end() - 1:])
+ err = None
+ try:
+ ret, should_abort = self.interpret_statement(try_expr, local_vars, allow_recursion)
+ if should_abort:
+ return ret, True
+ except Exception as e:
+ # XXX: This works for now, but makes debugging future issues very hard
+ err = e
+
+ pending = (None, False)
+ m = re.match(r'catch\s*(?P<err>\(\s*{_NAME_RE}\s*\))?\{{'.format(**globals()), expr)
+ if m:
+ sub_expr, expr = self._separate_at_paren(expr[m.end() - 1:])
+ if err:
+ catch_vars = {}
+ if m.group('err'):
+ catch_vars[m.group('err')] = err.error if isinstance(err, JS_Throw) else err
+ catch_vars = local_vars.new_child(catch_vars)
+ err, pending = None, self.interpret_statement(sub_expr, catch_vars, allow_recursion)
+
+ m = re.match(r'finally\s*\{', expr)
+ if m:
+ sub_expr, expr = self._separate_at_paren(expr[m.end() - 1:])
+ ret, should_abort = self.interpret_statement(sub_expr, local_vars, allow_recursion)
+ if should_abort:
+ return ret, True
+
+ ret, should_abort = pending
if should_abort:
- return ret
- return self.interpret_statement(expr, local_vars, allow_recursion - 1)[0]
+ return ret, True
- m = re.match(r'catch\s*\(', expr)
- if m:
- # We ignore the catch block
- _, expr = self._separate_at_paren(expr, '}')
- return self.interpret_statement(expr, local_vars, allow_recursion - 1)[0]
+ if err:
+ raise err
- m = re.match(r'for\s*\(', expr)
- if m:
- constructor, remaining = self._separate_at_paren(expr[m.end() - 1:], ')')
+ elif md.get('for'):
+ constructor, remaining = self._separate_at_paren(expr[m.end() - 1:])
if remaining.startswith('{'):
- body, expr = self._separate_at_paren(remaining, '}')
+ body, expr = self._separate_at_paren(remaining)
else:
- m = re.match(r'switch\s*\(', remaining) # FIXME
- if m:
- switch_val, remaining = self._separate_at_paren(remaining[m.end() - 1:], ')')
+ switch_m = re.match(r'switch\s*\(', remaining) # FIXME
+ if switch_m:
+ switch_val, remaining = self._separate_at_paren(remaining[switch_m.end() - 1:])
body, expr = self._separate_at_paren(remaining, '}')
body = 'switch(%s){%s}' % (switch_val, body)
else:
body, expr = remaining, ''
start, cndn, increment = self._separate(constructor, ';')
- if self.interpret_statement(start, local_vars, allow_recursion - 1)[1]:
- raise ExtractorError(
- f'Premature return in the initialization of a for loop in {constructor!r}')
+ self.interpret_expression(start, local_vars, allow_recursion)
while True:
- if not self.interpret_expression(cndn, local_vars, allow_recursion):
+ if not _js_ternary(self.interpret_expression(cndn, local_vars, allow_recursion)):
break
try:
- ret, should_abort = self.interpret_statement(body, local_vars, allow_recursion - 1)
+ ret, should_abort = self.interpret_statement(body, local_vars, allow_recursion)
if should_abort:
- return ret
+ return ret, True
except JS_Break:
break
except JS_Continue:
pass
- if self.interpret_statement(increment, local_vars, allow_recursion - 1)[1]:
- raise ExtractorError(
- f'Premature return in the initialization of a for loop in {constructor!r}')
- return self.interpret_statement(expr, local_vars, allow_recursion - 1)[0]
+ self.interpret_expression(increment, local_vars, allow_recursion)
- m = re.match(r'switch\s*\(', expr)
- if m:
- switch_val, remaining = self._separate_at_paren(expr[m.end() - 1:], ')')
+ elif md.get('switch'):
+ switch_val, remaining = self._separate_at_paren(expr[m.end() - 1:])
switch_val = self.interpret_expression(switch_val, local_vars, allow_recursion)
body, expr = self._separate_at_paren(remaining, '}')
items = body.replace('default:', 'case default:').split('case ')[1:]
for default in (False, True):
matched = False
for item in items:
- case, stmt = [i.strip() for i in self._separate(item, ':', 1)]
+ case, stmt = (i.strip() for i in self._separate(item, ':', 1))
if default:
matched = matched or case == 'default'
elif not matched:
- matched = case != 'default' and switch_val == self.interpret_expression(case, local_vars, allow_recursion)
+ matched = (case != 'default'
+ and switch_val == self.interpret_expression(case, local_vars, allow_recursion))
if not matched:
continue
try:
- ret, should_abort = self.interpret_statement(stmt, local_vars, allow_recursion - 1)
+ ret, should_abort = self.interpret_statement(stmt, local_vars, allow_recursion)
if should_abort:
return ret
except JS_Break:
break
if matched:
break
- return self.interpret_statement(expr, local_vars, allow_recursion - 1)[0]
+
+ if md:
+ ret, should_abort = self.interpret_statement(expr, local_vars, allow_recursion)
+ return ret, should_abort or should_return
# Comma separated statements
sub_expressions = list(self._separate(expr))
- expr = sub_expressions.pop().strip() if sub_expressions else ''
- for sub_expr in sub_expressions:
- self.interpret_expression(sub_expr, local_vars, allow_recursion)
+ if len(sub_expressions) > 1:
+ for sub_expr in sub_expressions:
+ ret, should_abort = self.interpret_statement(sub_expr, local_vars, allow_recursion)
+ if should_abort:
+ return ret, True
+ return ret, False
for m in re.finditer(rf'''(?x)
(?P<pre_sign>\+\+|--)(?P<var1>{_NAME_RE})|
@@ -265,107 +517,123 @@ class JSInterpreter(object):
local_vars[var] += 1 if sign[0] == '+' else -1
if m.group('pre_sign'):
ret = local_vars[var]
- expr = expr[:start] + json.dumps(ret) + expr[end:]
-
- for op, opfunc in _ASSIGN_OPERATORS:
- m = re.match(r'''(?x)
- (?P<out>%s)(?:\[(?P<index>[^\]]+?)\])?
- \s*%s
- (?P<expr>.*)$''' % (_NAME_RE, re.escape(op)), expr)
- if not m:
- continue
- right_val = self.interpret_expression(m.group('expr'), local_vars, allow_recursion)
-
- if m.groupdict().get('index'):
- lvar = local_vars[m.group('out')]
- idx = self.interpret_expression(m.group('index'), local_vars, allow_recursion)
- if not isinstance(idx, int):
- raise ExtractorError(f'List indices must be integers: {idx}')
- cur = lvar[idx]
- val = opfunc(cur, right_val)
- lvar[idx] = val
- return val
- else:
- cur = local_vars.get(m.group('out'))
- val = opfunc(cur, right_val)
- local_vars[m.group('out')] = val
- return val
+ expr = expr[:start] + self._dump(ret, local_vars) + expr[end:]
- if expr.isdigit():
- return int(expr)
-
- if expr == 'break':
+ if not expr:
+ return None, should_return
+
+ m = re.match(fr'''(?x)
+ (?P<assign>
+ (?P<out>{_NAME_RE})(?:\[(?P<index>[^\]]+?)\])?\s*
+ (?P<op>{"|".join(map(re.escape, set(_OPERATORS) - _COMP_OPERATORS))})?
+ =(?!=)(?P<expr>.*)$
+ )|(?P<return>
+ (?!if|return|true|false|null|undefined|NaN)(?P<name>{_NAME_RE})$
+ )|(?P<indexing>
+ (?P<in>{_NAME_RE})\[(?P<idx>.+)\]$
+ )|(?P<attribute>
+ (?P<var>{_NAME_RE})(?:(?P<nullish>\?)?\.(?P<member>[^(]+)|\[(?P<member2>[^\]]+)\])\s*
+ )|(?P<function>
+ (?P<fname>{_NAME_RE})\((?P<args>.*)\)$
+ )''', expr)
+ if m and m.group('assign'):
+ left_val = local_vars.get(m.group('out'))
+
+ if not m.group('index'):
+ local_vars[m.group('out')] = self._operator(
+ m.group('op'), left_val, m.group('expr'), expr, local_vars, allow_recursion)
+ return local_vars[m.group('out')], should_return
+ elif left_val in (None, JS_Undefined):
+ raise self.Exception(f'Cannot index undefined variable {m.group("out")}', expr)
+
+ idx = self.interpret_expression(m.group('index'), local_vars, allow_recursion)
+ if not isinstance(idx, (int, float)):
+ raise self.Exception(f'List index {idx} must be integer', expr)
+ idx = int(idx)
+ left_val[idx] = self._operator(
+ m.group('op'), self._index(left_val, idx), m.group('expr'), expr, local_vars, allow_recursion)
+ return left_val[idx], should_return
+
+ elif expr.isdigit():
+ return int(expr), should_return
+
+ elif expr == 'break':
raise JS_Break()
elif expr == 'continue':
raise JS_Continue()
+ elif expr == 'undefined':
+ return JS_Undefined, should_return
+ elif expr == 'NaN':
+ return float('NaN'), should_return
- var_m = re.match(
- r'(?!if|return|true|false|null)(?P<name>%s)$' % _NAME_RE,
- expr)
- if var_m:
- return local_vars[var_m.group('name')]
+ elif m and m.group('return'):
+ return local_vars.get(m.group('name'), JS_Undefined), should_return
- try:
- return json.loads(expr)
- except ValueError:
- pass
+ with contextlib.suppress(ValueError):
+ return json.loads(js_to_json(expr, strict=True)), should_return
- m = re.match(
- r'(?P<in>%s)\[(?P<idx>.+)\]$' % _NAME_RE, expr)
- if m:
+ if m and m.group('indexing'):
val = local_vars[m.group('in')]
idx = self.interpret_expression(m.group('idx'), local_vars, allow_recursion)
- return val[idx]
+ return self._index(val, idx), should_return
- for op, opfunc in _OPERATORS:
+ for op in _OPERATORS:
separated = list(self._separate(expr, op))
- if len(separated) < 2:
+ right_expr = separated.pop()
+ while True:
+ if op in '?<>*-' and len(separated) > 1 and not separated[-1].strip():
+ separated.pop()
+ elif not (separated and op == '?' and right_expr.startswith('.')):
+ break
+ right_expr = f'{op}{right_expr}'
+ if op != '-':
+ right_expr = f'{separated.pop()}{op}{right_expr}'
+ if not separated:
continue
- right_val = separated.pop()
- left_val = op.join(separated)
- left_val, should_abort = self.interpret_statement(
- left_val, local_vars, allow_recursion - 1)
- if should_abort:
- raise ExtractorError(f'Premature left-side return of {op} in {expr!r}')
- right_val, should_abort = self.interpret_statement(
- right_val, local_vars, allow_recursion - 1)
- if should_abort:
- raise ExtractorError(f'Premature right-side return of {op} in {expr!r}')
- return opfunc(left_val or 0, right_val)
+ left_val = self.interpret_expression(op.join(separated), local_vars, allow_recursion)
+ return self._operator(op, left_val, right_expr, expr, local_vars, allow_recursion), should_return
- m = re.match(
- r'(?P<var>%s)(?:\.(?P<member>[^(]+)|\[(?P<member2>[^]]+)\])\s*' % _NAME_RE,
- expr)
- if m:
- variable = m.group('var')
- member = remove_quotes(m.group('member') or m.group('member2'))
+ if m and m.group('attribute'):
+ variable, member, nullish = m.group('var', 'member', 'nullish')
+ if not member:
+ member = self.interpret_expression(m.group('member2'), local_vars, allow_recursion)
arg_str = expr[m.end():]
if arg_str.startswith('('):
- arg_str, remaining = self._separate_at_paren(arg_str, ')')
+ arg_str, remaining = self._separate_at_paren(arg_str)
else:
arg_str, remaining = None, arg_str
def assertion(cndn, msg):
""" assert, but without risk of getting optimized out """
if not cndn:
- raise ExtractorError(f'{member} {msg}: {expr}')
+ raise self.Exception(f'{member} {msg}', expr)
def eval_method():
- nonlocal member
- if variable == 'String':
- obj = str
- elif variable in local_vars:
- obj = local_vars[variable]
- else:
+ if (variable, member) == ('console', 'debug'):
+ if Debugger.ENABLED:
+ Debugger.write(self.interpret_expression(f'[{arg_str}]', local_vars, allow_recursion))
+ return
+
+ types = {
+ 'String': str,
+ 'Math': float,
+ }
+ obj = local_vars.get(variable, types.get(variable, NO_DEFAULT))
+ if obj is NO_DEFAULT:
if variable not in self._objects:
- self._objects[variable] = self.extract_object(variable)
- obj = self._objects[variable]
+ try:
+ self._objects[variable] = self.extract_object(variable)
+ except self.Exception:
+ if not nullish:
+ raise
+ obj = self._objects.get(variable, JS_Undefined)
+
+ if nullish and obj is JS_Undefined:
+ return JS_Undefined
+ # Member access
if arg_str is None:
- # Member access
- if member == 'length':
- return len(obj)
- return obj[member]
+ return self._index(obj, member, nullish)
# Function call
argvals = [
@@ -376,12 +644,17 @@ class JSInterpreter(object):
if member == 'fromCharCode':
assertion(argvals, 'takes one or more arguments')
return ''.join(map(chr, argvals))
- raise ExtractorError(f'Unsupported string method {member}')
+ raise self.Exception(f'Unsupported String method {member}', expr)
+ elif obj == float:
+ if member == 'pow':
+ assertion(len(argvals) == 2, 'takes two arguments')
+ return argvals[0] ** argvals[1]
+ raise self.Exception(f'Unsupported Math method {member}', expr)
if member == 'split':
assertion(argvals, 'takes one or more arguments')
- assertion(argvals == [''], 'with arguments is not implemented')
- return list(obj)
+ assertion(len(argvals) == 1, 'with limit argument is not implemented')
+ return obj.split(argvals[0]) if argvals[0] else list(obj)
elif member == 'join':
assertion(isinstance(obj, list), 'must be applied on a list')
assertion(len(argvals) == 1, 'takes exactly one argument')
@@ -427,7 +700,7 @@ class JSInterpreter(object):
assertion(argvals, 'takes one or more arguments')
assertion(len(argvals) <= 2, 'takes at-most 2 arguments')
f, this = (argvals + [''])[:2]
- return [f((item, idx, obj), this=this) for idx, item in enumerate(obj)]
+ return [f((item, idx, obj), {'this': this}, allow_recursion) for idx, item in enumerate(obj)]
elif member == 'indexOf':
assertion(argvals, 'takes one or more arguments')
assertion(len(argvals) <= 2, 'takes at-most 2 arguments')
@@ -436,32 +709,43 @@ class JSInterpreter(object):
return obj.index(idx, start)
except ValueError:
return -1
+ elif member == 'charCodeAt':
+ assertion(isinstance(obj, str), 'must be applied on a string')
+ assertion(len(argvals) == 1, 'takes exactly one argument')
+ idx = argvals[0] if isinstance(argvals[0], int) else 0
+ if idx >= len(obj):
+ return None
+ return ord(obj[idx])
- if isinstance(obj, list):
- member = int(member)
- return obj[member](argvals)
+ idx = int(member) if isinstance(obj, list) else member
+ return obj[idx](argvals, allow_recursion=allow_recursion)
if remaining:
- return self.interpret_expression(
+ ret, should_abort = self.interpret_statement(
self._named_object(local_vars, eval_method()) + remaining,
local_vars, allow_recursion)
+ return ret, should_return or should_abort
else:
- return eval_method()
+ return eval_method(), should_return
- m = re.match(r'^(?P<func>%s)\((?P<args>[a-zA-Z0-9_$,]*)\)$' % _NAME_RE, expr)
- if m:
- fname = m.group('func')
- argvals = tuple([
- int(v) if v.isdigit() else local_vars[v]
- for v in self._separate(m.group('args'))])
+ elif m and m.group('function'):
+ fname = m.group('fname')
+ argvals = [self.interpret_expression(v, local_vars, allow_recursion)
+ for v in self._separate(m.group('args'))]
if fname in local_vars:
- return local_vars[fname](argvals)
+ return local_vars[fname](argvals, allow_recursion=allow_recursion), should_return
elif fname not in self._functions:
self._functions[fname] = self.extract_function(fname)
- return self._functions[fname](argvals)
+ return self._functions[fname](argvals, allow_recursion=allow_recursion), should_return
+
+ raise self.Exception(
+ f'Unsupported JS expression {truncate_string(expr, 20, 20) if expr != stmt else ""}', stmt)
- if expr:
- raise ExtractorError('Unsupported JS expression %r' % expr)
+ def interpret_expression(self, expr, local_vars, allow_recursion):
+ ret, should_return = self.interpret_statement(expr, local_vars, allow_recursion)
+ if should_return:
+ raise self.Exception('Cannot return from an expression', expr)
+ return ret
def extract_object(self, objname):
_FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')'''
@@ -473,12 +757,14 @@ class JSInterpreter(object):
}\s*;
''' % (re.escape(objname), _FUNC_NAME_RE),
self.code)
+ if not obj_m:
+ raise self.Exception(f'Could not find object {objname}')
fields = obj_m.group('fields')
# Currently, it only supports function definitions
fields_m = re.finditer(
r'''(?x)
- (?P<key>%s)\s*:\s*function\s*\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}
- ''' % _FUNC_NAME_RE,
+ (?P<key>%s)\s*:\s*function\s*\((?P<args>(?:%s|,)*)\){(?P<code>[^}]+)}
+ ''' % (_FUNC_NAME_RE, _NAME_RE),
fields)
for f in fields_m:
argnames = f.group('args').split(',')
@@ -489,16 +775,19 @@ class JSInterpreter(object):
def extract_function_code(self, funcname):
""" @returns argnames, code """
func_m = re.search(
- r'''(?x)
- (?:function\s+%s|[{;,]\s*%s\s*=\s*function|var\s+%s\s*=\s*function)\s*
+ r'''(?xs)
+ (?:
+ function\s+%(name)s|
+ [{;,]\s*%(name)s\s*=\s*function|
+ (?:var|const|let)\s+%(name)s\s*=\s*function
+ )\s*
\((?P<args>[^)]*)\)\s*
- (?P<code>\{(?:(?!};)[^"]|"([^"]|\\")*")+\})''' % (
- re.escape(funcname), re.escape(funcname), re.escape(funcname)),
+ (?P<code>{.+})''' % {'name': re.escape(funcname)},
self.code)
- code, _ = self._separate_at_paren(func_m.group('code'), '}') # refine the match
+ code, _ = self._separate_at_paren(func_m.group('code'))
if func_m is None:
- raise ExtractorError('Could not find JS function %r' % funcname)
- return func_m.group('args').split(','), code
+ raise self.Exception(f'Could not find JS function "{funcname}"')
+ return [x.strip() for x in func_m.group('args').split(',')], code
def extract_function(self, funcname):
return self.extract_function_from_code(*self.extract_function_code(funcname))
@@ -510,12 +799,10 @@ class JSInterpreter(object):
if mobj is None:
break
start, body_start = mobj.span()
- body, remaining = self._separate_at_paren(code[body_start - 1:], '}')
- name = self._named_object(
- local_vars,
- self.extract_function_from_code(
- [str.strip(x) for x in mobj.group('args').split(',')],
- body, local_vars, *global_stack))
+ body, remaining = self._separate_at_paren(code[body_start - 1:])
+ name = self._named_object(local_vars, self.extract_function_from_code(
+ [x.strip() for x in mobj.group('args').split(',')],
+ body, local_vars, *global_stack))
code = code[:start] + name + remaining
return self.build_function(argnames, code, local_vars, *global_stack)
@@ -524,17 +811,13 @@ class JSInterpreter(object):
def build_function(self, argnames, code, *global_stack):
global_stack = list(global_stack) or [{}]
- local_vars = global_stack.pop(0)
-
- def resf(args, **kwargs):
- local_vars.update({
- **dict(zip(argnames, args)),
- **kwargs
- })
- var_stack = LocalNameSpace(local_vars, *global_stack)
- for stmt in self._separate(code.replace('\n', ''), ';'):
- ret, should_abort = self.interpret_statement(stmt, var_stack)
- if should_abort:
- break
- return ret
+ argnames = tuple(argnames)
+
+ def resf(args, kwargs={}, allow_recursion=100):
+ global_stack[0].update(itertools.zip_longest(argnames, args, fillvalue=None))
+ global_stack[0].update(kwargs)
+ var_stack = LocalNameSpace(*global_stack)
+ ret, should_abort = self.interpret_statement(code.replace('\n', ' '), var_stack, allow_recursion - 1)
+ if should_abort:
+ return ret
return resf
diff --git a/hypervideo_dl/minicurses.py b/hypervideo_dl/minicurses.py
index f9f99e3..7db02cb 100644
--- a/hypervideo_dl/minicurses.py
+++ b/hypervideo_dl/minicurses.py
@@ -1,7 +1,7 @@
import functools
from threading import Lock
-from .utils import supports_terminal_sequences, write_string
+from .utils import supports_terminal_sequences, write_string
CONTROL_SEQUENCES = {
'DOWN': '\n',
@@ -34,7 +34,7 @@ def format_text(text, f):
'''
@param f String representation of formatting to apply in the form:
[style] [light] font_color [on [light] bg_color]
- Eg: "red", "bold green on light blue"
+ E.g. "red", "bold green on light blue"
'''
f = f.upper()
tokens = f.strip().split()
@@ -69,6 +69,7 @@ def format_text(text, f):
raise SyntaxError(f'Invalid format {" ".join(tokens)!r} in {f!r}')
if fg_color or bg_color:
+ text = text.replace(CONTROL_SEQUENCES['RESET'], f'{fg_color}{bg_color}')
return f'{fg_color}{bg_color}{text}{CONTROL_SEQUENCES["RESET"]}'
else:
return text
@@ -178,4 +179,4 @@ class MultilinePrinter(MultilinePrinterBase):
*text, CONTROL_SEQUENCES['ERASE_LINE'],
f'{CONTROL_SEQUENCES["UP"]}{CONTROL_SEQUENCES["ERASE_LINE"]}' * self.maximum)
else:
- self.write(*text, ' ' * self._lastlength)
+ self.write('\r', ' ' * self._lastlength, '\r')
diff --git a/hypervideo_dl/options.py b/hypervideo_dl/options.py
index b91193a..bf8684c 100644
--- a/hypervideo_dl/options.py
+++ b/hypervideo_dl/options.py
@@ -1,54 +1,49 @@
-from __future__ import unicode_literals
-
-import os.path
+import collections
+import contextlib
import optparse
+import os.path
import re
+import shlex
+import shutil
+import string
import sys
-from .compat import (
- compat_expanduser,
- compat_get_terminal_size,
- compat_getenv,
- compat_kwargs,
- compat_shlex_split,
-)
-from .utils import (
- Config,
- expand_path,
- get_executable_path,
- OUTTMPL_TYPES,
- POSTPROCESS_WHEN,
- remove_end,
- write_string,
-)
+from .compat import compat_expanduser
from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS
-from .version import __version__
-
from .downloader.external import list_external_downloaders
from .postprocessor import (
FFmpegExtractAudioPP,
+ FFmpegMergerPP,
FFmpegSubtitlesConvertorPP,
FFmpegThumbnailsConvertorPP,
FFmpegVideoRemuxerPP,
SponsorBlockPP,
)
from .postprocessor.modify_chapters import DEFAULT_SPONSORBLOCK_CHAPTER_TITLE
+from .utils import (
+ OUTTMPL_TYPES,
+ POSTPROCESS_WHEN,
+ Config,
+ deprecation_warning,
+ expand_path,
+ format_field,
+ get_executable_path,
+ join_nonempty,
+ orderedSet_from_options,
+ remove_end,
+ write_string,
+)
+from .version import __version__
def parseOpts(overrideArguments=None, ignore_config_files='if_override'):
- parser = create_parser()
- root = Config(parser)
-
+ root = Config(create_parser())
if ignore_config_files == 'if_override':
ignore_config_files = overrideArguments is not None
- if overrideArguments:
- root.append_config(overrideArguments, label='Override')
- else:
- root.append_config(sys.argv[1:], label='Command-line')
def _readUserConf(package_name, default=[]):
# .config
- xdg_config_home = compat_getenv('XDG_CONFIG_HOME') or compat_expanduser('~/.config')
+ xdg_config_home = os.getenv('XDG_CONFIG_HOME') or compat_expanduser('~/.config')
userConfFile = os.path.join(xdg_config_home, package_name, 'config')
if not os.path.isfile(userConfFile):
userConfFile = os.path.join(xdg_config_home, '%s.conf' % package_name)
@@ -57,7 +52,7 @@ def parseOpts(overrideArguments=None, ignore_config_files='if_override'):
return userConf, userConfFile
# appdata
- appdata_dir = compat_getenv('appdata')
+ appdata_dir = os.getenv('appdata')
if appdata_dir:
userConfFile = os.path.join(appdata_dir, package_name, 'config')
userConf = Config.read_file(userConfFile, default=None)
@@ -80,10 +75,10 @@ def parseOpts(overrideArguments=None, ignore_config_files='if_override'):
def add_config(label, path, user=False):
""" Adds config and returns whether to continue """
- if root.parse_args()[0].ignoreconfig:
+ if root.parse_known_args()[0].ignoreconfig:
return False
# Multiple package names can be given here
- # Eg: ('hypervideo', 'youtube-dlc', 'youtube-dl') will look for
+ # E.g. ('hypervideo', 'youtube-dlc', 'youtube-dl') will look for
# the configuration file of any of these three packages
for package in ('hypervideo',):
if user:
@@ -99,55 +94,138 @@ def parseOpts(overrideArguments=None, ignore_config_files='if_override'):
def load_configs():
yield not ignore_config_files
yield add_config('Portable', get_executable_path())
- yield add_config('Home', expand_path(root.parse_args()[0].paths.get('home', '')).strip())
+ yield add_config('Home', expand_path(root.parse_known_args()[0].paths.get('home', '')).strip())
yield add_config('User', None, user=True)
yield add_config('System', '/etc')
- if all(load_configs()):
- # If ignoreconfig is found inside the system configuration file,
- # the user configuration is removed
- if root.parse_args()[0].ignoreconfig:
- user_conf = next((i for i, conf in enumerate(root.configs) if conf.label == 'User'), None)
- if user_conf is not None:
- root.configs.pop(user_conf)
+ opts = optparse.Values({'verbose': True, 'print_help': False})
+ try:
+ try:
+ if overrideArguments:
+ root.append_config(overrideArguments, label='Override')
+ else:
+ root.append_config(sys.argv[1:], label='Command-line')
+ loaded_all_configs = all(load_configs())
+ except ValueError as err:
+ raise root.parser.error(err)
+
+ if loaded_all_configs:
+ # If ignoreconfig is found inside the system configuration file,
+ # the user configuration is removed
+ if root.parse_known_args()[0].ignoreconfig:
+ user_conf = next((i for i, conf in enumerate(root.configs) if conf.label == 'User'), None)
+ if user_conf is not None:
+ root.configs.pop(user_conf)
+
+ try:
+ root.configs[0].load_configs() # Resolve any aliases using --config-location
+ except ValueError as err:
+ raise root.parser.error(err)
+
+ opts, args = root.parse_args()
+ except optparse.OptParseError:
+ with contextlib.suppress(optparse.OptParseError):
+ opts, _ = root.parse_known_args(strict=False)
+ raise
+ except (SystemExit, KeyboardInterrupt):
+ opts.verbose = False
+ raise
+ finally:
+ verbose = opts.verbose and f'\n{root}'.replace('\n| ', '\n[debug] ')[1:]
+ if verbose:
+ write_string(f'{verbose}\n')
+ if opts.print_help:
+ if verbose:
+ write_string('\n')
+ root.parser.print_help()
+ if opts.print_help:
+ sys.exit()
+ return root.parser, opts, args
+
- opts, args = root.parse_args()
- if opts.verbose:
- write_string(f'\n{root}'.replace('\n| ', '\n[debug] ')[1:] + '\n')
- return parser, opts, args
+class _YoutubeDLHelpFormatter(optparse.IndentedHelpFormatter):
+ def __init__(self):
+ # No need to wrap help messages if we're on a wide console
+ max_width = shutil.get_terminal_size().columns or 80
+ # The % is chosen to get a pretty output in README.md
+ super().__init__(width=max_width, max_help_position=int(0.45 * max_width))
+
+ @staticmethod
+ def format_option_strings(option):
+ """ ('-o', '--option') -> -o, --format METAVAR """
+ opts = join_nonempty(
+ option._short_opts and option._short_opts[0],
+ option._long_opts and option._long_opts[0],
+ delim=', ')
+ if option.takes_value():
+ opts += f' {option.metavar}'
+ return opts
class _YoutubeDLOptionParser(optparse.OptionParser):
# optparse is deprecated since python 3.2. So assume a stable interface even for private methods
+ ALIAS_DEST = '_triggered_aliases'
+ ALIAS_TRIGGER_LIMIT = 100
+
+ def __init__(self):
+ super().__init__(
+ prog='hypervideo',
+ version=__version__,
+ usage='%prog [OPTIONS] URL [URL...]',
+ epilog='See full documentation at https://github.com/hypervideo/hypervideo#readme',
+ formatter=_YoutubeDLHelpFormatter(),
+ conflict_handler='resolve',
+ )
+ self.set_default(self.ALIAS_DEST, collections.defaultdict(int))
+
+ _UNKNOWN_OPTION = (optparse.BadOptionError, optparse.AmbiguousOptionError)
+ _BAD_OPTION = optparse.OptionValueError
+
+ def parse_known_args(self, args=None, values=None, strict=True):
+ """Same as parse_args, but ignore unknown switches. Similar to argparse.parse_known_args"""
+ self.rargs, self.largs = self._get_args(args), []
+ self.values = values or self.get_default_values()
+ while self.rargs:
+ arg = self.rargs[0]
+ try:
+ if arg == '--':
+ del self.rargs[0]
+ break
+ elif arg.startswith('--'):
+ self._process_long_opt(self.rargs, self.values)
+ elif arg.startswith('-') and arg != '-':
+ self._process_short_opts(self.rargs, self.values)
+ elif self.allow_interspersed_args:
+ self.largs.append(self.rargs.pop(0))
+ else:
+ break
+ except optparse.OptParseError as err:
+ if isinstance(err, self._UNKNOWN_OPTION):
+ self.largs.append(err.opt_str)
+ elif strict:
+ if isinstance(err, self._BAD_OPTION):
+ self.error(str(err))
+ raise
+ return self.check_values(self.values, self.largs)
+
+ def error(self, msg):
+ msg = f'{self.get_prog_name()}: error: {str(msg).strip()}\n'
+ raise optparse.OptParseError(f'{self.get_usage()}\n{msg}' if self.usage else msg)
+
+ def _get_args(self, args):
+ return sys.argv[1:] if args is None else list(args)
def _match_long_opt(self, opt):
- """Improve ambigious argument resolution by comparing option objects instead of argument strings"""
+ """Improve ambiguous argument resolution by comparing option objects instead of argument strings"""
try:
return super()._match_long_opt(opt)
except optparse.AmbiguousOptionError as e:
- if len(set(self._long_opt[p] for p in e.possibilities)) == 1:
+ if len({self._long_opt[p] for p in e.possibilities}) == 1:
return e.possibilities[0]
raise
def create_parser():
- def _format_option_string(option):
- ''' ('-o', '--option') -> -o, --format METAVAR'''
-
- opts = []
-
- if option._short_opts:
- opts.append(option._short_opts[0])
- if option._long_opts:
- opts.append(option._long_opts[0])
- if len(opts) > 1:
- opts.insert(1, ', ')
-
- if option.takes_value():
- opts.append(' %s' % option.metavar)
-
- return ''.join(opts)
-
def _list_from_options_callback(option, opt_str, value, parser, append=True, delim=',', process=str.strip):
# append can be True, False or -1 (prepend)
current = list(getattr(parser.values, option.dest)) if append else []
@@ -157,30 +235,16 @@ def create_parser():
current + value if append is True else value + current)
def _set_from_options_callback(
- option, opt_str, value, parser, delim=',', allowed_values=None, aliases={},
+ option, opt_str, value, parser, allowed_values, delim=',', aliases={},
process=lambda x: x.lower().strip()):
- current = set(getattr(parser.values, option.dest))
- values = [process(value)] if delim is None else list(map(process, value.split(delim)[::-1]))
- while values:
- actual_val = val = values.pop()
- if not val:
- raise optparse.OptionValueError(f'Invalid {option.metavar} for {opt_str}: {value}')
- if val == 'all':
- current.update(allowed_values)
- elif val == '-all':
- current = set()
- elif val in aliases:
- values.extend(aliases[val])
- else:
- if val[0] == '-':
- val = val[1:]
- current.discard(val)
- else:
- current.update([val])
- if allowed_values is not None and val not in allowed_values:
- raise optparse.OptionValueError(f'wrong {option.metavar} for {opt_str}: {actual_val}')
+ values = [process(value)] if delim is None else map(process, value.split(delim))
+ try:
+ requested = orderedSet_from_options(values, collections.ChainMap(aliases, {'all': allowed_values}),
+ start=getattr(parser.values, option.dest))
+ except ValueError as e:
+ raise optparse.OptionValueError(f'wrong {option.metavar} for {opt_str}: {e.args[0]}')
- setattr(parser.values, option.dest, current)
+ setattr(parser.values, option.dest, set(requested))
def _dict_from_options_callback(
option, opt_str, value, parser,
@@ -190,9 +254,9 @@ def create_parser():
out_dict = dict(getattr(parser.values, option.dest))
multiple_args = not isinstance(value, str)
if multiple_keys:
- allowed_keys = r'(%s)(,(%s))*' % (allowed_keys, allowed_keys)
+ allowed_keys = fr'({allowed_keys})(,({allowed_keys}))*'
mobj = re.match(
- r'(?i)(?P<keys>%s)%s(?P<val>.*)$' % (allowed_keys, delimiter),
+ fr'(?i)(?P<keys>{allowed_keys}){delimiter}(?P<val>.*)$',
value[0] if multiple_args else value)
if mobj is not None:
keys, val = mobj.group('keys').split(','), mobj.group('val')
@@ -202,7 +266,7 @@ def create_parser():
keys, val = [default_key], value
else:
raise optparse.OptionValueError(
- 'wrong %s formatting; it should be %s, not "%s"' % (opt_str, option.metavar, value))
+ f'wrong {opt_str} formatting; it should be {option.metavar}, not "{value}"')
try:
keys = map(process_key, keys) if process_key else keys
val = process(val) if process else val
@@ -212,30 +276,45 @@ def create_parser():
out_dict[key] = out_dict.get(key, []) + [val] if append else val
setattr(parser.values, option.dest, out_dict)
- # No need to wrap help messages if we're on a wide console
- columns = compat_get_terminal_size().columns
- max_width = columns if columns else 80
- # 47% is chosen because that is how README.md is currently formatted
- # and moving help text even further to the right is undesirable.
- # This can be reduced in the future to get a prettier output
- max_help_position = int(0.47 * max_width)
+ parser = _YoutubeDLOptionParser()
+ alias_group = optparse.OptionGroup(parser, 'Aliases')
+ Formatter = string.Formatter()
- fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position)
- fmt.format_option_strings = _format_option_string
+ def _create_alias(option, opt_str, value, parser):
+ aliases, opts = value
+ try:
+ nargs = len({i if f == '' else f
+ for i, (_, f, _, _) in enumerate(Formatter.parse(opts)) if f is not None})
+ opts.format(*map(str, range(nargs))) # validate
+ except Exception as err:
+ raise optparse.OptionValueError(f'wrong {opt_str} OPTIONS formatting; {err}')
+ if alias_group not in parser.option_groups:
+ parser.add_option_group(alias_group)
- kw = {
- 'version': __version__,
- 'formatter': fmt,
- 'usage': '%prog [OPTIONS] URL [URL...]',
- 'conflict_handler': 'resolve',
- }
+ aliases = (x if x.startswith('-') else f'--{x}' for x in map(str.strip, aliases.split(',')))
+ try:
+ args = [f'ARG{i}' for i in range(nargs)]
+ alias_group.add_option(
+ *aliases, nargs=nargs, dest=parser.ALIAS_DEST, type='str' if nargs else None,
+ metavar=' '.join(args), help=opts.format(*args), action='callback',
+ callback=_alias_callback, callback_kwargs={'opts': opts, 'nargs': nargs})
+ except Exception as err:
+ raise optparse.OptionValueError(f'wrong {opt_str} formatting; {err}')
- parser = _YoutubeDLOptionParser(**compat_kwargs(kw))
+ def _alias_callback(option, opt_str, value, parser, opts, nargs):
+ counter = getattr(parser.values, option.dest)
+ counter[opt_str] += 1
+ if counter[opt_str] > parser.ALIAS_TRIGGER_LIMIT:
+ raise optparse.OptionValueError(f'Alias {opt_str} exceeded invocation limit')
+ if nargs == 1:
+ value = [value]
+ assert (nargs == 0 and value is None) or len(value) == nargs
+ parser.rargs[:0] = shlex.split(
+ opts if value is None else opts.format(*map(shlex.quote, value)))
general = optparse.OptionGroup(parser, 'General Options')
general.add_option(
- '-h', '--help',
- action='help',
+ '-h', '--help', dest='print_help', action='store_true',
help='Print this help text and exit')
general.add_option(
'--version',
@@ -266,13 +345,28 @@ def create_parser():
action='store_true', dest='list_extractor_descriptions', default=False,
help='Output descriptions of all supported extractors and exit')
general.add_option(
+ '--use-extractors', '--ies',
+ action='callback', dest='allowed_extractors', metavar='NAMES', type='str',
+ default=[], callback=_list_from_options_callback,
+ help=(
+ 'Extractor names to use separated by commas. '
+ 'You can also use regexes, "all", "default" and "end" (end URL matching); '
+ 'e.g. --ies "holodex.*,end,youtube". '
+ 'Prefix the name with a "-" to exclude it, e.g. --ies default,-generic. '
+ 'Use --list-extractors for a list of extractor names. (Alias: --ies)'))
+ general.add_option(
'--force-generic-extractor',
action='store_true', dest='force_generic_extractor', default=False,
- help='Force extraction to use the generic extractor')
+ help=optparse.SUPPRESS_HELP)
general.add_option(
'--default-search',
dest='default_search', metavar='PREFIX',
- help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for the search term "large apple". Use the value "auto" to let hypervideo guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching')
+ help=(
+ 'Use this prefix for unqualified URLs. '
+ 'E.g. "gvsearch2:python" downloads two videos from google videos for the search term "python". '
+ 'Use the value "auto" to let hypervideo guess ("auto_warning" to emit a warning when guessing). '
+ '"error" just throws an error. The default value "fixup_error" repairs broken URLs, '
+ 'but emits an error if this is not possible instead of searching'))
general.add_option(
'--ignore-config', '--no-config',
action='store_true', dest='ignoreconfig',
@@ -290,8 +384,8 @@ def create_parser():
'--config-locations',
dest='config_locations', metavar='PATH', action='append',
help=(
- 'Location of the main configuration file; either the path to the config or its containing directory. '
- 'Can be used multiple times and inside other configuration files'))
+ 'Location of the main configuration file; either the path to the config or its containing directory '
+ '("-" for stdin). Can be used multiple times and inside other configuration files'))
general.add_option(
'--flat-playlist',
action='store_const', dest='extract_flat', const='in_playlist', default=False,
@@ -327,9 +421,9 @@ def create_parser():
action='store_false', dest='mark_watched',
help='Do not mark videos watched (default)')
general.add_option(
- '--no-colors',
+ '--no-colors', '--no-colours',
action='store_true', dest='no_color', default=False,
- help='Do not emit color codes in output')
+ help='Do not emit color codes in output (Alias: --no-colours)')
general.add_option(
'--compat-options',
metavar='OPTS', dest='compat_opts', default=set(), type='str',
@@ -338,26 +432,37 @@ def create_parser():
'allowed_values': {
'filename', 'filename-sanitization', 'format-sort', 'abort-on-error', 'format-spec', 'no-playlist-metafiles',
'multistreams', 'no-live-chat', 'playlist-index', 'list-formats', 'no-direct-merge',
- 'no-youtube-channel-redirect', 'no-youtube-unavailable-videos', 'no-attach-info-json', 'embed-metadata',
- 'embed-thumbnail-atomicparsley', 'seperate-video-versions', 'no-clean-infojson', 'no-keep-subs', 'no-certifi',
+ 'no-attach-info-json', 'embed-metadata', 'embed-thumbnail-atomicparsley',
+ 'seperate-video-versions', 'no-clean-infojson', 'no-keep-subs', 'no-certifi',
+ 'no-youtube-channel-redirect', 'no-youtube-unavailable-videos', 'no-youtube-prefer-utc-upload-date',
}, 'aliases': {
- 'youtube-dl': ['-multistreams', 'all'],
- 'youtube-dlc': ['-no-youtube-channel-redirect', '-no-live-chat', 'all'],
+ 'youtube-dl': ['all', '-multistreams'],
+ 'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat'],
}
}, help=(
'Options that can help keep compatibility with youtube-dl or youtube-dlc '
'configurations by reverting some of the changes made in hypervideo. '
'See "Differences in default behavior" for details'))
+ general.add_option(
+ '--alias', metavar='ALIASES OPTIONS', dest='_', type='str', nargs=2,
+ action='callback', callback=_create_alias,
+ help=(
+ 'Create aliases for an option string. Unless an alias starts with a dash "-", it is prefixed with "--". '
+ 'Arguments are parsed according to the Python string formatting mini-language. '
+ 'E.g. --alias get-audio,-X "-S=aext:{0},abr -x --audio-format {0}" creates options '
+ '"--get-audio" and "-X" that takes an argument (ARG0) and expands to '
+ '"-S=aext:ARG0,abr -x --audio-format ARG0". All defined aliases are listed in the --help output. '
+ 'Alias options can trigger more aliases; so be careful to avoid defining recursive options. '
+ f'As a safety measure, each alias may be triggered a maximum of {_YoutubeDLOptionParser.ALIAS_TRIGGER_LIMIT} times. '
+ 'This option can be used multiple times'))
network = optparse.OptionGroup(parser, 'Network Options')
network.add_option(
'--proxy', dest='proxy',
default=None, metavar='URL',
help=(
- 'Use the specified HTTP/HTTPS/SOCKS proxy. To enable '
- 'SOCKS proxy, specify a proper scheme. For example '
- 'socks5://user:pass@127.0.0.1:1080/. Pass in an empty string (--proxy "") '
- 'for direct connection'))
+ 'Use the specified HTTP/HTTPS/SOCKS proxy. To enable SOCKS proxy, specify a proper scheme, '
+ 'e.g. socks5://user:pass@127.0.0.1:1080/. Pass in an empty string (--proxy "") for direct connection'))
network.add_option(
'--socket-timeout',
dest='socket_timeout', type=float, default=None, metavar='SECONDS',
@@ -410,15 +515,19 @@ def create_parser():
selection.add_option(
'--playlist-start',
dest='playliststart', metavar='NUMBER', default=1, type=int,
- help='Playlist video to start at (default is %default)')
+ help=optparse.SUPPRESS_HELP)
selection.add_option(
'--playlist-end',
dest='playlistend', metavar='NUMBER', default=None, type=int,
- help='Playlist video to end at (default is last)')
+ help=optparse.SUPPRESS_HELP)
selection.add_option(
- '--playlist-items',
+ '-I', '--playlist-items',
dest='playlist_items', metavar='ITEM_SPEC', default=None,
- help='Playlist video items to download. Specify indices of the videos in the playlist separated by commas like: "--playlist-items 1,2,5,8" if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13')
+ help=(
+ 'Comma separated playlist_index of the videos to download. '
+ 'You can specify a range using "[START]:[STOP][:STEP]". For backward compatibility, START-STOP is also supported. '
+ 'Use negative indices to count from the right and negative STEP to download in reverse order. '
+ 'E.g. "-I 1:3,7,-5::2" used on a playlist of size 15 will download the videos at index 1,2,3,7,11,13,15'))
selection.add_option(
'--match-title',
dest='matchtitle', metavar='REGEX',
@@ -430,18 +539,17 @@ def create_parser():
selection.add_option(
'--min-filesize',
metavar='SIZE', dest='min_filesize', default=None,
- help='Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)')
+ help='Abort download if filesize is smaller than SIZE, e.g. 50k or 44.6M')
selection.add_option(
'--max-filesize',
metavar='SIZE', dest='max_filesize', default=None,
- help='Do not download any videos larger than SIZE (e.g. 50k or 44.6m)')
+ help='Abort download if filesize if larger than SIZE, e.g. 50k or 44.6M')
selection.add_option(
'--date',
metavar='DATE', dest='date', default=None,
help=(
- 'Download only videos uploaded on this date. '
- 'The date can be "YYYYMMDD" or in the format '
- '"(now|today)[+-][0-9](day|week|month|year)(s)?"'))
+ 'Download only videos uploaded on this date. The date can be "YYYYMMDD" or in the format '
+ '[now|today|yesterday][-N[day|week|month|year]]. E.g. --date today-2weeks'))
selection.add_option(
'--datebefore',
metavar='DATE', dest='datebefore', default=None,
@@ -466,16 +574,17 @@ def create_parser():
'--match-filters',
metavar='FILTER', dest='match_filter', action='append',
help=(
- 'Generic video filter. Any field (see "OUTPUT TEMPLATE") can be compared with a '
- 'number or a string using the operators defined in "Filtering formats". '
+ 'Generic video filter. Any "OUTPUT TEMPLATE" field can be compared with a '
+ 'number or a string using the operators defined in "Filtering Formats". '
'You can also simply specify a field to match if the field is present, '
'use "!field" to check if the field is not present, and "&" to check multiple conditions. '
'Use a "\\" to escape "&" or quotes if needed. If used multiple times, '
- 'the filter matches if atleast one of the conditions are met. Eg: --match-filter '
+ 'the filter matches if atleast one of the conditions are met. E.g. --match-filter '
'!is_live --match-filter "like_count>?100 & description~=\'(?i)\\bcats \\& dogs\\b\'" '
'matches only videos that are not live OR those that have a like count more than 100 '
'(or the like field is not available) and also has a description '
- 'that contains the phrase "cats & dogs" (ignoring case)'))
+ 'that contains the phrase "cats & dogs" (caseless). '
+ 'Use "--match-filter -" to interactively ask whether to download each video'))
selection.add_option(
'--no-match-filter',
metavar='FILTER', dest='match_filter', action='store_const', const=None,
@@ -515,11 +624,11 @@ def create_parser():
selection.add_option(
'--break-per-input',
action='store_true', dest='break_per_url', default=False,
- help='Make --break-on-existing and --break-on-reject act only on the current input URL')
+ help='--break-on-existing, --break-on-reject, --max-downloads, and autonumber resets per input URL')
selection.add_option(
'--no-break-per-input',
action='store_false', dest='break_per_url',
- help='--break-on-existing and --break-on-reject terminates the entire download queue')
+ help='--break-on-existing and similar options terminates the entire download queue')
selection.add_option(
'--skip-playlist-after-errors', metavar='N',
dest='skip_playlist_after_errors', default=None, type=int,
@@ -574,6 +683,19 @@ def create_parser():
'--ap-list-mso',
action='store_true', dest='ap_list_mso', default=False,
help='List all supported multiple-system operators')
+ authentication.add_option(
+ '--client-certificate',
+ dest='client_certificate', metavar='CERTFILE',
+ help='Path to client certificate file in PEM format. May include the private key')
+ authentication.add_option(
+ '--client-certificate-key',
+ dest='client_certificate_key', metavar='KEYFILE',
+ help='Path to private key file for client certificate')
+ authentication.add_option(
+ '--client-certificate-password',
+ dest='client_certificate_password', metavar='PASSWORD',
+ help='Password for client certificate private key, if encrypted. '
+ 'If not provided, and the key is encrypted, hypervideo will ask interactively')
video_format = optparse.OptionGroup(parser, 'Video Format Options')
video_format.add_option(
@@ -590,13 +712,11 @@ def create_parser():
action='store_true', dest='format_sort_force', metavar='FORMAT', default=False,
help=(
'Force user specified sort order to have precedence over all fields, '
- 'see "Sorting Formats" for more details'))
+ 'see "Sorting Formats" for more details (Alias: --S-force)'))
video_format.add_option(
'--no-format-sort-force',
action='store_false', dest='format_sort_force', metavar='FORMAT', default=False,
- help=(
- 'Some fields have precedence over the user specified sort order (default), '
- 'see "Sorting Formats" for more details'))
+ help='Some fields have precedence over the user specified sort order (default)')
video_format.add_option(
'--video-multistreams',
action='store_true', dest='allow_multiple_video_streams', default=None,
@@ -630,7 +750,7 @@ def create_parser():
video_format.add_option(
'--check-formats',
action='store_const', const='selected', dest='check_formats', default=None,
- help='Check that the selected formats are actually downloadable')
+ help='Make sure formats are selected only from those that are actually downloadable')
video_format.add_option(
'--check-all-formats',
action='store_true', dest='check_formats',
@@ -655,9 +775,9 @@ def create_parser():
'--merge-output-format',
action='store', dest='merge_output_format', metavar='FORMAT', default=None,
help=(
- 'If a merge is required (e.g. bestvideo+bestaudio), '
- 'output to given container format. One of mkv, mp4, ogg, webm, flv. '
- 'Ignored if no merge is required'))
+ 'Containers that may be used when merging formats, separated by "/", e.g. "mp4/mkv". '
+ 'Ignored if no merge is required. '
+ f'(currently supported: {", ".join(sorted(FFmpegMergerPP.SUPPORTED_EXTS))})'))
video_format.add_option(
'--allow-unplayable-formats',
action='store_true', dest='allow_unplayable_formats', default=False,
@@ -695,14 +815,14 @@ def create_parser():
subtitles.add_option(
'--sub-format',
action='store', dest='subtitlesformat', metavar='FORMAT', default='best',
- help='Subtitle format, accepts formats preference, for example: "srt" or "ass/srt/best"')
+ help='Subtitle format; accepts formats preference, e.g. "srt" or "ass/srt/best"')
subtitles.add_option(
'--sub-langs', '--srt-langs',
action='callback', dest='subtitleslangs', metavar='LANGS', type='str',
default=[], callback=_list_from_options_callback,
help=(
- 'Languages of the subtitles to download (can be regex) or "all" separated by commas. (Eg: --sub-langs "en.*,ja") '
- 'You can prefix the language code with a "-" to exempt it from the requested languages. (Eg: --sub-langs all,-live_chat) '
+ 'Languages of the subtitles to download (can be regex) or "all" separated by commas, e.g. --sub-langs "en.*,ja". '
+ 'You can prefix the language code with a "-" to exclude it from the requested languages, e.g. --sub-langs all,-live_chat. '
'Use --list-subs for a list of available language tags'))
downloader = optparse.OptionGroup(parser, 'Download Options')
@@ -713,11 +833,11 @@ def create_parser():
downloader.add_option(
'-r', '--limit-rate', '--rate-limit',
dest='ratelimit', metavar='RATE',
- help='Maximum download rate in bytes per second (e.g. 50K or 4.2M)')
+ help='Maximum download rate in bytes per second, e.g. 50K or 4.2M')
downloader.add_option(
'--throttled-rate',
dest='throttledratelimit', metavar='RATE',
- help='Minimum download rate in bytes per second below which throttling is assumed and the video data is re-extracted (e.g. 100K)')
+ help='Minimum download rate in bytes per second below which throttling is assumed and the video data is re-extracted, e.g. 100K')
downloader.add_option(
'-R', '--retries',
dest='retries', metavar='RETRIES', default=10,
@@ -731,13 +851,26 @@ def create_parser():
dest='fragment_retries', metavar='RETRIES', default=10,
help='Number of retries for a fragment (default is %default), or "infinite" (DASH, hlsnative and ISM)')
downloader.add_option(
+ '--retry-sleep',
+ dest='retry_sleep', metavar='[TYPE:]EXPR', default={}, type='str',
+ action='callback', callback=_dict_from_options_callback,
+ callback_kwargs={
+ 'allowed_keys': 'http|fragment|file_access|extractor',
+ 'default_key': 'http',
+ }, help=(
+ 'Time to sleep between retries in seconds (optionally) prefixed by the type of retry '
+ '(http (default), fragment, file_access, extractor) to apply the sleep to. '
+ 'EXPR can be a number, linear=START[:END[:STEP=1]] or exp=START[:END[:BASE=2]]. '
+ 'This option can be used multiple times to set the sleep for the different retry types, '
+ 'e.g. --retry-sleep linear=1::2 --retry-sleep fragment:exp=1:20'))
+ downloader.add_option(
'--skip-unavailable-fragments', '--no-abort-on-unavailable-fragment',
action='store_true', dest='skip_unavailable_fragments', default=True,
- help='Skip unavailable fragments for DASH, hlsnative and ISM (default) (Alias: --no-abort-on-unavailable-fragment)')
+ help='Skip unavailable fragments for DASH, hlsnative and ISM downloads (default) (Alias: --no-abort-on-unavailable-fragment)')
downloader.add_option(
'--abort-on-unavailable-fragment', '--no-skip-unavailable-fragments',
action='store_false', dest='skip_unavailable_fragments',
- help='Abort downloading if a fragment is unavailable (Alias: --no-skip-unavailable-fragments)')
+ help='Abort download if a fragment is unavailable (Alias: --no-skip-unavailable-fragments)')
downloader.add_option(
'--keep-fragments',
action='store_true', dest='keep_fragments', default=False,
@@ -749,7 +882,7 @@ def create_parser():
downloader.add_option(
'--buffer-size',
dest='buffersize', metavar='SIZE', default='1024',
- help='Size of download buffer (e.g. 1024 or 16K) (default is %default)')
+ help='Size of download buffer, e.g. 1024 or 16K (default is %default)')
downloader.add_option(
'--resize-buffer',
action='store_false', dest='noresizebuffer',
@@ -762,7 +895,7 @@ def create_parser():
'--http-chunk-size',
dest='http_chunk_size', metavar='SIZE', default=None,
help=(
- 'Size of a chunk for chunk-based HTTP downloading (e.g. 10485760 or 10M) (default is disabled). '
+ 'Size of a chunk for chunk-based HTTP downloading, e.g. 10485760 or 10M (default is disabled). '
'May be useful for bypassing bandwidth throttling imposed by a webserver (experimental)'))
downloader.add_option(
'--test',
@@ -770,17 +903,25 @@ def create_parser():
help=optparse.SUPPRESS_HELP)
downloader.add_option(
'--playlist-reverse',
- action='store_true',
- help='Download playlist videos in reverse order')
+ action='store_true', dest='playlist_reverse',
+ help=optparse.SUPPRESS_HELP)
downloader.add_option(
'--no-playlist-reverse',
action='store_false', dest='playlist_reverse',
- help='Download playlist videos in default order (default)')
+ help=optparse.SUPPRESS_HELP)
downloader.add_option(
'--playlist-random',
- action='store_true',
+ action='store_true', dest='playlist_random',
help='Download playlist videos in random order')
downloader.add_option(
+ '--lazy-playlist',
+ action='store_true', dest='lazy_playlist',
+ help='Process entries in the playlist as they are received. This disables n_entries, --playlist-random and --playlist-reverse')
+ downloader.add_option(
+ '--no-lazy-playlist',
+ action='store_false', dest='lazy_playlist',
+ help='Process videos in the playlist only after the entire playlist is parsed (default)')
+ downloader.add_option(
'--xattr-set-filesize',
dest='xattr_set_filesize', action='store_true',
help='Set file xattribute ytdl.filesize with expected file size')
@@ -807,6 +948,14 @@ def create_parser():
'Do not use the mpegts container for HLS videos. '
'This is default when not downloading live streams'))
downloader.add_option(
+ '--download-sections',
+ metavar='REGEX', dest='download_ranges', action='append',
+ help=(
+ 'Download only chapters whose title matches the given regular expression. '
+ 'Time ranges prefixed by a "*" can also be used in place of chapters to download the specified range. '
+ 'Needs ffmpeg. This option can be used multiple times to download multiple sections, '
+ 'e.g. --download-sections "*10:15-inf" --download-sections "intro"'))
+ downloader.add_option(
'--downloader', '--external-downloader',
dest='external_downloader', metavar='[PROTO:]NAME', default={}, type='str',
action='callback', callback=_dict_from_options_callback,
@@ -817,11 +966,11 @@ def create_parser():
}, help=(
'Name or path of the external downloader to use (optionally) prefixed by '
'the protocols (http, ftp, m3u8, dash, rstp, rtmp, mms) to use it for. '
- 'Currently supports native, %s (Recommended: aria2c). '
+ f'Currently supports native, {", ".join(sorted(list_external_downloaders()))}. '
'You can use this option multiple times to set different downloaders for different protocols. '
- 'For example, --downloader aria2c --downloader "dash,m3u8:native" will use '
+ 'E.g. --downloader aria2c --downloader "dash,m3u8:native" will use '
'aria2c for http/ftp downloads, and the native downloader for dash/m3u8 downloads '
- '(Alias: --external-downloader)' % ', '.join(list_external_downloaders())))
+ '(Alias: --external-downloader)'))
downloader.add_option(
'--downloader-args', '--external-downloader-args',
metavar='NAME:ARGS', dest='external_downloader_args', default={}, type='str',
@@ -829,7 +978,7 @@ def create_parser():
callback_kwargs={
'allowed_keys': r'ffmpeg_[io]\d*|%s' % '|'.join(map(re.escape, list_external_downloaders())),
'default_key': 'default',
- 'process': compat_shlex_split
+ 'process': shlex.split
}, help=(
'Give these arguments to the external downloader. '
'Specify the downloader name and the arguments separated by a colon ":". '
@@ -936,7 +1085,8 @@ def create_parser():
}, help=(
'Field name or output template to print to screen, optionally prefixed with when to print it, separated by a ":". '
'Supported values of "WHEN" are the same as that of --use-postprocessor, and "video" (default). '
- 'Implies --quiet and --simulate (unless --no-simulate is used). This option can be used multiple times'))
+ 'Implies --quiet. Implies --simulate unless --no-simulate or later stages of WHEN are used. '
+ 'This option can be used multiple times'))
verbosity.add_option(
'--print-to-file',
metavar='[WHEN:]TEMPLATE FILE', dest='print_to_file', default={}, type='str', nargs=2,
@@ -1028,7 +1178,7 @@ def create_parser():
'Template for progress outputs, optionally prefixed with one of "download:" (default), '
'"download-title:" (the console title), "postprocess:", or "postprocess-title:". '
'The video\'s fields are accessible under the "info" key and '
- 'the progress attributes are accessible under "progress" key. E.g.: '
+ 'the progress attributes are accessible under "progress" key. E.g. '
# TODO: Document the fields inside "progress"
'--console-title --progress-template "download-title:%(info.id)s-%(progress.eta)s"'))
verbosity.add_option(
@@ -1044,6 +1194,10 @@ def create_parser():
action='store_true', dest='write_pages', default=False,
help='Write downloaded intermediary pages to files in the current directory to debug problems')
verbosity.add_option(
+ '--load-pages',
+ action='store_true', dest='load_pages', default=False,
+ help=optparse.SUPPRESS_HELP)
+ verbosity.add_option(
'--youtube-print-sig-code',
action='store_true', dest='youtube_print_sig_code', default=False,
help=optparse.SUPPRESS_HELP)
@@ -1054,7 +1208,7 @@ def create_parser():
verbosity.add_option(
'-C', '--call-home',
dest='call_home', action='store_true', default=False,
- # help='[Broken] Contact the hypervideo server for debugging')
+ # help='Contact the hypervideo server for debugging')
help=optparse.SUPPRESS_HELP)
verbosity.add_option(
'--no-call-home',
@@ -1102,7 +1256,7 @@ def create_parser():
filesystem.add_option(
'--output-na-placeholder',
dest='outtmpl_na_placeholder', metavar='TEXT', default='NA',
- help=('Placeholder value for unavailable meta fields in output filename template (default: "%default")'))
+ help=('Placeholder for unavailable fields in "OUTPUT TEMPLATE" (default: "%default")'))
filesystem.add_option(
'--autonumber-size',
dest='autonumber_size', metavar='NUMBER', type=int,
@@ -1237,14 +1391,15 @@ def create_parser():
help='Do not read/dump cookies from/to file (default)')
filesystem.add_option(
'--cookies-from-browser',
- dest='cookiesfrombrowser', metavar='BROWSER[+KEYRING][:PROFILE]',
+ dest='cookiesfrombrowser', metavar='BROWSER[+KEYRING][:PROFILE][::CONTAINER]',
help=(
- 'The name of the browser and (optionally) the name/path of '
- 'the profile to load cookies from, separated by a ":". '
+ 'The name of the browser to load cookies from. '
f'Currently supported browsers are: {", ".join(sorted(SUPPORTED_BROWSERS))}. '
- 'By default, the most recently accessed profile is used. '
- 'The keyring used for decrypting Chromium cookies on Linux can be '
- '(optionally) specified after the browser name separated by a "+". '
+ 'Optionally, the KEYRING used for decrypting Chromium cookies on Linux, '
+ 'the name/path of the PROFILE to load cookies from, '
+ 'and the CONTAINER name (if Firefox) ("none" for no container) '
+ 'can be given with their respective seperators. '
+ 'By default, all containers of the most recently accessed profile are used. '
f'Currently supported keyrings are: {", ".join(map(str.lower, sorted(SUPPORTED_KEYRINGS)))}'))
filesystem.add_option(
'--no-cookies-from-browser',
@@ -1252,7 +1407,9 @@ def create_parser():
help='Do not load cookies from browser (default)')
filesystem.add_option(
'--cache-dir', dest='cachedir', default=None, metavar='DIR',
- help='Location in the filesystem where youtube-dl can store some downloaded information (such as client ids and signatures) permanently. By default $XDG_CACHE_HOME/hypervideo or ~/.cache/hypervideo')
+ help=(
+ 'Location in the filesystem where hypervideo can store some downloaded information '
+ '(such as client ids and signatures) permanently. By default ${XDG_CACHE_HOME}/hypervideo'))
filesystem.add_option(
'--no-cache-dir', action='store_false', dest='cachedir',
help='Disable filesystem caching')
@@ -1308,26 +1465,27 @@ def create_parser():
postproc.add_option(
'--audio-format', metavar='FORMAT', dest='audioformat', default='best',
help=(
- 'Specify audio format to convert the audio to when -x is used. Currently supported formats are: '
- 'best (default) or one of %s' % ', '.join(FFmpegExtractAudioPP.SUPPORTED_EXTS)))
+ 'Format to convert the audio to when -x is used. '
+ f'(currently supported: best (default), {", ".join(sorted(FFmpegExtractAudioPP.SUPPORTED_EXTS))}). '
+ 'You can specify multiple rules using similar syntax as --remux-video'))
postproc.add_option(
'--audio-quality', metavar='QUALITY',
dest='audioquality', default='5',
- help='Specify ffmpeg audio quality to use when converting the audio with -x. Insert a value between 0 (best) and 10 (worst) for VBR or a specific bitrate like 128K (default %default)')
+ help=(
+ 'Specify ffmpeg audio quality to use when converting the audio with -x. '
+ 'Insert a value between 0 (best) and 10 (worst) for VBR or a specific bitrate like 128K (default %default)'))
postproc.add_option(
'--remux-video',
metavar='FORMAT', dest='remuxvideo', default=None,
help=(
- 'Remux the video into another container if necessary (currently supported: %s). '
- 'If target container does not support the video/audio codec, remuxing will fail. '
- 'You can specify multiple rules; Eg. "aac>m4a/mov>mp4/mkv" will remux aac to m4a, mov to mp4 '
- 'and anything else to mkv.' % ', '.join(FFmpegVideoRemuxerPP.SUPPORTED_EXTS)))
+ 'Remux the video into another container if necessary '
+ f'(currently supported: {", ".join(FFmpegVideoRemuxerPP.SUPPORTED_EXTS)}). '
+ 'If target container does not support the video/audio codec, remuxing will fail. You can specify multiple rules; '
+ 'e.g. "aac>m4a/mov>mp4/mkv" will remux aac to m4a, mov to mp4 and anything else to mkv'))
postproc.add_option(
'--recode-video',
metavar='FORMAT', dest='recodevideo', default=None,
- help=(
- 'Re-encode the video into another format if re-encoding is necessary. '
- 'The syntax and supported formats are the same as --remux-video'))
+ help='Re-encode the video into another format if necessary. The syntax and supported formats are the same as --remux-video')
postproc.add_option(
'--postprocessor-args', '--ppa',
metavar='NAME:ARGS', dest='postprocessor_args', default={}, type='str',
@@ -1335,7 +1493,7 @@ def create_parser():
callback_kwargs={
'allowed_keys': r'\w+(?:\+\w+)?',
'default_key': 'default-compat',
- 'process': compat_shlex_split,
+ 'process': shlex.split,
'multiple_keys': False
}, help=(
'Give these arguments to the postprocessors. '
@@ -1348,7 +1506,7 @@ def create_parser():
'You can also specify "PP+EXE:ARGS" to give the arguments to the specified executable '
'only when being used by the specified postprocessor. Additionally, for ffmpeg/ffprobe, '
'"_i"/"_o" can be appended to the prefix optionally followed by a number to pass the argument '
- 'before the specified input/output file. Eg: --ppa "Merger+ffmpeg_i1:-v quiet". '
+ 'before the specified input/output file, e.g. --ppa "Merger+ffmpeg_i1:-v quiet". '
'You can use this option multiple times to give different arguments to different '
'postprocessors. (Alias: --ppa)'))
postproc.add_option(
@@ -1424,7 +1582,7 @@ def create_parser():
dest='parse_metadata', metavar='FIELDS REGEX REPLACE', action='append', nargs=3,
help='Replace text in a metadata field using the given regex. This option can be used multiple times')
postproc.add_option(
- '--xattrs',
+ '--xattrs', '--xattr',
action='store_true', dest='xattrs', default=False,
help='Write metadata to the video file\'s xattrs (using dublin core and xdg standards)')
postproc.add_option(
@@ -1491,13 +1649,14 @@ def create_parser():
metavar='FORMAT', dest='convertsubtitles', default=None,
help=(
'Convert the subtitles to another format (currently supported: %s) '
- '(Alias: --convert-subtitles)' % ', '.join(FFmpegSubtitlesConvertorPP.SUPPORTED_EXTS)))
+ '(Alias: --convert-subtitles)' % ', '.join(sorted(FFmpegSubtitlesConvertorPP.SUPPORTED_EXTS))))
postproc.add_option(
'--convert-thumbnails',
metavar='FORMAT', dest='convertthumbnails', default=None,
help=(
'Convert the thumbnails to another format '
- '(currently supported: %s) ' % ', '.join(FFmpegThumbnailsConvertorPP.SUPPORTED_EXTS)))
+ f'(currently supported: {", ".join(sorted(FFmpegThumbnailsConvertorPP.SUPPORTED_EXTS))}). '
+ 'You can specify multiple rules using similar syntax as --remux-video'))
postproc.add_option(
'--split-chapters', '--split-tracks',
dest='split_chapters', action='store_true', default=False,
@@ -1514,9 +1673,7 @@ def create_parser():
metavar='REGEX', dest='remove_chapters', action='append',
help=(
'Remove chapters whose title matches the given regular expression. '
- 'Time ranges prefixed by a "*" can also be used in place of chapters to remove the specified range. '
- 'Eg: --remove-chapters "*10:15-15:00" --remove-chapters "intro". '
- 'This option can be used multiple times'))
+ 'The syntax is the same as --download-sections. This option can be used multiple times'))
postproc.add_option(
'--no-remove-chapters', dest='remove_chapters', action='store_const', const=None,
help='Do not remove any chapters from the file (default)')
@@ -1524,9 +1681,8 @@ def create_parser():
'--force-keyframes-at-cuts',
action='store_true', dest='force_keyframes_at_cuts', default=False,
help=(
- 'Force keyframes around the chapters before removing/splitting them. '
- 'Requires a re-encode and thus is very slow, but the resulting video '
- 'may have fewer artifacts around the cuts'))
+ 'Force keyframes at cuts when downloading/splitting/removing sections. '
+ 'This is slow due to needing a re-encode, but the resulting video may have fewer artifacts around the cuts'))
postproc.add_option(
'--no-force-keyframes-at-cuts',
action='store_false', dest='force_keyframes_at_cuts',
@@ -1564,14 +1720,14 @@ def create_parser():
'aliases': {'default': ['all']}
}, help=(
'SponsorBlock categories to create chapters for, separated by commas. '
- f'Available categories are all, default(=all), {", ".join(SponsorBlockPP.CATEGORIES.keys())}. '
- 'You can prefix the category with a "-" to exempt it. See [1] for description of the categories. '
- 'Eg: --sponsorblock-mark all,-preview [1] https://wiki.sponsor.ajay.app/w/Segment_Categories'))
+ f'Available categories are {", ".join(SponsorBlockPP.CATEGORIES.keys())}, all and default (=all). '
+ 'You can prefix the category with a "-" to exclude it. See [1] for description of the categories. '
+ 'E.g. --sponsorblock-mark all,-preview [1] https://wiki.sponsor.ajay.app/w/Segment_Categories'))
sponsorblock.add_option(
'--sponsorblock-remove', metavar='CATS',
dest='sponsorblock_remove', default=set(), action='callback', type='str',
callback=_set_from_options_callback, callback_kwargs={
- 'allowed_values': set(SponsorBlockPP.CATEGORIES.keys()) - set(SponsorBlockPP.POI_CATEGORIES.keys()),
+ 'allowed_values': set(SponsorBlockPP.CATEGORIES.keys()) - set(SponsorBlockPP.NON_SKIPPABLE_CATEGORIES.keys()),
# Note: From https://wiki.sponsor.ajay.app/w/Types:
# The filler category is very aggressive.
# It is strongly recommended to not use this in a client by default.
@@ -1581,14 +1737,14 @@ def create_parser():
'If a category is present in both mark and remove, remove takes precedence. '
'The syntax and available categories are the same as for --sponsorblock-mark '
'except that "default" refers to "all,-filler" '
- f'and {", ".join(SponsorBlockPP.POI_CATEGORIES.keys())} is not available'))
+ f'and {", ".join(SponsorBlockPP.NON_SKIPPABLE_CATEGORIES.keys())} are not available'))
sponsorblock.add_option(
'--sponsorblock-chapter-title', metavar='TEMPLATE',
default=DEFAULT_SPONSORBLOCK_CHAPTER_TITLE, dest='sponsorblock_chapter_title',
help=(
- 'The title template for SponsorBlock chapters created by --sponsorblock-mark. '
- 'The same syntax as the output template is used, but the only available fields are '
- 'start_time, end_time, category, categories, name, category_names. Defaults to "%default"'))
+ 'An output template for the title of the SponsorBlock chapters created by --sponsorblock-mark. '
+ 'The only available fields are start_time, end_time, category, categories, name, category_names. '
+ 'Defaults to "%default"'))
sponsorblock.add_option(
'--no-sponsorblock', default=False,
action='store_true', dest='no_sponsorblock',
@@ -1656,14 +1812,14 @@ def create_parser():
val.replace(r'\,', ',').strip() for val in re.split(r'(?<!\\),', vals)])
extractor.add_option(
'--extractor-args',
- metavar='KEY:ARGS', dest='extractor_args', default={}, type='str',
+ metavar='IE_KEY:ARGS', dest='extractor_args', default={}, type='str',
action='callback', callback=_dict_from_options_callback,
callback_kwargs={
'multiple_keys': False,
'process': lambda val: dict(
_extractor_arg_parser(*arg.split('=', 1)) for arg in val.split(';'))
}, help=(
- 'Pass these arguments to the extractor. See "EXTRACTOR ARGUMENTS" for details. '
+ 'Pass ARGS arguments to the IE_KEY extractor. See "EXTRACTOR ARGUMENTS" for details. '
'You can use this option multiple times to give arguments for different extractors'))
extractor.add_option(
'--youtube-include-dash-manifest', '--no-youtube-skip-dash-manifest',
@@ -1703,7 +1859,6 @@ def create_parser():
def _hide_login_info(opts):
- write_string(
- 'DeprecationWarning: "hypervideo_dl.options._hide_login_info" is deprecated and may be removed in a future version. '
- 'Use "hypervideo_dl.utils.Config.hide_login_info" instead\n')
+ deprecation_warning(f'"{__name__}._hide_login_info" is deprecated and may be removed '
+ 'in a future version. Use "hypervideo_dl.utils.Config.hide_login_info" instead')
return Config.hide_login_info(opts)
diff --git a/hypervideo_dl/postprocessor/__init__.py b/hypervideo_dl/postprocessor/__init__.py
index e47631e..f168be4 100644
--- a/hypervideo_dl/postprocessor/__init__.py
+++ b/hypervideo_dl/postprocessor/__init__.py
@@ -1,27 +1,25 @@
# flake8: noqa: F401
-from ..utils import load_plugins
-
from .common import PostProcessor
from .embedthumbnail import EmbedThumbnailPP
-from .exec import ExecPP, ExecAfterDownloadPP
+from .exec import ExecAfterDownloadPP, ExecPP
from .ffmpeg import (
- FFmpegPostProcessor,
- FFmpegCopyStreamPP,
FFmpegConcatPP,
+ FFmpegCopyStreamPP,
FFmpegEmbedSubtitlePP,
FFmpegExtractAudioPP,
FFmpegFixupDuplicateMoovPP,
FFmpegFixupDurationPP,
- FFmpegFixupStretchedPP,
- FFmpegFixupTimestampPP,
FFmpegFixupM3u8PP,
FFmpegFixupM4aPP,
+ FFmpegFixupStretchedPP,
+ FFmpegFixupTimestampPP,
FFmpegMergerPP,
FFmpegMetadataPP,
+ FFmpegPostProcessor,
+ FFmpegSplitChaptersPP,
FFmpegSubtitlesConvertorPP,
FFmpegThumbnailsConvertorPP,
- FFmpegSplitChaptersPP,
FFmpegVideoConvertorPP,
FFmpegVideoRemuxerPP,
)
@@ -35,6 +33,7 @@ from .movefilesafterdownload import MoveFilesAfterDownloadPP
from .sponskrub import SponSkrubPP
from .sponsorblock import SponsorBlockPP
from .xattrpp import XAttrMetadataPP
+from ..utils import load_plugins
_PLUGIN_CLASSES = load_plugins('postprocessor', 'PP', globals())
diff --git a/hypervideo_dl/postprocessor/common.py b/hypervideo_dl/postprocessor/common.py
index 3899646..c3fca35 100644
--- a/hypervideo_dl/postprocessor/common.py
+++ b/hypervideo_dl/postprocessor/common.py
@@ -1,19 +1,16 @@
-from __future__ import unicode_literals
-
import functools
-import itertools
import json
import os
-import time
import urllib.error
from ..utils import (
+ PostProcessingError,
+ RetryManager,
_configuration_args,
+ deprecation_warning,
encodeFilename,
network_exceptions,
- PostProcessingError,
sanitized_Request,
- write_string,
)
@@ -47,9 +44,6 @@ class PostProcessor(metaclass=PostProcessorMetaClass):
an initial argument and then with the returned value of the previous
PostProcessor.
- The chain will be stopped if one of them ever returns None or the end
- of the chain is reached.
-
PostProcessor objects follow a "mutual registration" process similar
to InfoExtractor objects.
@@ -71,21 +65,26 @@ class PostProcessor(metaclass=PostProcessorMetaClass):
return name[6:] if name[:6].lower() == 'ffmpeg' else name
def to_screen(self, text, prefix=True, *args, **kwargs):
- tag = '[%s] ' % self.PP_NAME if prefix else ''
if self._downloader:
- return self._downloader.to_screen('%s%s' % (tag, text), *args, **kwargs)
+ tag = '[%s] ' % self.PP_NAME if prefix else ''
+ return self._downloader.to_screen(f'{tag}{text}', *args, **kwargs)
def report_warning(self, text, *args, **kwargs):
if self._downloader:
return self._downloader.report_warning(text, *args, **kwargs)
- def deprecation_warning(self, text):
+ def deprecation_warning(self, msg):
+ warn = getattr(self._downloader, 'deprecation_warning', deprecation_warning)
+ return warn(msg, stacklevel=1)
+
+ def deprecated_feature(self, msg):
if self._downloader:
- return self._downloader.deprecation_warning(text)
- write_string(f'DeprecationWarning: {text}')
+ return self._downloader.deprecated_feature(msg)
+ return deprecation_warning(msg, stacklevel=1)
def report_error(self, text, *args, **kwargs):
- # Exists only for compatibility. Do not use
+ self.deprecation_warning('"hypervideo_dl.postprocessor.PostProcessor.report_error" is deprecated. '
+ 'raise "hypervideo_dl.utils.PostProcessingError" instead')
if self._downloader:
return self._downloader.report_error(text, *args, **kwargs)
@@ -93,6 +92,12 @@ class PostProcessor(metaclass=PostProcessorMetaClass):
if self._downloader:
return self._downloader.write_debug(text, *args, **kwargs)
+ def _delete_downloaded_files(self, *files_to_delete, **kwargs):
+ if self._downloader:
+ return self._downloader._delete_downloaded_files(*files_to_delete, **kwargs)
+ for filename in set(filter(None, files_to_delete)):
+ os.remove(filename)
+
def get_param(self, name, default=None, *args, **kwargs):
if self._downloader:
return self._downloader.params.get(name, default, *args, **kwargs)
@@ -171,6 +176,8 @@ class PostProcessor(metaclass=PostProcessorMetaClass):
def report_progress(self, s):
s['_default_template'] = '%(postprocessor)s %(status)s' % s
+ if not self._downloader:
+ return
progress_dict = s.copy()
progress_dict.pop('info_dict')
@@ -179,34 +186,31 @@ class PostProcessor(metaclass=PostProcessorMetaClass):
progress_template = self.get_param('progress_template', {})
tmpl = progress_template.get('postprocess')
if tmpl:
- self._downloader.to_stdout(self._downloader.evaluate_outtmpl(tmpl, progress_dict))
+ self._downloader.to_screen(
+ self._downloader.evaluate_outtmpl(tmpl, progress_dict), skip_eol=True, quiet=False)
self._downloader.to_console_title(self._downloader.evaluate_outtmpl(
progress_template.get('postprocess-title') or 'hypervideo %(progress._default_template)s',
progress_dict))
- def _download_json(self, url, *, expected_http_errors=(404,)):
+ def _retry_download(self, err, count, retries):
# While this is not an extractor, it behaves similar to one and
- # so obey extractor_retries and sleep_interval_requests
- max_retries = self.get_param('extractor_retries', 3)
- sleep_interval = self.get_param('sleep_interval_requests') or 0
+ # so obey extractor_retries and "--retry-sleep extractor"
+ RetryManager.report_retry(err, count, retries, info=self.to_screen, warn=self.report_warning,
+ sleep_func=self.get_param('retry_sleep_functions', {}).get('extractor'))
+ def _download_json(self, url, *, expected_http_errors=(404,)):
self.write_debug(f'{self.PP_NAME} query: {url}')
- for retries in itertools.count():
+ for retry in RetryManager(self.get_param('extractor_retries', 3), self._retry_download):
try:
rsp = self._downloader.urlopen(sanitized_Request(url))
- return json.loads(rsp.read().decode(rsp.info().get_param('charset') or 'utf-8'))
except network_exceptions as e:
if isinstance(e, urllib.error.HTTPError) and e.code in expected_http_errors:
return None
- if retries < max_retries:
- self.report_warning(f'{e}. Retrying...')
- if sleep_interval > 0:
- self.to_screen(f'Sleeping {sleep_interval} seconds ...')
- time.sleep(sleep_interval)
- continue
- raise PostProcessingError(f'Unable to communicate with {self.PP_NAME} API: {e}')
+ retry.error = PostProcessingError(f'Unable to communicate with {self.PP_NAME} API: {e}')
+ continue
+ return json.loads(rsp.read().decode(rsp.info().get_param('charset') or 'utf-8'))
-class AudioConversionError(PostProcessingError):
+class AudioConversionError(PostProcessingError): # Deprecated
pass
diff --git a/hypervideo_dl/postprocessor/embedthumbnail.py b/hypervideo_dl/postprocessor/embedthumbnail.py
index 815221d..7cd3952 100644
--- a/hypervideo_dl/postprocessor/embedthumbnail.py
+++ b/hypervideo_dl/postprocessor/embedthumbnail.py
@@ -1,37 +1,29 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import base64
-import imghdr
import os
-import subprocess
import re
-
-try:
- from mutagen.flac import Picture, FLAC
- from mutagen.mp4 import MP4, MP4Cover
- from mutagen.oggopus import OggOpus
- from mutagen.oggvorbis import OggVorbis
- has_mutagen = True
-except ImportError:
- has_mutagen = False
+import subprocess
from .common import PostProcessor
-from .ffmpeg import (
- FFmpegPostProcessor,
- FFmpegThumbnailsConvertorPP,
-)
+from .ffmpeg import FFmpegPostProcessor, FFmpegThumbnailsConvertorPP
+from ..compat import imghdr
+from ..dependencies import mutagen
from ..utils import (
+ Popen,
+ PostProcessingError,
check_executable,
encodeArgument,
encodeFilename,
error_to_compat_str,
- Popen,
- PostProcessingError,
prepend_extension,
shell_quote,
)
+if mutagen:
+ from mutagen.flac import FLAC, Picture
+ from mutagen.mp4 import MP4, MP4Cover
+ from mutagen.oggopus import OggOpus
+ from mutagen.oggvorbis import OggVorbis
+
class EmbedThumbnailPPError(PostProcessingError):
pass
@@ -61,7 +53,7 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
return int(mobj.group('w')), int(mobj.group('h'))
def _report_run(self, exe, filename):
- self.to_screen('%s: Adding thumbnail to "%s"' % (exe, filename))
+ self.to_screen(f'{exe}: Adding thumbnail to "{filename}"')
@PostProcessor._restrict_to(images=False)
def run(self, info):
@@ -87,12 +79,10 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
original_thumbnail = thumbnail_filename = info['thumbnails'][idx]['filepath']
- # Convert unsupported thumbnail formats to PNG (see #25687, #25717)
- # Original behavior was to convert to JPG, but since JPG is a lossy
- # format, there will be some additional data loss.
- # PNG, on the other hand, is lossless.
+ # Convert unsupported thumbnail formats (see #25687, #25717)
+ # PNG is preferred since JPEG is lossy
thumbnail_ext = os.path.splitext(thumbnail_filename)[1][1:]
- if thumbnail_ext not in ('jpg', 'jpeg', 'png'):
+ if info['ext'] not in ('mkv', 'mka') and thumbnail_ext not in ('jpg', 'jpeg', 'png'):
thumbnail_filename = convertor.convert_thumbnail(thumbnail_filename, 'png')
thumbnail_ext = 'png'
@@ -101,8 +91,8 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
success = True
if info['ext'] == 'mp3':
options = [
- '-c', 'copy', '-map', '0:0', '-map', '1:0', '-id3v2_version', '3',
- '-metadata:s:v', 'title="Album cover"', '-metadata:s:v', 'comment="Cover (front)"']
+ '-c', 'copy', '-map', '0:0', '-map', '1:0', '-write_id3v1', '1', '-id3v2_version', '3',
+ '-metadata:s:v', 'title="Album cover"', '-metadata:s:v', 'comment=Cover (front)']
self._report_run('ffmpeg', filename)
self.run_ffmpeg_multiple_files([filename, thumbnail_filename], temp_filename, options)
@@ -110,7 +100,7 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
elif info['ext'] in ['mkv', 'mka']:
options = list(self.stream_copy_opts())
- mimetype = 'image/%s' % ('png' if thumbnail_ext == 'png' else 'jpeg')
+ mimetype = f'image/{thumbnail_ext.replace("jpg", "jpeg")}'
old_stream, new_stream = self.get_stream_number(
filename, ('tags', 'mimetype'), mimetype)
if old_stream is not None:
@@ -127,7 +117,7 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
elif info['ext'] in ['m4a', 'mp4', 'mov']:
prefer_atomicparsley = 'embed-thumbnail-atomicparsley' in self.get_param('compat_opts', [])
# Method 1: Use mutagen
- if not has_mutagen or prefer_atomicparsley:
+ if not mutagen or prefer_atomicparsley:
success = False
else:
try:
@@ -149,7 +139,8 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
if not success:
success = True
atomicparsley = next((
- x for x in ['AtomicParsley', 'atomicparsley']
+ # libatomicparsley.so : See https://github.com/xibr/ytdlp-lazy/issues/1
+ x for x in ['AtomicParsley', 'atomicparsley', 'libatomicparsley.so']
if check_executable(x, ['-v'])), None)
if atomicparsley is None:
self.to_screen('Neither mutagen nor AtomicParsley was found. Falling back to ffmpeg')
@@ -167,14 +158,12 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
self._report_run('atomicparsley', filename)
self.write_debug('AtomicParsley command line: %s' % shell_quote(cmd))
- p = Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
- stdout, stderr = p.communicate_or_kill()
- if p.returncode != 0:
- msg = stderr.decode('utf-8', 'replace').strip()
- self.report_warning(f'Unable to embed thumbnails using AtomicParsley; {msg}')
+ stdout, stderr, returncode = Popen.run(cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ if returncode:
+ self.report_warning(f'Unable to embed thumbnails using AtomicParsley; {stderr.strip()}')
# for formats that don't support thumbnails (like 3gp) AtomicParsley
# won't create to the temporary file
- if b'No changes' in stdout:
+ if 'No changes' in stdout:
self.report_warning('The file format doesn\'t support embedding a thumbnail')
success = False
@@ -200,7 +189,7 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
raise EmbedThumbnailPPError(f'Unable to embed using ffprobe & ffmpeg; {err}')
elif info['ext'] in ['ogg', 'opus', 'flac']:
- if not has_mutagen:
+ if not mutagen:
raise EmbedThumbnailPPError('module mutagen was not found. Please install using `python -m pip install mutagen`')
self._report_run('mutagen', filename)
@@ -230,11 +219,9 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
os.replace(temp_filename, filename)
self.try_utime(filename, mtime, mtime)
-
- files_to_delete = [thumbnail_filename]
- if self._already_have_thumbnail:
- if original_thumbnail == thumbnail_filename:
- files_to_delete = []
- elif original_thumbnail != thumbnail_filename:
- files_to_delete.append(original_thumbnail)
- return files_to_delete, info
+ converted = original_thumbnail != thumbnail_filename
+ self._delete_downloaded_files(
+ thumbnail_filename if converted or not self._already_have_thumbnail else None,
+ original_thumbnail if converted and not self._already_have_thumbnail else None,
+ info=info)
+ return [], info
diff --git a/hypervideo_dl/postprocessor/exec.py b/hypervideo_dl/postprocessor/exec.py
index c0bd6df..65fe6d4 100644
--- a/hypervideo_dl/postprocessor/exec.py
+++ b/hypervideo_dl/postprocessor/exec.py
@@ -1,14 +1,8 @@
-from __future__ import unicode_literals
-
import subprocess
from .common import PostProcessor
from ..compat import compat_shlex_quote
-from ..utils import (
- encodeArgument,
- PostProcessingError,
- variadic,
-)
+from ..utils import PostProcessingError, encodeArgument, variadic
class ExecPP(PostProcessor):
diff --git a/hypervideo_dl/postprocessor/execafterdownload.py b/hypervideo_dl/postprocessor/execafterdownload.py
deleted file mode 100644
index 64dabe7..0000000
--- a/hypervideo_dl/postprocessor/execafterdownload.py
+++ /dev/null
@@ -1,31 +0,0 @@
-from __future__ import unicode_literals
-
-import subprocess
-
-from .common import PostProcessor
-from ..compat import compat_shlex_quote
-from ..utils import (
- encodeArgument,
- PostProcessingError,
-)
-
-
-class ExecAfterDownloadPP(PostProcessor):
- def __init__(self, downloader, exec_cmd):
- super(ExecAfterDownloadPP, self).__init__(downloader)
- self.exec_cmd = exec_cmd
-
- def run(self, information):
- cmd = self.exec_cmd
- if '{}' not in cmd:
- cmd += ' {}'
-
- cmd = cmd.replace('{}', compat_shlex_quote(information['filepath']))
-
- self._downloader.to_screen('[exec] Executing command: %s' % cmd)
- retCode = subprocess.call(encodeArgument(cmd), shell=True)
- if retCode != 0:
- raise PostProcessingError(
- 'Command returned error code %d' % retCode)
-
- return [], information
diff --git a/hypervideo_dl/postprocessor/ffmpeg.py b/hypervideo_dl/postprocessor/ffmpeg.py
index 3e6edcf..0471594 100644
--- a/hypervideo_dl/postprocessor/ffmpeg.py
+++ b/hypervideo_dl/postprocessor/ffmpeg.py
@@ -1,30 +1,30 @@
-from __future__ import unicode_literals
-
import collections
-import io
+import contextvars
import itertools
+import json
import os
+import re
import subprocess
import time
-import re
-import json
-from .common import AudioConversionError, PostProcessor
-
-from ..compat import compat_str
+from .common import PostProcessor
+from ..compat import functools, imghdr
from ..utils import (
+ MEDIA_EXTENSIONS,
+ ISO639Utils,
+ Popen,
+ PostProcessingError,
+ _get_exe_version_output,
+ deprecation_warning,
+ detect_exe_version,
determine_ext,
dfxp2srt,
encodeArgument,
encodeFilename,
+ filter_dict,
float_or_none,
- _get_exe_version_output,
- detect_exe_version,
is_outdated_version,
- ISO639Utils,
orderedSet,
- Popen,
- PostProcessingError,
prepend_extension,
replace_extension,
shell_quote,
@@ -33,7 +33,6 @@ from ..utils import (
write_json_file,
)
-
EXT_TO_OUT_FORMATS = {
'aac': 'adts',
'flac': 'flac',
@@ -48,36 +47,48 @@ EXT_TO_OUT_FORMATS = {
'vtt': 'webvtt',
}
ACODECS = {
- 'mp3': 'libmp3lame',
- 'aac': 'aac',
- 'flac': 'flac',
- 'm4a': 'aac',
- 'opus': 'libopus',
- 'vorbis': 'libvorbis',
- 'wav': None,
- 'alac': None,
+ # name: (ext, encoder, opts)
+ 'mp3': ('mp3', 'libmp3lame', ()),
+ 'aac': ('m4a', 'aac', ('-f', 'adts')),
+ 'm4a': ('m4a', 'aac', ('-bsf:a', 'aac_adtstoasc')),
+ 'opus': ('opus', 'libopus', ()),
+ 'vorbis': ('ogg', 'libvorbis', ()),
+ 'flac': ('flac', 'flac', ()),
+ 'alac': ('m4a', None, ('-acodec', 'alac')),
+ 'wav': ('wav', None, ('-f', 'wav')),
}
+def create_mapping_re(supported):
+ return re.compile(r'{0}(?:/{0})*$'.format(r'(?:\s*\w+\s*>)?\s*(?:%s)\s*' % '|'.join(supported)))
+
+
+def resolve_mapping(source, mapping):
+ """
+ Get corresponding item from a mapping string like 'A>B/C>D/E'
+ @returns (target, error_message)
+ """
+ for pair in mapping.lower().split('/'):
+ kv = pair.split('>', 1)
+ if len(kv) == 1 or kv[0].strip() == source:
+ target = kv[-1].strip()
+ if target == source:
+ return target, f'already is in target format {source}'
+ return target, None
+ return None, f'could not find a mapping for {source}'
+
+
class FFmpegPostProcessorError(PostProcessingError):
pass
class FFmpegPostProcessor(PostProcessor):
+ _ffmpeg_location = contextvars.ContextVar('ffmpeg_location', default=None)
+
def __init__(self, downloader=None):
PostProcessor.__init__(self, downloader)
- self._determine_executables()
-
- def check_version(self):
- if not self.available:
- raise FFmpegPostProcessorError('ffmpeg not found. Please install or provide the path using --ffmpeg-location')
-
- required_version = '10-0' if self.basename == 'avconv' else '1.0'
- if is_outdated_version(
- self._versions[self.basename], required_version):
- warning = 'Your copy of %s is outdated, update %s to version %s or newer if you encounter any errors.' % (
- self.basename, self.basename, required_version)
- self.report_warning(warning)
+ self._prefer_ffmpeg = self.get_param('prefer_ffmpeg', True)
+ self._paths = self._determine_executables()
@staticmethod
def get_versions_and_features(downloader=None):
@@ -88,87 +99,105 @@ class FFmpegPostProcessor(PostProcessor):
def get_versions(downloader=None):
return FFmpegPostProcessor.get_versions_and_features(downloader)[0]
- _version_cache, _features_cache = {}, {}
+ _ffmpeg_to_avconv = {'ffmpeg': 'avconv', 'ffprobe': 'avprobe'}
def _determine_executables(self):
- programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
-
- def get_ffmpeg_version(path, prog):
- if path in self._version_cache:
- self._versions[prog], self._features = self._version_cache[path], self._features_cache.get(path, {})
- return
- out = _get_exe_version_output(path, ['-bsfs'], to_screen=self.write_debug)
- ver = detect_exe_version(out) if out else False
- if ver:
- regexs = [
- r'(?:\d+:)?([0-9.]+)-[0-9]+ubuntu[0-9.]+$', # Ubuntu, see [1]
- r'n([0-9.]+)$', # Arch Linux
- # 1. http://www.ducea.com/2006/06/17/ubuntu-package-version-naming-explanation/
- ]
- for regex in regexs:
- mobj = re.match(regex, ver)
- if mobj:
- ver = mobj.group(1)
- self._versions[prog] = self._version_cache[path] = ver
- if prog != 'ffmpeg' or not out:
- return
+ programs = [*self._ffmpeg_to_avconv.keys(), *self._ffmpeg_to_avconv.values()]
- mobj = re.search(r'(?m)^\s+libavformat\s+(?:[0-9. ]+)\s+/\s+(?P<runtime>[0-9. ]+)', out)
- lavf_runtime_version = mobj.group('runtime').replace(' ', '') if mobj else None
- self._features = self._features_cache[path] = {
- 'fdk': '--enable-libfdk-aac' in out,
- 'setts': 'setts' in out.splitlines(),
- 'needs_adtstoasc': is_outdated_version(lavf_runtime_version, '57.56.100', False),
- }
-
- self.basename = None
- self.probe_basename = None
- self._paths = None
- self._versions = None
- self._features = {}
-
- prefer_ffmpeg = self.get_param('prefer_ffmpeg', True)
- location = self.get_param('ffmpeg_location')
+ location = self.get_param('ffmpeg_location', self._ffmpeg_location.get())
if location is None:
- self._paths = {p: p for p in programs}
+ return {p: p for p in programs}
+
+ if not os.path.exists(location):
+ self.report_warning(
+ f'ffmpeg-location {location} does not exist! Continuing without ffmpeg', only_once=True)
+ return {}
+ elif os.path.isdir(location):
+ dirname, basename, filename = location, None, None
else:
- if not os.path.exists(location):
- self.report_warning(
- 'ffmpeg-location %s does not exist! '
- 'Continuing without ffmpeg.' % (location))
- self._versions = {}
- return
- elif os.path.isdir(location):
- dirname, basename = location, None
- else:
- basename = os.path.splitext(os.path.basename(location))[0]
- basename = next((p for p in programs if basename.startswith(p)), 'ffmpeg')
- dirname = os.path.dirname(os.path.abspath(location))
- if basename in ('ffmpeg', 'ffprobe'):
- prefer_ffmpeg = True
-
- self._paths = dict(
- (p, os.path.join(dirname, p)) for p in programs)
- if basename:
- self._paths[basename] = location
-
- self._versions = {}
- executables = {'basename': ('ffmpeg', 'avconv'), 'probe_basename': ('ffprobe', 'avprobe')}
- if prefer_ffmpeg is False:
- executables = {k: v[::-1] for k, v in executables.items()}
- for var, prefs in executables.items():
- for p in prefs:
- get_ffmpeg_version(self._paths[p], p)
- if self._versions[p]:
- setattr(self, var, p)
- break
-
- if self.basename == 'avconv':
- self.deprecation_warning(
- 'Support for avconv is deprecated and may be removed in a future version. Use ffmpeg instead')
- if self.probe_basename == 'avprobe':
- self.deprecation_warning(
- 'Support for avprobe is deprecated and may be removed in a future version. Use ffprobe instead')
+ filename = os.path.basename(location)
+ basename = next((p for p in programs if p in filename), 'ffmpeg')
+ dirname = os.path.dirname(os.path.abspath(location))
+ if basename in self._ffmpeg_to_avconv.keys():
+ self._prefer_ffmpeg = True
+
+ paths = {p: os.path.join(dirname, p) for p in programs}
+ if basename and basename in filename:
+ for p in programs:
+ path = os.path.join(dirname, filename.replace(basename, p))
+ if os.path.exists(path):
+ paths[p] = path
+ if basename:
+ paths[basename] = location
+ return paths
+
+ _version_cache, _features_cache = {None: None}, {}
+
+ def _get_ffmpeg_version(self, prog):
+ path = self._paths.get(prog)
+ if path in self._version_cache:
+ return self._version_cache[path], self._features_cache.get(path, {})
+ out = _get_exe_version_output(path, ['-bsfs'])
+ ver = detect_exe_version(out) if out else False
+ if ver:
+ regexs = [
+ r'(?:\d+:)?([0-9.]+)-[0-9]+ubuntu[0-9.]+$', # Ubuntu, see [1]
+ r'n([0-9.]+)$', # Arch Linux
+ # 1. http://www.ducea.com/2006/06/17/ubuntu-package-version-naming-explanation/
+ ]
+ for regex in regexs:
+ mobj = re.match(regex, ver)
+ if mobj:
+ ver = mobj.group(1)
+ self._version_cache[path] = ver
+ if prog != 'ffmpeg' or not out:
+ return ver, {}
+
+ mobj = re.search(r'(?m)^\s+libavformat\s+(?:[0-9. ]+)\s+/\s+(?P<runtime>[0-9. ]+)', out)
+ lavf_runtime_version = mobj.group('runtime').replace(' ', '') if mobj else None
+ self._features_cache[path] = features = {
+ 'fdk': '--enable-libfdk-aac' in out,
+ 'setts': 'setts' in out.splitlines(),
+ 'needs_adtstoasc': is_outdated_version(lavf_runtime_version, '57.56.100', False),
+ }
+ return ver, features
+
+ @property
+ def _versions(self):
+ return filter_dict({self.basename: self._version, self.probe_basename: self._probe_version})
+
+ @functools.cached_property
+ def basename(self):
+ self._version # run property
+ return self.basename
+
+ @functools.cached_property
+ def probe_basename(self):
+ self._probe_version # run property
+ return self.probe_basename
+
+ def _get_version(self, kind):
+ executables = (kind, )
+ if not self._prefer_ffmpeg:
+ executables = (kind, self._ffmpeg_to_avconv[kind])
+ basename, version, features = next(filter(
+ lambda x: x[1], ((p, *self._get_ffmpeg_version(p)) for p in executables)), (None, None, {}))
+ if kind == 'ffmpeg':
+ self.basename, self._features = basename, features
+ else:
+ self.probe_basename = basename
+ if basename == self._ffmpeg_to_avconv[kind]:
+ self.deprecated_feature(f'Support for {self._ffmpeg_to_avconv[kind]} is deprecated and '
+ f'may be removed in a future version. Use {kind} instead')
+ return version
+
+ @functools.cached_property
+ def _version(self):
+ return self._get_version('ffmpeg')
+
+ @functools.cached_property
+ def _probe_version(self):
+ return self._get_version('ffprobe')
@property
def available(self):
@@ -176,7 +205,7 @@ class FFmpegPostProcessor(PostProcessor):
@property
def executable(self):
- return self._paths[self.basename]
+ return self._paths.get(self.basename)
@property
def probe_available(self):
@@ -184,7 +213,7 @@ class FFmpegPostProcessor(PostProcessor):
@property
def probe_executable(self):
- return self._paths[self.probe_basename]
+ return self._paths.get(self.probe_basename)
@staticmethod
def stream_copy_opts(copy=True, *, ext=None):
@@ -194,10 +223,18 @@ class FFmpegPostProcessor(PostProcessor):
yield from ('-dn', '-ignore_unknown')
if copy:
yield from ('-c', 'copy')
- # For some reason, '-c copy -map 0' is not enough to copy subtitles
- if ext in ('mp4', 'mov'):
+ if ext in ('mp4', 'mov', 'm4a'):
yield from ('-c:s', 'mov_text')
+ def check_version(self):
+ if not self.available:
+ raise FFmpegPostProcessorError('ffmpeg not found. Please install or provide the path using --ffmpeg-location')
+
+ required_version = '10-0' if self.basename == 'avconv' else '1.0'
+ if is_outdated_version(self._version, required_version):
+ self.report_warning(f'Your copy of {self.basename} is outdated, update {self.basename} '
+ f'to version {required_version} or newer if you encounter any errors')
+
def get_audio_codec(self, path):
if not self.probe_available and not self.available:
raise PostProcessingError('ffprobe and ffmpeg not found. Please install or provide the path using --ffmpeg-location')
@@ -211,15 +248,14 @@ class FFmpegPostProcessor(PostProcessor):
encodeFilename(self.executable, True),
encodeArgument('-i')]
cmd.append(encodeFilename(self._ffmpeg_filename_argument(path), True))
- self.write_debug('%s command line: %s' % (self.basename, shell_quote(cmd)))
- handle = Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
- stdout_data, stderr_data = handle.communicate_or_kill()
- expected_ret = 0 if self.probe_available else 1
- if handle.wait() != expected_ret:
+ self.write_debug(f'{self.basename} command line: {shell_quote(cmd)}')
+ stdout, stderr, returncode = Popen.run(
+ cmd, text=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ if returncode != (0 if self.probe_available else 1):
return None
- except (IOError, OSError):
+ except OSError:
return None
- output = (stdout_data if self.probe_available else stderr_data).decode('ascii', 'ignore')
+ output = stdout if self.probe_available else stderr
if self.probe_available:
audio_codec = None
for line in output.split('\n'):
@@ -253,11 +289,10 @@ class FFmpegPostProcessor(PostProcessor):
]
cmd += opts
- cmd.append(encodeFilename(self._ffmpeg_filename_argument(path), True))
- self.write_debug('ffprobe command line: %s' % shell_quote(cmd))
- p = Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
- stdout, stderr = p.communicate()
- return json.loads(stdout.decode('utf-8', 'replace'))
+ cmd.append(self._ffmpeg_filename_argument(path))
+ self.write_debug(f'ffprobe command line: {shell_quote(cmd)}')
+ stdout, _, _ = Popen.run(cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
+ return json.loads(stdout)
def get_stream_number(self, path, keys, value):
streams = self.get_metadata_object(path)['streams']
@@ -277,12 +312,12 @@ class FFmpegPostProcessor(PostProcessor):
if fatal:
raise PostProcessingError(f'Unable to determine video duration: {e.msg}')
- def _duration_mismatch(self, d1, d2):
+ def _duration_mismatch(self, d1, d2, tolerance=2):
if not d1 or not d2:
return None
# The duration is often only known to nearest second. So there can be <1sec disparity natually.
# Further excuse an additional <1sec difference.
- return abs(d1 - d2) > 2
+ return abs(d1 - d2) > tolerance
def run_ffmpeg_multiple_files(self, input_paths, out_path, opts, **kwargs):
return self.real_run_ffmpeg(
@@ -319,16 +354,15 @@ class FFmpegPostProcessor(PostProcessor):
for i, (path, opts) in enumerate(path_opts) if path)
self.write_debug('ffmpeg command line: %s' % shell_quote(cmd))
- p = Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
- stdout, stderr = p.communicate_or_kill()
- if p.returncode not in variadic(expected_retcodes):
- stderr = stderr.decode('utf-8', 'replace').strip()
+ _, stderr, returncode = Popen.run(
+ cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
+ if returncode not in variadic(expected_retcodes):
self.write_debug(stderr)
- raise FFmpegPostProcessorError(stderr.split('\n')[-1])
+ raise FFmpegPostProcessorError(stderr.strip().splitlines()[-1])
for out_path, _ in output_path_opts:
if out_path:
self.try_utime(out_path, oldest_mtime, oldest_mtime)
- return stderr.decode('utf-8', 'replace')
+ return stderr
def run_ffmpeg(self, path, out_path, opts, **kwargs):
return self.run_ffmpeg_multiple_files([path], out_path, opts, **kwargs)
@@ -381,7 +415,7 @@ class FFmpegPostProcessor(PostProcessor):
self.real_run_ffmpeg(
[(concat_file, ['-hide_banner', '-nostdin', '-f', 'concat', '-safe', '0'])],
[(out_file, out_flags)])
- os.remove(concat_file)
+ self._delete_downloaded_files(concat_file)
@classmethod
def _concat_spec(cls, in_files, concat_opts=None):
@@ -397,12 +431,13 @@ class FFmpegPostProcessor(PostProcessor):
class FFmpegExtractAudioPP(FFmpegPostProcessor):
- COMMON_AUDIO_EXTS = ('wav', 'flac', 'm4a', 'aiff', 'mp3', 'ogg', 'mka', 'opus', 'wma')
- SUPPORTED_EXTS = ('aac', 'flac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav', 'alac')
+ COMMON_AUDIO_EXTS = MEDIA_EXTENSIONS.common_audio + ('wma', )
+ SUPPORTED_EXTS = tuple(ACODECS.keys())
+ FORMAT_RE = create_mapping_re(('best', *SUPPORTED_EXTS))
def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, nopostoverwrites=False):
FFmpegPostProcessor.__init__(self, downloader)
- self._preferredcodec = preferredcodec or 'best'
+ self.mapping = preferredcodec or 'best'
self._preferredquality = float_or_none(preferredquality)
self._nopostoverwrites = nopostoverwrites
@@ -437,71 +472,47 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
try:
FFmpegPostProcessor.run_ffmpeg(self, path, out_path, opts)
except FFmpegPostProcessorError as err:
- raise AudioConversionError(err.msg)
+ raise PostProcessingError(f'audio conversion failed: {err.msg}')
@PostProcessor._restrict_to(images=False)
def run(self, information):
orig_path = path = information['filepath']
- orig_ext = information['ext']
-
- if self._preferredcodec == 'best' and orig_ext in self.COMMON_AUDIO_EXTS:
- self.to_screen('Skipping audio extraction since the file is already in a common audio format')
+ target_format, _skip_msg = resolve_mapping(information['ext'], self.mapping)
+ if target_format == 'best' and information['ext'] in self.COMMON_AUDIO_EXTS:
+ target_format, _skip_msg = None, 'the file is already in a common audio format'
+ if not target_format:
+ self.to_screen(f'Not converting audio {orig_path}; {_skip_msg}')
return [], information
filecodec = self.get_audio_codec(path)
if filecodec is None:
raise PostProcessingError('WARNING: unable to obtain file audio codec with ffprobe')
- more_opts = []
- if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'):
- if filecodec == 'aac' and self._preferredcodec in ['m4a', 'best']:
- # Lossless, but in another container
- acodec = 'copy'
- extension = 'm4a'
- more_opts = ['-bsf:a', 'aac_adtstoasc']
- elif filecodec in ['aac', 'flac', 'mp3', 'vorbis', 'opus']:
- # Lossless if possible
- acodec = 'copy'
- extension = filecodec
- if filecodec == 'aac':
- more_opts = ['-f', 'adts']
- if filecodec == 'vorbis':
- extension = 'ogg'
- elif filecodec == 'alac':
- acodec = None
- extension = 'm4a'
- more_opts += ['-acodec', 'alac']
- else:
- # MP3 otherwise.
- acodec = 'libmp3lame'
- extension = 'mp3'
- more_opts = self._quality_args(acodec)
+ if filecodec == 'aac' and target_format in ('m4a', 'best'):
+ # Lossless, but in another container
+ extension, _, more_opts, acodec = *ACODECS['m4a'], 'copy'
+ elif target_format == 'best' or target_format == filecodec:
+ # Lossless if possible
+ try:
+ extension, _, more_opts, acodec = *ACODECS[filecodec], 'copy'
+ except KeyError:
+ extension, acodec, more_opts = ACODECS['mp3']
else:
# We convert the audio (lossy if codec is lossy)
- acodec = ACODECS[self._preferredcodec]
+ extension, acodec, more_opts = ACODECS[target_format]
if acodec == 'aac' and self._features.get('fdk'):
- acodec = 'libfdk_aac'
- extension = self._preferredcodec
+ acodec, more_opts = 'libfdk_aac', []
+
+ more_opts = list(more_opts)
+ if acodec != 'copy':
more_opts = self._quality_args(acodec)
- if self._preferredcodec == 'aac':
- more_opts += ['-f', 'adts']
- elif self._preferredcodec == 'm4a':
- more_opts += ['-bsf:a', 'aac_adtstoasc']
- elif self._preferredcodec == 'vorbis':
- extension = 'ogg'
- elif self._preferredcodec == 'wav':
- extension = 'wav'
- more_opts += ['-f', 'wav']
- elif self._preferredcodec == 'alac':
- extension = 'm4a'
- more_opts += ['-acodec', 'alac']
-
- prefix, sep, ext = path.rpartition('.') # not os.path.splitext, since the latter does not work on unicode in all setups
- temp_path = new_path = prefix + sep + extension
+
+ # not os.path.splitext, since the latter does not work on unicode in all setups
+ temp_path = new_path = f'{path.rpartition(".")[0]}.{extension}'
if new_path == path:
if acodec == 'copy':
- self.to_screen(f'File is already in target format {self._preferredcodec}, skipping')
+ self.to_screen(f'Not converting audio {orig_path}; file is already in target format {target_format}')
return [], information
orig_path = prepend_extension(path, 'orig')
temp_path = prepend_extension(path, 'temp')
@@ -510,14 +521,8 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
self.to_screen('Post-process file %s exists, skipping' % new_path)
return [], information
- try:
- self.to_screen(f'Destination: {new_path}')
- self.run_ffmpeg(path, temp_path, acodec, more_opts)
- except AudioConversionError as e:
- raise PostProcessingError(
- 'audio conversion failed: ' + e.msg)
- except Exception:
- raise PostProcessingError('error running ' + self.basename)
+ self.to_screen(f'Destination: {new_path}')
+ self.run_ffmpeg(path, temp_path, acodec, more_opts)
os.replace(path, orig_path)
os.replace(temp_path, new_path)
@@ -527,26 +532,19 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
# Try to update the date time for extracted audio file.
if information.get('filetime') is not None:
self.try_utime(
- new_path, time.time(), information['filetime'],
- errnote='Cannot update utime of audio file')
+ new_path, time.time(), information['filetime'], errnote='Cannot update utime of audio file')
return [orig_path], information
class FFmpegVideoConvertorPP(FFmpegPostProcessor):
- SUPPORTED_EXTS = ('mp4', 'mkv', 'flv', 'webm', 'mov', 'avi', 'mka', 'ogg', *FFmpegExtractAudioPP.SUPPORTED_EXTS)
- FORMAT_RE = re.compile(r'{0}(?:/{0})*$'.format(r'(?:\w+>)?(?:%s)' % '|'.join(SUPPORTED_EXTS)))
+ SUPPORTED_EXTS = (*MEDIA_EXTENSIONS.common_video, *sorted(MEDIA_EXTENSIONS.common_audio + ('aac', 'vorbis')))
+ FORMAT_RE = create_mapping_re(SUPPORTED_EXTS)
_ACTION = 'converting'
def __init__(self, downloader=None, preferedformat=None):
- super(FFmpegVideoConvertorPP, self).__init__(downloader)
- self._preferedformats = preferedformat.lower().split('/')
-
- def _target_ext(self, source_ext):
- for pair in self._preferedformats:
- kv = pair.split('>')
- if len(kv) == 1 or kv[0].strip() == source_ext:
- return kv[-1].strip()
+ super().__init__(downloader)
+ self.mapping = preferedformat
@staticmethod
def _options(target_ext):
@@ -557,11 +555,7 @@ class FFmpegVideoConvertorPP(FFmpegPostProcessor):
@PostProcessor._restrict_to(images=False)
def run(self, info):
filename, source_ext = info['filepath'], info['ext'].lower()
- target_ext = self._target_ext(source_ext)
- _skip_msg = (
- f'could not find a mapping for {source_ext}' if not target_ext
- else f'already is in target format {source_ext}' if source_ext == target_ext
- else None)
+ target_ext, _skip_msg = resolve_mapping(source_ext, self.mapping)
if _skip_msg:
self.to_screen(f'Not {self._ACTION} media file "{filename}"; {_skip_msg}')
return [], info
@@ -584,14 +578,16 @@ class FFmpegVideoRemuxerPP(FFmpegVideoConvertorPP):
class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
+ SUPPORTED_EXTS = ('mp4', 'mov', 'm4a', 'webm', 'mkv', 'mka')
+
def __init__(self, downloader=None, already_have_subtitle=False):
- super(FFmpegEmbedSubtitlePP, self).__init__(downloader)
+ super().__init__(downloader)
self._already_have_subtitle = already_have_subtitle
@PostProcessor._restrict_to(images=False)
def run(self, info):
- if info['ext'] not in ('mp4', 'webm', 'mkv'):
- self.to_screen('Subtitles can only be embedded in mp4, webm or mkv files')
+ if info['ext'] not in self.SUPPORTED_EXTS:
+ self.to_screen(f'Subtitles can only be embedded in {", ".join(self.SUPPORTED_EXTS)} files')
return [], info
subtitles = info.get('requested_subtitles')
if not subtitles:
@@ -600,7 +596,7 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
filename = info['filepath']
- # Disabled temporarily. There needs to be a way to overide this
+ # Disabled temporarily. There needs to be a way to override this
# in case of duration actually mismatching in extractor
# See: https://github.com/hypervideo/hypervideo/issues/1870, https://github.com/hypervideo/hypervideo/issues/1385
'''
@@ -706,14 +702,13 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
self.run_ffmpeg_multiple_files(
(filename, metadata_filename), temp_filename,
itertools.chain(self._options(info['ext']), *options))
- for file in filter(None, files_to_delete):
- os.remove(file) # Don't obey --keep-files
+ self._delete_downloaded_files(*files_to_delete)
os.replace(temp_filename, filename)
return [], info
@staticmethod
def _get_chapter_opts(chapters, metadata_filename):
- with io.open(metadata_filename, 'wt', encoding='utf-8') as f:
+ with open(metadata_filename, 'wt', encoding='utf-8') as f:
def ffmpeg_escape(text):
return re.sub(r'([\\=;#\n])', r'\\\1', text)
@@ -737,13 +732,13 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
str(info[key]) for key in [f'{meta_prefix}_'] + list(variadic(info_list or meta_list))
if info.get(key) is not None), None)
if value not in ('', None):
+ value = value.replace('\0', '') # nul character cannot be passed in command line
metadata['common'].update({meta_f: value for meta_f in variadic(meta_list)})
- # See [1-4] for some info on media metadata/metadata supported
- # by ffmpeg.
- # 1. https://kdenlive.org/en/project/adding-meta-data-to-mp4-video/
- # 2. https://wiki.multimedia.cx/index.php/FFmpeg_Metadata
- # 3. https://kodi.wiki/view/Video_file_tagging
+ # Info on media metadata/metadata supported by ffmpeg:
+ # https://wiki.multimedia.cx/index.php/FFmpeg_Metadata
+ # https://kdenlive.org/en/project/adding-meta-data-to-mp4-video/
+ # https://kodi.wiki/view/Video_file_tagging
add('title', ('track', 'title'))
add('date', 'upload_date')
@@ -767,7 +762,10 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
for key, value in info.items():
mobj = re.fullmatch(meta_regex, key)
if value is not None and mobj:
- metadata[mobj.group('i') or 'common'][mobj.group('key')] = value
+ metadata[mobj.group('i') or 'common'][mobj.group('key')] = value.replace('\0', '')
+
+ # Write id3v1 metadata also since Windows Explorer can't handle id3v2 tags
+ yield ('-write_id3v1', '1')
for name, value in metadata['common'].items():
yield ('-metadata', f'{name}={value}')
@@ -801,11 +799,16 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
yield ('-map', '-0:%d' % old_stream)
new_stream -= 1
- yield ('-attach', infofn,
- '-metadata:s:%d' % new_stream, 'mimetype=application/json')
+ yield (
+ '-attach', infofn,
+ f'-metadata:s:{new_stream}', 'mimetype=application/json',
+ f'-metadata:s:{new_stream}', 'filename=info.json',
+ )
class FFmpegMergerPP(FFmpegPostProcessor):
+ SUPPORTED_EXTS = MEDIA_EXTENSIONS.common_video
+
@PostProcessor._restrict_to(images=False)
def run(self, info):
filename = info['filepath']
@@ -895,7 +898,7 @@ class FFmpegFixupTimestampPP(FFmpegFixupPostProcessor):
def __init__(self, downloader=None, trim=0.001):
# "trim" should be used when the video contains unintended packets
- super(FFmpegFixupTimestampPP, self).__init__(downloader)
+ super().__init__(downloader)
assert isinstance(trim, (int, float))
self.trim = str(trim)
@@ -930,10 +933,10 @@ class FFmpegFixupDuplicateMoovPP(FFmpegCopyStreamPP):
class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
- SUPPORTED_EXTS = ('srt', 'vtt', 'ass', 'lrc')
+ SUPPORTED_EXTS = MEDIA_EXTENSIONS.subtitles
def __init__(self, downloader=None, format=None):
- super(FFmpegSubtitlesConvertorPP, self).__init__(downloader)
+ super().__init__(downloader)
self.format = format
def run(self, info):
@@ -975,7 +978,7 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
with open(dfxp_file, 'rb') as f:
srt_data = dfxp2srt(f.read())
- with io.open(srt_file, 'wt', encoding='utf-8') as f:
+ with open(srt_file, 'wt', encoding='utf-8') as f:
f.write(srt_data)
old_file = srt_file
@@ -992,7 +995,7 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
self.run_ffmpeg(old_file, new_file, ['-f', new_format])
- with io.open(new_file, 'rt', encoding='utf-8') as f:
+ with open(new_file, encoding='utf-8') as f:
subs[lang] = {
'ext': new_ext,
'data': f.read(),
@@ -1029,8 +1032,8 @@ class FFmpegSplitChaptersPP(FFmpegPostProcessor):
self.to_screen('Chapter %03d; Destination: %s' % (number, destination))
return (
destination,
- ['-ss', compat_str(chapter['start_time']),
- '-t', compat_str(chapter['end_time'] - chapter['start_time'])])
+ ['-ss', str(chapter['start_time']),
+ '-t', str(chapter['end_time'] - chapter['start_time'])])
@PostProcessor._restrict_to(images=False)
def run(self, info):
@@ -1047,29 +1050,28 @@ class FFmpegSplitChaptersPP(FFmpegPostProcessor):
destination, opts = self._ffmpeg_args_for_chapter(idx + 1, chapter, info)
self.real_run_ffmpeg([(in_file, opts)], [(destination, self.stream_copy_opts())])
if in_file != info['filepath']:
- os.remove(in_file)
+ self._delete_downloaded_files(in_file, msg=None)
return [], info
class FFmpegThumbnailsConvertorPP(FFmpegPostProcessor):
- SUPPORTED_EXTS = ('jpg', 'png', 'webp')
+ SUPPORTED_EXTS = MEDIA_EXTENSIONS.thumbnails
+ FORMAT_RE = create_mapping_re(SUPPORTED_EXTS)
def __init__(self, downloader=None, format=None):
- super(FFmpegThumbnailsConvertorPP, self).__init__(downloader)
- self.format = format
+ super().__init__(downloader)
+ self.mapping = format
- @staticmethod
- def is_webp(path):
- with open(encodeFilename(path), 'rb') as f:
- b = f.read(12)
- return b[0:4] == b'RIFF' and b[8:] == b'WEBP'
+ @classmethod
+ def is_webp(cls, path):
+ deprecation_warning(f'{cls.__module__}.{cls.__name__}.is_webp is deprecated')
+ return imghdr.what(path) == 'webp'
def fixup_webp(self, info, idx=-1):
thumbnail_filename = info['thumbnails'][idx]['filepath']
_, thumbnail_ext = os.path.splitext(thumbnail_filename)
if thumbnail_ext:
- thumbnail_ext = thumbnail_ext[1:].lower()
- if thumbnail_ext != 'webp' and self.is_webp(thumbnail_filename):
+ if thumbnail_ext.lower() != '.webp' and imghdr.what(thumbnail_filename) == 'webp':
self.to_screen('Correcting thumbnail "%s" extension to webp' % thumbnail_filename)
webp_filename = replace_extension(thumbnail_filename, 'webp')
os.replace(thumbnail_filename, webp_filename)
@@ -1079,17 +1081,18 @@ class FFmpegThumbnailsConvertorPP(FFmpegPostProcessor):
@staticmethod
def _options(target_ext):
+ yield from ('-update', '1')
if target_ext == 'jpg':
- return ['-bsf:v', 'mjpeg2jpeg']
- return []
+ yield from ('-bsf:v', 'mjpeg2jpeg')
def convert_thumbnail(self, thumbnail_filename, target_ext):
thumbnail_conv_filename = replace_extension(thumbnail_filename, target_ext)
- self.to_screen('Converting thumbnail "%s" to %s' % (thumbnail_filename, target_ext))
+ self.to_screen(f'Converting thumbnail "{thumbnail_filename}" to {target_ext}')
+ _, source_ext = os.path.splitext(thumbnail_filename)
self.real_run_ffmpeg(
- [(thumbnail_filename, ['-f', 'image2', '-pattern_type', 'none'])],
- [(thumbnail_conv_filename.replace('%', '%%'), self._options(target_ext))])
+ [(thumbnail_filename, [] if source_ext == '.gif' else ['-f', 'image2', '-pattern_type', 'none'])],
+ [(thumbnail_conv_filename, self._options(target_ext))])
return thumbnail_conv_filename
def run(self, info):
@@ -1102,18 +1105,18 @@ class FFmpegThumbnailsConvertorPP(FFmpegPostProcessor):
continue
has_thumbnail = True
self.fixup_webp(info, idx)
- _, thumbnail_ext = os.path.splitext(original_thumbnail)
- if thumbnail_ext:
- thumbnail_ext = thumbnail_ext[1:].lower()
+ original_thumbnail = thumbnail_dict['filepath'] # Path can change during fixup
+ thumbnail_ext = os.path.splitext(original_thumbnail)[1][1:].lower()
if thumbnail_ext == 'jpeg':
thumbnail_ext = 'jpg'
- if thumbnail_ext == self.format:
- self.to_screen('Thumbnail "%s" is already in the requested format' % original_thumbnail)
+ target_ext, _skip_msg = resolve_mapping(thumbnail_ext, self.mapping)
+ if _skip_msg:
+ self.to_screen(f'Not converting thumbnail "{original_thumbnail}"; {_skip_msg}')
continue
- thumbnail_dict['filepath'] = self.convert_thumbnail(original_thumbnail, self.format)
+ thumbnail_dict['filepath'] = self.convert_thumbnail(original_thumbnail, target_ext)
files_to_delete.append(original_thumbnail)
info['__files_to_move'][thumbnail_dict['filepath']] = replace_extension(
- info['__files_to_move'][original_thumbnail], self.format)
+ info['__files_to_move'][original_thumbnail], target_ext)
if not has_thumbnail:
self.to_screen('There aren\'t any thumbnails to convert')
@@ -1153,16 +1156,16 @@ class FFmpegConcatPP(FFmpegPostProcessor):
entries = info.get('entries') or []
if not any(entries) or (self._only_multi_video and info['_type'] != 'multi_video'):
return [], info
- elif traverse_obj(entries, (..., 'requested_downloads', lambda _, v: len(v) > 1)):
+ elif traverse_obj(entries, (..., lambda k, v: k == 'requested_downloads' and len(v) > 1)):
raise PostProcessingError('Concatenation is not supported when downloading multiple separate formats')
in_files = traverse_obj(entries, (..., 'requested_downloads', 0, 'filepath')) or []
if len(in_files) < len(entries):
raise PostProcessingError('Aborting concatenation because some downloads failed')
- ie_copy = self._downloader._playlist_infodict(info)
exts = traverse_obj(entries, (..., 'requested_downloads', 0, 'ext'), (..., 'ext'))
- ie_copy['ext'] = exts[0] if len(set(exts)) == 1 else 'mkv'
+ ie_copy = collections.ChainMap({'ext': exts[0] if len(set(exts)) == 1 else 'mkv'},
+ info, self._downloader._playlist_infodict(info))
out_file = self._downloader.prepare_filename(ie_copy, 'pl_video')
files_to_delete = self.concat_files(in_files, out_file)
diff --git a/hypervideo_dl/postprocessor/metadatafromtitle.py b/hypervideo_dl/postprocessor/metadatafromtitle.py
deleted file mode 100644
index f5c14d9..0000000
--- a/hypervideo_dl/postprocessor/metadatafromtitle.py
+++ /dev/null
@@ -1,48 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import PostProcessor
-
-
-class MetadataFromTitlePP(PostProcessor):
- def __init__(self, downloader, titleformat):
- super(MetadataFromTitlePP, self).__init__(downloader)
- self._titleformat = titleformat
- self._titleregex = (self.format_to_regex(titleformat)
- if re.search(r'%\(\w+\)s', titleformat)
- else titleformat)
-
- def format_to_regex(self, fmt):
- r"""
- Converts a string like
- '%(title)s - %(artist)s'
- to a regex like
- '(?P<title>.+)\ \-\ (?P<artist>.+)'
- """
- lastpos = 0
- regex = ''
- # replace %(..)s with regex group and escape other string parts
- for match in re.finditer(r'%\((\w+)\)s', fmt):
- regex += re.escape(fmt[lastpos:match.start()])
- regex += r'(?P<' + match.group(1) + '>.+)'
- lastpos = match.end()
- if lastpos < len(fmt):
- regex += re.escape(fmt[lastpos:])
- return regex
-
- def run(self, info):
- title = info['title']
- match = re.match(self._titleregex, title)
- if match is None:
- self._downloader.to_screen(
- '[fromtitle] Could not interpret title of video as "%s"'
- % self._titleformat)
- return [], info
- for attribute, value in match.groupdict().items():
- info[attribute] = value
- self._downloader.to_screen(
- '[fromtitle] parsed %s: %s'
- % (attribute, value if value is not None else 'NA'))
-
- return [], info
diff --git a/hypervideo_dl/postprocessor/metadataparser.py b/hypervideo_dl/postprocessor/metadataparser.py
index 01ee6c1..381182b 100644
--- a/hypervideo_dl/postprocessor/metadataparser.py
+++ b/hypervideo_dl/postprocessor/metadataparser.py
@@ -1,31 +1,27 @@
import re
-from enum import Enum
from .common import PostProcessor
+from ..utils import Namespace, filter_dict
class MetadataParserPP(PostProcessor):
- class Actions(Enum):
- INTERPRET = 'interpretter'
- REPLACE = 'replacer'
-
def __init__(self, downloader, actions):
- PostProcessor.__init__(self, downloader)
+ super().__init__(downloader)
self._actions = []
for f in actions:
- action = f[0]
- assert isinstance(action, self.Actions)
- self._actions.append(getattr(self, action.value)(*f[1:]))
+ action, *args = f
+ assert action in self.Actions
+ self._actions.append(action(self, *args))
@classmethod
def validate_action(cls, action, *data):
- ''' Each action can be:
+ """Each action can be:
(Actions.INTERPRET, from, to) OR
(Actions.REPLACE, field, search, replace)
- '''
- if not isinstance(action, cls.Actions):
+ """
+ if action not in cls.Actions:
raise ValueError(f'{action!r} is not a valid action')
- getattr(cls, action.value)(cls, *data) # So this can raise error to validate
+ action(cls, *data) # So this can raise error to validate
@staticmethod
def field_to_template(tmpl):
@@ -72,9 +68,9 @@ class MetadataParserPP(PostProcessor):
if match is None:
self.to_screen(f'Could not interpret {inp!r} as {out!r}')
return
- for attribute, value in match.groupdict().items():
+ for attribute, value in filter_dict(match.groupdict()).items():
info[attribute] = value
- self.to_screen('Parsed %s from %r: %r' % (attribute, template, value if value is not None else 'NA'))
+ self.to_screen(f'Parsed {attribute} from {template!r}: {value!r}')
template = self.field_to_template(inp)
out_re = re.compile(self.format_to_regex(out))
@@ -99,6 +95,8 @@ class MetadataParserPP(PostProcessor):
search_re = re.compile(search)
return f
+ Actions = Namespace(INTERPRET=interpretter, REPLACE=replacer)
+
class MetadataFromFieldPP(MetadataParserPP):
@classmethod
diff --git a/hypervideo_dl/postprocessor/modify_chapters.py b/hypervideo_dl/postprocessor/modify_chapters.py
index 22506bc..a745b45 100644
--- a/hypervideo_dl/postprocessor/modify_chapters.py
+++ b/hypervideo_dl/postprocessor/modify_chapters.py
@@ -3,17 +3,9 @@ import heapq
import os
from .common import PostProcessor
-from .ffmpeg import (
- FFmpegPostProcessor,
- FFmpegSubtitlesConvertorPP
-)
+from .ffmpeg import FFmpegPostProcessor, FFmpegSubtitlesConvertorPP
from .sponsorblock import SponsorBlockPP
-from ..utils import (
- orderedSet,
- PostProcessingError,
- prepend_extension,
-)
-
+from ..utils import PostProcessingError, orderedSet, prepend_extension
_TINY_CHAPTER_DURATION = 1
DEFAULT_SPONSORBLOCK_CHAPTER_TITLE = '[SponsorBlock]: %(category_names)l'
@@ -24,7 +16,7 @@ class ModifyChaptersPP(FFmpegPostProcessor):
*, sponsorblock_chapter_title=DEFAULT_SPONSORBLOCK_CHAPTER_TITLE, force_keyframes=False):
FFmpegPostProcessor.__init__(self, downloader)
self._remove_chapters_patterns = set(remove_chapters_patterns or [])
- self._remove_sponsor_segments = set(remove_sponsor_segments or []) - set(SponsorBlockPP.POI_CATEGORIES.keys())
+ self._remove_sponsor_segments = set(remove_sponsor_segments or []) - set(SponsorBlockPP.NON_SKIPPABLE_CATEGORIES.keys())
self._ranges_to_remove = set(remove_ranges or [])
self._sponsorblock_chapter_title = sponsorblock_chapter_title
self._force_keyframes = force_keyframes
@@ -40,14 +32,18 @@ class ModifyChaptersPP(FFmpegPostProcessor):
real_duration = self._get_real_video_duration(info['filepath'])
if not chapters:
- chapters = [{'start_time': 0, 'end_time': real_duration, 'title': info['title']}]
+ chapters = [{'start_time': 0, 'end_time': info.get('duration') or real_duration, 'title': info['title']}]
info['chapters'], cuts = self._remove_marked_arrange_sponsors(chapters + sponsor_chapters)
if not cuts:
return [], info
+ elif not info['chapters']:
+ self.report_warning('You have requested to remove the entire video, which is not possible')
+ return [], info
- if self._duration_mismatch(real_duration, info.get('duration')):
- if not self._duration_mismatch(real_duration, info['chapters'][-1]['end_time']):
+ original_duration, info['duration'] = info.get('duration'), info['chapters'][-1]['end_time']
+ if self._duration_mismatch(real_duration, original_duration, 1):
+ if not self._duration_mismatch(real_duration, info['duration']):
self.to_screen(f'Skipping {self.pp_key()} since the video appears to be already cut')
return [], info
if not info.get('__real_download'):
@@ -106,7 +102,7 @@ class ModifyChaptersPP(FFmpegPostProcessor):
'start_time': start,
'end_time': end,
'category': 'manually_removed',
- '_categories': [('manually_removed', start, end)],
+ '_categories': [('manually_removed', start, end, 'Manually removed')],
'remove': True,
} for start, end in self._ranges_to_remove)
@@ -297,13 +293,12 @@ class ModifyChaptersPP(FFmpegPostProcessor):
c.pop('_was_cut', None)
cats = c.pop('_categories', None)
if cats:
- category = min(cats, key=lambda c: c[2] - c[1])[0]
- cats = orderedSet(x[0] for x in cats)
+ category, _, _, category_name = min(cats, key=lambda c: c[2] - c[1])
c.update({
'category': category,
- 'categories': cats,
- 'name': SponsorBlockPP.CATEGORIES[category],
- 'category_names': [SponsorBlockPP.CATEGORIES[c] for c in cats]
+ 'categories': orderedSet(x[0] for x in cats),
+ 'name': category_name,
+ 'category_names': orderedSet(x[3] for x in cats),
})
c['title'] = self._downloader.evaluate_outtmpl(self._sponsorblock_chapter_title, c.copy())
# Merge identically named sponsors.
@@ -322,7 +317,7 @@ class ModifyChaptersPP(FFmpegPostProcessor):
self.to_screen(f'Removing chapters from {filename}')
self.concat_files([in_file] * len(concat_opts), out_file, concat_opts)
if in_file != filename:
- os.remove(in_file)
+ self._delete_downloaded_files(in_file, msg=None)
return out_file
@staticmethod
diff --git a/hypervideo_dl/postprocessor/movefilesafterdownload.py b/hypervideo_dl/postprocessor/movefilesafterdownload.py
index 1064a8c..23b0924 100644
--- a/hypervideo_dl/postprocessor/movefilesafterdownload.py
+++ b/hypervideo_dl/postprocessor/movefilesafterdownload.py
@@ -1,13 +1,12 @@
-from __future__ import unicode_literals
import os
-import shutil
from .common import PostProcessor
+from ..compat import shutil
from ..utils import (
+ PostProcessingError,
decodeFilename,
encodeFilename,
make_dir,
- PostProcessingError,
)
@@ -47,7 +46,7 @@ class MoveFilesAfterDownloadPP(PostProcessor):
% (oldfile, newfile))
continue
make_dir(newfile, PostProcessingError)
- self.to_screen('Moving file "%s" to "%s"' % (oldfile, newfile))
+ self.to_screen(f'Moving file "{oldfile}" to "{newfile}"')
shutil.move(oldfile, newfile) # os.rename cannot move between volumes
info['filepath'] = finalpath
diff --git a/hypervideo_dl/postprocessor/sponskrub.py b/hypervideo_dl/postprocessor/sponskrub.py
index 400cbcc..4ba2520 100644
--- a/hypervideo_dl/postprocessor/sponskrub.py
+++ b/hypervideo_dl/postprocessor/sponskrub.py
@@ -1,19 +1,18 @@
-from __future__ import unicode_literals
import os
+import shlex
import subprocess
from .common import PostProcessor
-from ..compat import compat_shlex_split
from ..utils import (
+ Popen,
+ PostProcessingError,
check_executable,
cli_option,
encodeArgument,
encodeFilename,
+ prepend_extension,
shell_quote,
str_or_none,
- Popen,
- PostProcessingError,
- prepend_extension,
)
@@ -79,23 +78,21 @@ class SponSkrubPP(PostProcessor):
if not self.cutout:
cmd += ['-chapter']
cmd += cli_option(self._downloader.params, '-proxy', 'proxy')
- cmd += compat_shlex_split(self.args) # For backward compatibility
+ cmd += shlex.split(self.args) # For backward compatibility
cmd += self._configuration_args(self._exe_name, use_compat=False)
cmd += ['--', information['id'], filename, temp_filename]
cmd = [encodeArgument(i) for i in cmd]
self.write_debug('sponskrub command line: %s' % shell_quote(cmd))
- pipe = None if self.get_param('verbose') else subprocess.PIPE
- p = Popen(cmd, stdout=pipe)
- stdout = p.communicate_or_kill()[0]
+ stdout, _, returncode = Popen.run(cmd, text=True, stdout=None if self.get_param('verbose') else subprocess.PIPE)
- if p.returncode == 0:
+ if not returncode:
os.replace(temp_filename, filename)
self.to_screen('Sponsor sections have been %s' % ('removed' if self.cutout else 'marked'))
- elif p.returncode == 3:
+ elif returncode == 3:
self.to_screen('No segments in the SponsorBlock database')
else:
- msg = stdout.decode('utf-8', 'replace').strip() if stdout else ''
- msg = msg.split('\n')[0 if msg.lower().startswith('unrecognised') else -1]
- raise PostProcessingError(msg if msg else 'sponskrub failed with error code %s' % p.returncode)
+ raise PostProcessingError(
+ stdout.strip().splitlines()[0 if stdout.strip().lower().startswith('unrecognised') else -1]
+ or f'sponskrub failed with error code {returncode}')
return [], information
diff --git a/hypervideo_dl/postprocessor/sponsorblock.py b/hypervideo_dl/postprocessor/sponsorblock.py
index 7943014..6ba87cd 100644
--- a/hypervideo_dl/postprocessor/sponsorblock.py
+++ b/hypervideo_dl/postprocessor/sponsorblock.py
@@ -1,9 +1,9 @@
-from hashlib import sha256
+import hashlib
import json
import re
+import urllib.parse
from .ffmpeg import FFmpegPostProcessor
-from ..compat import compat_urllib_parse_urlencode
class SponsorBlockPP(FFmpegPostProcessor):
@@ -14,6 +14,10 @@ class SponsorBlockPP(FFmpegPostProcessor):
POI_CATEGORIES = {
'poi_highlight': 'Highlight',
}
+ NON_SKIPPABLE_CATEGORIES = {
+ **POI_CATEGORIES,
+ 'chapter': 'Chapter',
+ }
CATEGORIES = {
'sponsor': 'Sponsor',
'intro': 'Intermission/Intro Animation',
@@ -23,7 +27,7 @@ class SponsorBlockPP(FFmpegPostProcessor):
'filler': 'Filler Tangent',
'interaction': 'Interaction Reminder',
'music_offtopic': 'Non-Music Section',
- **POI_CATEGORIES,
+ **NON_SKIPPABLE_CATEGORIES
}
def __init__(self, downloader, categories=None, api='https://sponsor.ajay.app'):
@@ -38,7 +42,7 @@ class SponsorBlockPP(FFmpegPostProcessor):
return [], info
self.to_screen('Fetching SponsorBlock segments')
- info['sponsorblock_chapters'] = self._get_sponsor_chapters(info, info['duration'])
+ info['sponsorblock_chapters'] = self._get_sponsor_chapters(info, info.get('duration'))
return [], info
def _get_sponsor_chapters(self, info, duration):
@@ -60,7 +64,8 @@ class SponsorBlockPP(FFmpegPostProcessor):
if duration and duration - start_end[1] <= 1:
start_end[1] = duration
# SponsorBlock duration may be absent or it may deviate from the real one.
- return s['videoDuration'] == 0 or not duration or abs(duration - s['videoDuration']) <= 1
+ diff = abs(duration - s['videoDuration']) if s['videoDuration'] else 0
+ return diff < 1 or (diff < 5 and diff / (start_end[1] - start_end[0]) < 0.05)
duration_match = [s for s in segments if duration_filter(s)]
if len(duration_match) != len(segments):
@@ -68,28 +73,30 @@ class SponsorBlockPP(FFmpegPostProcessor):
def to_chapter(s):
(start, end), cat = s['segment'], s['category']
+ title = s['description'] if cat == 'chapter' else self.CATEGORIES[cat]
return {
'start_time': start,
'end_time': end,
'category': cat,
- 'title': self.CATEGORIES[cat],
- '_categories': [(cat, start, end)]
+ 'title': title,
+ 'type': s['actionType'],
+ '_categories': [(cat, start, end, title)],
}
sponsor_chapters = [to_chapter(s) for s in duration_match]
if not sponsor_chapters:
- self.to_screen('No segments were found in the SponsorBlock database')
+ self.to_screen('No matching segments were found in the SponsorBlock database')
else:
self.to_screen(f'Found {len(sponsor_chapters)} segments in the SponsorBlock database')
return sponsor_chapters
def _get_sponsor_segments(self, video_id, service):
- hash = sha256(video_id.encode('ascii')).hexdigest()
+ hash = hashlib.sha256(video_id.encode('ascii')).hexdigest()
# SponsorBlock API recommends using first 4 hash characters.
- url = f'{self._API_URL}/api/skipSegments/{hash[:4]}?' + compat_urllib_parse_urlencode({
+ url = f'{self._API_URL}/api/skipSegments/{hash[:4]}?' + urllib.parse.urlencode({
'service': service,
'categories': json.dumps(self._categories),
- 'actionTypes': json.dumps(['skip', 'poi'])
+ 'actionTypes': json.dumps(['skip', 'poi', 'chapter'])
})
for d in self._download_json(url) or []:
if d['videoID'] == video_id:
diff --git a/hypervideo_dl/postprocessor/xattrpp.py b/hypervideo_dl/postprocessor/xattrpp.py
index 93acd6d..f822eff 100644
--- a/hypervideo_dl/postprocessor/xattrpp.py
+++ b/hypervideo_dl/postprocessor/xattrpp.py
@@ -1,78 +1,63 @@
-from __future__ import unicode_literals
+import os
from .common import PostProcessor
from ..compat import compat_os_name
from ..utils import (
- hyphenate_date,
- write_xattr,
PostProcessingError,
XAttrMetadataError,
XAttrUnavailableError,
+ hyphenate_date,
+ write_xattr,
)
class XAttrMetadataPP(PostProcessor):
- #
- # More info about extended attributes for media:
- # http://freedesktop.org/wiki/CommonExtendedAttributes/
- # http://www.freedesktop.org/wiki/PhreedomDraft/
- # http://dublincore.org/documents/usageguide/elements.shtml
- #
- # TODO:
- # * capture youtube keywords and put them in 'user.dublincore.subject' (comma-separated)
- # * figure out which xattrs can be used for 'duration', 'thumbnail', 'resolution'
- #
+ """Set extended attributes on downloaded file (if xattr support is found)
+
+ More info about extended attributes for media:
+ http://freedesktop.org/wiki/CommonExtendedAttributes/
+ http://www.freedesktop.org/wiki/PhreedomDraft/
+ http://dublincore.org/documents/usageguide/elements.shtml
+
+ TODO:
+ * capture youtube keywords and put them in 'user.dublincore.subject' (comma-separated)
+ * figure out which xattrs can be used for 'duration', 'thumbnail', 'resolution'
+ """
+
+ XATTR_MAPPING = {
+ 'user.xdg.referrer.url': 'webpage_url',
+ # 'user.xdg.comment': 'description',
+ 'user.dublincore.title': 'title',
+ 'user.dublincore.date': 'upload_date',
+ 'user.dublincore.description': 'description',
+ 'user.dublincore.contributor': 'uploader',
+ 'user.dublincore.format': 'format',
+ }
def run(self, info):
- """ Set extended attributes on downloaded file (if xattr support is found). """
-
- # Write the metadata to the file's xattrs
+ mtime = os.stat(info['filepath']).st_mtime
self.to_screen('Writing metadata to file\'s xattrs')
-
- filename = info['filepath']
-
try:
- xattr_mapping = {
- 'user.xdg.referrer.url': 'webpage_url',
- # 'user.xdg.comment': 'description',
- 'user.dublincore.title': 'title',
- 'user.dublincore.date': 'upload_date',
- 'user.dublincore.description': 'description',
- 'user.dublincore.contributor': 'uploader',
- 'user.dublincore.format': 'format',
- }
-
- num_written = 0
- for xattrname, infoname in xattr_mapping.items():
-
+ for xattrname, infoname in self.XATTR_MAPPING.items():
value = info.get(infoname)
-
if value:
if infoname == 'upload_date':
value = hyphenate_date(value)
-
- byte_value = value.encode('utf-8')
- write_xattr(filename, xattrname, byte_value)
- num_written += 1
-
- return [], info
+ write_xattr(info['filepath'], xattrname, value.encode())
except XAttrUnavailableError as e:
raise PostProcessingError(str(e))
-
except XAttrMetadataError as e:
if e.reason == 'NO_SPACE':
self.report_warning(
'There\'s no disk space left, disk quota exceeded or filesystem xattr limit exceeded. '
- + (('Some ' if num_written else '') + 'extended attributes are not written.').capitalize())
+ 'Some extended attributes are not written')
elif e.reason == 'VALUE_TOO_LONG':
- self.report_warning(
- 'Unable to write extended attributes due to too long values.')
+ self.report_warning('Unable to write extended attributes due to too long values.')
else:
- msg = 'This filesystem doesn\'t support extended attributes. '
- if compat_os_name == 'nt':
- msg += 'You need to use NTFS.'
- else:
- msg += '(You may have to enable them in your /etc/fstab)'
- raise PostProcessingError(str(e))
- return [], info
+ tip = ('You need to use NTFS' if compat_os_name == 'nt'
+ else 'You may have to enable them in your "/etc/fstab"')
+ raise PostProcessingError(f'This filesystem doesn\'t support extended attributes. {tip}')
+
+ self.try_utime(info['filepath'], mtime, mtime)
+ return [], info
diff --git a/hypervideo_dl/socks.py b/hypervideo_dl/socks.py
index 5d4adbe..f93328f 100644
--- a/hypervideo_dl/socks.py
+++ b/hypervideo_dl/socks.py
@@ -1,8 +1,5 @@
# Public Domain SOCKS proxy protocol implementation
# Adapted from https://gist.github.com/bluec0re/cafd3764412967417fd3
-
-from __future__ import unicode_literals
-
# References:
# SOCKS4 protocol http://www.openssh.com/txt/socks4.protocol
# SOCKS4A protocol http://www.openssh.com/txt/socks4a.protocol
@@ -11,12 +8,9 @@ from __future__ import unicode_literals
import collections
import socket
+import struct
-from .compat import (
- compat_ord,
- compat_struct_pack,
- compat_struct_unpack,
-)
+from .compat import compat_ord
__author__ = 'Timo Schmid <coding@timoschmid.de>'
@@ -26,14 +20,14 @@ SOCKS4_REPLY_VERSION = 0x00
# if the client cannot resolve the destination host's domain name to find its
# IP address, it should set the first three bytes of DSTIP to NULL and the last
# byte to a non-zero value.
-SOCKS4_DEFAULT_DSTIP = compat_struct_pack('!BBBB', 0, 0, 0, 0xFF)
+SOCKS4_DEFAULT_DSTIP = struct.pack('!BBBB', 0, 0, 0, 0xFF)
SOCKS5_VERSION = 5
SOCKS5_USER_AUTH_VERSION = 0x01
SOCKS5_USER_AUTH_SUCCESS = 0x00
-class Socks4Command(object):
+class Socks4Command:
CMD_CONNECT = 0x01
CMD_BIND = 0x02
@@ -42,14 +36,14 @@ class Socks5Command(Socks4Command):
CMD_UDP_ASSOCIATE = 0x03
-class Socks5Auth(object):
+class Socks5Auth:
AUTH_NONE = 0x00
AUTH_GSSAPI = 0x01
AUTH_USER_PASS = 0x02
AUTH_NO_ACCEPTABLE = 0xFF # For server response
-class Socks5AddressType(object):
+class Socks5AddressType:
ATYP_IPV4 = 0x01
ATYP_DOMAINNAME = 0x03
ATYP_IPV6 = 0x04
@@ -61,14 +55,14 @@ class ProxyError(socket.error):
def __init__(self, code=None, msg=None):
if code is not None and msg is None:
msg = self.CODES.get(code) or 'unknown error'
- super(ProxyError, self).__init__(code, msg)
+ super().__init__(code, msg)
class InvalidVersionError(ProxyError):
def __init__(self, expected_version, got_version):
- msg = ('Invalid response version from server. Expected {0:02x} got '
- '{1:02x}'.format(expected_version, got_version))
- super(InvalidVersionError, self).__init__(0, msg)
+ msg = ('Invalid response version from server. Expected {:02x} got '
+ '{:02x}'.format(expected_version, got_version))
+ super().__init__(0, msg)
class Socks4Error(ProxyError):
@@ -98,7 +92,7 @@ class Socks5Error(ProxyError):
}
-class ProxyType(object):
+class ProxyType:
SOCKS4 = 0
SOCKS4A = 1
SOCKS5 = 2
@@ -111,7 +105,7 @@ Proxy = collections.namedtuple('Proxy', (
class sockssocket(socket.socket):
def __init__(self, *args, **kwargs):
self._proxy = None
- super(sockssocket, self).__init__(*args, **kwargs)
+ super().__init__(*args, **kwargs)
def setproxy(self, proxytype, addr, port, rdns=True, username=None, password=None):
assert proxytype in (ProxyType.SOCKS4, ProxyType.SOCKS4A, ProxyType.SOCKS5)
@@ -123,17 +117,17 @@ class sockssocket(socket.socket):
while len(data) < cnt:
cur = self.recv(cnt - len(data))
if not cur:
- raise EOFError('{0} bytes missing'.format(cnt - len(data)))
+ raise EOFError(f'{cnt - len(data)} bytes missing')
data += cur
return data
def _recv_bytes(self, cnt):
data = self.recvall(cnt)
- return compat_struct_unpack('!{0}B'.format(cnt), data)
+ return struct.unpack(f'!{cnt}B', data)
@staticmethod
def _len_and_data(data):
- return compat_struct_pack('!B', len(data)) + data
+ return struct.pack('!B', len(data)) + data
def _check_response_version(self, expected_version, got_version):
if got_version != expected_version:
@@ -143,7 +137,7 @@ class sockssocket(socket.socket):
def _resolve_address(self, destaddr, default, use_remote_dns):
try:
return socket.inet_aton(destaddr)
- except socket.error:
+ except OSError:
if use_remote_dns and self._proxy.remote_dns:
return default
else:
@@ -154,17 +148,17 @@ class sockssocket(socket.socket):
ipaddr = self._resolve_address(destaddr, SOCKS4_DEFAULT_DSTIP, use_remote_dns=is_4a)
- packet = compat_struct_pack('!BBH', SOCKS4_VERSION, Socks4Command.CMD_CONNECT, port) + ipaddr
+ packet = struct.pack('!BBH', SOCKS4_VERSION, Socks4Command.CMD_CONNECT, port) + ipaddr
- username = (self._proxy.username or '').encode('utf-8')
+ username = (self._proxy.username or '').encode()
packet += username + b'\x00'
if is_4a and self._proxy.remote_dns:
- packet += destaddr.encode('utf-8') + b'\x00'
+ packet += destaddr.encode() + b'\x00'
self.sendall(packet)
- version, resp_code, dstport, dsthost = compat_struct_unpack('!BBHI', self.recvall(8))
+ version, resp_code, dstport, dsthost = struct.unpack('!BBHI', self.recvall(8))
self._check_response_version(SOCKS4_REPLY_VERSION, version)
@@ -178,14 +172,14 @@ class sockssocket(socket.socket):
self._setup_socks4(address, is_4a=True)
def _socks5_auth(self):
- packet = compat_struct_pack('!B', SOCKS5_VERSION)
+ packet = struct.pack('!B', SOCKS5_VERSION)
auth_methods = [Socks5Auth.AUTH_NONE]
if self._proxy.username and self._proxy.password:
auth_methods.append(Socks5Auth.AUTH_USER_PASS)
- packet += compat_struct_pack('!B', len(auth_methods))
- packet += compat_struct_pack('!{0}B'.format(len(auth_methods)), *auth_methods)
+ packet += struct.pack('!B', len(auth_methods))
+ packet += struct.pack(f'!{len(auth_methods)}B', *auth_methods)
self.sendall(packet)
@@ -199,9 +193,9 @@ class sockssocket(socket.socket):
raise Socks5Error(Socks5Auth.AUTH_NO_ACCEPTABLE)
if method == Socks5Auth.AUTH_USER_PASS:
- username = self._proxy.username.encode('utf-8')
- password = self._proxy.password.encode('utf-8')
- packet = compat_struct_pack('!B', SOCKS5_USER_AUTH_VERSION)
+ username = self._proxy.username.encode()
+ password = self._proxy.password.encode()
+ packet = struct.pack('!B', SOCKS5_USER_AUTH_VERSION)
packet += self._len_and_data(username) + self._len_and_data(password)
self.sendall(packet)
@@ -221,14 +215,14 @@ class sockssocket(socket.socket):
self._socks5_auth()
reserved = 0
- packet = compat_struct_pack('!BBB', SOCKS5_VERSION, Socks5Command.CMD_CONNECT, reserved)
+ packet = struct.pack('!BBB', SOCKS5_VERSION, Socks5Command.CMD_CONNECT, reserved)
if ipaddr is None:
- destaddr = destaddr.encode('utf-8')
- packet += compat_struct_pack('!B', Socks5AddressType.ATYP_DOMAINNAME)
+ destaddr = destaddr.encode()
+ packet += struct.pack('!B', Socks5AddressType.ATYP_DOMAINNAME)
packet += self._len_and_data(destaddr)
else:
- packet += compat_struct_pack('!B', Socks5AddressType.ATYP_IPV4) + ipaddr
- packet += compat_struct_pack('!H', port)
+ packet += struct.pack('!B', Socks5AddressType.ATYP_IPV4) + ipaddr
+ packet += struct.pack('!H', port)
self.sendall(packet)
@@ -247,7 +241,7 @@ class sockssocket(socket.socket):
destaddr = self.recvall(alen)
elif atype == Socks5AddressType.ATYP_IPV6:
destaddr = self.recvall(16)
- destport = compat_struct_unpack('!H', self.recvall(2))[0]
+ destport = struct.unpack('!H', self.recvall(2))[0]
return (destaddr, destport)
diff --git a/hypervideo_dl/utils.py b/hypervideo_dl/utils.py
index 6379872..45847f9 100644
--- a/hypervideo_dl/utils.py
+++ b/hypervideo_dl/utils.py
@@ -1,8 +1,3 @@
-#!/usr/bin/env python3
-# coding: utf-8
-
-from __future__ import unicode_literals
-
import asyncio
import atexit
import base64
@@ -10,86 +5,59 @@ import binascii
import calendar
import codecs
import collections
+import collections.abc
import contextlib
-import ctypes
import datetime
-import email.utils
import email.header
+import email.utils
import errno
-import functools
import gzip
import hashlib
import hmac
+import html.entities
+import html.parser
+import http.client
+import http.cookiejar
import importlib.util
+import inspect
import io
import itertools
import json
import locale
import math
+import mimetypes
import operator
import os
import platform
import random
import re
+import shlex
import socket
import ssl
+import struct
import subprocess
import sys
import tempfile
import time
import traceback
+import types
+import unicodedata
+import urllib.error
+import urllib.parse
+import urllib.request
import xml.etree.ElementTree
import zlib
-import mimetypes
+from .compat import functools # isort: split
from .compat import (
- compat_HTMLParseError,
- compat_HTMLParser,
- compat_HTTPError,
- compat_basestring,
- compat_brotli,
- compat_chr,
- compat_cookiejar,
- compat_ctypes_WINFUNCTYPE,
compat_etree_fromstring,
compat_expanduser,
- compat_html_entities,
- compat_html_entities_html5,
- compat_http_client,
- compat_integer_types,
- compat_numeric_types,
- compat_kwargs,
+ compat_HTMLParseError,
compat_os_name,
- compat_parse_qs,
- compat_shlex_split,
compat_shlex_quote,
- compat_str,
- compat_struct_pack,
- compat_struct_unpack,
- compat_urllib_error,
- compat_urllib_parse,
- compat_urllib_parse_urlencode,
- compat_urllib_parse_urlparse,
- compat_urllib_parse_urlunparse,
- compat_urllib_parse_quote,
- compat_urllib_parse_quote_plus,
- compat_urllib_parse_unquote_plus,
- compat_urllib_request,
- compat_urlparse,
- compat_websockets,
- compat_xpath,
-)
-
-from .socks import (
- ProxyType,
- sockssocket,
)
-
-try:
- import certifi
- has_certifi = True
-except ImportError:
- has_certifi = False
+from .dependencies import brotli, certifi, websockets, xattr
+from .socks import ProxyType, sockssocket
def register_socks_protocols():
@@ -97,8 +65,8 @@ def register_socks_protocols():
# In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
# URLs with protocols not in urlparse.uses_netloc are not handled correctly
for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
- if scheme not in compat_urlparse.uses_netloc:
- compat_urlparse.uses_netloc.append(scheme)
+ if scheme not in urllib.parse.uses_netloc:
+ urllib.parse.uses_netloc.append(scheme)
# This is not clearly defined otherwise
@@ -153,7 +121,7 @@ def random_user_agent():
SUPPORTED_ENCODINGS = [
'gzip', 'deflate'
]
-if compat_brotli:
+if brotli:
SUPPORTED_ENCODINGS.append('br')
std_headers = {
@@ -170,6 +138,7 @@ USER_AGENTS = {
NO_DEFAULT = object()
+IDENTITY = lambda x: x
ENGLISH_MONTH_NAMES = [
'January', 'February', 'March', 'April', 'May', 'June',
@@ -180,22 +149,22 @@ MONTH_NAMES = {
'fr': [
'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
+ # these follow the genitive grammatical case (dopełniacz)
+ # some websites might be using nominative, which will require another month list
+ # https://en.wikibooks.org/wiki/Polish/Noun_cases
+ 'pl': ['stycznia', 'lutego', 'marca', 'kwietnia', 'maja', 'czerwca',
+ 'lipca', 'sierpnia', 'września', 'października', 'listopada', 'grudnia'],
}
-KNOWN_EXTENSIONS = (
- 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
- 'flv', 'f4v', 'f4a', 'f4b',
- 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
- 'mkv', 'mka', 'mk3d',
- 'avi', 'divx',
- 'mov',
- 'asf', 'wmv', 'wma',
- '3gp', '3g2',
- 'mp3',
- 'flac',
- 'ape',
- 'wav',
- 'f4f', 'f4m', 'm3u8', 'smil')
+# From https://github.com/python/cpython/blob/3.11/Lib/email/_parseaddr.py#L36-L42
+TIMEZONE_NAMES = {
+ 'UT': 0, 'UTC': 0, 'GMT': 0, 'Z': 0,
+ 'AST': -4, 'ADT': -3, # Atlantic (used in Canada)
+ 'EST': -5, 'EDT': -4, # Eastern
+ 'CST': -6, 'CDT': -5, # Central
+ 'MST': -7, 'MDT': -6, # Mountain
+ 'PST': -8, 'PDT': -7 # Pacific
+}
# needed for sanitizing filenames in restricted mode
ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
@@ -255,6 +224,7 @@ DATE_FORMATS_DAY_FIRST.extend([
'%d/%m/%Y',
'%d/%m/%y',
'%d/%m/%Y %H:%M:%S',
+ '%d-%m-%Y %H:%M',
])
DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
@@ -267,9 +237,12 @@ DATE_FORMATS_MONTH_FIRST.extend([
])
PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
-JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
+JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>\s*(?P<json_ld>{.+?}|\[.+?\])\s*</script>'
+
+NUMBER_RE = r'\d+(?:\.\d+)?'
+@functools.cache
def preferredencoding():
"""Get preferred encoding.
@@ -288,37 +261,9 @@ def preferredencoding():
def write_json_file(obj, fn):
""" Encode obj as JSON and write it to fn, atomically if possible """
- fn = encodeFilename(fn)
- if sys.version_info < (3, 0) and sys.platform != 'win32':
- encoding = get_filesystem_encoding()
- # os.path.basename returns a bytes object, but NamedTemporaryFile
- # will fail if the filename contains non ascii characters unless we
- # use a unicode object
- path_basename = lambda f: os.path.basename(fn).decode(encoding)
- # the same for os.path.dirname
- path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
- else:
- path_basename = os.path.basename
- path_dirname = os.path.dirname
-
- args = {
- 'suffix': '.tmp',
- 'prefix': path_basename(fn) + '.',
- 'dir': path_dirname(fn),
- 'delete': False,
- }
-
- # In Python 2.x, json.dump expects a bytestream.
- # In Python 3.x, it writes to a character stream
- if sys.version_info < (3, 0):
- args['mode'] = 'wb'
- else:
- args.update({
- 'mode': 'w',
- 'encoding': 'utf-8',
- })
-
- tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
+ tf = tempfile.NamedTemporaryFile(
+ prefix=f'{os.path.basename(fn)}.', dir=os.path.dirname(fn),
+ suffix='.tmp', delete=False, mode='w', encoding='utf-8')
try:
with tf:
@@ -326,39 +271,24 @@ def write_json_file(obj, fn):
if sys.platform == 'win32':
# Need to remove existing file on Windows, else os.rename raises
# WindowsError or FileExistsError.
- try:
+ with contextlib.suppress(OSError):
os.unlink(fn)
- except OSError:
- pass
- try:
+ with contextlib.suppress(OSError):
mask = os.umask(0)
os.umask(mask)
os.chmod(tf.name, 0o666 & ~mask)
- except OSError:
- pass
os.rename(tf.name, fn)
except Exception:
- try:
+ with contextlib.suppress(OSError):
os.remove(tf.name)
- except OSError:
- pass
raise
-if sys.version_info >= (2, 7):
- def find_xpath_attr(node, xpath, key, val=None):
- """ Find the xpath xpath[@key=val] """
- assert re.match(r'^[a-zA-Z_-]+$', key)
- expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
- return node.find(expr)
-else:
- def find_xpath_attr(node, xpath, key, val=None):
- for f in node.findall(compat_xpath(xpath)):
- if key not in f.attrib:
- continue
- if val is None or f.attrib.get(key) == val:
- return f
- return None
+def find_xpath_attr(node, xpath, key, val=None):
+ """ Find the xpath xpath[@key=val] """
+ assert re.match(r'^[a-zA-Z_-]+$', key)
+ expr = xpath + ('[@%s]' % key if val is None else f"[@{key}='{val}']")
+ return node.find(expr)
# On python2.6 the xml.etree.ElementTree.Element methods don't support
# the namespace parameter
@@ -378,9 +308,9 @@ def xpath_with_ns(path, ns_map):
def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
def _find_xpath(xpath):
- return node.find(compat_xpath(xpath))
+ return node.find(xpath)
- if isinstance(xpath, (str, compat_str)):
+ if isinstance(xpath, str):
n = _find_xpath(xpath)
else:
for xp in xpath:
@@ -420,21 +350,21 @@ def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
if default is not NO_DEFAULT:
return default
elif fatal:
- name = '%s[@%s]' % (xpath, key) if name is None else name
+ name = f'{xpath}[@{key}]' if name is None else name
raise ExtractorError('Could not find XML attribute %s' % name)
else:
return None
return n.attrib[key]
-def get_element_by_id(id, html):
+def get_element_by_id(id, html, **kwargs):
"""Return the content of the tag with the specified ID in the passed HTML document"""
- return get_element_by_attribute('id', id, html)
+ return get_element_by_attribute('id', id, html, **kwargs)
-def get_element_html_by_id(id, html):
+def get_element_html_by_id(id, html, **kwargs):
"""Return the html of the tag with the specified ID in the passed HTML document"""
- return get_element_html_by_attribute('id', id, html)
+ return get_element_html_by_attribute('id', id, html, **kwargs)
def get_element_by_class(class_name, html):
@@ -449,27 +379,27 @@ def get_element_html_by_class(class_name, html):
return retval[0] if retval else None
-def get_element_by_attribute(attribute, value, html, escape_value=True):
- retval = get_elements_by_attribute(attribute, value, html, escape_value)
+def get_element_by_attribute(attribute, value, html, **kwargs):
+ retval = get_elements_by_attribute(attribute, value, html, **kwargs)
return retval[0] if retval else None
-def get_element_html_by_attribute(attribute, value, html, escape_value=True):
- retval = get_elements_html_by_attribute(attribute, value, html, escape_value)
+def get_element_html_by_attribute(attribute, value, html, **kargs):
+ retval = get_elements_html_by_attribute(attribute, value, html, **kargs)
return retval[0] if retval else None
-def get_elements_by_class(class_name, html):
+def get_elements_by_class(class_name, html, **kargs):
"""Return the content of all tags with the specified class in the passed HTML document as a list"""
return get_elements_by_attribute(
- 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
+ 'class', r'[^\'"]*(?<=[\'"\s])%s(?=[\'"\s])[^\'"]*' % re.escape(class_name),
html, escape_value=False)
def get_elements_html_by_class(class_name, html):
"""Return the html of all tags with the specified class in the passed HTML document as a list"""
return get_elements_html_by_attribute(
- 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
+ 'class', r'[^\'"]*(?<=[\'"\s])%s(?=[\'"\s])[^\'"]*' % re.escape(class_name),
html, escape_value=False)
@@ -483,21 +413,23 @@ def get_elements_html_by_attribute(*args, **kwargs):
return [whole for _, whole in get_elements_text_and_html_by_attribute(*args, **kwargs)]
-def get_elements_text_and_html_by_attribute(attribute, value, html, escape_value=True):
+def get_elements_text_and_html_by_attribute(attribute, value, html, *, tag=r'[\w:.-]+', escape_value=True):
"""
Return the text (content) and the html (whole) of the tag with the specified
attribute in the passed HTML document
"""
+ if not value:
+ return
- value_quote_optional = '' if re.match(r'''[\s"'`=<>]''', value) else '?'
+ quote = '' if re.match(r'''[\s"'`=<>]''', value) else '?'
value = re.escape(value) if escape_value else value
- partial_element_re = r'''(?x)
- <(?P<tag>[a-zA-Z0-9:._-]+)
+ partial_element_re = rf'''(?x)
+ <(?P<tag>{tag})
(?:\s(?:[^>"']|"[^"]*"|'[^']*')*)?
- \s%(attribute)s\s*=\s*(?P<_q>['"]%(vqo)s)(?-x:%(value)s)(?P=_q)
- ''' % {'attribute': re.escape(attribute), 'value': value, 'vqo': value_quote_optional}
+ \s{re.escape(attribute)}\s*=\s*(?P<_q>['"]{quote})(?-x:{value})(?P=_q)
+ '''
for m in re.finditer(partial_element_re, html):
content, whole = get_element_text_and_html_by_tag(m.group('tag'), html[m.start():])
@@ -508,7 +440,7 @@ def get_elements_text_and_html_by_attribute(attribute, value, html, escape_value
)
-class HTMLBreakOnClosingTagParser(compat_HTMLParser):
+class HTMLBreakOnClosingTagParser(html.parser.HTMLParser):
"""
HTML parser which raises HTMLBreakOnClosingTagException upon reaching the
closing tag for the first opening tag it has encountered, and can be used
@@ -520,7 +452,7 @@ class HTMLBreakOnClosingTagParser(compat_HTMLParser):
def __init__(self):
self.tagstack = collections.deque()
- compat_HTMLParser.__init__(self)
+ html.parser.HTMLParser.__init__(self)
def __enter__(self):
return self
@@ -550,6 +482,7 @@ class HTMLBreakOnClosingTagParser(compat_HTMLParser):
raise self.HTMLBreakOnClosingTagException()
+# XXX: This should be far less strict
def get_element_text_and_html_by_tag(tag, html):
"""
For the first element with the specified tag in the passed HTML document
@@ -585,22 +518,23 @@ def get_element_text_and_html_by_tag(tag, html):
raise compat_HTMLParseError('unexpected end of html')
-class HTMLAttributeParser(compat_HTMLParser):
+class HTMLAttributeParser(html.parser.HTMLParser):
"""Trivial HTML parser to gather the attributes for a single element"""
def __init__(self):
self.attrs = {}
- compat_HTMLParser.__init__(self)
+ html.parser.HTMLParser.__init__(self)
def handle_starttag(self, tag, attrs):
self.attrs = dict(attrs)
+ raise compat_HTMLParseError('done')
-class HTMLListAttrsParser(compat_HTMLParser):
+class HTMLListAttrsParser(html.parser.HTMLParser):
"""HTML parser to gather the attributes for the elements of a list"""
def __init__(self):
- compat_HTMLParser.__init__(self)
+ html.parser.HTMLParser.__init__(self)
self.items = []
self._level = 0
@@ -626,16 +560,11 @@ def extract_attributes(html_element):
'empty': '', 'noval': None, 'entity': '&',
'sq': '"', 'dq': '\''
}.
- NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
- but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
"""
parser = HTMLAttributeParser()
- try:
+ with contextlib.suppress(compat_HTMLParseError):
parser.feed(html_element)
parser.close()
- # Older Python may throw HTMLParseError in case of malformed HTML
- except compat_HTMLParseError:
- pass
return parser.attrs
@@ -664,6 +593,24 @@ def clean_html(html):
return html.strip()
+class LenientJSONDecoder(json.JSONDecoder):
+ def __init__(self, *args, transform_source=None, ignore_extra=False, **kwargs):
+ self.transform_source, self.ignore_extra = transform_source, ignore_extra
+ super().__init__(*args, **kwargs)
+
+ def decode(self, s):
+ if self.transform_source:
+ s = self.transform_source(s)
+ try:
+ if self.ignore_extra:
+ return self.raw_decode(s.lstrip())[0]
+ return super().decode(s)
+ except json.JSONDecodeError as e:
+ if e.pos is not None:
+ raise type(e)(f'{e.msg} in {s[e.pos-10:e.pos+10]!r}', s, e.pos)
+ raise
+
+
def sanitize_open(filename, open_mode):
"""Try to open the given filename, and slightly tweak it if this fails.
@@ -674,26 +621,33 @@ def sanitize_open(filename, open_mode):
It returns the tuple (stream, definitive_file_name).
"""
- try:
- if filename == '-':
- if sys.platform == 'win32':
- import msvcrt
+ if filename == '-':
+ if sys.platform == 'win32':
+ import msvcrt
+
+ # stdout may be any IO stream, e.g. when using contextlib.redirect_stdout
+ with contextlib.suppress(io.UnsupportedOperation):
msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
- return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
- stream = locked_file(filename, open_mode, block=False).open()
- return (stream, filename)
- except (IOError, OSError) as err:
- if err.errno in (errno.EACCES,):
- raise
+ return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
- # In case of error, try to remove win32 forbidden chars
- alt_filename = sanitize_path(filename)
- if alt_filename == filename:
- raise
- else:
- # An exception here should be caught in the caller
- stream = locked_file(filename, open_mode, block=False).open()
- return (stream, alt_filename)
+ for attempt in range(2):
+ try:
+ try:
+ if sys.platform == 'win32':
+ # FIXME: An exclusive lock also locks the file from being read.
+ # Since windows locks are mandatory, don't lock the file on windows (for now).
+ # Ref: https://github.com/hypervideo/hypervideo/issues/3124
+ raise LockingUnsupportedError()
+ stream = locked_file(filename, open_mode, block=False).__enter__()
+ except OSError:
+ stream = open(filename, open_mode)
+ return stream, filename
+ except OSError as err:
+ if attempt or err.errno in (errno.EACCES,):
+ raise
+ old_filename, filename = filename, sanitize_path(filename)
+ if old_filename == filename:
+ raise
def timeconvert(timestr):
@@ -719,6 +673,9 @@ def sanitize_filename(s, restricted=False, is_id=NO_DEFAULT):
return ACCENT_CHARS[char]
elif not restricted and char == '\n':
return '\0 '
+ elif is_id is NO_DEFAULT and not restricted and char in '"*:<>?|/\\':
+ # Replace with their full-width unicode counterparts
+ return {'/': '\u29F8', '\\': '\u29f9'}.get(char, chr(ord(char) + 0xfee0))
elif char == '?' or ord(char) < 32 or ord(char) == 127:
return ''
elif char == '"':
@@ -731,11 +688,14 @@ def sanitize_filename(s, restricted=False, is_id=NO_DEFAULT):
return '\0_'
return char
+ # Replace look-alike Unicode glyphs
+ if restricted and (is_id is NO_DEFAULT or not is_id):
+ s = unicodedata.normalize('NFKC', s)
s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s) # Handle timestamps
result = ''.join(map(replace_insane, s))
if is_id is NO_DEFAULT:
- result = re.sub('(\0.)(?:(?=\\1)..)+', r'\1', result) # Remove repeated substitute chars
- STRIP_RE = '(?:\0.|[ _-])*'
+ result = re.sub(r'(\0.)(?:(?=\1)..)+', r'\1', result) # Remove repeated substitute chars
+ STRIP_RE = r'(?:\0.|[ _-])*'
result = re.sub(f'^\0.{STRIP_RE}|{STRIP_RE}\0.$', '', result) # Remove substitute chars from start/end
result = result.replace('\0', '') or '_'
@@ -759,8 +719,6 @@ def sanitize_path(s, force=False):
if sys.platform == 'win32':
force = False
drive_or_unc, _ = os.path.splitdrive(s)
- if sys.version_info < (2, 7) and not drive_or_unc:
- drive_or_unc, _ = os.path.splitunc(s)
elif force:
drive_or_unc = ''
else:
@@ -774,16 +732,18 @@ def sanitize_path(s, force=False):
for path_part in norm_path]
if drive_or_unc:
sanitized_path.insert(0, drive_or_unc + os.path.sep)
- elif force and s[0] == os.path.sep:
+ elif force and s and s[0] == os.path.sep:
sanitized_path.insert(0, os.path.sep)
return os.path.join(*sanitized_path)
-def sanitize_url(url):
+def sanitize_url(url, *, scheme='http'):
# Prepend protocol-less URLs with `http:` scheme in order to mitigate
# the number of unwanted failures due to missing protocol
- if url.startswith('//'):
- return 'http:%s' % url
+ if url is None:
+ return
+ elif url.startswith('//'):
+ return f'{scheme}:{url}'
# Fix some common typos seen so far
COMMON_TYPOS = (
# https://github.com/ytdl-org/youtube-dl/issues/15649
@@ -798,15 +758,15 @@ def sanitize_url(url):
def extract_basic_auth(url):
- parts = compat_urlparse.urlsplit(url)
+ parts = urllib.parse.urlsplit(url)
if parts.username is None:
return url, None
- url = compat_urlparse.urlunsplit(parts._replace(netloc=(
+ url = urllib.parse.urlunsplit(parts._replace(netloc=(
parts.hostname if parts.port is None
else '%s:%d' % (parts.hostname, parts.port))))
auth_payload = base64.b64encode(
- ('%s:%s' % (parts.username, parts.password or '')).encode('utf-8'))
- return url, 'Basic ' + auth_payload.decode('utf-8')
+ ('%s:%s' % (parts.username, parts.password or '')).encode())
+ return url, f'Basic {auth_payload.decode()}'
def sanitized_Request(url, *args, **kwargs):
@@ -814,7 +774,7 @@ def sanitized_Request(url, *args, **kwargs):
if auth_header is not None:
headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
headers['Authorization'] = auth_header
- return compat_urllib_request.Request(url, *args, **kwargs)
+ return urllib.request.Request(url, *args, **kwargs)
def expand_path(s):
@@ -822,13 +782,16 @@ def expand_path(s):
return os.path.expandvars(compat_expanduser(s))
-def orderedSet(iterable):
- """ Remove all duplicates from the input iterable """
- res = []
- for el in iterable:
- if el not in res:
- res.append(el)
- return res
+def orderedSet(iterable, *, lazy=False):
+ """Remove all duplicates from the input iterable"""
+ def _iter():
+ seen = [] # Do not use set since the items can be unhashable
+ for x in iterable:
+ if x not in seen:
+ seen.append(x)
+ yield x
+
+ return _iter() if lazy else list(_iter())
def _htmlentity_transform(entity_with_semicolon):
@@ -836,13 +799,13 @@ def _htmlentity_transform(entity_with_semicolon):
entity = entity_with_semicolon[:-1]
# Known non-numeric HTML entity
- if entity in compat_html_entities.name2codepoint:
- return compat_chr(compat_html_entities.name2codepoint[entity])
+ if entity in html.entities.name2codepoint:
+ return chr(html.entities.name2codepoint[entity])
- # TODO: HTML5 allows entities without a semicolon. For example,
- # '&Eacuteric' should be decoded as 'Éric'.
- if entity_with_semicolon in compat_html_entities_html5:
- return compat_html_entities_html5[entity_with_semicolon]
+ # TODO: HTML5 allows entities without a semicolon.
+ # E.g. '&Eacuteric' should be decoded as 'Éric'.
+ if entity_with_semicolon in html.entities.html5:
+ return html.entities.html5[entity_with_semicolon]
mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
if mobj is not None:
@@ -853,10 +816,8 @@ def _htmlentity_transform(entity_with_semicolon):
else:
base = 10
# See https://github.com/ytdl-org/youtube-dl/issues/7518
- try:
- return compat_chr(int(numstr, base))
- except ValueError:
- pass
+ with contextlib.suppress(ValueError):
+ return chr(int(numstr, base))
# Unknown entity in name, return its literal representation
return '&%s;' % entity
@@ -865,7 +826,7 @@ def _htmlentity_transform(entity_with_semicolon):
def unescapeHTML(s):
if s is None:
return None
- assert type(s) == compat_str
+ assert isinstance(s, str)
return re.sub(
r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
@@ -883,12 +844,9 @@ def escapeHTML(text):
def process_communicate_or_kill(p, *args, **kwargs):
- try:
- return p.communicate(*args, **kwargs)
- except BaseException: # Including KeyboardInterrupt
- p.kill()
- p.wait()
- raise
+ deprecation_warning(f'"{__name__}.process_communicate_or_kill" is deprecated and may be removed '
+ f'in a future version. Use "{__name__}.Popen.communicate_or_kill" instead')
+ return Popen.communicate_or_kill(p, *args, **kwargs)
class Popen(subprocess.Popen):
@@ -898,11 +856,54 @@ class Popen(subprocess.Popen):
else:
_startupinfo = None
- def __init__(self, *args, **kwargs):
- super(Popen, self).__init__(*args, **kwargs, startupinfo=self._startupinfo)
+ @staticmethod
+ def _fix_pyinstaller_ld_path(env):
+ """Restore LD_LIBRARY_PATH when using PyInstaller
+ Ref: https://github.com/pyinstaller/pyinstaller/blob/develop/doc/runtime-information.rst#ld_library_path--libpath-considerations
+ https://github.com/hypervideo/hypervideo/issues/4573
+ """
+ if not hasattr(sys, '_MEIPASS'):
+ return
+
+ def _fix(key):
+ orig = env.get(f'{key}_ORIG')
+ if orig is None:
+ env.pop(key, None)
+ else:
+ env[key] = orig
+
+ _fix('LD_LIBRARY_PATH') # Linux
+ _fix('DYLD_LIBRARY_PATH') # macOS
+
+ def __init__(self, *args, env=None, text=False, **kwargs):
+ if env is None:
+ env = os.environ.copy()
+ self._fix_pyinstaller_ld_path(env)
+
+ if text is True:
+ kwargs['universal_newlines'] = True # For 3.6 compatibility
+ kwargs.setdefault('encoding', 'utf-8')
+ kwargs.setdefault('errors', 'replace')
+ super().__init__(*args, env=env, **kwargs, startupinfo=self._startupinfo)
def communicate_or_kill(self, *args, **kwargs):
- return process_communicate_or_kill(self, *args, **kwargs)
+ try:
+ return self.communicate(*args, **kwargs)
+ except BaseException: # Including KeyboardInterrupt
+ self.kill(timeout=None)
+ raise
+
+ def kill(self, *, timeout=0):
+ super().kill()
+ if timeout != 0:
+ self.wait(timeout=timeout)
+
+ @classmethod
+ def run(cls, *args, timeout=None, **kwargs):
+ with cls(*args, **kwargs) as proc:
+ default = '' if proc.text_mode else b''
+ stdout, stderr = proc.communicate_or_kill(timeout=timeout)
+ return stdout or default, stderr or default, proc.returncode
def get_subprocess_encoding():
@@ -918,51 +919,23 @@ def get_subprocess_encoding():
def encodeFilename(s, for_subprocess=False):
- """
- @param s The name of the file
- """
-
- assert type(s) == compat_str
-
- # Python 3 has a Unicode API
- if sys.version_info >= (3, 0):
- return s
-
- # Pass '' directly to use Unicode APIs on Windows 2000 and up
- # (Detecting Windows NT 4 is tricky because 'major >= 4' would
- # match Windows 9x series as well. Besides, NT 4 is obsolete.)
- if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
- return s
-
- # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
- if sys.platform.startswith('java'):
- return s
-
- return s.encode(get_subprocess_encoding(), 'ignore')
+ assert isinstance(s, str)
+ return s
def decodeFilename(b, for_subprocess=False):
-
- if sys.version_info >= (3, 0):
- return b
-
- if not isinstance(b, bytes):
- return b
-
- return b.decode(get_subprocess_encoding(), 'ignore')
+ return b
def encodeArgument(s):
- if not isinstance(s, compat_str):
- # Legacy code that uses byte strings
- # Uncomment the following line after fixing all post processors
- # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
- s = s.decode('ascii')
- return encodeFilename(s, True)
+ # Legacy code that uses byte strings
+ # Uncomment the following line after fixing all post processors
+ # assert isinstance(s, str), 'Internal error: %r should be of type %r, is %r' % (s, str, type(s))
+ return s if isinstance(s, str) else s.decode('ascii')
def decodeArgument(b):
- return decodeFilename(b, True)
+ return b
def decodeOption(optval):
@@ -971,7 +944,7 @@ def decodeOption(optval):
if isinstance(optval, bytes):
optval = optval.decode(preferredencoding())
- assert isinstance(optval, compat_str)
+ assert isinstance(optval, str)
return optval
@@ -1005,10 +978,8 @@ def _ssl_load_windows_store_certs(ssl_context, storename):
except PermissionError:
return
for cert in certs:
- try:
+ with contextlib.suppress(ssl.SSLError):
ssl_context.load_verify_locations(cadata=cert)
- except ssl.SSLError:
- pass
def make_HTTPS_handler(params, **kwargs):
@@ -1017,6 +988,28 @@ def make_HTTPS_handler(params, **kwargs):
context.check_hostname = opts_check_certificate
if params.get('legacyserverconnect'):
context.options |= 4 # SSL_OP_LEGACY_SERVER_CONNECT
+ # Allow use of weaker ciphers in Python 3.10+. See https://bugs.python.org/issue43998
+ context.set_ciphers('DEFAULT')
+ elif (
+ sys.version_info < (3, 10)
+ and ssl.OPENSSL_VERSION_INFO >= (1, 1, 1)
+ and not ssl.OPENSSL_VERSION.startswith('LibreSSL')
+ ):
+ # Backport the default SSL ciphers and minimum TLS version settings from Python 3.10 [1].
+ # This is to ensure consistent behavior across Python versions, and help avoid fingerprinting
+ # in some situations [2][3].
+ # Python 3.10 only supports OpenSSL 1.1.1+ [4]. Because this change is likely
+ # untested on older versions, we only apply this to OpenSSL 1.1.1+ to be safe.
+ # LibreSSL is excluded until further investigation due to cipher support issues [5][6].
+ # 1. https://github.com/python/cpython/commit/e983252b516edb15d4338b0a47631b59ef1e2536
+ # 2. https://github.com/hypervideo/hypervideo/issues/4627
+ # 3. https://github.com/hypervideo/hypervideo/pull/5294
+ # 4. https://peps.python.org/pep-0644/
+ # 5. https://peps.python.org/pep-0644/#libressl-support
+ # 6. https://github.com/hypervideo/hypervideo/commit/5b9f253fa0aee996cf1ed30185d4b502e00609c4#commitcomment-89054368
+ context.set_ciphers('@SECLEVEL=2:ECDH+AESGCM:ECDH+CHACHA20:ECDH+AES:DHE+AES:!aNULL:!eNULL:!aDSS:!SHA1:!AESCCM')
+ context.minimum_version = ssl.TLSVersion.TLSv1_2
+
context.verify_mode = ssl.CERT_REQUIRED if opts_check_certificate else ssl.CERT_NONE
if opts_check_certificate:
if has_certifi and 'no-certifi' not in params.get('compat_opts', []):
@@ -1030,12 +1023,25 @@ def make_HTTPS_handler(params, **kwargs):
except ssl.SSLError:
# enum_certificates is not present in mingw python. See https://github.com/hypervideo/hypervideo/issues/1151
if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'):
- # Create a new context to discard any certificates that were already loaded
- context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
- context.check_hostname, context.verify_mode = True, ssl.CERT_REQUIRED
for storename in ('CA', 'ROOT'):
_ssl_load_windows_store_certs(context, storename)
context.set_default_verify_paths()
+
+ client_certfile = params.get('client_certificate')
+ if client_certfile:
+ try:
+ context.load_cert_chain(
+ client_certfile, keyfile=params.get('client_certificate_key'),
+ password=params.get('client_certificate_password'))
+ except ssl.SSLError:
+ raise YoutubeDLError('Unable to load client certificate')
+
+ # Some servers may reject requests if ALPN extension is not sent. See:
+ # https://github.com/python/cpython/issues/85140
+ # https://github.com/hypervideo/hypervideo/issues/3878
+ with contextlib.suppress(NotImplementedError):
+ context.set_alpn_protocols(['http/1.1'])
+
return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
@@ -1063,7 +1069,7 @@ class YoutubeDLError(Exception):
super().__init__(self.msg)
-network_exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
+network_exceptions = [urllib.error.URLError, http.client.HTTPException, socket.error]
if hasattr(ssl, 'CertificateError'):
network_exceptions.append(ssl.CertificateError)
network_exceptions = tuple(network_exceptions)
@@ -1086,13 +1092,18 @@ class ExtractorError(YoutubeDLError):
self.video_id = video_id
self.ie = ie
self.exc_info = sys.exc_info() # preserve original exception
+ if isinstance(self.exc_info[1], ExtractorError):
+ self.exc_info = self.exc_info[1].exc_info
+ super().__init__(self.__msg)
- super(ExtractorError, self).__init__(''.join((
- format_field(ie, template='[%s] '),
- format_field(video_id, template='%s: '),
- msg,
- format_field(cause, template=' (caused by %r)'),
- '' if expected else bug_reports_message())))
+ @property
+ def __msg(self):
+ return ''.join((
+ format_field(self.ie, None, '[%s] '),
+ format_field(self.video_id, None, '%s: '),
+ self.orig_msg,
+ format_field(self.cause, None, ' (caused by %r)'),
+ '' if self.expected else bug_reports_message()))
def format_traceback(self):
return join_nonempty(
@@ -1100,10 +1111,16 @@ class ExtractorError(YoutubeDLError):
self.cause and ''.join(traceback.format_exception(None, self.cause, self.cause.__traceback__)[1:]),
delim='\n') or None
+ def __setattr__(self, name, value):
+ super().__setattr__(name, value)
+ if getattr(self, 'msg', None) and name not in ('msg', 'args'):
+ self.msg = self.__msg or type(self).__name__
+ self.args = (self.msg, ) # Cannot be property
+
class UnsupportedError(ExtractorError):
def __init__(self, url):
- super(UnsupportedError, self).__init__(
+ super().__init__(
'Unsupported URL: %s' % url, expected=True)
self.url = url
@@ -1122,10 +1139,18 @@ class GeoRestrictedError(ExtractorError):
def __init__(self, msg, countries=None, **kwargs):
kwargs['expected'] = True
- super(GeoRestrictedError, self).__init__(msg, **kwargs)
+ super().__init__(msg, **kwargs)
self.countries = countries
+class UserNotLive(ExtractorError):
+ """Error when a channel/user is not live"""
+
+ def __init__(self, msg=None, **kwargs):
+ kwargs['expected'] = True
+ super().__init__(msg or 'The channel is not currently live', **kwargs)
+
+
class DownloadError(YoutubeDLError):
"""Download Error exception.
@@ -1136,7 +1161,7 @@ class DownloadError(YoutubeDLError):
def __init__(self, msg, exc_info=None):
""" exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
- super(DownloadError, self).__init__(msg)
+ super().__init__(msg)
self.exc_info = exc_info
@@ -1230,9 +1255,7 @@ class ContentTooShortError(YoutubeDLError):
"""
def __init__(self, downloaded, expected):
- super(ContentTooShortError, self).__init__(
- 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
- )
+ super().__init__(f'Downloaded {downloaded} bytes, expected {expected} bytes')
# Both in bytes
self.downloaded = downloaded
self.expected = expected
@@ -1240,7 +1263,7 @@ class ContentTooShortError(YoutubeDLError):
class XAttrMetadataError(YoutubeDLError):
def __init__(self, code=None, msg='Unknown error'):
- super(XAttrMetadataError, self).__init__(msg)
+ super().__init__(msg)
self.code = code
self.msg = msg
@@ -1259,12 +1282,7 @@ class XAttrUnavailableError(YoutubeDLError):
def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
- # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
- # expected HTTP responses to meet HTTP/1.0 or later (see also
- # https://github.com/ytdl-org/youtube-dl/issues/6727)
- if sys.version_info < (3, 0):
- kwargs['strict'] = True
- hc = http_class(*args, **compat_kwargs(kwargs))
+ hc = http_class(*args, **kwargs)
source_address = ydl_handler._params.get('source_address')
if source_address is not None:
@@ -1281,7 +1299,7 @@ def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
ip_addrs = [addr for addr in addrs if addr[0] == af]
if addrs and not ip_addrs:
ip_version = 'v4' if af == socket.AF_INET else 'v6'
- raise socket.error(
+ raise OSError(
"No remote IP%s addresses available for connect, can't use '%s' as source address"
% (ip_version, source_address[0]))
for res in ip_addrs:
@@ -1295,30 +1313,17 @@ def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
sock.connect(sa)
err = None # Explicitly break reference cycle
return sock
- except socket.error as _:
+ except OSError as _:
err = _
if sock is not None:
sock.close()
if err is not None:
raise err
else:
- raise socket.error('getaddrinfo returns an empty list')
+ raise OSError('getaddrinfo returns an empty list')
if hasattr(hc, '_create_connection'):
hc._create_connection = _create_connection
- sa = (source_address, 0)
- if hasattr(hc, 'source_address'): # Python 2.7+
- hc.source_address = sa
- else: # Python 2.6
- def _hc_connect(self, *args, **kwargs):
- sock = _create_connection(
- (self.host, self.port), self.timeout, sa)
- if is_https:
- self.sock = ssl.wrap_socket(
- sock, self.key_file, self.cert_file,
- ssl_version=ssl.PROTOCOL_TLSv1)
- else:
- self.sock = sock
- hc.connect = functools.partial(_hc_connect, hc)
+ hc.source_address = (source_address, 0)
return hc
@@ -1327,13 +1332,13 @@ def handle_youtubedl_headers(headers):
filtered_headers = headers
if 'Youtubedl-no-compression' in filtered_headers:
- filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
+ filtered_headers = {k: v for k, v in filtered_headers.items() if k.lower() != 'accept-encoding'}
del filtered_headers['Youtubedl-no-compression']
return filtered_headers
-class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
+class YoutubeDLHandler(urllib.request.HTTPHandler):
"""Handler for HTTP requests and responses.
This class, when installed with an OpenerDirector, automatically adds
@@ -1352,11 +1357,11 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
"""
def __init__(self, params, *args, **kwargs):
- compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
+ urllib.request.HTTPHandler.__init__(self, *args, **kwargs)
self._params = params
def http_open(self, req):
- conn_class = compat_http_client.HTTPConnection
+ conn_class = http.client.HTTPConnection
socks_proxy = req.headers.get('Ytdl-socks-proxy')
if socks_proxy:
@@ -1380,7 +1385,7 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
def brotli(data):
if not data:
return data
- return compat_brotli.decompress(data)
+ return brotli.decompress(data)
def http_request(self, req):
# According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
@@ -1409,12 +1414,7 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
req.headers = handle_youtubedl_headers(req.headers)
- if sys.version_info < (2, 7) and '#' in req.get_full_url():
- # Python 2.6 is brain-dead when it comes to fragments
- req._Request__original = req._Request__original.partition('#')[0]
- req._Request__r_type = req._Request__r_type.partition('#')[0]
-
- return req
+ return super().do_request_(req)
def http_response(self, req, resp):
old_resp = resp
@@ -1424,30 +1424,30 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
try:
uncompressed = io.BytesIO(gz.read())
- except IOError as original_ioerror:
+ except OSError as original_ioerror:
# There may be junk add the end of the file
# See http://stackoverflow.com/q/4928560/35070 for details
for i in range(1, 1024):
try:
gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
uncompressed = io.BytesIO(gz.read())
- except IOError:
+ except OSError:
continue
break
else:
raise original_ioerror
- resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
+ resp = urllib.request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
resp.msg = old_resp.msg
del resp.headers['Content-encoding']
# deflate
if resp.headers.get('Content-encoding', '') == 'deflate':
gz = io.BytesIO(self.deflate(resp.read()))
- resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
+ resp = urllib.request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
resp.msg = old_resp.msg
del resp.headers['Content-encoding']
# brotli
if resp.headers.get('Content-encoding', '') == 'br':
- resp = compat_urllib_request.addinfourl(
+ resp = urllib.request.addinfourl(
io.BytesIO(self.brotli(resp.read())), old_resp.headers, old_resp.url, old_resp.code)
resp.msg = old_resp.msg
del resp.headers['Content-encoding']
@@ -1457,15 +1457,10 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
location = resp.headers.get('Location')
if location:
# As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
- if sys.version_info >= (3, 0):
- location = location.encode('iso-8859-1').decode('utf-8')
- else:
- location = location.decode('utf-8')
+ location = location.encode('iso-8859-1').decode()
location_escaped = escape_url(location)
if location != location_escaped:
del resp.headers['Location']
- if sys.version_info < (3, 0):
- location_escaped = location_escaped.encode('utf-8')
resp.headers['Location'] = location_escaped
return resp
@@ -1475,9 +1470,9 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
def make_socks_conn_class(base_class, socks_proxy):
assert issubclass(base_class, (
- compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
+ http.client.HTTPConnection, http.client.HTTPSConnection))
- url_components = compat_urlparse.urlparse(socks_proxy)
+ url_components = urllib.parse.urlparse(socks_proxy)
if url_components.scheme.lower() == 'socks5':
socks_type = ProxyType.SOCKS5
elif url_components.scheme.lower() in ('socks', 'socks4'):
@@ -1488,7 +1483,7 @@ def make_socks_conn_class(base_class, socks_proxy):
def unquote_if_non_empty(s):
if not s:
return s
- return compat_urllib_parse_unquote_plus(s)
+ return urllib.parse.unquote_plus(s)
proxy_args = (
socks_type,
@@ -1502,11 +1497,11 @@ def make_socks_conn_class(base_class, socks_proxy):
def connect(self):
self.sock = sockssocket()
self.sock.setproxy(*proxy_args)
- if type(self.timeout) in (int, float):
+ if isinstance(self.timeout, (int, float)):
self.sock.settimeout(self.timeout)
self.sock.connect((self.host, self.port))
- if isinstance(self, compat_http_client.HTTPSConnection):
+ if isinstance(self, http.client.HTTPSConnection):
if hasattr(self, '_context'): # Python > 2.6
self.sock = self._context.wrap_socket(
self.sock, server_hostname=self.host)
@@ -1516,10 +1511,10 @@ def make_socks_conn_class(base_class, socks_proxy):
return SocksConnection
-class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
+class YoutubeDLHTTPSHandler(urllib.request.HTTPSHandler):
def __init__(self, params, https_conn_class=None, *args, **kwargs):
- compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
- self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
+ urllib.request.HTTPSHandler.__init__(self, *args, **kwargs)
+ self._https_conn_class = https_conn_class or http.client.HTTPSConnection
self._params = params
def https_open(self, req):
@@ -1536,12 +1531,21 @@ class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
conn_class = make_socks_conn_class(conn_class, socks_proxy)
del req.headers['Ytdl-socks-proxy']
- return self.do_open(functools.partial(
- _create_http_connection, self, conn_class, True),
- req, **kwargs)
+ try:
+ return self.do_open(
+ functools.partial(_create_http_connection, self, conn_class, True), req, **kwargs)
+ except urllib.error.URLError as e:
+ if (isinstance(e.reason, ssl.SSLError)
+ and getattr(e.reason, 'reason', None) == 'SSLV3_ALERT_HANDSHAKE_FAILURE'):
+ raise YoutubeDLError('SSLV3_ALERT_HANDSHAKE_FAILURE: Try using --legacy-server-connect')
+ raise
-class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
+def is_path_like(f):
+ return isinstance(f, (str, bytes, os.PathLike))
+
+
+class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
"""
See [1] for cookie file format.
@@ -1557,57 +1561,67 @@ class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
'CookieFileEntry',
('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
- def save(self, filename=None, ignore_discard=False, ignore_expires=False):
+ def __init__(self, filename=None, *args, **kwargs):
+ super().__init__(None, *args, **kwargs)
+ if is_path_like(filename):
+ filename = os.fspath(filename)
+ self.filename = filename
+
+ @staticmethod
+ def _true_or_false(cndn):
+ return 'TRUE' if cndn else 'FALSE'
+
+ @contextlib.contextmanager
+ def open(self, file, *, write=False):
+ if is_path_like(file):
+ with open(file, 'w' if write else 'r', encoding='utf-8') as f:
+ yield f
+ else:
+ if write:
+ file.truncate(0)
+ yield file
+
+ def _really_save(self, f, ignore_discard=False, ignore_expires=False):
+ now = time.time()
+ for cookie in self:
+ if (not ignore_discard and cookie.discard
+ or not ignore_expires and cookie.is_expired(now)):
+ continue
+ name, value = cookie.name, cookie.value
+ if value is None:
+ # cookies.txt regards 'Set-Cookie: foo' as a cookie
+ # with no name, whereas http.cookiejar regards it as a
+ # cookie with no value.
+ name, value = '', name
+ f.write('%s\n' % '\t'.join((
+ cookie.domain,
+ self._true_or_false(cookie.domain.startswith('.')),
+ cookie.path,
+ self._true_or_false(cookie.secure),
+ str_or_none(cookie.expires, default=''),
+ name, value
+ )))
+
+ def save(self, filename=None, *args, **kwargs):
"""
Save cookies to a file.
+ Code is taken from CPython 3.6
+ https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """
- Most of the code is taken from CPython 3.8 and slightly adapted
- to support cookie files with UTF-8 in both python 2 and 3.
- """
if filename is None:
if self.filename is not None:
filename = self.filename
else:
- raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
+ raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
- # Store session cookies with `expires` set to 0 instead of an empty
- # string
+ # Store session cookies with `expires` set to 0 instead of an empty string
for cookie in self:
if cookie.expires is None:
cookie.expires = 0
- with io.open(filename, 'w', encoding='utf-8') as f:
+ with self.open(filename, write=True) as f:
f.write(self._HEADER)
- now = time.time()
- for cookie in self:
- if not ignore_discard and cookie.discard:
- continue
- if not ignore_expires and cookie.is_expired(now):
- continue
- if cookie.secure:
- secure = 'TRUE'
- else:
- secure = 'FALSE'
- if cookie.domain.startswith('.'):
- initial_dot = 'TRUE'
- else:
- initial_dot = 'FALSE'
- if cookie.expires is not None:
- expires = compat_str(cookie.expires)
- else:
- expires = ''
- if cookie.value is None:
- # cookies.txt regards 'Set-Cookie: foo' as a cookie
- # with no name, whereas http.cookiejar regards it as a
- # cookie with no value.
- name = ''
- value = cookie.name
- else:
- name = cookie.name
- value = cookie.value
- f.write(
- '\t'.join([cookie.domain, initial_dot, cookie.path,
- secure, expires, name, value]) + '\n')
+ self._really_save(f, *args, **kwargs)
def load(self, filename=None, ignore_discard=False, ignore_expires=False):
"""Load cookies from a file."""
@@ -1615,7 +1629,7 @@ class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
if self.filename is not None:
filename = self.filename
else:
- raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
+ raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
def prepare_line(line):
if line.startswith(self._HTTPONLY_PREFIX):
@@ -1625,21 +1639,23 @@ class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
return line
cookie_list = line.split('\t')
if len(cookie_list) != self._ENTRY_LEN:
- raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
+ raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list))
cookie = self._CookieFileEntry(*cookie_list)
if cookie.expires_at and not cookie.expires_at.isdigit():
- raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
+ raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
return line
cf = io.StringIO()
- with io.open(filename, encoding='utf-8') as f:
+ with self.open(filename) as f:
for line in f:
try:
cf.write(prepare_line(line))
- except compat_cookiejar.LoadError as e:
- write_string(
- 'WARNING: skipping cookie file entry due to %s: %r\n'
- % (e, line), sys.stderr)
+ except http.cookiejar.LoadError as e:
+ if f'{line.strip()} '[0] in '[{"':
+ raise http.cookiejar.LoadError(
+ 'Cookies file must be Netscape formatted, not JSON. See '
+ 'https://github.com/hypervideo/hypervideo/wiki/FAQ#how-do-i-pass-cookies-to-hypervideo')
+ write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n')
continue
cf.seek(0)
self._really_load(cf, filename, ignore_discard, ignore_expires)
@@ -1659,31 +1675,18 @@ class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
cookie.discard = True
-class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
+class YoutubeDLCookieProcessor(urllib.request.HTTPCookieProcessor):
def __init__(self, cookiejar=None):
- compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
+ urllib.request.HTTPCookieProcessor.__init__(self, cookiejar)
def http_response(self, request, response):
- # Python 2 will choke on next HTTP request in row if there are non-ASCII
- # characters in Set-Cookie HTTP header of last response (see
- # https://github.com/ytdl-org/youtube-dl/issues/6769).
- # In order to at least prevent crashing we will percent encode Set-Cookie
- # header before HTTPCookieProcessor starts processing it.
- # if sys.version_info < (3, 0) and response.headers:
- # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
- # set_cookie = response.headers.get(set_cookie_header)
- # if set_cookie:
- # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
- # if set_cookie != set_cookie_escaped:
- # del response.headers[set_cookie_header]
- # response.headers[set_cookie_header] = set_cookie_escaped
- return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
-
- https_request = compat_urllib_request.HTTPCookieProcessor.http_request
+ return urllib.request.HTTPCookieProcessor.http_response(self, request, response)
+
+ https_request = urllib.request.HTTPCookieProcessor.http_request
https_response = http_response
-class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
+class YoutubeDLRedirectHandler(urllib.request.HTTPRedirectHandler):
"""YoutubeDL redirect handler
The code is based on HTTPRedirectHandler implementation from CPython [1].
@@ -1698,7 +1701,7 @@ class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
3. https://github.com/ytdl-org/youtube-dl/issues/28768
"""
- http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
+ http_error_301 = http_error_303 = http_error_307 = http_error_308 = urllib.request.HTTPRedirectHandler.http_error_302
def redirect_request(self, req, fp, code, msg, headers, newurl):
"""Return a Request or None in response to a redirect.
@@ -1713,19 +1716,13 @@ class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
m = req.get_method()
if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
or code in (301, 302, 303) and m == "POST")):
- raise compat_HTTPError(req.full_url, code, msg, headers, fp)
+ raise urllib.error.HTTPError(req.full_url, code, msg, headers, fp)
# Strictly (according to RFC 2616), 301 or 302 in response to
# a POST MUST NOT cause a redirection without confirmation
# from the user (of urllib.request, in this case). In practice,
# essentially all clients do redirect in this case, so we do
# the same.
- # On python 2 urlh.geturl() may sometimes return redirect URL
- # as byte string instead of unicode. This workaround allows
- # to force it always return unicode.
- if sys.version_info[0] < 3:
- newurl = compat_str(newurl)
-
# Be conciliant with URIs containing a space. This is mainly
# redundant with the more complete encoding done in http_error_302(),
# but it is kept for compatibility with other callers.
@@ -1733,11 +1730,22 @@ class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
CONTENT_HEADERS = ("content-length", "content-type")
# NB: don't use dict comprehension for python 2.6 compatibility
- newheaders = dict((k, v) for k, v in req.headers.items()
- if k.lower() not in CONTENT_HEADERS)
- return compat_urllib_request.Request(
+ newheaders = {k: v for k, v in req.headers.items() if k.lower() not in CONTENT_HEADERS}
+
+ # A 303 must either use GET or HEAD for subsequent request
+ # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.4
+ if code == 303 and m != 'HEAD':
+ m = 'GET'
+ # 301 and 302 redirects are commonly turned into a GET from a POST
+ # for subsequent requests by browsers, so we'll do the same.
+ # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.2
+ # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.3
+ if code in (301, 302) and m == 'POST':
+ m = 'GET'
+
+ return urllib.request.Request(
newurl, headers=newheaders, origin_req_host=req.origin_req_host,
- unverifiable=True)
+ unverifiable=True, method=m)
def extract_timezone(date_str):
@@ -1753,7 +1761,11 @@ def extract_timezone(date_str):
$)
''', date_str)
if not m:
- timezone = datetime.timedelta()
+ m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
+ timezone = TIMEZONE_NAMES.get(m and m.group('tz').strip())
+ if timezone is not None:
+ date_str = date_str[:-len(m.group('tz'))]
+ timezone = datetime.timedelta(hours=timezone or 0)
else:
date_str = date_str[:-len(m.group('tz'))]
if not m.group('sign'):
@@ -1777,12 +1789,10 @@ def parse_iso8601(date_str, delimiter='T', timezone=None):
if timezone is None:
timezone, date_str = extract_timezone(date_str)
- try:
- date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
+ with contextlib.suppress(ValueError):
+ date_format = f'%Y-%m-%d{delimiter}%H:%M:%S'
dt = datetime.datetime.strptime(date_str, date_format) - timezone
return calendar.timegm(dt.timetuple())
- except ValueError:
- pass
def date_formats(day_first=True):
@@ -1802,26 +1812,23 @@ def unified_strdate(date_str, day_first=True):
_, date_str = extract_timezone(date_str)
for expression in date_formats(day_first):
- try:
+ with contextlib.suppress(ValueError):
upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
- except ValueError:
- pass
if upload_date is None:
timetuple = email.utils.parsedate_tz(date_str)
if timetuple:
- try:
+ with contextlib.suppress(ValueError):
upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
- except ValueError:
- pass
if upload_date is not None:
- return compat_str(upload_date)
+ return str(upload_date)
def unified_timestamp(date_str, day_first=True):
if date_str is None:
return None
- date_str = re.sub(r'[,|]', '', date_str)
+ date_str = re.sub(r'\s+', ' ', re.sub(
+ r'(?i)[,|]|(mon|tues?|wed(nes)?|thu(rs)?|fri|sat(ur)?)(day)?', '', date_str))
pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
timezone, date_str = extract_timezone(date_str)
@@ -1840,14 +1847,13 @@ def unified_timestamp(date_str, day_first=True):
date_str = m.group(1)
for expression in date_formats(day_first):
- try:
+ with contextlib.suppress(ValueError):
dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
return calendar.timegm(dt.timetuple())
- except ValueError:
- pass
+
timetuple = email.utils.parsedate_tz(date_str)
if timetuple:
- return calendar.timegm(timetuple) + pm_delta * 3600
+ return calendar.timegm(timetuple) + pm_delta * 3600 - timezone.total_seconds()
def determine_ext(url, default_ext='unknown_video'):
@@ -1868,14 +1874,14 @@ def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
- """
- Return a datetime object from a string in the format YYYYMMDD or
- (now|today|yesterday|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
-
- format: string date format used to return datetime object from
- precision: round the time portion of a datetime object.
- auto|microsecond|second|minute|hour|day.
- auto: round to the unit provided in date_str (if applicable).
+ R"""
+ Return a datetime object from a string.
+ Supported format:
+ (now|today|yesterday|DATE)([+-]\d+(microsecond|second|minute|hour|day|week|month|year)s?)?
+
+ @param format strftime format of DATE
+ @param precision Round the datetime object: auto|microsecond|second|minute|hour|day
+ auto: round to the unit provided in date_str (if applicable).
"""
auto_precision = False
if precision == 'auto':
@@ -1887,7 +1893,7 @@ def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
if date_str == 'yesterday':
return today - datetime.timedelta(days=1)
match = re.match(
- r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)(s)?',
+ r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?',
date_str)
if match is not None:
start_time = datetime_from_str(match.group('start'), precision, format)
@@ -1910,16 +1916,14 @@ def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
def date_from_str(date_str, format='%Y%m%d', strict=False):
- """
- Return a datetime object from a string in the format YYYYMMDD or
- (now|today|yesterday|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
-
- If "strict", only (now|today)[+-][0-9](day|week|month|year)(s)? is allowed
+ R"""
+ Return a date object from a string using datetime_from_str
- format: string date format used to return datetime object from
+ @param strict Restrict allowed patterns to "YYYYMMDD" and
+ (now|today|yesterday)(-\d+(day|week|month|year)s?)?
"""
- if strict and not re.fullmatch(r'\d{8}|(now|today)[+-]\d+(day|week|month|year)(s)?', date_str):
- raise ValueError(f'Invalid date format {date_str}')
+ if strict and not re.fullmatch(r'\d{8}|(now|today|yesterday)(-\d+(day|week|month|year)s?)?', date_str):
+ raise ValueError(f'Invalid date format "{date_str}"')
return datetime_from_str(date_str, precision='microsecond', format=format).date()
@@ -1960,7 +1964,7 @@ def hyphenate_date(date_str):
return date_str
-class DateRange(object):
+class DateRange:
"""Represents a time interval between two dates"""
def __init__(self, start=None, end=None):
@@ -1988,121 +1992,81 @@ class DateRange(object):
return self.start <= date <= self.end
def __str__(self):
- return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
+ return f'{self.start.isoformat()} - {self.end.isoformat()}'
+ def __eq__(self, other):
+ return (isinstance(other, DateRange)
+ and self.start == other.start and self.end == other.end)
-def platform_name():
- """ Returns the platform name as a compat_str """
- res = platform.platform()
- if isinstance(res, bytes):
- res = res.decode(preferredencoding())
- assert isinstance(res, compat_str)
- return res
+def platform_name():
+ """ Returns the platform name as a str """
+ deprecation_warning(f'"{__name__}.platform_name" is deprecated, use "platform.platform" instead')
+ return platform.platform()
+
+
+@functools.cache
+def system_identifier():
+ python_implementation = platform.python_implementation()
+ if python_implementation == 'PyPy' and hasattr(sys, 'pypy_version_info'):
+ python_implementation += ' version %d.%d.%d' % sys.pypy_version_info[:3]
+ libc_ver = []
+ with contextlib.suppress(OSError): # We may not have access to the executable
+ libc_ver = platform.libc_ver()
+
+ return 'Python %s (%s %s %s) - %s (%s%s)' % (
+ platform.python_version(),
+ python_implementation,
+ platform.machine(),
+ platform.architecture()[0],
+ platform.platform(),
+ ssl.OPENSSL_VERSION,
+ format_field(join_nonempty(*libc_ver, delim=' '), None, ', %s'),
+ )
+@functools.cache
def get_windows_version():
- ''' Get Windows version. None if it's not running on Windows '''
+ ''' Get Windows version. returns () if it's not running on Windows '''
if compat_os_name == 'nt':
return version_tuple(platform.win32_ver()[1])
else:
- return None
-
+ return ()
-def _windows_write_string(s, out):
- """ Returns True if the string was written using special methods,
- False if it has yet to be written out."""
- # Adapted from http://stackoverflow.com/a/3259271/35070
- import ctypes.wintypes
-
- WIN_OUTPUT_IDS = {
- 1: -11,
- 2: -12,
- }
-
- try:
- fileno = out.fileno()
- except AttributeError:
- # If the output stream doesn't have a fileno, it's virtual
- return False
- except io.UnsupportedOperation:
- # Some strange Windows pseudo files?
- return False
- if fileno not in WIN_OUTPUT_IDS:
- return False
-
- GetStdHandle = compat_ctypes_WINFUNCTYPE(
- ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
- ('GetStdHandle', ctypes.windll.kernel32))
- h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
-
- WriteConsoleW = compat_ctypes_WINFUNCTYPE(
- ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
- ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
- ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
- written = ctypes.wintypes.DWORD(0)
-
- GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
- FILE_TYPE_CHAR = 0x0002
- FILE_TYPE_REMOTE = 0x8000
- GetConsoleMode = compat_ctypes_WINFUNCTYPE(
- ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
- ctypes.POINTER(ctypes.wintypes.DWORD))(
- ('GetConsoleMode', ctypes.windll.kernel32))
- INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
-
- def not_a_console(handle):
- if handle == INVALID_HANDLE_VALUE or handle is None:
- return True
- return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
- or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
+def write_string(s, out=None, encoding=None):
+ assert isinstance(s, str)
+ out = out or sys.stderr
- if not_a_console(h):
- return False
+ if compat_os_name == 'nt' and supports_terminal_sequences(out):
+ s = re.sub(r'([\r\n]+)', r' \1', s)
- def next_nonbmp_pos(s):
- try:
- return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
- except StopIteration:
- return len(s)
-
- while s:
- count = min(next_nonbmp_pos(s), 1024)
-
- ret = WriteConsoleW(
- h, s, count if count else 2, ctypes.byref(written), None)
- if ret == 0:
- raise OSError('Failed to write string')
- if not count: # We just wrote a non-BMP character
- assert written.value == 2
- s = s[1:]
- else:
- assert written.value > 0
- s = s[written.value:]
- return True
+ enc, buffer = None, out
+ if 'b' in getattr(out, 'mode', ''):
+ enc = encoding or preferredencoding()
+ elif hasattr(out, 'buffer'):
+ buffer = out.buffer
+ enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
+ buffer.write(s.encode(enc, 'ignore') if enc else s)
+ out.flush()
-def write_string(s, out=None, encoding=None):
- if out is None:
- out = sys.stderr
- assert type(s) == compat_str
- if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
- if _windows_write_string(s, out):
+def deprecation_warning(msg, *, printer=None, stacklevel=0, **kwargs):
+ from . import _IN_CLI
+ if _IN_CLI:
+ if msg in deprecation_warning._cache:
return
-
- if ('b' in getattr(out, 'mode', '')
- or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
- byt = s.encode(encoding or preferredencoding(), 'ignore')
- out.write(byt)
- elif hasattr(out, 'buffer'):
- enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
- byt = s.encode(enc, 'ignore')
- out.buffer.write(byt)
+ deprecation_warning._cache.add(msg)
+ if printer:
+ return printer(f'{msg}{bug_reports_message()}', **kwargs)
+ return write_string(f'ERROR: {msg}{bug_reports_message()}\n', **kwargs)
else:
- out.write(s)
- out.flush()
+ import warnings
+ warnings.warn(DeprecationWarning(msg), stacklevel=stacklevel + 3)
+
+
+deprecation_warning._cache = set()
def bytes_to_intlist(bs):
@@ -2117,11 +2081,19 @@ def bytes_to_intlist(bs):
def intlist_to_bytes(xs):
if not xs:
return b''
- return compat_struct_pack('%dB' % len(xs), *xs)
+ return struct.pack('%dB' % len(xs), *xs)
+
+
+class LockingUnsupportedError(OSError):
+ msg = 'File locking is not supported'
+
+ def __init__(self):
+ super().__init__(self.msg)
# Cross-platform file locking
if sys.platform == 'win32':
+ import ctypes
import ctypes.wintypes
import msvcrt
@@ -2167,7 +2139,8 @@ if sys.platform == 'win32':
if not LockFileEx(msvcrt.get_osfhandle(f.fileno()),
(0x2 if exclusive else 0x0) | (0x0 if block else 0x1),
0, whole_low, whole_high, f._lock_file_overlapped_p):
- raise BlockingIOError('Locking file failed: %r' % ctypes.FormatError())
+ # NB: No argument form of "ctypes.FormatError" does not work on PyPy
+ raise BlockingIOError(f'Locking file failed: {ctypes.FormatError(ctypes.GetLastError())!r}')
def _unlock_file(f):
assert f._lock_file_overlapped_p
@@ -2180,18 +2153,15 @@ else:
import fcntl
def _lock_file(f, exclusive, block):
+ flags = fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH
+ if not block:
+ flags |= fcntl.LOCK_NB
try:
- fcntl.flock(f,
- fcntl.LOCK_SH if not exclusive
- else fcntl.LOCK_EX if block
- else fcntl.LOCK_EX | fcntl.LOCK_NB)
+ fcntl.flock(f, flags)
except BlockingIOError:
raise
except OSError: # AOSP does not have flock()
- fcntl.lockf(f,
- fcntl.LOCK_SH if not exclusive
- else fcntl.LOCK_EX if block
- else fcntl.LOCK_EX | fcntl.LOCK_NB)
+ fcntl.lockf(f, flags)
def _unlock_file(f):
try:
@@ -2200,60 +2170,80 @@ else:
fcntl.lockf(f, fcntl.LOCK_UN)
except ImportError:
- UNSUPPORTED_MSG = 'file locking is not supported on this platform'
def _lock_file(f, exclusive, block):
- raise IOError(UNSUPPORTED_MSG)
+ raise LockingUnsupportedError()
def _unlock_file(f):
- raise IOError(UNSUPPORTED_MSG)
+ raise LockingUnsupportedError()
-class locked_file(object):
- _closed = False
+class locked_file:
+ locked = False
def __init__(self, filename, mode, block=True, encoding=None):
- assert mode in ['r', 'rb', 'a', 'ab', 'w', 'wb']
- self.f = io.open(filename, mode, encoding=encoding)
- self.mode = mode
- self.block = block
+ if mode not in {'r', 'rb', 'a', 'ab', 'w', 'wb'}:
+ raise NotImplementedError(mode)
+ self.mode, self.block = mode, block
+
+ writable = any(f in mode for f in 'wax+')
+ readable = any(f in mode for f in 'r+')
+ flags = functools.reduce(operator.ior, (
+ getattr(os, 'O_CLOEXEC', 0), # UNIX only
+ getattr(os, 'O_BINARY', 0), # Windows only
+ getattr(os, 'O_NOINHERIT', 0), # Windows only
+ os.O_CREAT if writable else 0, # O_TRUNC only after locking
+ os.O_APPEND if 'a' in mode else 0,
+ os.O_EXCL if 'x' in mode else 0,
+ os.O_RDONLY if not writable else os.O_RDWR if readable else os.O_WRONLY,
+ ))
+
+ self.f = os.fdopen(os.open(filename, flags, 0o666), mode, encoding=encoding)
def __enter__(self):
exclusive = 'r' not in self.mode
try:
_lock_file(self.f, exclusive, self.block)
- except IOError:
+ self.locked = True
+ except OSError:
self.f.close()
raise
+ if 'w' in self.mode:
+ try:
+ self.f.truncate()
+ except OSError as e:
+ if e.errno not in (
+ errno.ESPIPE, # Illegal seek - expected for FIFO
+ errno.EINVAL, # Invalid argument - expected for /dev/null
+ ):
+ raise
return self
- def __exit__(self, etype, value, traceback):
+ def unlock(self):
+ if not self.locked:
+ return
try:
- if not self._closed:
- _unlock_file(self.f)
+ _unlock_file(self.f)
finally:
- self.f.close()
- self._closed = True
+ self.locked = False
- def __iter__(self):
- return iter(self.f)
-
- def write(self, *args):
- return self.f.write(*args)
-
- def read(self, *args):
- return self.f.read(*args)
+ def __exit__(self, *_):
+ try:
+ self.unlock()
+ finally:
+ self.f.close()
- def flush(self):
- self.f.flush()
+ open = __enter__
+ close = __exit__
- def open(self):
- return self.__enter__()
+ def __getattr__(self, attr):
+ return getattr(self.f, attr)
- def close(self, *args):
- self.__exit__(self, *args, value=False, traceback=False)
+ def __iter__(self):
+ return iter(self.f)
+@functools.cache
def get_filesystem_encoding():
encoding = sys.getfilesystemencoding()
return encoding if encoding is not None else 'utf-8'
@@ -2275,7 +2265,7 @@ def smuggle_url(url, data):
url, idata = unsmuggle_url(url, {})
data.update(idata)
- sdata = compat_urllib_parse_urlencode(
+ sdata = urllib.parse.urlencode(
{'__youtubedl_smuggle': json.dumps(data)})
return url + '#' + sdata
@@ -2284,7 +2274,7 @@ def unsmuggle_url(smug_url, default=None):
if '#__youtubedl_smuggle' not in smug_url:
return smug_url, default
url, _, sdata = smug_url.rpartition('#')
- jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
+ jsond = urllib.parse.parse_qs(sdata)['__youtubedl_smuggle'][0]
data = json.loads(jsond)
return url, data
@@ -2307,15 +2297,24 @@ def format_bytes(bytes):
return format_decimal_suffix(bytes, '%.2f%sB', factor=1024) or 'N/A'
-def lookup_unit_table(unit_table, s):
+def lookup_unit_table(unit_table, s, strict=False):
+ num_re = NUMBER_RE if strict else NUMBER_RE.replace(R'\.', '[,.]')
units_re = '|'.join(re.escape(u) for u in unit_table)
- m = re.match(
- r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
+ m = (re.fullmatch if strict else re.match)(
+ rf'(?P<num>{num_re})\s*(?P<unit>{units_re})\b', s)
if not m:
return None
- num_str = m.group('num').replace(',', '.')
+
+ num = float(m.group('num').replace(',', '.'))
mult = unit_table[m.group('unit')]
- return int(float(num_str) * mult)
+ return round(num * mult)
+
+
+def parse_bytes(s):
+ """Parse a string indicating a byte quantity into an integer"""
+ return lookup_unit_table(
+ {u: 1024**i for i, u in enumerate(['', *'KMGTPEZY'])},
+ s.upper(), strict=True)
def parse_filesize(s):
@@ -2444,7 +2443,7 @@ def parse_resolution(s, *, lenient=False):
def parse_bitrate(s):
- if not isinstance(s, compat_str):
+ if not isinstance(s, str):
return
mobj = re.search(r'\b(\d+)\s*kbps', s)
if mobj:
@@ -2481,11 +2480,12 @@ def fix_xml_ampersands(xml_str):
def setproctitle(title):
- assert isinstance(title, compat_str)
+ assert isinstance(title, str)
- # ctypes in Jython is not complete
- # http://bugs.jython.org/issue2148
- if sys.platform.startswith('java'):
+ # Workaround for https://github.com/hypervideo/hypervideo/issues/4541
+ try:
+ import ctypes
+ except ImportError:
return
try:
@@ -2497,7 +2497,7 @@ def setproctitle(title):
# a bytestring, but since unicode_literals turns
# every string into a unicode string, it fails.
return
- title_bytes = title.encode('utf-8')
+ title_bytes = title.encode()
buf = ctypes.create_string_buffer(len(title_bytes))
buf.value = title_bytes
try:
@@ -2524,40 +2524,43 @@ def remove_quotes(s):
def get_domain(url):
- domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
- return domain.group('domain') if domain else None
+ """
+ This implementation is inconsistent, but is kept for compatibility.
+ Use this only for "webpage_url_domain"
+ """
+ return remove_start(urllib.parse.urlparse(url).netloc, 'www.') or None
def url_basename(url):
- path = compat_urlparse.urlparse(url).path
+ path = urllib.parse.urlparse(url).path
return path.strip('/').split('/')[-1]
def base_url(url):
- return re.match(r'https?://[^?#&]+/', url).group()
+ return re.match(r'https?://[^?#]+/', url).group()
def urljoin(base, path):
if isinstance(path, bytes):
- path = path.decode('utf-8')
- if not isinstance(path, compat_str) or not path:
+ path = path.decode()
+ if not isinstance(path, str) or not path:
return None
if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
return path
if isinstance(base, bytes):
- base = base.decode('utf-8')
- if not isinstance(base, compat_str) or not re.match(
+ base = base.decode()
+ if not isinstance(base, str) or not re.match(
r'^(?:https?:)?//', base):
return None
- return compat_urlparse.urljoin(base, path)
+ return urllib.parse.urljoin(base, path)
-class HEADRequest(compat_urllib_request.Request):
+class HEADRequest(urllib.request.Request):
def get_method(self):
return 'HEAD'
-class PUTRequest(compat_urllib_request.Request):
+class PUTRequest(urllib.request.Request):
def get_method(self):
return 'PUT'
@@ -2572,14 +2575,14 @@ def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
def str_or_none(v, default=None):
- return default if v is None else compat_str(v)
+ return default if v is None else str(v)
def str_to_int(int_str):
""" A more relaxed version of int_or_none """
- if isinstance(int_str, compat_integer_types):
+ if isinstance(int_str, int):
return int_str
- elif isinstance(int_str, compat_str):
+ elif isinstance(int_str, str):
int_str = re.sub(r'[,\.\+]', '', int_str)
return int_or_none(int_str)
@@ -2598,18 +2601,18 @@ def bool_or_none(v, default=None):
def strip_or_none(v, default=None):
- return v.strip() if isinstance(v, compat_str) else default
+ return v.strip() if isinstance(v, str) else default
def url_or_none(url):
- if not url or not isinstance(url, compat_str):
+ if not url or not isinstance(url, str):
return None
url = url.strip()
return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
def request_to_url(req):
- if isinstance(req, compat_urllib_request.Request):
+ if isinstance(req, urllib.request.Request):
return req.get_full_url()
else:
return req
@@ -2618,17 +2621,21 @@ def request_to_url(req):
def strftime_or_none(timestamp, date_format, default=None):
datetime_object = None
try:
- if isinstance(timestamp, compat_numeric_types): # unix timestamp
- datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
- elif isinstance(timestamp, compat_str): # assume YYYYMMDD
+ if isinstance(timestamp, (int, float)): # unix timestamp
+ # Using naive datetime here can break timestamp() in Windows
+ # Ref: https://github.com/hypervideo/hypervideo/issues/5185, https://github.com/python/cpython/issues/94414
+ datetime_object = datetime.datetime.fromtimestamp(timestamp, datetime.timezone.utc)
+ elif isinstance(timestamp, str): # assume YYYYMMDD
datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
+ date_format = re.sub( # Support %s on windows
+ r'(?<!%)(%%)*%s', rf'\g<1>{int(datetime_object.timestamp())}', date_format)
return datetime_object.strftime(date_format)
except (ValueError, TypeError, AttributeError):
return default
def parse_duration(s):
- if not isinstance(s, compat_basestring):
+ if not isinstance(s, str):
return None
s = s.strip()
if not s:
@@ -2677,31 +2684,23 @@ def parse_duration(s):
else:
return None
- duration = 0
- if secs:
- duration += float(secs)
- if mins:
- duration += float(mins) * 60
- if hours:
- duration += float(hours) * 60 * 60
- if days:
- duration += float(days) * 24 * 60 * 60
if ms:
- duration += float(ms.replace(':', '.'))
- return duration
+ ms = ms.replace(':', '.')
+ return sum(float(part or 0) * mult for part, mult in (
+ (days, 86400), (hours, 3600), (mins, 60), (secs, 1), (ms, 1)))
def prepend_extension(filename, ext, expected_real_ext=None):
name, real_ext = os.path.splitext(filename)
return (
- '{0}.{1}{2}'.format(name, ext, real_ext)
+ f'{name}.{ext}{real_ext}'
if not expected_real_ext or real_ext[1:] == expected_real_ext
- else '{0}.{1}'.format(filename, ext))
+ else f'{filename}.{ext}')
def replace_extension(filename, ext, expected_real_ext=None):
name, real_ext = os.path.splitext(filename)
- return '{0}.{1}'.format(
+ return '{}.{}'.format(
name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
ext)
@@ -2710,31 +2709,26 @@ def check_executable(exe, args=[]):
""" Checks if the given binary is installed somewhere in PATH, and returns its name.
args can be a list of arguments for a short output (like -version) """
try:
- Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate_or_kill()
+ Popen.run([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
except OSError:
return False
return exe
-def _get_exe_version_output(exe, args, *, to_screen=None):
- if to_screen:
- to_screen(f'Checking exe version: {shell_quote([exe] + args)}')
+def _get_exe_version_output(exe, args):
try:
# STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
# SIGTTOU if hypervideo is run in the background.
# See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
- out, _ = Popen(
- [encodeArgument(exe)] + args, stdin=subprocess.PIPE,
- stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate_or_kill()
+ stdout, _, _ = Popen.run([encodeArgument(exe)] + args, text=True,
+ stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
except OSError:
return False
- if isinstance(out, bytes): # Python 2.x
- out = out.decode('ascii', 'ignore')
- return out
+ return stdout
def detect_exe_version(output, version_re=None, unrecognized='present'):
- assert isinstance(output, compat_str)
+ assert isinstance(output, str)
if version_re is None:
version_re = r'version\s+([-0-9._a-zA-Z]+)'
m = re.search(version_re, output)
@@ -2752,50 +2746,59 @@ def get_exe_version(exe, args=['--version'],
return detect_exe_version(out, version_re, unrecognized) if out else False
+def frange(start=0, stop=None, step=1):
+ """Float range"""
+ if stop is None:
+ start, stop = 0, start
+ sign = [-1, 1][step > 0] if step else 0
+ while sign * start < sign * stop:
+ yield start
+ start += step
+
+
class LazyList(collections.abc.Sequence):
- ''' Lazy immutable list from an iterable
- Note that slices of a LazyList are lists and not LazyList'''
+ """Lazy immutable list from an iterable
+ Note that slices of a LazyList are lists and not LazyList"""
class IndexError(IndexError):
pass
def __init__(self, iterable, *, reverse=False, _cache=None):
- self.__iterable = iter(iterable)
- self.__cache = [] if _cache is None else _cache
- self.__reversed = reverse
+ self._iterable = iter(iterable)
+ self._cache = [] if _cache is None else _cache
+ self._reversed = reverse
def __iter__(self):
- if self.__reversed:
+ if self._reversed:
# We need to consume the entire iterable to iterate in reverse
yield from self.exhaust()
return
- yield from self.__cache
- for item in self.__iterable:
- self.__cache.append(item)
+ yield from self._cache
+ for item in self._iterable:
+ self._cache.append(item)
yield item
- def __exhaust(self):
- self.__cache.extend(self.__iterable)
- # Discard the emptied iterable to make it pickle-able
- self.__iterable = []
- return self.__cache
+ def _exhaust(self):
+ self._cache.extend(self._iterable)
+ self._iterable = [] # Discard the emptied iterable to make it pickle-able
+ return self._cache
def exhaust(self):
- ''' Evaluate the entire iterable '''
- return self.__exhaust()[::-1 if self.__reversed else 1]
+ """Evaluate the entire iterable"""
+ return self._exhaust()[::-1 if self._reversed else 1]
@staticmethod
- def __reverse_index(x):
- return None if x is None else -(x + 1)
+ def _reverse_index(x):
+ return None if x is None else ~x
def __getitem__(self, idx):
if isinstance(idx, slice):
- if self.__reversed:
- idx = slice(self.__reverse_index(idx.start), self.__reverse_index(idx.stop), -(idx.step or 1))
+ if self._reversed:
+ idx = slice(self._reverse_index(idx.start), self._reverse_index(idx.stop), -(idx.step or 1))
start, stop, step = idx.start, idx.stop, idx.step or 1
elif isinstance(idx, int):
- if self.__reversed:
- idx = self.__reverse_index(idx)
+ if self._reversed:
+ idx = self._reverse_index(idx)
start, stop, step = idx, idx, 0
else:
raise TypeError('indices must be integers or slices')
@@ -2804,35 +2807,35 @@ class LazyList(collections.abc.Sequence):
or (stop is None and step > 0)):
# We need to consume the entire iterable to be able to slice from the end
# Obviously, never use this with infinite iterables
- self.__exhaust()
+ self._exhaust()
try:
- return self.__cache[idx]
+ return self._cache[idx]
except IndexError as e:
raise self.IndexError(e) from e
- n = max(start or 0, stop or 0) - len(self.__cache) + 1
+ n = max(start or 0, stop or 0) - len(self._cache) + 1
if n > 0:
- self.__cache.extend(itertools.islice(self.__iterable, n))
+ self._cache.extend(itertools.islice(self._iterable, n))
try:
- return self.__cache[idx]
+ return self._cache[idx]
except IndexError as e:
raise self.IndexError(e) from e
def __bool__(self):
try:
- self[-1] if self.__reversed else self[0]
+ self[-1] if self._reversed else self[0]
except self.IndexError:
return False
return True
def __len__(self):
- self.__exhaust()
- return len(self.__cache)
+ self._exhaust()
+ return len(self._cache)
def __reversed__(self):
- return type(self)(self.__iterable, reverse=not self.__reversed, _cache=self.__cache)
+ return type(self)(self._iterable, reverse=not self._reversed, _cache=self._cache)
def __copy__(self):
- return type(self)(self.__iterable, reverse=self.__reversed, _cache=self.__cache)
+ return type(self)(self._iterable, reverse=self._reversed, _cache=self._cache)
def __repr__(self):
# repr and str should mimic a list. So we exhaust the iterable
@@ -2884,6 +2887,7 @@ class PagedList:
class OnDemandPagedList(PagedList):
"""Download pages until a page with less than maximum results"""
+
def _getslice(self, start, end):
for pagenum in itertools.count(start // self._pagesize):
firstid = pagenum * self._pagesize
@@ -2924,6 +2928,7 @@ class OnDemandPagedList(PagedList):
class InAdvancePagedList(PagedList):
"""PagedList with total number of pages known in advance"""
+
def __init__(self, pagefunc, pagecount, pagesize):
PagedList.__init__(self, pagefunc, pagesize, True)
self._pagecount = pagecount
@@ -2947,6 +2952,140 @@ class InAdvancePagedList(PagedList):
yield from page_results
+class PlaylistEntries:
+ MissingEntry = object()
+ is_exhausted = False
+
+ def __init__(self, ydl, info_dict):
+ self.ydl = ydl
+
+ # _entries must be assigned now since infodict can change during iteration
+ entries = info_dict.get('entries')
+ if entries is None:
+ raise EntryNotInPlaylist('There are no entries')
+ elif isinstance(entries, list):
+ self.is_exhausted = True
+
+ requested_entries = info_dict.get('requested_entries')
+ self.is_incomplete = requested_entries is not None
+ if self.is_incomplete:
+ assert self.is_exhausted
+ self._entries = [self.MissingEntry] * max(requested_entries or [0])
+ for i, entry in zip(requested_entries, entries):
+ self._entries[i - 1] = entry
+ elif isinstance(entries, (list, PagedList, LazyList)):
+ self._entries = entries
+ else:
+ self._entries = LazyList(entries)
+
+ PLAYLIST_ITEMS_RE = re.compile(r'''(?x)
+ (?P<start>[+-]?\d+)?
+ (?P<range>[:-]
+ (?P<end>[+-]?\d+|inf(?:inite)?)?
+ (?::(?P<step>[+-]?\d+))?
+ )?''')
+
+ @classmethod
+ def parse_playlist_items(cls, string):
+ for segment in string.split(','):
+ if not segment:
+ raise ValueError('There is two or more consecutive commas')
+ mobj = cls.PLAYLIST_ITEMS_RE.fullmatch(segment)
+ if not mobj:
+ raise ValueError(f'{segment!r} is not a valid specification')
+ start, end, step, has_range = mobj.group('start', 'end', 'step', 'range')
+ if int_or_none(step) == 0:
+ raise ValueError(f'Step in {segment!r} cannot be zero')
+ yield slice(int_or_none(start), float_or_none(end), int_or_none(step)) if has_range else int(start)
+
+ def get_requested_items(self):
+ playlist_items = self.ydl.params.get('playlist_items')
+ playlist_start = self.ydl.params.get('playliststart', 1)
+ playlist_end = self.ydl.params.get('playlistend')
+ # For backwards compatibility, interpret -1 as whole list
+ if playlist_end in (-1, None):
+ playlist_end = ''
+ if not playlist_items:
+ playlist_items = f'{playlist_start}:{playlist_end}'
+ elif playlist_start != 1 or playlist_end:
+ self.ydl.report_warning('Ignoring playliststart and playlistend because playlistitems was given', only_once=True)
+
+ for index in self.parse_playlist_items(playlist_items):
+ for i, entry in self[index]:
+ yield i, entry
+ if not entry:
+ continue
+ try:
+ # TODO: Add auto-generated fields
+ self.ydl._match_entry(entry, incomplete=True, silent=True)
+ except (ExistingVideoReached, RejectedVideoReached):
+ return
+
+ def get_full_count(self):
+ if self.is_exhausted and not self.is_incomplete:
+ return len(self)
+ elif isinstance(self._entries, InAdvancePagedList):
+ if self._entries._pagesize == 1:
+ return self._entries._pagecount
+
+ @functools.cached_property
+ def _getter(self):
+ if isinstance(self._entries, list):
+ def get_entry(i):
+ try:
+ entry = self._entries[i]
+ except IndexError:
+ entry = self.MissingEntry
+ if not self.is_incomplete:
+ raise self.IndexError()
+ if entry is self.MissingEntry:
+ raise EntryNotInPlaylist(f'Entry {i + 1} cannot be found')
+ return entry
+ else:
+ def get_entry(i):
+ try:
+ return type(self.ydl)._handle_extraction_exceptions(lambda _, i: self._entries[i])(self.ydl, i)
+ except (LazyList.IndexError, PagedList.IndexError):
+ raise self.IndexError()
+ return get_entry
+
+ def __getitem__(self, idx):
+ if isinstance(idx, int):
+ idx = slice(idx, idx)
+
+ # NB: PlaylistEntries[1:10] => (0, 1, ... 9)
+ step = 1 if idx.step is None else idx.step
+ if idx.start is None:
+ start = 0 if step > 0 else len(self) - 1
+ else:
+ start = idx.start - 1 if idx.start >= 0 else len(self) + idx.start
+
+ # NB: Do not call len(self) when idx == [:]
+ if idx.stop is None:
+ stop = 0 if step < 0 else float('inf')
+ else:
+ stop = idx.stop - 1 if idx.stop >= 0 else len(self) + idx.stop
+ stop += [-1, 1][step > 0]
+
+ for i in frange(start, stop, step):
+ if i < 0:
+ continue
+ try:
+ entry = self._getter(i)
+ except self.IndexError:
+ self.is_exhausted = True
+ if step > 0:
+ break
+ continue
+ yield i + 1, entry
+
+ def __len__(self):
+ return len(tuple(self[:]))
+
+ class IndexError(IndexError):
+ pass
+
+
def uppercase_escape(s):
unicode_escape = codecs.getdecoder('unicode_escape')
return re.sub(
@@ -2965,14 +3104,12 @@ def lowercase_escape(s):
def escape_rfc3986(s):
"""Escape non-ASCII characters as suggested by RFC 3986"""
- if sys.version_info < (3, 0) and isinstance(s, compat_str):
- s = s.encode('utf-8')
- return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
+ return urllib.parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
def escape_url(url):
"""Escape URL as suggested by RFC 3986"""
- url_parsed = compat_urllib_parse_urlparse(url)
+ url_parsed = urllib.parse.urlparse(url)
return url_parsed._replace(
netloc=url_parsed.netloc.encode('idna').decode('ascii'),
path=escape_rfc3986(url_parsed.path),
@@ -2982,13 +3119,13 @@ def escape_url(url):
).geturl()
-def parse_qs(url):
- return compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+def parse_qs(url, **kwargs):
+ return urllib.parse.parse_qs(urllib.parse.urlparse(url).query, **kwargs)
def read_batch_urls(batch_fd):
def fixup(url):
- if not isinstance(url, compat_str):
+ if not isinstance(url, str):
url = url.decode('utf-8', 'replace')
BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
for bom in BOM_UTF8:
@@ -2998,7 +3135,7 @@ def read_batch_urls(batch_fd):
if not url or url.startswith(('#', ';', ']')):
return False
# "#" cannot be stripped out since it is part of the URI
- # However, it can be safely stipped out if follwing a whitespace
+ # However, it can be safely stripped out if following a whitespace
return re.split(r'\s#', url, 1)[0].rstrip()
with contextlib.closing(batch_fd) as fd:
@@ -3006,22 +3143,22 @@ def read_batch_urls(batch_fd):
def urlencode_postdata(*args, **kargs):
- return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
+ return urllib.parse.urlencode(*args, **kargs).encode('ascii')
def update_url_query(url, query):
if not query:
return url
- parsed_url = compat_urlparse.urlparse(url)
- qs = compat_parse_qs(parsed_url.query)
+ parsed_url = urllib.parse.urlparse(url)
+ qs = urllib.parse.parse_qs(parsed_url.query)
qs.update(query)
- return compat_urlparse.urlunparse(parsed_url._replace(
- query=compat_urllib_parse_urlencode(qs, True)))
+ return urllib.parse.urlunparse(parsed_url._replace(
+ query=urllib.parse.urlencode(qs, True)))
-def update_Request(req, url=None, data=None, headers={}, query={}):
+def update_Request(req, url=None, data=None, headers=None, query=None):
req_headers = req.headers.copy()
- req_headers.update(headers)
+ req_headers.update(headers or {})
req_data = data or req.data
req_url = update_url_query(url or req.get_full_url(), query)
req_get_method = req.get_method()
@@ -3030,7 +3167,7 @@ def update_Request(req, url=None, data=None, headers={}, query={}):
elif req_get_method == 'PUT':
req_type = PUTRequest
else:
- req_type = compat_urllib_request.Request
+ req_type = urllib.request.Request
new_req = req_type(
req_url, data=req_data, headers=req_headers,
origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
@@ -3045,10 +3182,10 @@ def _multipart_encode_impl(data, boundary):
out = b''
for k, v in data.items():
out += b'--' + boundary.encode('ascii') + b'\r\n'
- if isinstance(k, compat_str):
- k = k.encode('utf-8')
- if isinstance(v, compat_str):
- v = v.encode('utf-8')
+ if isinstance(k, str):
+ k = k.encode()
+ if isinstance(v, str):
+ v = v.encode()
# RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
# suggests sending UTF-8 directly. Firefox sends UTF-8, too
content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
@@ -3091,6 +3228,10 @@ def multipart_encode(data, boundary=None):
return out, content_type
+def variadic(x, allowed_types=(str, bytes, dict)):
+ return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)
+
+
def dict_get(d, key_or_keys, default=None, skip_false_values=True):
for val in map(d.get, variadic(key_or_keys)):
if val is not None and (val or not skip_false_values):
@@ -3102,7 +3243,7 @@ def try_call(*funcs, expected_type=None, args=[], kwargs={}):
for f in funcs:
try:
val = f(*args, **kwargs)
- except (AttributeError, KeyError, TypeError, IndexError, ZeroDivisionError):
+ except (AttributeError, KeyError, TypeError, IndexError, ValueError, ZeroDivisionError):
pass
else:
if expected_type is None or isinstance(val, expected_type):
@@ -3128,7 +3269,7 @@ def merge_dicts(*dicts):
def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
- return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
+ return string if isinstance(string, str) else str(string, encoding, errors)
US_RATINGS = {
@@ -3151,9 +3292,10 @@ TV_PARENTAL_GUIDELINES = {
def parse_age_limit(s):
- if type(s) == int:
+ # isinstance(False, int) is True. So type() must be used instead
+ if type(s) is int: # noqa: E721
return s if 0 <= s <= 21 else None
- if not isinstance(s, compat_basestring):
+ elif not isinstance(s, str):
return None
m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
if m:
@@ -3177,15 +3319,26 @@ def strip_jsonp(code):
r'\g<callback_data>', code)
-def js_to_json(code, vars={}):
+def js_to_json(code, vars={}, *, strict=False):
# vars is a dict of var, val pairs to substitute
+ STRING_QUOTES = '\'"'
+ STRING_RE = '|'.join(rf'{q}(?:\\.|[^\\{q}])*{q}' for q in STRING_QUOTES)
COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
- SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
+ SKIP_RE = fr'\s*(?:{COMMENT_RE})?\s*'
INTEGER_TABLE = (
- (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
- (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
+ (fr'(?s)^(0[xX][0-9a-fA-F]+){SKIP_RE}:?$', 16),
+ (fr'(?s)^(0+[0-7]+){SKIP_RE}:?$', 8),
)
+ def process_escape(match):
+ JSON_PASSTHROUGH_ESCAPES = R'"\bfnrtu'
+ escape = match.group(1) or match.group(2)
+
+ return (Rf'\{escape}' if escape in JSON_PASSTHROUGH_ESCAPES
+ else R'\u00' if escape == 'x'
+ else '' if escape == '\n'
+ else escape)
+
def fix_kv(m):
v = m.group(0)
if v in ('true', 'false', 'null'):
@@ -3193,38 +3346,42 @@ def js_to_json(code, vars={}):
elif v in ('undefined', 'void 0'):
return 'null'
elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
- return ""
-
- if v[0] in ("'", '"'):
- v = re.sub(r'(?s)\\.|"', lambda m: {
- '"': '\\"',
- "\\'": "'",
- '\\\n': '',
- '\\x': '\\u00',
- }.get(m.group(0), m.group(0)), v[1:-1])
- else:
- for regex, base in INTEGER_TABLE:
- im = re.match(regex, v)
- if im:
- i = int(im.group(1), base)
- return '"%d":' % i if v.endswith(':') else '%d' % i
+ return ''
+
+ if v[0] in STRING_QUOTES:
+ escaped = re.sub(r'(?s)(")|\\(.)', process_escape, v[1:-1])
+ return f'"{escaped}"'
+
+ for regex, base in INTEGER_TABLE:
+ im = re.match(regex, v)
+ if im:
+ i = int(im.group(1), base)
+ return f'"{i}":' if v.endswith(':') else str(i)
- if v in vars:
- return vars[v]
+ if v in vars:
+ return json.dumps(vars[v])
- return '"%s"' % v
+ if not strict:
+ return f'"{v}"'
- code = re.sub(r'new Date\((".+")\)', r'\g<1>', code)
+ raise ValueError(f'Unknown value: {v}')
- return re.sub(r'''(?sx)
- "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
- '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
- {comment}|,(?={skip}[\]}}])|
+ def create_map(mobj):
+ return json.dumps(dict(json.loads(js_to_json(mobj.group(1) or '[]', vars=vars))))
+
+ code = re.sub(r'new Map\((\[.*?\])?\)', create_map, code)
+ if not strict:
+ code = re.sub(r'new Date\((".+")\)', r'\g<1>', code)
+ code = re.sub(r'new \w+\((.*?)\)', lambda m: json.dumps(m.group(0)), code)
+
+ return re.sub(rf'''(?sx)
+ {STRING_RE}|
+ {COMMENT_RE}|,(?={SKIP_RE}[\]}}])|
void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
- \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
- [0-9]+(?={skip}:)|
+ \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{SKIP_RE}:)?|
+ [0-9]+(?={SKIP_RE}:)|
!+
- '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
+ ''', fix_kv, code)
def qualities(quality_ids):
@@ -3237,7 +3394,7 @@ def qualities(quality_ids):
return q
-POSTPROCESS_WHEN = {'pre_process', 'after_filter', 'before_dl', 'after_move', 'post_process', 'after_video', 'playlist'}
+POSTPROCESS_WHEN = ('pre_process', 'after_filter', 'before_dl', 'post_process', 'after_move', 'after_video', 'playlist')
DEFAULT_OUTTMPL = {
@@ -3315,12 +3472,7 @@ def args_to_str(args):
def error_to_compat_str(err):
- err_str = str(err)
- # On python 2 error byte string must be decoded with proper
- # encoding rather than ascii
- if sys.version_info[0] < 3:
- err_str = err_str.decode(preferredencoding())
- return err_str
+ return str(err)
def error_to_str(err):
@@ -3405,34 +3557,33 @@ def parse_codecs(codecs_str):
return {}
split_codecs = list(filter(None, map(
str.strip, codecs_str.strip().strip(',').split(','))))
- vcodec, acodec, tcodec, hdr = None, None, None, None
+ vcodec, acodec, scodec, hdr = None, None, None, None
for full_codec in split_codecs:
- parts = full_codec.split('.')
- codec = parts[0].replace('0', '')
- if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2',
- 'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'):
- if not vcodec:
- vcodec = '.'.join(parts[:4]) if codec in ('vp9', 'av1', 'hvc1') else full_codec
- if codec in ('dvh1', 'dvhe'):
- hdr = 'DV'
- elif codec == 'av1' and len(parts) > 3 and parts[3] == '10':
- hdr = 'HDR10'
- elif full_codec.replace('0', '').startswith('vp9.2'):
- hdr = 'HDR10'
- elif codec in ('flac', 'mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
- if not acodec:
- acodec = full_codec
- elif codec in ('stpp', 'wvtt',):
- if not tcodec:
- tcodec = full_codec
+ parts = re.sub(r'0+(?=\d)', '', full_codec).split('.')
+ if parts[0] in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2',
+ 'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'):
+ if vcodec:
+ continue
+ vcodec = full_codec
+ if parts[0] in ('dvh1', 'dvhe'):
+ hdr = 'DV'
+ elif parts[0] == 'av1' and traverse_obj(parts, 3) == '10':
+ hdr = 'HDR10'
+ elif parts[:2] == ['vp9', '2']:
+ hdr = 'HDR10'
+ elif parts[0] in ('flac', 'mp4a', 'opus', 'vorbis', 'mp3', 'aac',
+ 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
+ acodec = acodec or full_codec
+ elif parts[0] in ('stpp', 'wvtt'):
+ scodec = scodec or full_codec
else:
- write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
- if vcodec or acodec or tcodec:
+ write_string(f'WARNING: Unknown codec {full_codec}\n')
+ if vcodec or acodec or scodec:
return {
'vcodec': vcodec or 'none',
'acodec': acodec or 'none',
'dynamic_range': hdr,
- **({'tcodec': tcodec} if tcodec is not None else {}),
+ **({'scodec': scodec} if scodec is not None else {}),
}
elif len(split_codecs) == 2:
return {
@@ -3442,6 +3593,46 @@ def parse_codecs(codecs_str):
return {}
+def get_compatible_ext(*, vcodecs, acodecs, vexts, aexts, preferences=None):
+ assert len(vcodecs) == len(vexts) and len(acodecs) == len(aexts)
+
+ allow_mkv = not preferences or 'mkv' in preferences
+
+ if allow_mkv and max(len(acodecs), len(vcodecs)) > 1:
+ return 'mkv' # TODO: any other format allows this?
+
+ # TODO: All codecs supported by parse_codecs isn't handled here
+ COMPATIBLE_CODECS = {
+ 'mp4': {
+ 'av1', 'hevc', 'avc1', 'mp4a', # fourcc (m3u8, mpd)
+ 'h264', 'aacl', 'ec-3', # Set in ISM
+ },
+ 'webm': {
+ 'av1', 'vp9', 'vp8', 'opus', 'vrbs',
+ 'vp9x', 'vp8x', # in the webm spec
+ },
+ }
+
+ sanitize_codec = functools.partial(try_get, getter=lambda x: x[0].split('.')[0].replace('0', ''))
+ vcodec, acodec = sanitize_codec(vcodecs), sanitize_codec(acodecs)
+
+ for ext in preferences or COMPATIBLE_CODECS.keys():
+ codec_set = COMPATIBLE_CODECS.get(ext, set())
+ if ext == 'mkv' or codec_set.issuperset((vcodec, acodec)):
+ return ext
+
+ COMPATIBLE_EXTS = (
+ {'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma', 'mov'},
+ {'webm'},
+ )
+ for ext in preferences or vexts:
+ current_exts = {ext, *vexts, *aexts}
+ if ext == 'mkv' or current_exts == {ext} or any(
+ ext_sets.issuperset(current_exts) for ext_sets in COMPATIBLE_EXTS):
+ return ext
+ return 'mkv' if allow_mkv else preferences[-1]
+
+
def urlhandle_detect_ext(url_handle):
getheader = url_handle.headers.get
@@ -3470,24 +3661,25 @@ def age_restricted(content_limit, age_limit):
return age_limit < content_limit
+# List of known byte-order-marks (BOM)
+BOMS = [
+ (b'\xef\xbb\xbf', 'utf-8'),
+ (b'\x00\x00\xfe\xff', 'utf-32-be'),
+ (b'\xff\xfe\x00\x00', 'utf-32-le'),
+ (b'\xff\xfe', 'utf-16-le'),
+ (b'\xfe\xff', 'utf-16-be'),
+]
+
+
def is_html(first_bytes):
""" Detect whether a file contains HTML by examining its first bytes. """
- BOMS = [
- (b'\xef\xbb\xbf', 'utf-8'),
- (b'\x00\x00\xfe\xff', 'utf-32-be'),
- (b'\xff\xfe\x00\x00', 'utf-32-le'),
- (b'\xff\xfe', 'utf-16-le'),
- (b'\xfe\xff', 'utf-16-be'),
- ]
+ encoding = 'utf-8'
for bom, enc in BOMS:
- if first_bytes.startswith(bom):
- s = first_bytes[len(bom):].decode(enc, 'replace')
- break
- else:
- s = first_bytes.decode('utf-8', 'replace')
+ while first_bytes.startswith(bom):
+ encoding, first_bytes = enc, first_bytes[len(bom):]
- return re.match(r'^\s*<', s)
+ return re.match(r'^\s*<', first_bytes.decode(encoding, 'replace'))
def determine_protocol(info_dict):
@@ -3505,11 +3697,11 @@ def determine_protocol(info_dict):
ext = determine_ext(url)
if ext == 'm3u8':
- return 'm3u8'
+ return 'm3u8' if info_dict.get('is_live') else 'm3u8_native'
elif ext == 'f4m':
return 'f4m'
- return compat_urllib_parse_urlparse(url).scheme
+ return urllib.parse.urlparse(url).scheme
def render_table(header_row, data, delim=False, extra_gap=0, hide_empty=False):
@@ -3566,16 +3758,15 @@ def _match_one(filter_part, dct, incomplete):
else:
is_incomplete = lambda k: k in incomplete
- operator_rex = re.compile(r'''(?x)\s*
+ operator_rex = re.compile(r'''(?x)
(?P<key>[a-z_]+)
\s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
(?:
(?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
(?P<strval>.+?)
)
- \s*$
''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
- m = operator_rex.search(filter_part)
+ m = operator_rex.fullmatch(filter_part.strip())
if m:
m = m.groupdict()
unnegated_op = COMPARISON_OPERATORS[m['op']]
@@ -3588,7 +3779,7 @@ def _match_one(filter_part, dct, incomplete):
comparison_value = comparison_value.replace(r'\%s' % m['quote'], m['quote'])
actual_value = dct.get(m['key'])
numeric_comparison = None
- if isinstance(actual_value, compat_numeric_types):
+ if isinstance(actual_value, (int, float)):
# If the original field is a string and matching comparisonvalue is
# a number we should respect the origin of the original field
# and process comparison value as a string (see
@@ -3611,11 +3802,10 @@ def _match_one(filter_part, dct, incomplete):
'': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
'!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
}
- operator_rex = re.compile(r'''(?x)\s*
+ operator_rex = re.compile(r'''(?x)
(?P<op>%s)\s*(?P<key>[a-z_]+)
- \s*$
''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
- m = operator_rex.search(filter_part)
+ m = operator_rex.fullmatch(filter_part.strip())
if m:
op = UNARY_OPERATORS[m.group('op')]
actual_value = dct.get(m.group('key'))
@@ -3641,23 +3831,52 @@ def match_str(filter_str, dct, incomplete=False):
def match_filter_func(filters):
if not filters:
return None
- filters = variadic(filters)
+ filters = set(variadic(filters))
- def _match_func(info_dict, *args, **kwargs):
- if any(match_str(f, info_dict, *args, **kwargs) for f in filters):
- return None
+ interactive = '-' in filters
+ if interactive:
+ filters.remove('-')
+
+ def _match_func(info_dict, incomplete=False):
+ if not filters or any(match_str(f, info_dict, incomplete) for f in filters):
+ return NO_DEFAULT if interactive and not incomplete else None
else:
- video_title = info_dict.get('title') or info_dict.get('id') or 'video'
+ video_title = info_dict.get('title') or info_dict.get('id') or 'entry'
filter_str = ') | ('.join(map(str.strip, filters))
return f'{video_title} does not pass filter ({filter_str}), skipping ..'
return _match_func
+class download_range_func:
+ def __init__(self, chapters, ranges):
+ self.chapters, self.ranges = chapters, ranges
+
+ def __call__(self, info_dict, ydl):
+ if not self.ranges and not self.chapters:
+ yield {}
+
+ warning = ('There are no chapters matching the regex' if info_dict.get('chapters')
+ else 'Cannot match chapters since chapter information is unavailable')
+ for regex in self.chapters or []:
+ for i, chapter in enumerate(info_dict.get('chapters') or []):
+ if re.search(regex, chapter['title']):
+ warning = None
+ yield {**chapter, 'index': i}
+ if self.chapters and warning:
+ ydl.to_screen(f'[info] {info_dict["id"]}: {warning}')
+
+ yield from ({'start_time': start, 'end_time': end} for start, end in self.ranges or [])
+
+ def __eq__(self, other):
+ return (isinstance(other, download_range_func)
+ and self.chapters == other.chapters and self.ranges == other.ranges)
+
+
def parse_dfxp_time_expr(time_expr):
if not time_expr:
return
- mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
+ mobj = re.match(rf'^(?P<time_offset>{NUMBER_RE})s?$', time_expr)
if mobj:
return float(mobj.group('time_offset'))
@@ -3709,7 +3928,7 @@ def dfxp2srt(dfxp_data):
styles = {}
default_style = {}
- class TTMLPElementParser(object):
+ class TTMLPElementParser:
_out = ''
_unclosed_elements = []
_applied_styles = []
@@ -3839,26 +4058,21 @@ def dfxp2srt(dfxp_data):
return ''.join(out)
-def cli_option(params, command_option, param):
+def cli_option(params, command_option, param, separator=None):
param = params.get(param)
- if param:
- param = compat_str(param)
- return [command_option, param] if param is not None else []
+ return ([] if param is None
+ else [command_option, str(param)] if separator is None
+ else [f'{command_option}{separator}{param}'])
def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
param = params.get(param)
- if param is None:
- return []
- assert isinstance(param, bool)
- if separator:
- return [command_option + separator + (true_value if param else false_value)]
- return [command_option, true_value if param else false_value]
+ assert param in (True, False, None)
+ return cli_option({True: true_value, False: false_value}, command_option, param, separator)
def cli_valueless_option(params, command_option, param, expected_value=True):
- param = params.get(param)
- return [command_option] if param == expected_value else []
+ return [command_option] if params.get(param) == expected_value else []
def cli_configuration_args(argdict, keys, default=[], use_compat=True):
@@ -3894,7 +4108,7 @@ def _configuration_args(main_key, argdict, exe, keys=None, default=[], use_compa
return cli_configuration_args(argdict, keys, default, use_compat)
-class ISO639Utils(object):
+class ISO639Utils:
# See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
_lang_map = {
'aa': 'aar',
@@ -4099,7 +4313,7 @@ class ISO639Utils(object):
return short_name
-class ISO3166Utils(object):
+class ISO3166Utils:
# From http://data.okfn.org/data/core/country-list
_country_map = {
'AF': 'Afghanistan',
@@ -4351,6 +4565,9 @@ class ISO3166Utils(object):
'YE': 'Yemen',
'ZM': 'Zambia',
'ZW': 'Zimbabwe',
+ # Not ISO 3166 codes, but used for IP blocks
+ 'AP': 'Asia/Pacific Region',
+ 'EU': 'Europe',
}
@classmethod
@@ -4359,7 +4576,7 @@ class ISO3166Utils(object):
return cls._country_map.get(code.upper())
-class GeoUtils(object):
+class GeoUtils:
# Major IPv4 address blocks per country
_country_ip_map = {
'AD': '46.172.224.0/19',
@@ -4613,20 +4830,20 @@ class GeoUtils(object):
else:
block = code_or_block
addr, preflen = block.split('/')
- addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
+ addr_min = struct.unpack('!L', socket.inet_aton(addr))[0]
addr_max = addr_min | (0xffffffff >> int(preflen))
- return compat_str(socket.inet_ntoa(
- compat_struct_pack('!L', random.randint(addr_min, addr_max))))
+ return str(socket.inet_ntoa(
+ struct.pack('!L', random.randint(addr_min, addr_max))))
-class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
+class PerRequestProxyHandler(urllib.request.ProxyHandler):
def __init__(self, proxies=None):
# Set default handlers
for type in ('http', 'https'):
setattr(self, '%s_open' % type,
lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
meth(r, proxy, type))
- compat_urllib_request.ProxyHandler.__init__(self, proxies)
+ urllib.request.ProxyHandler.__init__(self, proxies)
def proxy_open(self, req, proxy, type):
req_proxy = req.headers.get('Ytdl-request-proxy')
@@ -4636,11 +4853,11 @@ class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
if proxy == '__noproxy__':
return None # No Proxy
- if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
+ if urllib.parse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
req.add_header('Ytdl-socks-proxy', proxy)
# hypervideo's http/https handlers do wrapping the socket with socks
return None
- return compat_urllib_request.ProxyHandler.proxy_open(
+ return urllib.request.ProxyHandler.proxy_open(
self, req, proxy, type)
@@ -4660,7 +4877,7 @@ def long_to_bytes(n, blocksize=0):
s = b''
n = int(n)
while n > 0:
- s = compat_struct_pack('>I', n & 0xffffffff) + s
+ s = struct.pack('>I', n & 0xffffffff) + s
n = n >> 32
# strip off leading zeros
for i in range(len(s)):
@@ -4691,7 +4908,7 @@ def bytes_to_long(s):
s = b'\000' * extra + s
length = length + extra
for i in range(0, length, 4):
- acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
+ acc = (acc << 32) + struct.unpack('>I', s[i:i + 4])[0]
return acc
@@ -4727,22 +4944,42 @@ def pkcs1pad(data, length):
return [0, 2] + pseudo_random + [0] + data
-def encode_base_n(num, n, table=None):
- FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
- if not table:
- table = FULL_TABLE[:n]
+def _base_n_table(n, table):
+ if not table and not n:
+ raise ValueError('Either table or n must be specified')
+ table = (table or '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')[:n]
+
+ if n and n != len(table):
+ raise ValueError(f'base {n} exceeds table length {len(table)}')
+ return table
- if n > len(table):
- raise ValueError('base %d exceeds table length %d' % (n, len(table)))
- if num == 0:
+def encode_base_n(num, n=None, table=None):
+ """Convert given int to a base-n string"""
+ table = _base_n_table(n, table)
+ if not num:
return table[0]
- ret = ''
+ result, base = '', len(table)
while num:
- ret = table[num % n] + ret
- num = num // n
- return ret
+ result = table[num % base] + result
+ num = num // base
+ return result
+
+
+def decode_base_n(string, n=None, table=None):
+ """Convert given base-n string to int"""
+ table = {char: index for index, char in enumerate(_base_n_table(n, table))}
+ result, base = 0, len(table)
+ for char in string:
+ result = result * base + table[char]
+ return result
+
+
+def decode_base(value, digits):
+ deprecation_warning(f'{__name__}.decode_base is deprecated and may be removed '
+ f'in a future version. Use {__name__}.decode_base_n instead')
+ return decode_base_n(value, table=digits)
def decode_packed_codes(code):
@@ -4796,10 +5033,10 @@ def decode_png(png_data):
header = png_data[8:]
if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
- raise IOError('Not a valid PNG file.')
+ raise OSError('Not a valid PNG file.')
int_map = {1: '>B', 2: '>H', 4: '>I'}
- unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
+ unpack_integer = lambda x: struct.unpack(int_map[len(x)], x)[0]
chunks = []
@@ -4833,7 +5070,7 @@ def decode_png(png_data):
idat += chunk['data']
if not idat:
- raise IOError('Unable to read PNG data.')
+ raise OSError('Unable to read PNG data.')
decompressed_data = bytearray(zlib.decompress(idat))
@@ -4897,87 +5134,54 @@ def decode_png(png_data):
def write_xattr(path, key, value):
- # This mess below finds the best xattr tool for the job
- try:
- # try the pyxattr module...
- import xattr
-
- if hasattr(xattr, 'set'): # pyxattr
- # Unicode arguments are not supported in python-pyxattr until
- # version 0.5.0
- # See https://github.com/ytdl-org/youtube-dl/issues/5498
- pyxattr_required_version = '0.5.0'
- if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
- # TODO: fallback to CLI tools
- raise XAttrUnavailableError(
- 'python-pyxattr is detected but is too old. '
- 'hypervideo requires %s or above while your version is %s. '
- 'Falling back to other xattr implementations' % (
- pyxattr_required_version, xattr.__version__))
-
- setxattr = xattr.set
- else: # xattr
- setxattr = xattr.setxattr
+ # Windows: Write xattrs to NTFS Alternate Data Streams:
+ # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
+ if compat_os_name == 'nt':
+ assert ':' not in key
+ assert os.path.exists(path)
try:
- setxattr(path, key, value)
- except EnvironmentError as e:
+ with open(f'{path}:{key}', 'wb') as f:
+ f.write(value)
+ except OSError as e:
raise XAttrMetadataError(e.errno, e.strerror)
+ return
- except ImportError:
- if compat_os_name == 'nt':
- # Write xattrs to NTFS Alternate Data Streams:
- # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
- assert ':' not in key
- assert os.path.exists(path)
-
- ads_fn = path + ':' + key
- try:
- with open(ads_fn, 'wb') as f:
- f.write(value)
- except EnvironmentError as e:
- raise XAttrMetadataError(e.errno, e.strerror)
- else:
- user_has_setfattr = check_executable('setfattr', ['--version'])
- user_has_xattr = check_executable('xattr', ['-h'])
-
- if user_has_setfattr or user_has_xattr:
+ # UNIX Method 1. Use xattrs/pyxattrs modules
- value = value.decode('utf-8')
- if user_has_setfattr:
- executable = 'setfattr'
- opts = ['-n', key, '-v', value]
- elif user_has_xattr:
- executable = 'xattr'
- opts = ['-w', key, value]
+ setxattr = None
+ if getattr(xattr, '_hypervideo_dl__identifier', None) == 'pyxattr':
+ # Unicode arguments are not supported in pyxattr until version 0.5.0
+ # See https://github.com/ytdl-org/youtube-dl/issues/5498
+ if version_tuple(xattr.__version__) >= (0, 5, 0):
+ setxattr = xattr.set
+ elif xattr:
+ setxattr = xattr.setxattr
- cmd = ([encodeFilename(executable, True)]
- + [encodeArgument(o) for o in opts]
- + [encodeFilename(path, True)])
+ if setxattr:
+ try:
+ setxattr(path, key, value)
+ except OSError as e:
+ raise XAttrMetadataError(e.errno, e.strerror)
+ return
- try:
- p = Popen(
- cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
- except EnvironmentError as e:
- raise XAttrMetadataError(e.errno, e.strerror)
- stdout, stderr = p.communicate_or_kill()
- stderr = stderr.decode('utf-8', 'replace')
- if p.returncode != 0:
- raise XAttrMetadataError(p.returncode, stderr)
+ # UNIX Method 2. Use setfattr/xattr executables
+ exe = ('setfattr' if check_executable('setfattr', ['--version'])
+ else 'xattr' if check_executable('xattr', ['-h']) else None)
+ if not exe:
+ raise XAttrUnavailableError(
+ 'Couldn\'t find a tool to set the xattrs. Install either the python "xattr" or "pyxattr" modules or the '
+ + ('"xattr" binary' if sys.platform != 'linux' else 'GNU "attr" package (which contains the "setfattr" tool)'))
- else:
- # On Unix, and can't find pyxattr, setfattr, or xattr.
- if sys.platform.startswith('linux'):
- raise XAttrUnavailableError(
- "Couldn't find a tool to set the xattrs. "
- "Install either the python 'pyxattr' or 'xattr' "
- "modules, or the GNU 'attr' package "
- "(which contains the 'setfattr' tool).")
- else:
- raise XAttrUnavailableError(
- "Couldn't find a tool to set the xattrs. "
- "Install either the python 'xattr' module, "
- "or the 'xattr' binary.")
+ value = value.decode()
+ try:
+ _, stderr, returncode = Popen.run(
+ [exe, '-w', key, value, path] if exe == 'xattr' else [exe, '-n', key, '-v', value, path],
+ text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
+ except OSError as e:
+ raise XAttrMetadataError(e.errno, e.strerror)
+ if returncode:
+ raise XAttrMetadataError(returncode, stderr)
def random_birthday(year_field, month_field, day_field):
@@ -4993,12 +5197,12 @@ def random_birthday(year_field, month_field, day_field):
# Templates for internet shortcut files, which are plain text files.
-DOT_URL_LINK_TEMPLATE = '''
+DOT_URL_LINK_TEMPLATE = '''\
[InternetShortcut]
URL=%(url)s
-'''.lstrip()
+'''
-DOT_WEBLOC_LINK_TEMPLATE = '''
+DOT_WEBLOC_LINK_TEMPLATE = '''\
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
@@ -5007,16 +5211,16 @@ DOT_WEBLOC_LINK_TEMPLATE = '''
\t<string>%(url)s</string>
</dict>
</plist>
-'''.lstrip()
+'''
-DOT_DESKTOP_LINK_TEMPLATE = '''
+DOT_DESKTOP_LINK_TEMPLATE = '''\
[Desktop Entry]
Encoding=UTF-8
Name=%(filename)s
Type=Link
URL=%(url)s
Icon=text-html
-'''.lstrip()
+'''
LINK_TEMPLATES = {
'url': DOT_URL_LINK_TEMPLATE,
@@ -5032,7 +5236,7 @@ def iri_to_uri(iri):
The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
"""
- iri_parts = compat_urllib_parse_urlparse(iri)
+ iri_parts = urllib.parse.urlparse(iri)
if '[' in iri_parts.netloc:
raise ValueError('IPv6 URIs are not, yet, supported.')
@@ -5042,29 +5246,29 @@ def iri_to_uri(iri):
net_location = ''
if iri_parts.username:
- net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
+ net_location += urllib.parse.quote(iri_parts.username, safe=r"!$%&'()*+,~")
if iri_parts.password is not None:
- net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
+ net_location += ':' + urllib.parse.quote(iri_parts.password, safe=r"!$%&'()*+,~")
net_location += '@'
- net_location += iri_parts.hostname.encode('idna').decode('utf-8') # Punycode for Unicode hostnames.
+ net_location += iri_parts.hostname.encode('idna').decode() # Punycode for Unicode hostnames.
# The 'idna' encoding produces ASCII text.
if iri_parts.port is not None and iri_parts.port != 80:
net_location += ':' + str(iri_parts.port)
- return compat_urllib_parse_urlunparse(
+ return urllib.parse.urlunparse(
(iri_parts.scheme,
net_location,
- compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
+ urllib.parse.quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
# Unsure about the `safe` argument, since this is a legacy way of handling parameters.
- compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
+ urllib.parse.quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
# Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
- compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
+ urllib.parse.quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
- compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
+ urllib.parse.quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
# Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
@@ -5072,16 +5276,16 @@ def iri_to_uri(iri):
def to_high_limit_path(path):
if sys.platform in ['win32', 'cygwin']:
# Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
- return r'\\?\ '.rstrip() + os.path.abspath(path)
+ return '\\\\?\\' + os.path.abspath(path)
return path
-def format_field(obj, field=None, template='%s', ignore=(None, ''), default='', func=None):
+def format_field(obj, field=None, template='%s', ignore=NO_DEFAULT, default='', func=IDENTITY):
val = traverse_obj(obj, *variadic(field))
- if val in ignore:
+ if (not val and val != 0) if ignore is NO_DEFAULT else val in variadic(ignore):
return default
- return template % (func(val) if func else val)
+ return template % func(val)
def clean_podcast_url(url):
@@ -5114,7 +5318,7 @@ def make_dir(path, to_screen=None):
if dn and not os.path.exists(dn):
os.makedirs(dn)
return True
- except (OSError, IOError) as err:
+ except OSError as err:
if callable(to_screen) is not None:
to_screen('unable to create directory ' + error_to_compat_str(err))
return False
@@ -5133,7 +5337,7 @@ def get_executable_path():
def load_plugins(name, suffix, namespace):
classes = {}
- try:
+ with contextlib.suppress(FileNotFoundError):
plugins_spec = importlib.util.spec_from_file_location(
name, os.path.join(get_executable_path(), 'ytdlp_plugins', name, '__init__.py'))
plugins = importlib.util.module_from_spec(plugins_spec)
@@ -5146,133 +5350,186 @@ def load_plugins(name, suffix, namespace):
continue
klass = getattr(plugins, name)
classes[name] = namespace[name] = klass
- except FileNotFoundError:
- pass
return classes
def traverse_obj(
- obj, *path_list, default=None, expected_type=None, get_all=True,
+ obj, *paths, default=NO_DEFAULT, expected_type=None, get_all=True,
casesense=True, is_user_input=False, traverse_string=False):
- ''' Traverse nested list/dict/tuple
- @param path_list A list of paths which are checked one by one.
- Each path is a list of keys where each key is a string,
- a function, a tuple of strings/None or "...".
- When a fuction is given, it takes the key and value as arguments
- and returns whether the key matches or not. When a tuple is given,
- all the keys given in the tuple are traversed, and
- "..." traverses all the keys in the object
- "None" returns the object without traversal
- @param default Default value to return
- @param expected_type Only accept final value of this type (Can also be any callable)
- @param get_all Return all the values obtained from a path or only the first one
- @param casesense Whether to consider dictionary keys as case sensitive
- @param is_user_input Whether the keys are generated from user input. If True,
- strings are converted to int/slice if necessary
- @param traverse_string Whether to traverse inside strings. If True, any
- non-compatible object will also be converted into a string
- # TODO: Write tests
- '''
- if not casesense:
- _lower = lambda k: (k.lower() if isinstance(k, str) else k)
- path_list = (map(_lower, variadic(path)) for path in path_list)
-
- def _traverse_obj(obj, path, _current_depth=0):
- nonlocal depth
- path = tuple(variadic(path))
- for i, key in enumerate(path):
- if None in (key, obj):
- return obj
- if isinstance(key, (list, tuple)):
- obj = [_traverse_obj(obj, sub_key, _current_depth) for sub_key in key]
- key = ...
- if key is ...:
- obj = (obj.values() if isinstance(obj, dict)
- else obj if isinstance(obj, (list, tuple, LazyList))
- else str(obj) if traverse_string else [])
- _current_depth += 1
- depth = max(depth, _current_depth)
- return [_traverse_obj(inner_obj, path[i + 1:], _current_depth) for inner_obj in obj]
- elif callable(key):
- if isinstance(obj, (list, tuple, LazyList)):
- obj = enumerate(obj)
- elif isinstance(obj, dict):
- obj = obj.items()
- else:
- if not traverse_string:
- return None
- obj = str(obj)
- _current_depth += 1
- depth = max(depth, _current_depth)
- return [_traverse_obj(v, path[i + 1:], _current_depth) for k, v in obj if try_call(key, args=(k, v))]
- elif isinstance(obj, dict) and not (is_user_input and key == ':'):
- obj = (obj.get(key) if casesense or (key in obj)
- else next((v for k, v in obj.items() if _lower(k) == key), None))
- else:
- if is_user_input:
- key = (int_or_none(key) if ':' not in key
- else slice(*map(int_or_none, key.split(':'))))
- if key == slice(None):
- return _traverse_obj(obj, (..., *path[i + 1:]), _current_depth)
- if not isinstance(key, (int, slice)):
- return None
- if not isinstance(obj, (list, tuple, LazyList)):
- if not traverse_string:
- return None
- obj = str(obj)
- try:
- obj = obj[key]
- except IndexError:
- return None
- return obj
+ """
+ Safely traverse nested `dict`s and `Sequence`s
+
+ >>> obj = [{}, {"key": "value"}]
+ >>> traverse_obj(obj, (1, "key"))
+ "value"
+
+ Each of the provided `paths` is tested and the first producing a valid result will be returned.
+ The next path will also be tested if the path branched but no results could be found.
+ Supported values for traversal are `Mapping`, `Sequence` and `re.Match`.
+ A value of None is treated as the absence of a value.
+
+ The paths will be wrapped in `variadic`, so that `'key'` is conveniently the same as `('key', )`.
+
+ The keys in the path can be one of:
+ - `None`: Return the current object.
+ - `str`/`int`: Return `obj[key]`. For `re.Match, return `obj.group(key)`.
+ - `slice`: Branch out and return all values in `obj[key]`.
+ - `Ellipsis`: Branch out and return a list of all values.
+ - `tuple`/`list`: Branch out and return a list of all matching values.
+ Read as: `[traverse_obj(obj, branch) for branch in branches]`.
+ - `function`: Branch out and return values filtered by the function.
+ Read as: `[value for key, value in obj if function(key, value)]`.
+ For `Sequence`s, `key` is the index of the value.
+ - `dict` Transform the current object and return a matching dict.
+ Read as: `{key: traverse_obj(obj, path) for key, path in dct.items()}`.
+
+ `tuple`, `list`, and `dict` all support nested paths and branches.
+
+ @params paths Paths which to traverse by.
+ @param default Value to return if the paths do not match.
+ @param expected_type If a `type`, only accept final values of this type.
+ If any other callable, try to call the function on each result.
+ @param get_all If `False`, return the first matching result, otherwise all matching ones.
+ @param casesense If `False`, consider string dictionary keys as case insensitive.
+
+ The following are only meant to be used by YoutubeDL.prepare_outtmpl and are not part of the API
+
+ @param is_user_input Whether the keys are generated from user input.
+ If `True` strings get converted to `int`/`slice` if needed.
+ @param traverse_string Whether to traverse into objects as strings.
+ If `True`, any non-compatible object will first be
+ converted into a string and then traversed into.
+
+
+ @returns The result of the object traversal.
+ If successful, `get_all=True`, and the path branches at least once,
+ then a list of results is returned instead.
+ A list is always returned if the last path branches and no `default` is given.
+ """
+ is_sequence = lambda x: isinstance(x, collections.abc.Sequence) and not isinstance(x, (str, bytes))
+ casefold = lambda k: k.casefold() if isinstance(k, str) else k
if isinstance(expected_type, type):
type_test = lambda val: val if isinstance(val, expected_type) else None
- elif expected_type is not None:
- type_test = expected_type
else:
- type_test = lambda val: val
-
- for path in path_list:
- depth = 0
- val = _traverse_obj(obj, path)
- if val is not None:
- if depth:
- for _ in range(depth - 1):
- val = itertools.chain.from_iterable(v for v in val if v is not None)
- val = [v for v in map(type_test, val) if v is not None]
- if val:
- return val if get_all else val[0]
+ type_test = lambda val: try_call(expected_type or IDENTITY, args=(val,))
+
+ def apply_key(key, obj):
+ if obj is None:
+ return
+
+ elif key is None:
+ yield obj
+
+ elif isinstance(key, (list, tuple)):
+ for branch in key:
+ _, result = apply_path(obj, branch)
+ yield from result
+
+ elif key is ...:
+ if isinstance(obj, collections.abc.Mapping):
+ yield from obj.values()
+ elif is_sequence(obj):
+ yield from obj
+ elif isinstance(obj, re.Match):
+ yield from obj.groups()
+ elif traverse_string:
+ yield from str(obj)
+
+ elif callable(key):
+ if is_sequence(obj):
+ iter_obj = enumerate(obj)
+ elif isinstance(obj, collections.abc.Mapping):
+ iter_obj = obj.items()
+ elif isinstance(obj, re.Match):
+ iter_obj = enumerate((obj.group(), *obj.groups()))
+ elif traverse_string:
+ iter_obj = enumerate(str(obj))
else:
- val = type_test(val)
- if val is not None:
- return val
- return default
+ return
+ yield from (v for k, v in iter_obj if try_call(key, args=(k, v)))
+ elif isinstance(key, dict):
+ iter_obj = ((k, _traverse_obj(obj, v)) for k, v in key.items())
+ yield {k: v if v is not None else default for k, v in iter_obj
+ if v is not None or default is not NO_DEFAULT}
-def traverse_dict(dictn, keys, casesense=True):
- write_string('DeprecationWarning: hypervideo_dl.utils.traverse_dict is deprecated '
- 'and may be removed in a future version. Use hypervideo_dl.utils.traverse_obj instead')
- return traverse_obj(dictn, keys, casesense=casesense, is_user_input=True, traverse_string=True)
+ elif isinstance(obj, collections.abc.Mapping):
+ yield (obj.get(key) if casesense or (key in obj)
+ else next((v for k, v in obj.items() if casefold(k) == key), None))
+ elif isinstance(obj, re.Match):
+ if isinstance(key, int) or casesense:
+ with contextlib.suppress(IndexError):
+ yield obj.group(key)
+ return
-def get_first(obj, keys, **kwargs):
- return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
+ if not isinstance(key, str):
+ return
+ yield next((v for k, v in obj.groupdict().items() if casefold(k) == key), None)
-def variadic(x, allowed_types=(str, bytes, dict)):
- return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)
+ else:
+ if is_user_input:
+ key = (int_or_none(key) if ':' not in key
+ else slice(*map(int_or_none, key.split(':'))))
+ if not isinstance(key, (int, slice)):
+ return
-def decode_base(value, digits):
- # This will convert given base-x string to scalar (long or int)
- table = {char: index for index, char in enumerate(digits)}
- result = 0
- base = len(digits)
- for chr in value:
- result *= base
- result += table[chr]
- return result
+ if not is_sequence(obj):
+ if not traverse_string:
+ return
+ obj = str(obj)
+
+ with contextlib.suppress(IndexError):
+ yield obj[key]
+
+ def apply_path(start_obj, path):
+ objs = (start_obj,)
+ has_branched = False
+
+ for key in variadic(path):
+ if is_user_input and key == ':':
+ key = ...
+
+ if not casesense and isinstance(key, str):
+ key = key.casefold()
+
+ if key is ... or isinstance(key, (list, tuple)) or callable(key):
+ has_branched = True
+
+ key_func = functools.partial(apply_key, key)
+ objs = itertools.chain.from_iterable(map(key_func, objs))
+
+ return has_branched, objs
+
+ def _traverse_obj(obj, path, use_list=True):
+ has_branched, results = apply_path(obj, path)
+ results = LazyList(x for x in map(type_test, results) if x is not None)
+
+ if get_all and has_branched:
+ return results.exhaust() if results or use_list else None
+
+ return results[0] if results else None
+
+ for index, path in enumerate(paths, 1):
+ use_list = default is NO_DEFAULT and index == len(paths)
+ result = _traverse_obj(obj, path, use_list)
+ if result is not None:
+ return result
+
+ return None if default is NO_DEFAULT else default
+
+
+def traverse_dict(dictn, keys, casesense=True):
+ deprecation_warning(f'"{__name__}.traverse_dict" is deprecated and may be removed '
+ f'in a future version. Use "{__name__}.traverse_obj" instead')
+ return traverse_obj(dictn, keys, casesense=casesense, is_user_input=True, traverse_string=True)
+
+
+def get_first(obj, keys, **kwargs):
+ return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
def time_seconds(**kwargs):
@@ -5291,9 +5548,9 @@ def jwt_encode_hs256(payload_data, key, headers={}):
}
if headers:
header_data.update(headers)
- header_b64 = base64.b64encode(json.dumps(header_data).encode('utf-8'))
- payload_b64 = base64.b64encode(json.dumps(payload_data).encode('utf-8'))
- h = hmac.new(key.encode('utf-8'), header_b64 + b'.' + payload_b64, hashlib.sha256)
+ header_b64 = base64.b64encode(json.dumps(header_data).encode())
+ payload_b64 = base64.b64encode(json.dumps(payload_data).encode())
+ h = hmac.new(key.encode(), header_b64 + b'.' + payload_b64, hashlib.sha256)
signature_b64 = base64.b64encode(h.digest())
token = header_b64 + b'.' + payload_b64 + b'.' + signature_b64
return token
@@ -5302,14 +5559,18 @@ def jwt_encode_hs256(payload_data, key, headers={}):
# can be extended in future to verify the signature and parse header and return the algorithm used if it's not HS256
def jwt_decode_hs256(jwt):
header_b64, payload_b64, signature_b64 = jwt.split('.')
- payload_data = json.loads(base64.urlsafe_b64decode(payload_b64))
+ # add trailing ='s that may have been stripped, superfluous ='s are ignored
+ payload_data = json.loads(base64.urlsafe_b64decode(f'{payload_b64}==='))
return payload_data
+WINDOWS_VT_MODE = False if compat_os_name == 'nt' else None
+
+
+@functools.cache
def supports_terminal_sequences(stream):
if compat_os_name == 'nt':
- from .compat import WINDOWS_VT_MODE # Must be imported locally
- if not WINDOWS_VT_MODE or get_windows_version() < (10, 0, 10586):
+ if not WINDOWS_VT_MODE:
return False
elif not os.getenv('TERM'):
return False
@@ -5319,6 +5580,19 @@ def supports_terminal_sequences(stream):
return False
+def windows_enable_vt_mode(): # TODO: Do this the proper way https://bugs.python.org/issue30075
+ if get_windows_version() < (10, 0, 10586):
+ return
+ global WINDOWS_VT_MODE
+ try:
+ Popen.run('', shell=True)
+ except Exception:
+ return
+
+ WINDOWS_VT_MODE = True
+ supports_terminal_sequences.cache_clear()
+
+
_terminal_sequences_re = re.compile('\033\\[[^m]+m')
@@ -5332,7 +5606,7 @@ def number_of_digits(number):
def join_nonempty(*values, delim='-', from_dict=None):
if from_dict is not None:
- values = map(from_dict.get, values)
+ values = (traverse_obj(from_dict, variadic(v)) for v in values)
return delim.join(map(str, filter(None, values)))
@@ -5346,7 +5620,7 @@ def scale_thumbnails_to_max_format_width(formats, thumbnails, url_width_re):
"""
_keys = ('width', 'height')
max_dimensions = max(
- [tuple(format.get(k) or 0 for k in _keys) for format in formats],
+ (tuple(format.get(k) or 0 for k in _keys) for format in formats),
default=(0, 0))
if not max_dimensions[0]:
return thumbnails
@@ -5368,33 +5642,69 @@ def parse_http_range(range):
return int(crg.group(1)), int_or_none(crg.group(2)), int_or_none(crg.group(3))
+def read_stdin(what):
+ eof = 'Ctrl+Z' if compat_os_name == 'nt' else 'Ctrl+D'
+ write_string(f'Reading {what} from STDIN - EOF ({eof}) to end:\n')
+ return sys.stdin
+
+
+def determine_file_encoding(data):
+ """
+ Detect the text encoding used
+ @returns (encoding, bytes to skip)
+ """
+
+ # BOM marks are given priority over declarations
+ for bom, enc in BOMS:
+ if data.startswith(bom):
+ return enc, len(bom)
+
+ # Strip off all null bytes to match even when UTF-16 or UTF-32 is used.
+ # We ignore the endianness to get a good enough match
+ data = data.replace(b'\0', b'')
+ mobj = re.match(rb'(?m)^#\s*coding\s*:\s*(\S+)\s*$', data)
+ return mobj.group(1).decode() if mobj else None, 0
+
+
class Config:
own_args = None
+ parsed_args = None
filename = None
__initialized = False
def __init__(self, parser, label=None):
- self._parser, self.label = parser, label
+ self.parser, self.label = parser, label
self._loaded_paths, self.configs = set(), []
def init(self, args=None, filename=None):
assert not self.__initialized
+ self.own_args, self.filename = args, filename
+ return self.load_configs()
+
+ def load_configs(self):
directory = ''
- if filename:
- location = os.path.realpath(filename)
+ if self.filename:
+ location = os.path.realpath(self.filename)
directory = os.path.dirname(location)
if location in self._loaded_paths:
return False
self._loaded_paths.add(location)
self.__initialized = True
- self.own_args, self.filename = args, filename
- for location in self._parser.parse_args(args)[0].config_locations or []:
+ opts, _ = self.parser.parse_known_args(self.own_args)
+ self.parsed_args = self.own_args
+ for location in opts.config_locations or []:
+ if location == '-':
+ if location in self._loaded_paths:
+ continue
+ self._loaded_paths.add(location)
+ self.append_config(shlex.split(read_stdin('options'), comments=True), label='stdin')
+ continue
location = os.path.join(directory, expand_path(location))
if os.path.isdir(location):
location = os.path.join(location, 'hypervideo.conf')
if not os.path.exists(location):
- self._parser.error(f'config location {location} does not exist')
+ self.parser.error(f'config location {location} does not exist')
self.append_config(self.read_file(location), location)
return True
@@ -5410,22 +5720,27 @@ class Config:
@staticmethod
def read_file(filename, default=[]):
try:
- optionf = open(filename)
- except IOError:
+ optionf = open(filename, 'rb')
+ except OSError:
return default # silently skip if file is not present
try:
+ enc, skip = determine_file_encoding(optionf.read(512))
+ optionf.seek(skip, io.SEEK_SET)
+ except OSError:
+ enc = None # silently skip read errors
+ try:
# FIXME: https://github.com/ytdl-org/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56
- contents = optionf.read()
- if sys.version_info < (3,):
- contents = contents.decode(preferredencoding())
- res = compat_shlex_split(contents, comments=True)
+ contents = optionf.read().decode(enc or preferredencoding())
+ res = shlex.split(contents, comments=True)
+ except Exception as err:
+ raise ValueError(f'Unable to parse "{filename}": {err}')
finally:
optionf.close()
return res
@staticmethod
def hide_login_info(opts):
- PRIVATE_OPTS = set(['-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username'])
+ PRIVATE_OPTS = {'-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username'}
eqre = re.compile('^(?P<key>' + ('|'.join(re.escape(po) for po in PRIVATE_OPTS)) + ')=.+$')
def _scrub_eq(o):
@@ -5442,7 +5757,7 @@ class Config:
return opts
def append_config(self, *args, label=None):
- config = type(self)(self._parser, label)
+ config = type(self)(self.parser, label)
config._loaded_paths = self._loaded_paths
if config.init(*args):
self.configs.append(config)
@@ -5451,18 +5766,23 @@ class Config:
def all_args(self):
for config in reversed(self.configs):
yield from config.all_args
- yield from self.own_args or []
+ yield from self.parsed_args or []
+
+ def parse_known_args(self, **kwargs):
+ return self.parser.parse_known_args(self.all_args, **kwargs)
def parse_args(self):
- return self._parser.parse_args(list(self.all_args))
+ return self.parser.parse_args(self.all_args)
-class WebSocketsWrapper():
+class WebSocketsWrapper:
"""Wraps websockets module to use in non-async scopes"""
+ pool = None
def __init__(self, url, headers=None, connect=True):
- self.loop = asyncio.events.new_event_loop()
- self.conn = compat_websockets.connect(
+ self.loop = asyncio.new_event_loop()
+ # XXX: "loop" is deprecated
+ self.conn = websockets.connect(
url, extra_headers=headers, ping_interval=None,
close_timeout=float('inf'), loop=self.loop, ping_timeout=float('inf'))
if connect:
@@ -5491,7 +5811,7 @@ class WebSocketsWrapper():
# for contributors: If there's any new library using asyncio needs to be run in non-async, move these function out of this class
@staticmethod
def run_with_loop(main, loop):
- if not asyncio.coroutines.iscoroutine(main):
+ if not asyncio.iscoroutine(main):
raise ValueError(f'a coroutine was expected, got {main!r}')
try:
@@ -5503,7 +5823,7 @@ class WebSocketsWrapper():
@staticmethod
def _cancel_all_tasks(loop):
- to_cancel = asyncio.tasks.all_tasks(loop)
+ to_cancel = asyncio.all_tasks(loop)
if not to_cancel:
return
@@ -5511,8 +5831,9 @@ class WebSocketsWrapper():
for task in to_cancel:
task.cancel()
+ # XXX: "loop" is removed in python 3.10+
loop.run_until_complete(
- asyncio.tasks.gather(*to_cancel, loop=loop, return_exceptions=True))
+ asyncio.gather(*to_cancel, loop=loop, return_exceptions=True))
for task in to_cancel:
if task.cancelled():
@@ -5525,17 +5846,459 @@ class WebSocketsWrapper():
})
-has_websockets = bool(compat_websockets)
-
-
def merge_headers(*dicts):
"""Merge dicts of http headers case insensitively, prioritizing the latter ones"""
return {k.title(): v for k, v in itertools.chain.from_iterable(map(dict.items, dicts))}
+def cached_method(f):
+ """Cache a method"""
+ signature = inspect.signature(f)
+
+ @functools.wraps(f)
+ def wrapper(self, *args, **kwargs):
+ bound_args = signature.bind(self, *args, **kwargs)
+ bound_args.apply_defaults()
+ key = tuple(bound_args.arguments.values())[1:]
+
+ cache = vars(self).setdefault('_cached_method__cache', {}).setdefault(f.__name__, {})
+ if key not in cache:
+ cache[key] = f(self, *args, **kwargs)
+ return cache[key]
+ return wrapper
+
+
class classproperty:
- def __init__(self, f):
- self.f = f
+ """property access for class methods with optional caching"""
+ def __new__(cls, func=None, *args, **kwargs):
+ if not func:
+ return functools.partial(cls, *args, **kwargs)
+ return super().__new__(cls)
+
+ def __init__(self, func, *, cache=False):
+ functools.update_wrapper(self, func)
+ self.func = func
+ self._cache = {} if cache else None
def __get__(self, _, cls):
- return self.f(cls)
+ if self._cache is None:
+ return self.func(cls)
+ elif cls not in self._cache:
+ self._cache[cls] = self.func(cls)
+ return self._cache[cls]
+
+
+class Namespace(types.SimpleNamespace):
+ """Immutable namespace"""
+
+ def __iter__(self):
+ return iter(self.__dict__.values())
+
+ @property
+ def items_(self):
+ return self.__dict__.items()
+
+
+MEDIA_EXTENSIONS = Namespace(
+ common_video=('avi', 'flv', 'mkv', 'mov', 'mp4', 'webm'),
+ video=('3g2', '3gp', 'f4v', 'mk3d', 'divx', 'mpg', 'ogv', 'm4v', 'wmv'),
+ common_audio=('aiff', 'alac', 'flac', 'm4a', 'mka', 'mp3', 'ogg', 'opus', 'wav'),
+ audio=('aac', 'ape', 'asf', 'f4a', 'f4b', 'm4b', 'm4p', 'm4r', 'oga', 'ogx', 'spx', 'vorbis', 'wma'),
+ thumbnails=('jpg', 'png', 'webp'),
+ storyboards=('mhtml', ),
+ subtitles=('srt', 'vtt', 'ass', 'lrc'),
+ manifests=('f4f', 'f4m', 'm3u8', 'smil', 'mpd'),
+)
+MEDIA_EXTENSIONS.video += MEDIA_EXTENSIONS.common_video
+MEDIA_EXTENSIONS.audio += MEDIA_EXTENSIONS.common_audio
+
+KNOWN_EXTENSIONS = (*MEDIA_EXTENSIONS.video, *MEDIA_EXTENSIONS.audio, *MEDIA_EXTENSIONS.manifests)
+
+
+class RetryManager:
+ """Usage:
+ for retry in RetryManager(...):
+ try:
+ ...
+ except SomeException as err:
+ retry.error = err
+ continue
+ """
+ attempt, _error = 0, None
+
+ def __init__(self, _retries, _error_callback, **kwargs):
+ self.retries = _retries or 0
+ self.error_callback = functools.partial(_error_callback, **kwargs)
+
+ def _should_retry(self):
+ return self._error is not NO_DEFAULT and self.attempt <= self.retries
+
+ @property
+ def error(self):
+ if self._error is NO_DEFAULT:
+ return None
+ return self._error
+
+ @error.setter
+ def error(self, value):
+ self._error = value
+
+ def __iter__(self):
+ while self._should_retry():
+ self.error = NO_DEFAULT
+ self.attempt += 1
+ yield self
+ if self.error:
+ self.error_callback(self.error, self.attempt, self.retries)
+
+ @staticmethod
+ def report_retry(e, count, retries, *, sleep_func, info, warn, error=None, suffix=None):
+ """Utility function for reporting retries"""
+ if count > retries:
+ if error:
+ return error(f'{e}. Giving up after {count - 1} retries') if count > 1 else error(str(e))
+ raise e
+
+ if not count:
+ return warn(e)
+ elif isinstance(e, ExtractorError):
+ e = remove_end(str_or_none(e.cause) or e.orig_msg, '.')
+ warn(f'{e}. Retrying{format_field(suffix, None, " %s")} ({count}/{retries})...')
+
+ delay = float_or_none(sleep_func(n=count - 1)) if callable(sleep_func) else sleep_func
+ if delay:
+ info(f'Sleeping {delay:.2f} seconds ...')
+ time.sleep(delay)
+
+
+def make_archive_id(ie, video_id):
+ ie_key = ie if isinstance(ie, str) else ie.ie_key()
+ return f'{ie_key.lower()} {video_id}'
+
+
+def truncate_string(s, left, right=0):
+ assert left > 3 and right >= 0
+ if s is None or len(s) <= left + right:
+ return s
+ return f'{s[:left-3]}...{s[-right:]}'
+
+
+def orderedSet_from_options(options, alias_dict, *, use_regex=False, start=None):
+ assert 'all' in alias_dict, '"all" alias is required'
+ requested = list(start or [])
+ for val in options:
+ discard = val.startswith('-')
+ if discard:
+ val = val[1:]
+
+ if val in alias_dict:
+ val = alias_dict[val] if not discard else [
+ i[1:] if i.startswith('-') else f'-{i}' for i in alias_dict[val]]
+ # NB: Do not allow regex in aliases for performance
+ requested = orderedSet_from_options(val, alias_dict, start=requested)
+ continue
+
+ current = (filter(re.compile(val, re.I).fullmatch, alias_dict['all']) if use_regex
+ else [val] if val in alias_dict['all'] else None)
+ if current is None:
+ raise ValueError(val)
+
+ if discard:
+ for item in current:
+ while item in requested:
+ requested.remove(item)
+ else:
+ requested.extend(current)
+
+ return orderedSet(requested)
+
+
+class FormatSorter:
+ regex = r' *((?P<reverse>\+)?(?P<field>[a-zA-Z0-9_]+)((?P<separator>[~:])(?P<limit>.*?))?)? *$'
+
+ default = ('hidden', 'aud_or_vid', 'hasvid', 'ie_pref', 'lang', 'quality',
+ 'res', 'fps', 'hdr:12', 'vcodec:vp9.2', 'channels', 'acodec',
+ 'size', 'br', 'asr', 'proto', 'ext', 'hasaud', 'source', 'id') # These must not be aliases
+ ytdl_default = ('hasaud', 'lang', 'quality', 'tbr', 'filesize', 'vbr',
+ 'height', 'width', 'proto', 'vext', 'abr', 'aext',
+ 'fps', 'fs_approx', 'source', 'id')
+
+ settings = {
+ 'vcodec': {'type': 'ordered', 'regex': True,
+ 'order': ['av0?1', 'vp0?9.2', 'vp0?9', '[hx]265|he?vc?', '[hx]264|avc', 'vp0?8', 'mp4v|h263', 'theora', '', None, 'none']},
+ 'acodec': {'type': 'ordered', 'regex': True,
+ 'order': ['[af]lac', 'wav|aiff', 'opus', 'vorbis|ogg', 'aac', 'mp?4a?', 'mp3', 'e-?a?c-?3', 'ac-?3', 'dts', '', None, 'none']},
+ 'hdr': {'type': 'ordered', 'regex': True, 'field': 'dynamic_range',
+ 'order': ['dv', '(hdr)?12', r'(hdr)?10\+', '(hdr)?10', 'hlg', '', 'sdr', None]},
+ 'proto': {'type': 'ordered', 'regex': True, 'field': 'protocol',
+ 'order': ['(ht|f)tps', '(ht|f)tp$', 'm3u8.*', '.*dash', 'websocket_frag', 'rtmpe?', '', 'mms|rtsp', 'ws|websocket', 'f4']},
+ 'vext': {'type': 'ordered', 'field': 'video_ext',
+ 'order': ('mp4', 'mov', 'webm', 'flv', '', 'none'),
+ 'order_free': ('webm', 'mp4', 'mov', 'flv', '', 'none')},
+ 'aext': {'type': 'ordered', 'field': 'audio_ext',
+ 'order': ('m4a', 'aac', 'mp3', 'ogg', 'opus', 'webm', '', 'none'),
+ 'order_free': ('ogg', 'opus', 'webm', 'mp3', 'm4a', 'aac', '', 'none')},
+ 'hidden': {'visible': False, 'forced': True, 'type': 'extractor', 'max': -1000},
+ 'aud_or_vid': {'visible': False, 'forced': True, 'type': 'multiple',
+ 'field': ('vcodec', 'acodec'),
+ 'function': lambda it: int(any(v != 'none' for v in it))},
+ 'ie_pref': {'priority': True, 'type': 'extractor'},
+ 'hasvid': {'priority': True, 'field': 'vcodec', 'type': 'boolean', 'not_in_list': ('none',)},
+ 'hasaud': {'field': 'acodec', 'type': 'boolean', 'not_in_list': ('none',)},
+ 'lang': {'convert': 'float', 'field': 'language_preference', 'default': -1},
+ 'quality': {'convert': 'float', 'default': -1},
+ 'filesize': {'convert': 'bytes'},
+ 'fs_approx': {'convert': 'bytes', 'field': 'filesize_approx'},
+ 'id': {'convert': 'string', 'field': 'format_id'},
+ 'height': {'convert': 'float_none'},
+ 'width': {'convert': 'float_none'},
+ 'fps': {'convert': 'float_none'},
+ 'channels': {'convert': 'float_none', 'field': 'audio_channels'},
+ 'tbr': {'convert': 'float_none'},
+ 'vbr': {'convert': 'float_none'},
+ 'abr': {'convert': 'float_none'},
+ 'asr': {'convert': 'float_none'},
+ 'source': {'convert': 'float', 'field': 'source_preference', 'default': -1},
+
+ 'codec': {'type': 'combined', 'field': ('vcodec', 'acodec')},
+ 'br': {'type': 'combined', 'field': ('tbr', 'vbr', 'abr'), 'same_limit': True},
+ 'size': {'type': 'combined', 'same_limit': True, 'field': ('filesize', 'fs_approx')},
+ 'ext': {'type': 'combined', 'field': ('vext', 'aext')},
+ 'res': {'type': 'multiple', 'field': ('height', 'width'),
+ 'function': lambda it: (lambda l: min(l) if l else 0)(tuple(filter(None, it)))},
+
+ # Actual field names
+ 'format_id': {'type': 'alias', 'field': 'id'},
+ 'preference': {'type': 'alias', 'field': 'ie_pref'},
+ 'language_preference': {'type': 'alias', 'field': 'lang'},
+ 'source_preference': {'type': 'alias', 'field': 'source'},
+ 'protocol': {'type': 'alias', 'field': 'proto'},
+ 'filesize_approx': {'type': 'alias', 'field': 'fs_approx'},
+ 'audio_channels': {'type': 'alias', 'field': 'channels'},
+
+ # Deprecated
+ 'dimension': {'type': 'alias', 'field': 'res', 'deprecated': True},
+ 'resolution': {'type': 'alias', 'field': 'res', 'deprecated': True},
+ 'extension': {'type': 'alias', 'field': 'ext', 'deprecated': True},
+ 'bitrate': {'type': 'alias', 'field': 'br', 'deprecated': True},
+ 'total_bitrate': {'type': 'alias', 'field': 'tbr', 'deprecated': True},
+ 'video_bitrate': {'type': 'alias', 'field': 'vbr', 'deprecated': True},
+ 'audio_bitrate': {'type': 'alias', 'field': 'abr', 'deprecated': True},
+ 'framerate': {'type': 'alias', 'field': 'fps', 'deprecated': True},
+ 'filesize_estimate': {'type': 'alias', 'field': 'size', 'deprecated': True},
+ 'samplerate': {'type': 'alias', 'field': 'asr', 'deprecated': True},
+ 'video_ext': {'type': 'alias', 'field': 'vext', 'deprecated': True},
+ 'audio_ext': {'type': 'alias', 'field': 'aext', 'deprecated': True},
+ 'video_codec': {'type': 'alias', 'field': 'vcodec', 'deprecated': True},
+ 'audio_codec': {'type': 'alias', 'field': 'acodec', 'deprecated': True},
+ 'video': {'type': 'alias', 'field': 'hasvid', 'deprecated': True},
+ 'has_video': {'type': 'alias', 'field': 'hasvid', 'deprecated': True},
+ 'audio': {'type': 'alias', 'field': 'hasaud', 'deprecated': True},
+ 'has_audio': {'type': 'alias', 'field': 'hasaud', 'deprecated': True},
+ 'extractor': {'type': 'alias', 'field': 'ie_pref', 'deprecated': True},
+ 'extractor_preference': {'type': 'alias', 'field': 'ie_pref', 'deprecated': True},
+ }
+
+ def __init__(self, ydl, field_preference):
+ self.ydl = ydl
+ self._order = []
+ self.evaluate_params(self.ydl.params, field_preference)
+ if ydl.params.get('verbose'):
+ self.print_verbose_info(self.ydl.write_debug)
+
+ def _get_field_setting(self, field, key):
+ if field not in self.settings:
+ if key in ('forced', 'priority'):
+ return False
+ self.ydl.deprecated_feature(f'Using arbitrary fields ({field}) for format sorting is '
+ 'deprecated and may be removed in a future version')
+ self.settings[field] = {}
+ propObj = self.settings[field]
+ if key not in propObj:
+ type = propObj.get('type')
+ if key == 'field':
+ default = 'preference' if type == 'extractor' else (field,) if type in ('combined', 'multiple') else field
+ elif key == 'convert':
+ default = 'order' if type == 'ordered' else 'float_string' if field else 'ignore'
+ else:
+ default = {'type': 'field', 'visible': True, 'order': [], 'not_in_list': (None,)}.get(key, None)
+ propObj[key] = default
+ return propObj[key]
+
+ def _resolve_field_value(self, field, value, convertNone=False):
+ if value is None:
+ if not convertNone:
+ return None
+ else:
+ value = value.lower()
+ conversion = self._get_field_setting(field, 'convert')
+ if conversion == 'ignore':
+ return None
+ if conversion == 'string':
+ return value
+ elif conversion == 'float_none':
+ return float_or_none(value)
+ elif conversion == 'bytes':
+ return parse_bytes(value)
+ elif conversion == 'order':
+ order_list = (self._use_free_order and self._get_field_setting(field, 'order_free')) or self._get_field_setting(field, 'order')
+ use_regex = self._get_field_setting(field, 'regex')
+ list_length = len(order_list)
+ empty_pos = order_list.index('') if '' in order_list else list_length + 1
+ if use_regex and value is not None:
+ for i, regex in enumerate(order_list):
+ if regex and re.match(regex, value):
+ return list_length - i
+ return list_length - empty_pos # not in list
+ else: # not regex or value = None
+ return list_length - (order_list.index(value) if value in order_list else empty_pos)
+ else:
+ if value.isnumeric():
+ return float(value)
+ else:
+ self.settings[field]['convert'] = 'string'
+ return value
+
+ def evaluate_params(self, params, sort_extractor):
+ self._use_free_order = params.get('prefer_free_formats', False)
+ self._sort_user = params.get('format_sort', [])
+ self._sort_extractor = sort_extractor
+
+ def add_item(field, reverse, closest, limit_text):
+ field = field.lower()
+ if field in self._order:
+ return
+ self._order.append(field)
+ limit = self._resolve_field_value(field, limit_text)
+ data = {
+ 'reverse': reverse,
+ 'closest': False if limit is None else closest,
+ 'limit_text': limit_text,
+ 'limit': limit}
+ if field in self.settings:
+ self.settings[field].update(data)
+ else:
+ self.settings[field] = data
+
+ sort_list = (
+ tuple(field for field in self.default if self._get_field_setting(field, 'forced'))
+ + (tuple() if params.get('format_sort_force', False)
+ else tuple(field for field in self.default if self._get_field_setting(field, 'priority')))
+ + tuple(self._sort_user) + tuple(sort_extractor) + self.default)
+
+ for item in sort_list:
+ match = re.match(self.regex, item)
+ if match is None:
+ raise ExtractorError('Invalid format sort string "%s" given by extractor' % item)
+ field = match.group('field')
+ if field is None:
+ continue
+ if self._get_field_setting(field, 'type') == 'alias':
+ alias, field = field, self._get_field_setting(field, 'field')
+ if self._get_field_setting(alias, 'deprecated'):
+ self.ydl.deprecated_feature(f'Format sorting alias {alias} is deprecated and may '
+ f'be removed in a future version. Please use {field} instead')
+ reverse = match.group('reverse') is not None
+ closest = match.group('separator') == '~'
+ limit_text = match.group('limit')
+
+ has_limit = limit_text is not None
+ has_multiple_fields = self._get_field_setting(field, 'type') == 'combined'
+ has_multiple_limits = has_limit and has_multiple_fields and not self._get_field_setting(field, 'same_limit')
+
+ fields = self._get_field_setting(field, 'field') if has_multiple_fields else (field,)
+ limits = limit_text.split(':') if has_multiple_limits else (limit_text,) if has_limit else tuple()
+ limit_count = len(limits)
+ for (i, f) in enumerate(fields):
+ add_item(f, reverse, closest,
+ limits[i] if i < limit_count
+ else limits[0] if has_limit and not has_multiple_limits
+ else None)
+
+ def print_verbose_info(self, write_debug):
+ if self._sort_user:
+ write_debug('Sort order given by user: %s' % ', '.join(self._sort_user))
+ if self._sort_extractor:
+ write_debug('Sort order given by extractor: %s' % ', '.join(self._sort_extractor))
+ write_debug('Formats sorted by: %s' % ', '.join(['%s%s%s' % (
+ '+' if self._get_field_setting(field, 'reverse') else '', field,
+ '%s%s(%s)' % ('~' if self._get_field_setting(field, 'closest') else ':',
+ self._get_field_setting(field, 'limit_text'),
+ self._get_field_setting(field, 'limit'))
+ if self._get_field_setting(field, 'limit_text') is not None else '')
+ for field in self._order if self._get_field_setting(field, 'visible')]))
+
+ def _calculate_field_preference_from_value(self, format, field, type, value):
+ reverse = self._get_field_setting(field, 'reverse')
+ closest = self._get_field_setting(field, 'closest')
+ limit = self._get_field_setting(field, 'limit')
+
+ if type == 'extractor':
+ maximum = self._get_field_setting(field, 'max')
+ if value is None or (maximum is not None and value >= maximum):
+ value = -1
+ elif type == 'boolean':
+ in_list = self._get_field_setting(field, 'in_list')
+ not_in_list = self._get_field_setting(field, 'not_in_list')
+ value = 0 if ((in_list is None or value in in_list) and (not_in_list is None or value not in not_in_list)) else -1
+ elif type == 'ordered':
+ value = self._resolve_field_value(field, value, True)
+
+ # try to convert to number
+ val_num = float_or_none(value, default=self._get_field_setting(field, 'default'))
+ is_num = self._get_field_setting(field, 'convert') != 'string' and val_num is not None
+ if is_num:
+ value = val_num
+
+ return ((-10, 0) if value is None
+ else (1, value, 0) if not is_num # if a field has mixed strings and numbers, strings are sorted higher
+ else (0, -abs(value - limit), value - limit if reverse else limit - value) if closest
+ else (0, value, 0) if not reverse and (limit is None or value <= limit)
+ else (0, -value, 0) if limit is None or (reverse and value == limit) or value > limit
+ else (-1, value, 0))
+
+ def _calculate_field_preference(self, format, field):
+ type = self._get_field_setting(field, 'type') # extractor, boolean, ordered, field, multiple
+ get_value = lambda f: format.get(self._get_field_setting(f, 'field'))
+ if type == 'multiple':
+ type = 'field' # Only 'field' is allowed in multiple for now
+ actual_fields = self._get_field_setting(field, 'field')
+
+ value = self._get_field_setting(field, 'function')(get_value(f) for f in actual_fields)
+ else:
+ value = get_value(field)
+ return self._calculate_field_preference_from_value(format, field, type, value)
+
+ def calculate_preference(self, format):
+ # Determine missing protocol
+ if not format.get('protocol'):
+ format['protocol'] = determine_protocol(format)
+
+ # Determine missing ext
+ if not format.get('ext') and 'url' in format:
+ format['ext'] = determine_ext(format['url'])
+ if format.get('vcodec') == 'none':
+ format['audio_ext'] = format['ext'] if format.get('acodec') != 'none' else 'none'
+ format['video_ext'] = 'none'
+ else:
+ format['video_ext'] = format['ext']
+ format['audio_ext'] = 'none'
+ # if format.get('preference') is None and format.get('ext') in ('f4f', 'f4m'): # Not supported?
+ # format['preference'] = -1000
+
+ # Determine missing bitrates
+ if format.get('tbr') is None:
+ if format.get('vbr') is not None and format.get('abr') is not None:
+ format['tbr'] = format.get('vbr', 0) + format.get('abr', 0)
+ else:
+ if format.get('vcodec') != 'none' and format.get('vbr') is None:
+ format['vbr'] = format.get('tbr') - format.get('abr', 0)
+ if format.get('acodec') != 'none' and format.get('abr') is None:
+ format['abr'] = format.get('tbr') - format.get('vbr', 0)
+
+ return tuple(self._calculate_field_preference(format, field) for field in self._order)
+
+
+# Deprecated
+has_certifi = bool(certifi)
+has_websockets = bool(websockets)
diff --git a/hypervideo_dl/version.py b/hypervideo_dl/version.py
index 107fefb..3b08699 100644
--- a/hypervideo_dl/version.py
+++ b/hypervideo_dl/version.py
@@ -2,4 +2,8 @@
__version__ = '1.1.13'
-RELEASE_GIT_HEAD = 'c0c2c57d3'
+RELEASE_GIT_HEAD = '8b644025b'
+
+VARIANT = None
+
+UPDATE_HINT = None
diff --git a/hypervideo_dl/webvtt.py b/hypervideo_dl/webvtt.py
index 0e602a7..e24dae3 100644
--- a/hypervideo_dl/webvtt.py
+++ b/hypervideo_dl/webvtt.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals, print_function, division
-
"""
A partial parser for WebVTT segments. Interprets enough of the WebVTT stream
to be able to assemble a single stand-alone subtitle file, suitably adjusting
@@ -11,17 +8,13 @@ Regular expressions based on the W3C WebVTT specification
in RFC 8216 §3.5 <https://tools.ietf.org/html/rfc8216#section-3.5>.
"""
-import re
import io
+import re
+
from .utils import int_or_none, timetuple_from_msec
-from .compat import (
- compat_str as str,
- compat_Pattern,
- compat_Match,
-)
-class _MatchParser(object):
+class _MatchParser:
"""
An object that maintains the current parsing position and allows
conveniently advancing it as syntax elements are successfully parsed.
@@ -32,7 +25,7 @@ class _MatchParser(object):
self._pos = 0
def match(self, r):
- if isinstance(r, compat_Pattern):
+ if isinstance(r, re.Pattern):
return r.match(self._data, self._pos)
if isinstance(r, str):
if self._data.startswith(r, self._pos):
@@ -43,7 +36,7 @@ class _MatchParser(object):
def advance(self, by):
if by is None:
amt = 0
- elif isinstance(by, compat_Match):
+ elif isinstance(by, re.Match):
amt = len(by.group(0))
elif isinstance(by, str):
amt = len(by)
@@ -70,7 +63,7 @@ class _MatchChildParser(_MatchParser):
"""
def __init__(self, parent):
- super(_MatchChildParser, self).__init__(parent._data)
+ super().__init__(parent._data)
self.__parent = parent
self._pos = parent._pos
@@ -84,7 +77,7 @@ class _MatchChildParser(_MatchParser):
class ParseError(Exception):
def __init__(self, parser):
- super(ParseError, self).__init__("Parse error at position %u (near %r)" % (
+ super().__init__("Parse error at position %u (near %r)" % (
parser._pos, parser._data[parser._pos:parser._pos + 20]
))
@@ -100,7 +93,7 @@ _REGEX_TS = re.compile(r'''(?x)
([0-9]{3})?
''')
_REGEX_EOF = re.compile(r'\Z')
-_REGEX_NL = re.compile(r'(?:\r\n|[\r\n])')
+_REGEX_NL = re.compile(r'(?:\r\n|[\r\n]|$)')
_REGEX_BLANK = re.compile(r'(?:\r\n|[\r\n])+')
@@ -109,14 +102,8 @@ def _parse_ts(ts):
Convert a parsed WebVTT timestamp (a re.Match obtained from _REGEX_TS)
into an MPEG PES timestamp: a tick counter at 90 kHz resolution.
"""
-
- h, min, s, ms = ts.groups()
- return 90 * (
- int(h or 0) * 3600000 + # noqa: W504,E221,E222
- int(min) * 60000 + # noqa: W504,E221,E222
- int(s) * 1000 + # noqa: W504,E221,E222
- int(ms) # noqa: W504,E221,E222
- )
+ return 90 * sum(
+ int(part or 0) * mult for part, mult in zip(ts.groups(), (3600_000, 60_000, 1000, 1)))
def _format_ts(ts):
@@ -127,7 +114,7 @@ def _format_ts(ts):
return '%02u:%02u:%02u.%03u' % timetuple_from_msec(int((ts + 45) // 90))
-class Block(object):
+class Block:
"""
An abstract WebVTT block.
"""
@@ -153,7 +140,6 @@ class HeaderBlock(Block):
A WebVTT block that may only appear in the header part of the file,
i.e. before any cue blocks.
"""
-
pass
@@ -174,6 +160,12 @@ class Magic(HeaderBlock):
_REGEX_TSMAP_MPEGTS = re.compile(r'MPEGTS:([0-9]+)')
_REGEX_TSMAP_SEP = re.compile(r'[ \t]*,[ \t]*')
+ # This was removed from the spec in the 2017 revision;
+ # the last spec draft to describe this syntax element is
+ # <https://www.w3.org/TR/2015/WD-webvtt1-20151208/#webvtt-metadata-header>.
+ # Nevertheless, YouTube keeps serving those
+ _REGEX_META = re.compile(r'(?:(?!-->)[^\r\n])+:(?:(?!-->)[^\r\n])+(?:\r\n|[\r\n])')
+
@classmethod
def __parse_tsmap(cls, parser):
parser = parser.child()
@@ -213,13 +205,18 @@ class Magic(HeaderBlock):
raise ParseError(parser)
extra = m.group(1)
- local, mpegts = None, None
- if parser.consume(cls._REGEX_TSMAP):
- local, mpegts = cls.__parse_tsmap(parser)
- if not parser.consume(_REGEX_NL):
+ local, mpegts, meta = None, None, ''
+ while not parser.consume(_REGEX_NL):
+ if parser.consume(cls._REGEX_TSMAP):
+ local, mpegts = cls.__parse_tsmap(parser)
+ continue
+ m = parser.consume(cls._REGEX_META)
+ if m:
+ meta += m.group(0)
+ continue
raise ParseError(parser)
parser.commit()
- return cls(extra=extra, mpegts=mpegts, local=local)
+ return cls(extra=extra, mpegts=mpegts, local=local, meta=meta)
def write_into(self, stream):
stream.write('WEBVTT')
@@ -232,6 +229,8 @@ class Magic(HeaderBlock):
stream.write(',MPEGTS:')
stream.write(str(self.mpegts if self.mpegts is not None else 0))
stream.write('\n')
+ if self.meta:
+ stream.write(self.meta)
stream.write('\n')
@@ -359,7 +358,7 @@ def parse_fragment(frag_content):
a bytes object containing the raw contents of a WebVTT file.
"""
- parser = _MatchParser(frag_content.decode('utf-8'))
+ parser = _MatchParser(frag_content.decode())
yield Magic.parse(parser)
diff --git a/pytest.ini b/pytest.ini
deleted file mode 100644
index 52feb4a..0000000
--- a/pytest.ini
+++ /dev/null
@@ -1,4 +0,0 @@
-[pytest]
-addopts = -ra -v --strict-markers
-markers =
- download
diff --git a/requirements.txt b/requirements.txt
index b65d254..9d3b703 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,4 +3,4 @@ pycryptodome
websockets
brotli; platform_python_implementation=='CPython'
brotlicffi; platform_python_implementation!='CPython'
-certifi \ No newline at end of file
+certifi
diff --git a/setup.cfg b/setup.cfg
index 6875734..2def390 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,6 +1,49 @@
[wheel]
-universal = True
+universal = true
+
[flake8]
-exclude = hypervideo_dl/extractor/__init__.py,devscripts/buildserver.py,devscripts/lazy_load_template.py,devscripts/make_issue_template.py,setup.py,build,.git,venv,devscripts/create-github-release.py,devscripts/release.sh,devscripts/show-downloads-statistics.py
-ignore = E402,E501,E731,E741,W503 \ No newline at end of file
+exclude = build,venv,.tox,.git,.pytest_cache
+ignore = E402,E501,E731,E741,W503
+max_line_length = 120
+per_file_ignores =
+ devscripts/lazy_load_template.py: F401
+
+
+[autoflake]
+ignore-init-module-imports = true
+ignore-pass-after-docstring = true
+remove-all-unused-imports = true
+remove-duplicate-keys = true
+remove-unused-variables = true
+
+
+[tool:pytest]
+addopts = -ra -v --strict-markers
+markers =
+ download
+
+
+[tox:tox]
+skipsdist = true
+envlist = py{36,37,38,39,310},pypy{36,37,38,39}
+skip_missing_interpreters = true
+
+[testenv] # tox
+deps =
+ pytest
+commands = pytest {posargs:"-m not download"}
+passenv = HOME # For test_compat_expanduser
+setenv =
+ # PYTHONWARNINGS = error # Catches PIP's warnings too
+
+
+[isort]
+py_version = 37
+multi_line_output = VERTICAL_HANGING_INDENT
+line_length = 80
+reverse_relative = true
+ensure_newline_before_comments = true
+include_trailing_comma = true
+known_first_party =
+ test
diff --git a/setup.py b/setup.py
index d183924..87e34f9 100644
--- a/setup.py
+++ b/setup.py
@@ -27,7 +27,7 @@ REQUIREMENTS = ['mutagen', 'pycryptodome', 'websockets']
if sys.argv[1:2] == ['py2exe']:
- import py2exe
+ import py2exe # noqa: F401
warnings.warn(
'py2exe builds do not support pycryptodomex and needs VC++14 to run. '
'The recommended way is to use "pyinst.py" to build using pyinstaller')
@@ -124,6 +124,9 @@ setup(
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
+ 'Programming Language :: Python :: 3.9',
+ 'Programming Language :: Python :: 3.10',
+ 'Programming Language :: Python :: 3.11',
'Programming Language :: Python :: Implementation',
'Programming Language :: Python :: Implementation :: CPython',
'Programming Language :: Python :: Implementation :: PyPy',
diff --git a/test/helper.py b/test/helper.py
index 1f1ccfa..1dae86f 100644
--- a/test/helper.py
+++ b/test/helper.py
@@ -1,26 +1,16 @@
-from __future__ import unicode_literals
-
import errno
-import io
import hashlib
import json
import os.path
import re
-import types
import ssl
import sys
+import types
import hypervideo_dl.extractor
from hypervideo_dl import YoutubeDL
-from hypervideo_dl.compat import (
- compat_os_name,
- compat_str,
-)
-from hypervideo_dl.utils import (
- preferredencoding,
- write_string,
-)
-
+from hypervideo_dl.compat import compat_os_name
+from hypervideo_dl.utils import preferredencoding, write_string
if 'pytest' in sys.modules:
import pytest
@@ -35,10 +25,10 @@ def get_params(override=None):
'parameters.json')
LOCAL_PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)),
'local_parameters.json')
- with io.open(PARAMETERS_FILE, encoding='utf-8') as pf:
+ with open(PARAMETERS_FILE, encoding='utf-8') as pf:
parameters = json.load(pf)
if os.path.exists(LOCAL_PARAMETERS_FILE):
- with io.open(LOCAL_PARAMETERS_FILE, encoding='utf-8') as pf:
+ with open(LOCAL_PARAMETERS_FILE, encoding='utf-8') as pf:
parameters.update(json.load(pf))
if override:
parameters.update(override)
@@ -54,7 +44,7 @@ def try_rm(filename):
raise
-def report_warning(message):
+def report_warning(message, *args, **kwargs):
'''
Print the message to stderr, it will be prefixed with 'WARNING:'
If stderr is a tty file the 'WARNING:' will be colored
@@ -63,8 +53,8 @@ def report_warning(message):
_msg_header = '\033[0;33mWARNING:\033[0m'
else:
_msg_header = 'WARNING:'
- output = '%s %s\n' % (_msg_header, message)
- if 'b' in getattr(sys.stderr, 'mode', '') or sys.version_info[0] < 3:
+ output = f'{_msg_header} {message}\n'
+ if 'b' in getattr(sys.stderr, 'mode', ''):
output = output.encode(preferredencoding())
sys.stderr.write(output)
@@ -74,13 +64,13 @@ class FakeYDL(YoutubeDL):
# Different instances of the downloader can't share the same dictionary
# some test set the "sublang" parameter, which would break the md5 checks.
params = get_params(override=override)
- super(FakeYDL, self).__init__(params, auto_init=False)
+ super().__init__(params, auto_init=False)
self.result = []
- def to_screen(self, s, skip_eol=None):
+ def to_screen(self, s, *args, **kwargs):
print(s)
- def trouble(self, s, tb=None):
+ def trouble(self, s, *args, **kwargs):
raise Exception(s)
def download(self, x):
@@ -90,56 +80,59 @@ class FakeYDL(YoutubeDL):
# Silence an expected warning matching a regex
old_report_warning = self.report_warning
- def report_warning(self, message):
+ def report_warning(self, message, *args, **kwargs):
if re.match(regex, message):
return
- old_report_warning(message)
+ old_report_warning(message, *args, **kwargs)
self.report_warning = types.MethodType(report_warning, self)
def gettestcases(include_onlymatching=False):
for ie in hypervideo_dl.extractor.gen_extractors():
- for tc in ie.get_testcases(include_onlymatching):
+ yield from ie.get_testcases(include_onlymatching)
+
+
+def getwebpagetestcases():
+ for ie in hypervideo_dl.extractor.gen_extractors():
+ for tc in ie.get_webpage_testcases():
+ tc.setdefault('add_ie', []).append('Generic')
yield tc
-md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
+md5 = lambda s: hashlib.md5(s.encode()).hexdigest()
def expect_value(self, got, expected, field):
- if isinstance(expected, compat_str) and expected.startswith('re:'):
+ if isinstance(expected, str) and expected.startswith('re:'):
match_str = expected[len('re:'):]
match_rex = re.compile(match_str)
self.assertTrue(
- isinstance(got, compat_str),
- 'Expected a %s object, but got %s for field %s' % (
- compat_str.__name__, type(got).__name__, field))
+ isinstance(got, str),
+ f'Expected a {str.__name__} object, but got {type(got).__name__} for field {field}')
self.assertTrue(
match_rex.match(got),
- 'field %s (value: %r) should match %r' % (field, got, match_str))
- elif isinstance(expected, compat_str) and expected.startswith('startswith:'):
+ f'field {field} (value: {got!r}) should match {match_str!r}')
+ elif isinstance(expected, str) and expected.startswith('startswith:'):
start_str = expected[len('startswith:'):]
self.assertTrue(
- isinstance(got, compat_str),
- 'Expected a %s object, but got %s for field %s' % (
- compat_str.__name__, type(got).__name__, field))
+ isinstance(got, str),
+ f'Expected a {str.__name__} object, but got {type(got).__name__} for field {field}')
self.assertTrue(
got.startswith(start_str),
- 'field %s (value: %r) should start with %r' % (field, got, start_str))
- elif isinstance(expected, compat_str) and expected.startswith('contains:'):
+ f'field {field} (value: {got!r}) should start with {start_str!r}')
+ elif isinstance(expected, str) and expected.startswith('contains:'):
contains_str = expected[len('contains:'):]
self.assertTrue(
- isinstance(got, compat_str),
- 'Expected a %s object, but got %s for field %s' % (
- compat_str.__name__, type(got).__name__, field))
+ isinstance(got, str),
+ f'Expected a {str.__name__} object, but got {type(got).__name__} for field {field}')
self.assertTrue(
contains_str in got,
- 'field %s (value: %r) should contain %r' % (field, got, contains_str))
+ f'field {field} (value: {got!r}) should contain {contains_str!r}')
elif isinstance(expected, type):
self.assertTrue(
isinstance(got, expected),
- 'Expected type %r for field %s, but got value %r of type %r' % (expected, field, got, type(got)))
+ f'Expected type {expected!r} for field {field}, but got value {got!r} of type {type(got)!r}')
elif isinstance(expected, dict) and isinstance(got, dict):
expect_dict(self, got, expected)
elif isinstance(expected, list) and isinstance(got, list):
@@ -156,16 +149,15 @@ def expect_value(self, got, expected, field):
index, field, type_expected, type_got))
expect_value(self, item_got, item_expected, field)
else:
- if isinstance(expected, compat_str) and expected.startswith('md5:'):
+ if isinstance(expected, str) and expected.startswith('md5:'):
self.assertTrue(
- isinstance(got, compat_str),
- 'Expected field %s to be a unicode object, but got value %r of type %r' % (field, got, type(got)))
+ isinstance(got, str),
+ f'Expected field {field} to be a unicode object, but got value {got!r} of type {type(got)!r}')
got = 'md5:' + md5(got)
- elif isinstance(expected, compat_str) and re.match(r'^(?:min|max)?count:\d+', expected):
+ elif isinstance(expected, str) and re.match(r'^(?:min|max)?count:\d+', expected):
self.assertTrue(
isinstance(got, (list, dict)),
- 'Expected field %s to be a list or a dict, but it is of type %s' % (
- field, type(got).__name__))
+ f'Expected field {field} to be a list or a dict, but it is of type {type(got).__name__}')
op, _, expected_num = expected.partition(':')
expected_num = int(expected_num)
if op == 'mincount':
@@ -185,7 +177,7 @@ def expect_value(self, got, expected, field):
return
self.assertEqual(
expected, got,
- 'Invalid value for field %s, expected %r, got %r' % (field, expected, got))
+ f'Invalid value for field {field}, expected {expected!r}, got {got!r}')
def expect_dict(self, got_dict, expected_dict):
@@ -230,6 +222,10 @@ def sanitize_got_info_dict(got_dict):
if test_info_dict.get('display_id') == test_info_dict.get('id'):
test_info_dict.pop('display_id')
+ # Check url for flat entries
+ if got_dict.get('_type', 'video') != 'video' and got_dict.get('url'):
+ test_info_dict['url'] = got_dict['url']
+
return test_info_dict
@@ -243,33 +239,31 @@ def expect_info_dict(self, got_dict, expected_dict):
for key in mandatory_fields:
self.assertTrue(got_dict.get(key), 'Missing mandatory field %s' % key)
# Check for mandatory fields that are automatically set by YoutubeDL
- for key in ['webpage_url', 'extractor', 'extractor_key']:
- self.assertTrue(got_dict.get(key), 'Missing field: %s' % key)
+ if got_dict.get('_type', 'video') == 'video':
+ for key in ['webpage_url', 'extractor', 'extractor_key']:
+ self.assertTrue(got_dict.get(key), 'Missing field: %s' % key)
test_info_dict = sanitize_got_info_dict(got_dict)
missing_keys = set(test_info_dict.keys()) - set(expected_dict.keys())
if missing_keys:
def _repr(v):
- if isinstance(v, compat_str):
+ if isinstance(v, str):
return "'%s'" % v.replace('\\', '\\\\').replace("'", "\\'").replace('\n', '\\n')
elif isinstance(v, type):
return v.__name__
else:
return repr(v)
- info_dict_str = ''
- if len(missing_keys) != len(expected_dict):
- info_dict_str += ''.join(
- ' %s: %s,\n' % (_repr(k), _repr(v))
- for k, v in test_info_dict.items() if k not in missing_keys)
-
- if info_dict_str:
- info_dict_str += '\n'
+ info_dict_str = ''.join(
+ f' {_repr(k)}: {_repr(v)},\n'
+ for k, v in test_info_dict.items() if k not in missing_keys)
+ if info_dict_str:
+ info_dict_str += '\n'
info_dict_str += ''.join(
- ' %s: %s,\n' % (_repr(k), _repr(test_info_dict[k]))
+ f' {_repr(k)}: {_repr(test_info_dict[k])},\n'
for k in missing_keys)
- write_string(
- '\n\'info_dict\': {\n' + info_dict_str + '},\n', out=sys.stderr)
+ info_dict_str = '\n\'info_dict\': {\n' + info_dict_str + '},\n'
+ write_string(info_dict_str.replace('\n', '\n '), out=sys.stderr)
self.assertFalse(
missing_keys,
'Missing keys in test definition: %s' % (
@@ -295,30 +289,30 @@ def assertRegexpMatches(self, text, regexp, msg=None):
def assertGreaterEqual(self, got, expected, msg=None):
if not (got >= expected):
if msg is None:
- msg = '%r not greater than or equal to %r' % (got, expected)
+ msg = f'{got!r} not greater than or equal to {expected!r}'
self.assertTrue(got >= expected, msg)
def assertLessEqual(self, got, expected, msg=None):
if not (got <= expected):
if msg is None:
- msg = '%r not less than or equal to %r' % (got, expected)
+ msg = f'{got!r} not less than or equal to {expected!r}'
self.assertTrue(got <= expected, msg)
def assertEqual(self, got, expected, msg=None):
if not (got == expected):
if msg is None:
- msg = '%r not equal to %r' % (got, expected)
+ msg = f'{got!r} not equal to {expected!r}'
self.assertTrue(got == expected, msg)
def expect_warnings(ydl, warnings_re):
real_warning = ydl.report_warning
- def _report_warning(w):
+ def _report_warning(w, *args, **kwargs):
if not any(re.search(w_re, w) for w_re in warnings_re):
- real_warning(w)
+ real_warning(w, *args, **kwargs)
ydl.report_warning = _report_warning
diff --git a/test/parameters.json b/test/parameters.json
index 06fe3e3..8789ce1 100644
--- a/test/parameters.json
+++ b/test/parameters.json
@@ -44,6 +44,6 @@
"writesubtitles": false,
"allsubtitles": false,
"listsubtitles": false,
- "socket_timeout": 20,
- "fixup": "never"
+ "fixup": "never",
+ "allow_playlist_files": false
}
diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index 8494105..529da52 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -1,27 +1,32 @@
#!/usr/bin/env python3
-from __future__ import unicode_literals
-
# Allow direct execution
-import io
import os
import sys
import unittest
+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from test.helper import FakeYDL, expect_dict, expect_value, http_server_port
-from hypervideo_dl.compat import compat_etree_fromstring, compat_http_server
-from hypervideo_dl.extractor.common import InfoExtractor
-from hypervideo_dl.extractor import YoutubeIE, get_info_extractor
-from hypervideo_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError
+
+import http.server
import threading
+from test.helper import FakeYDL, expect_dict, expect_value, http_server_port
+from hypervideo_dl.compat import compat_etree_fromstring
+from hypervideo_dl.extractor import YoutubeIE, get_info_extractor
+from hypervideo_dl.extractor.common import InfoExtractor
+from hypervideo_dl.utils import (
+ ExtractorError,
+ RegexNotFoundError,
+ encode_data_uri,
+ strip_jsonp,
+)
TEAPOT_RESPONSE_STATUS = 418
TEAPOT_RESPONSE_BODY = "<h1>418 I'm a teapot</h1>"
-class InfoExtractorTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
+class InfoExtractorTestRequestHandler(http.server.BaseHTTPRequestHandler):
def log_message(self, format, *args):
pass
@@ -36,7 +41,9 @@ class InfoExtractorTestRequestHandler(compat_http_server.BaseHTTPRequestHandler)
class DummyIE(InfoExtractor):
- pass
+ def _sort_formats(self, formats, field_preference=[]):
+ self._downloader.sort_formats(
+ {'formats': formats, '_format_sort_fields': field_preference})
class TestInfoExtractor(unittest.TestCase):
@@ -500,6 +507,24 @@ class TestInfoExtractor(unittest.TestCase):
}],
})
+ # from https://0000.studio/
+ # with type attribute but without extension in URL
+ expect_dict(
+ self,
+ self.ie._parse_html5_media_entries(
+ 'https://0000.studio',
+ r'''
+ <video src="https://d1ggyt9m8pwf3g.cloudfront.net/protected/ap-northeast-1:1864af40-28d5-492b-b739-b32314b1a527/archive/clip/838db6a7-8973-4cd6-840d-8517e4093c92"
+ controls="controls" type="video/mp4" preload="metadata" autoplay="autoplay" playsinline class="object-contain">
+ </video>
+ ''', None)[0],
+ {
+ 'formats': [{
+ 'url': 'https://d1ggyt9m8pwf3g.cloudfront.net/protected/ap-northeast-1:1864af40-28d5-492b-b739-b32314b1a527/archive/clip/838db6a7-8973-4cd6-840d-8517e4093c92',
+ 'ext': 'mp4',
+ }],
+ })
+
def test_extract_jwplayer_data_realworld(self):
# from http://www.suffolk.edu/sjc/
expect_dict(
@@ -1011,8 +1036,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
]
for m3u8_file, m3u8_url, expected_formats, expected_subs in _TEST_CASES:
- with io.open('./test/testdata/m3u8/%s.m3u8' % m3u8_file,
- mode='r', encoding='utf-8') as f:
+ with open('./test/testdata/m3u8/%s.m3u8' % m3u8_file, encoding='utf-8') as f:
formats, subs = self.ie._parse_m3u8_formats_and_subtitles(
f.read(), m3u8_url, ext='mp4')
self.ie._sort_formats(formats)
@@ -1357,10 +1381,9 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
]
for mpd_file, mpd_url, mpd_base_url, expected_formats, expected_subtitles in _TEST_CASES:
- with io.open('./test/testdata/mpd/%s.mpd' % mpd_file,
- mode='r', encoding='utf-8') as f:
+ with open('./test/testdata/mpd/%s.mpd' % mpd_file, encoding='utf-8') as f:
formats, subtitles = self.ie._parse_mpd_formats_and_subtitles(
- compat_etree_fromstring(f.read().encode('utf-8')),
+ compat_etree_fromstring(f.read().encode()),
mpd_base_url=mpd_base_url, mpd_url=mpd_url)
self.ie._sort_formats(formats)
expect_value(self, formats, expected_formats, None)
@@ -1546,13 +1569,298 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
]
},
),
+ (
+ 'ec-3_test',
+ 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
+ [{
+ 'format_id': 'audio_deu_1-224',
+ 'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
+ 'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
+ 'ext': 'isma',
+ 'tbr': 224,
+ 'asr': 48000,
+ 'vcodec': 'none',
+ 'acodec': 'EC-3',
+ 'protocol': 'ism',
+ '_download_params':
+ {
+ 'stream_type': 'audio',
+ 'duration': 370000000,
+ 'timescale': 10000000,
+ 'width': 0,
+ 'height': 0,
+ 'fourcc': 'EC-3',
+ 'language': 'deu',
+ 'codec_private_data': '00063F000000AF87FBA7022DFB42A4D405CD93843BDD0700200F00',
+ 'sampling_rate': 48000,
+ 'channels': 6,
+ 'bits_per_sample': 16,
+ 'nal_unit_length_field': 4
+ },
+ 'audio_ext': 'isma',
+ 'video_ext': 'none',
+ 'abr': 224,
+ }, {
+ 'format_id': 'audio_deu-127',
+ 'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
+ 'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
+ 'ext': 'isma',
+ 'tbr': 127,
+ 'asr': 48000,
+ 'vcodec': 'none',
+ 'acodec': 'AACL',
+ 'protocol': 'ism',
+ '_download_params':
+ {
+ 'stream_type': 'audio',
+ 'duration': 370000000,
+ 'timescale': 10000000,
+ 'width': 0,
+ 'height': 0,
+ 'fourcc': 'AACL',
+ 'language': 'deu',
+ 'codec_private_data': '1190',
+ 'sampling_rate': 48000,
+ 'channels': 2,
+ 'bits_per_sample': 16,
+ 'nal_unit_length_field': 4
+ },
+ 'audio_ext': 'isma',
+ 'video_ext': 'none',
+ 'abr': 127,
+ }, {
+ 'format_id': 'video_deu-23',
+ 'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
+ 'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
+ 'ext': 'ismv',
+ 'width': 384,
+ 'height': 216,
+ 'tbr': 23,
+ 'vcodec': 'AVC1',
+ 'acodec': 'none',
+ 'protocol': 'ism',
+ '_download_params':
+ {
+ 'stream_type': 'video',
+ 'duration': 370000000,
+ 'timescale': 10000000,
+ 'width': 384,
+ 'height': 216,
+ 'fourcc': 'AVC1',
+ 'language': 'deu',
+ 'codec_private_data': '000000016742C00CDB06077E5C05A808080A00000300020000030009C0C02EE0177CC6300F142AE00000000168CA8DC8',
+ 'channels': 2,
+ 'bits_per_sample': 16,
+ 'nal_unit_length_field': 4
+ },
+ 'video_ext': 'ismv',
+ 'audio_ext': 'none',
+ 'vbr': 23,
+ }, {
+ 'format_id': 'video_deu-403',
+ 'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
+ 'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
+ 'ext': 'ismv',
+ 'width': 400,
+ 'height': 224,
+ 'tbr': 403,
+ 'vcodec': 'AVC1',
+ 'acodec': 'none',
+ 'protocol': 'ism',
+ '_download_params':
+ {
+ 'stream_type': 'video',
+ 'duration': 370000000,
+ 'timescale': 10000000,
+ 'width': 400,
+ 'height': 224,
+ 'fourcc': 'AVC1',
+ 'language': 'deu',
+ 'codec_private_data': '00000001674D4014E98323B602D4040405000003000100000300320F1429380000000168EAECF2',
+ 'channels': 2,
+ 'bits_per_sample': 16,
+ 'nal_unit_length_field': 4
+ },
+ 'video_ext': 'ismv',
+ 'audio_ext': 'none',
+ 'vbr': 403,
+ }, {
+ 'format_id': 'video_deu-680',
+ 'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
+ 'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
+ 'ext': 'ismv',
+ 'width': 640,
+ 'height': 360,
+ 'tbr': 680,
+ 'vcodec': 'AVC1',
+ 'acodec': 'none',
+ 'protocol': 'ism',
+ '_download_params':
+ {
+ 'stream_type': 'video',
+ 'duration': 370000000,
+ 'timescale': 10000000,
+ 'width': 640,
+ 'height': 360,
+ 'fourcc': 'AVC1',
+ 'language': 'deu',
+ 'codec_private_data': '00000001674D401EE981405FF2E02D4040405000000300100000030320F162D3800000000168EAECF2',
+ 'channels': 2,
+ 'bits_per_sample': 16,
+ 'nal_unit_length_field': 4
+ },
+ 'video_ext': 'ismv',
+ 'audio_ext': 'none',
+ 'vbr': 680,
+ }, {
+ 'format_id': 'video_deu-1253',
+ 'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
+ 'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
+ 'ext': 'ismv',
+ 'width': 640,
+ 'height': 360,
+ 'tbr': 1253,
+ 'vcodec': 'AVC1',
+ 'acodec': 'none',
+ 'protocol': 'ism',
+ '_download_params':
+ {
+ 'stream_type': 'video',
+ 'duration': 370000000,
+ 'timescale': 10000000,
+ 'width': 640,
+ 'height': 360,
+ 'fourcc': 'AVC1',
+ 'language': 'deu',
+ 'codec_private_data': '00000001674D401EE981405FF2E02D4040405000000300100000030320F162D3800000000168EAECF2',
+ 'channels': 2,
+ 'bits_per_sample': 16,
+ 'nal_unit_length_field': 4
+ },
+ 'video_ext': 'ismv',
+ 'audio_ext': 'none',
+ 'vbr': 1253,
+ }, {
+ 'format_id': 'video_deu-2121',
+ 'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
+ 'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
+ 'ext': 'ismv',
+ 'width': 768,
+ 'height': 432,
+ 'tbr': 2121,
+ 'vcodec': 'AVC1',
+ 'acodec': 'none',
+ 'protocol': 'ism',
+ '_download_params':
+ {
+ 'stream_type': 'video',
+ 'duration': 370000000,
+ 'timescale': 10000000,
+ 'width': 768,
+ 'height': 432,
+ 'fourcc': 'AVC1',
+ 'language': 'deu',
+ 'codec_private_data': '00000001674D401EECA0601BD80B50101014000003000400000300C83C58B6580000000168E93B3C80',
+ 'channels': 2,
+ 'bits_per_sample': 16,
+ 'nal_unit_length_field': 4
+ },
+ 'video_ext': 'ismv',
+ 'audio_ext': 'none',
+ 'vbr': 2121,
+ }, {
+ 'format_id': 'video_deu-3275',
+ 'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
+ 'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
+ 'ext': 'ismv',
+ 'width': 1280,
+ 'height': 720,
+ 'tbr': 3275,
+ 'vcodec': 'AVC1',
+ 'acodec': 'none',
+ 'protocol': 'ism',
+ '_download_params':
+ {
+ 'stream_type': 'video',
+ 'duration': 370000000,
+ 'timescale': 10000000,
+ 'width': 1280,
+ 'height': 720,
+ 'fourcc': 'AVC1',
+ 'language': 'deu',
+ 'codec_private_data': '00000001674D4020ECA02802DD80B501010140000003004000000C83C60C65800000000168E93B3C80',
+ 'channels': 2,
+ 'bits_per_sample': 16,
+ 'nal_unit_length_field': 4
+ },
+ 'video_ext': 'ismv',
+ 'audio_ext': 'none',
+ 'vbr': 3275,
+ }, {
+ 'format_id': 'video_deu-5300',
+ 'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
+ 'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
+ 'ext': 'ismv',
+ 'width': 1920,
+ 'height': 1080,
+ 'tbr': 5300,
+ 'vcodec': 'AVC1',
+ 'acodec': 'none',
+ 'protocol': 'ism',
+ '_download_params':
+ {
+ 'stream_type': 'video',
+ 'duration': 370000000,
+ 'timescale': 10000000,
+ 'width': 1920,
+ 'height': 1080,
+ 'fourcc': 'AVC1',
+ 'language': 'deu',
+ 'codec_private_data': '00000001674D4028ECA03C0113F2E02D4040405000000300100000030320F18319600000000168E93B3C80',
+ 'channels': 2,
+ 'bits_per_sample': 16,
+ 'nal_unit_length_field': 4
+ },
+ 'video_ext': 'ismv',
+ 'audio_ext': 'none',
+ 'vbr': 5300,
+ }, {
+ 'format_id': 'video_deu-8079',
+ 'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
+ 'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
+ 'ext': 'ismv',
+ 'width': 1920,
+ 'height': 1080,
+ 'tbr': 8079,
+ 'vcodec': 'AVC1',
+ 'acodec': 'none',
+ 'protocol': 'ism',
+ '_download_params':
+ {
+ 'stream_type': 'video',
+ 'duration': 370000000,
+ 'timescale': 10000000,
+ 'width': 1920,
+ 'height': 1080,
+ 'fourcc': 'AVC1',
+ 'language': 'deu',
+ 'codec_private_data': '00000001674D4028ECA03C0113F2E02D4040405000000300100000030320F18319600000000168E93B3C80',
+ 'channels': 2,
+ 'bits_per_sample': 16,
+ 'nal_unit_length_field': 4
+ },
+ 'video_ext': 'ismv',
+ 'audio_ext': 'none',
+ 'vbr': 8079,
+ }],
+ {},
+ ),
]
for ism_file, ism_url, expected_formats, expected_subtitles in _TEST_CASES:
- with io.open('./test/testdata/ism/%s.Manifest' % ism_file,
- mode='r', encoding='utf-8') as f:
+ with open('./test/testdata/ism/%s.Manifest' % ism_file, encoding='utf-8') as f:
formats, subtitles = self.ie._parse_ism_formats_and_subtitles(
- compat_etree_fromstring(f.read().encode('utf-8')), ism_url=ism_url)
+ compat_etree_fromstring(f.read().encode()), ism_url=ism_url)
self.ie._sort_formats(formats)
expect_value(self, formats, expected_formats, None)
expect_value(self, subtitles, expected_subtitles, None)
@@ -1576,10 +1884,9 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
]
for f4m_file, f4m_url, expected_formats in _TEST_CASES:
- with io.open('./test/testdata/f4m/%s.f4m' % f4m_file,
- mode='r', encoding='utf-8') as f:
+ with open('./test/testdata/f4m/%s.f4m' % f4m_file, encoding='utf-8') as f:
formats = self.ie._parse_f4m_formats(
- compat_etree_fromstring(f.read().encode('utf-8')),
+ compat_etree_fromstring(f.read().encode()),
f4m_url, None)
self.ie._sort_formats(formats)
expect_value(self, formats, expected_formats, None)
@@ -1624,10 +1931,9 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
]
for xspf_file, xspf_url, expected_entries in _TEST_CASES:
- with io.open('./test/testdata/xspf/%s.xspf' % xspf_file,
- mode='r', encoding='utf-8') as f:
+ with open('./test/testdata/xspf/%s.xspf' % xspf_file, encoding='utf-8') as f:
entries = self.ie._parse_xspf(
- compat_etree_fromstring(f.read().encode('utf-8')),
+ compat_etree_fromstring(f.read().encode()),
xspf_file, xspf_url=xspf_url, xspf_base_url=xspf_url)
expect_value(self, entries, expected_entries, None)
for i in range(len(entries)):
@@ -1640,7 +1946,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
# or the underlying `_download_webpage_handle` returning no content
# when a response matches `expected_status`.
- httpd = compat_http_server.HTTPServer(
+ httpd = http.server.HTTPServer(
('127.0.0.1', 0), InfoExtractorTestRequestHandler)
port = http_server_port(httpd)
server_thread = threading.Thread(target=httpd.serve_forever)
diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py
index fe0fd35..2d4e827 100644
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -1,38 +1,44 @@
#!/usr/bin/env python3
-# coding: utf-8
-
-from __future__ import unicode_literals
# Allow direct execution
import os
import sys
import unittest
+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
import copy
import json
+import urllib.error
from test.helper import FakeYDL, assertRegexpMatches
from hypervideo_dl import YoutubeDL
-from hypervideo_dl.compat import compat_os_name, compat_setenv, compat_str, compat_urllib_error
+from hypervideo_dl.compat import compat_os_name
from hypervideo_dl.extractor import YoutubeIE
from hypervideo_dl.extractor.common import InfoExtractor
from hypervideo_dl.postprocessor.common import PostProcessor
-from hypervideo_dl.utils import ExtractorError, int_or_none, match_filter_func, LazyList
+from hypervideo_dl.utils import (
+ ExtractorError,
+ LazyList,
+ OnDemandPagedList,
+ int_or_none,
+ match_filter_func,
+)
TEST_URL = 'http://localhost/sample.mp4'
class YDL(FakeYDL):
def __init__(self, *args, **kwargs):
- super(YDL, self).__init__(*args, **kwargs)
+ super().__init__(*args, **kwargs)
self.downloaded_info_dicts = []
self.msgs = []
def process_info(self, info_dict):
self.downloaded_info_dicts.append(info_dict.copy())
- def to_screen(self, msg):
+ def to_screen(self, msg, *args, **kwargs):
self.msgs.append(msg)
def dl(self, *args, **kwargs):
@@ -62,8 +68,7 @@ class TestFormatSelection(unittest.TestCase):
{'ext': 'mp4', 'height': 460, 'url': TEST_URL},
]
info_dict = _make_result(formats)
- yie = YoutubeIE(ydl)
- yie._sort_formats(info_dict['formats'])
+ ydl.sort_formats(info_dict)
ydl.process_ie_result(info_dict)
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['ext'], 'webm')
@@ -76,8 +81,7 @@ class TestFormatSelection(unittest.TestCase):
{'ext': 'mp4', 'height': 1080, 'url': TEST_URL},
]
info_dict['formats'] = formats
- yie = YoutubeIE(ydl)
- yie._sort_formats(info_dict['formats'])
+ ydl.sort_formats(info_dict)
ydl.process_ie_result(info_dict)
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['ext'], 'mp4')
@@ -91,8 +95,7 @@ class TestFormatSelection(unittest.TestCase):
{'ext': 'flv', 'height': 720, 'url': TEST_URL},
]
info_dict['formats'] = formats
- yie = YoutubeIE(ydl)
- yie._sort_formats(info_dict['formats'])
+ ydl.sort_formats(info_dict)
ydl.process_ie_result(info_dict)
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['ext'], 'mp4')
@@ -104,15 +107,14 @@ class TestFormatSelection(unittest.TestCase):
{'ext': 'webm', 'height': 720, 'url': TEST_URL},
]
info_dict['formats'] = formats
- yie = YoutubeIE(ydl)
- yie._sort_formats(info_dict['formats'])
+ ydl.sort_formats(info_dict)
ydl.process_ie_result(info_dict)
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['ext'], 'webm')
def test_format_selection(self):
formats = [
- {'format_id': '35', 'ext': 'mp4', 'preference': 1, 'url': TEST_URL},
+ {'format_id': '35', 'ext': 'mp4', 'preference': 0, 'url': TEST_URL},
{'format_id': 'example-with-dashes', 'ext': 'webm', 'preference': 1, 'url': TEST_URL},
{'format_id': '45', 'ext': 'webm', 'preference': 2, 'url': TEST_URL},
{'format_id': '47', 'ext': 'webm', 'preference': 3, 'url': TEST_URL},
@@ -180,22 +182,19 @@ class TestFormatSelection(unittest.TestCase):
info_dict = _make_result(formats)
ydl = YDL({'format': 'best'})
- ie = YoutubeIE(ydl)
- ie._sort_formats(info_dict['formats'])
+ ydl.sort_formats(info_dict)
ydl.process_ie_result(copy.deepcopy(info_dict))
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], 'aac-64')
ydl = YDL({'format': 'mp3'})
- ie = YoutubeIE(ydl)
- ie._sort_formats(info_dict['formats'])
+ ydl.sort_formats(info_dict)
ydl.process_ie_result(copy.deepcopy(info_dict))
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], 'mp3-64')
ydl = YDL({'prefer_free_formats': True})
- ie = YoutubeIE(ydl)
- ie._sort_formats(info_dict['formats'])
+ ydl.sort_formats(info_dict)
ydl.process_ie_result(copy.deepcopy(info_dict))
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], 'ogg-64')
@@ -340,8 +339,7 @@ class TestFormatSelection(unittest.TestCase):
info_dict = _make_result(list(formats_order), extractor='youtube')
ydl = YDL({'format': 'bestvideo+bestaudio'})
- yie = YoutubeIE(ydl)
- yie._sort_formats(info_dict['formats'])
+ ydl.sort_formats(info_dict)
ydl.process_ie_result(info_dict)
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], '248+172')
@@ -349,40 +347,35 @@ class TestFormatSelection(unittest.TestCase):
info_dict = _make_result(list(formats_order), extractor='youtube')
ydl = YDL({'format': 'bestvideo[height>=999999]+bestaudio/best'})
- yie = YoutubeIE(ydl)
- yie._sort_formats(info_dict['formats'])
+ ydl.sort_formats(info_dict)
ydl.process_ie_result(info_dict)
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], '38')
info_dict = _make_result(list(formats_order), extractor='youtube')
ydl = YDL({'format': 'bestvideo/best,bestaudio'})
- yie = YoutubeIE(ydl)
- yie._sort_formats(info_dict['formats'])
+ ydl.sort_formats(info_dict)
ydl.process_ie_result(info_dict)
downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts]
self.assertEqual(downloaded_ids, ['137', '141'])
info_dict = _make_result(list(formats_order), extractor='youtube')
ydl = YDL({'format': '(bestvideo[ext=mp4],bestvideo[ext=webm])+bestaudio'})
- yie = YoutubeIE(ydl)
- yie._sort_formats(info_dict['formats'])
+ ydl.sort_formats(info_dict)
ydl.process_ie_result(info_dict)
downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts]
self.assertEqual(downloaded_ids, ['137+141', '248+141'])
info_dict = _make_result(list(formats_order), extractor='youtube')
ydl = YDL({'format': '(bestvideo[ext=mp4],bestvideo[ext=webm])[height<=720]+bestaudio'})
- yie = YoutubeIE(ydl)
- yie._sort_formats(info_dict['formats'])
+ ydl.sort_formats(info_dict)
ydl.process_ie_result(info_dict)
downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts]
self.assertEqual(downloaded_ids, ['136+141', '247+141'])
info_dict = _make_result(list(formats_order), extractor='youtube')
ydl = YDL({'format': '(bestvideo[ext=none]/bestvideo[ext=webm])+bestaudio'})
- yie = YoutubeIE(ydl)
- yie._sort_formats(info_dict['formats'])
+ ydl.sort_formats(info_dict)
ydl.process_ie_result(info_dict)
downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts]
self.assertEqual(downloaded_ids, ['248+141'])
@@ -390,16 +383,14 @@ class TestFormatSelection(unittest.TestCase):
for f1, f2 in zip(formats_order, formats_order[1:]):
info_dict = _make_result([f1, f2], extractor='youtube')
ydl = YDL({'format': 'best/bestvideo'})
- yie = YoutubeIE(ydl)
- yie._sort_formats(info_dict['formats'])
+ ydl.sort_formats(info_dict)
ydl.process_ie_result(info_dict)
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], f1['format_id'])
info_dict = _make_result([f2, f1], extractor='youtube')
ydl = YDL({'format': 'best/bestvideo'})
- yie = YoutubeIE(ydl)
- yie._sort_formats(info_dict['formats'])
+ ydl.sort_formats(info_dict)
ydl.process_ie_result(info_dict)
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], f1['format_id'])
@@ -474,7 +465,7 @@ class TestFormatSelection(unittest.TestCase):
for f in formats:
f['url'] = 'http://_/'
f['ext'] = 'unknown'
- info_dict = _make_result(formats)
+ info_dict = _make_result(formats, _format_sort_fields=('id', ))
ydl = YDL({'format': 'best[filesize<3000]'})
ydl.process_ie_result(info_dict)
@@ -551,11 +542,11 @@ class TestYoutubeDL(unittest.TestCase):
def s_formats(lang, autocaption=False):
return [{
'ext': ext,
- 'url': 'http://localhost/video.%s.%s' % (lang, ext),
+ 'url': f'http://localhost/video.{lang}.{ext}',
'_auto': autocaption,
} for ext in ['vtt', 'srt', 'ass']]
- subtitles = dict((l, s_formats(l)) for l in ['en', 'fr', 'es'])
- auto_captions = dict((l, s_formats(l, True)) for l in ['it', 'pt', 'es'])
+ subtitles = {l: s_formats(l) for l in ['en', 'fr', 'es']}
+ auto_captions = {l: s_formats(l, True) for l in ['it', 'pt', 'es']}
info_dict = {
'id': 'test',
'title': 'Test',
@@ -580,7 +571,7 @@ class TestYoutubeDL(unittest.TestCase):
result = get_info({'writesubtitles': True})
subs = result['requested_subtitles']
self.assertTrue(subs)
- self.assertEqual(set(subs.keys()), set(['en']))
+ self.assertEqual(set(subs.keys()), {'en'})
self.assertTrue(subs['en'].get('data') is None)
self.assertEqual(subs['en']['ext'], 'ass')
@@ -591,39 +582,39 @@ class TestYoutubeDL(unittest.TestCase):
result = get_info({'writesubtitles': True, 'subtitleslangs': ['es', 'fr', 'it']})
subs = result['requested_subtitles']
self.assertTrue(subs)
- self.assertEqual(set(subs.keys()), set(['es', 'fr']))
+ self.assertEqual(set(subs.keys()), {'es', 'fr'})
result = get_info({'writesubtitles': True, 'subtitleslangs': ['all', '-en']})
subs = result['requested_subtitles']
self.assertTrue(subs)
- self.assertEqual(set(subs.keys()), set(['es', 'fr']))
+ self.assertEqual(set(subs.keys()), {'es', 'fr'})
result = get_info({'writesubtitles': True, 'subtitleslangs': ['en', 'fr', '-en']})
subs = result['requested_subtitles']
self.assertTrue(subs)
- self.assertEqual(set(subs.keys()), set(['fr']))
+ self.assertEqual(set(subs.keys()), {'fr'})
result = get_info({'writesubtitles': True, 'subtitleslangs': ['-en', 'en']})
subs = result['requested_subtitles']
self.assertTrue(subs)
- self.assertEqual(set(subs.keys()), set(['en']))
+ self.assertEqual(set(subs.keys()), {'en'})
result = get_info({'writesubtitles': True, 'subtitleslangs': ['e.+']})
subs = result['requested_subtitles']
self.assertTrue(subs)
- self.assertEqual(set(subs.keys()), set(['es', 'en']))
+ self.assertEqual(set(subs.keys()), {'es', 'en'})
result = get_info({'writesubtitles': True, 'writeautomaticsub': True, 'subtitleslangs': ['es', 'pt']})
subs = result['requested_subtitles']
self.assertTrue(subs)
- self.assertEqual(set(subs.keys()), set(['es', 'pt']))
+ self.assertEqual(set(subs.keys()), {'es', 'pt'})
self.assertFalse(subs['es']['_auto'])
self.assertTrue(subs['pt']['_auto'])
result = get_info({'writeautomaticsub': True, 'subtitleslangs': ['es', 'pt']})
subs = result['requested_subtitles']
self.assertTrue(subs)
- self.assertEqual(set(subs.keys()), set(['es', 'pt']))
+ self.assertEqual(set(subs.keys()), {'es', 'pt'})
self.assertTrue(subs['es']['_auto'])
self.assertTrue(subs['pt']['_auto'])
@@ -654,15 +645,19 @@ class TestYoutubeDL(unittest.TestCase):
'duration': 100000,
'playlist_index': 1,
'playlist_autonumber': 2,
- '_last_playlist_index': 100,
+ '__last_playlist_index': 100,
'n_entries': 10,
- 'formats': [{'id': 'id 1'}, {'id': 'id 2'}, {'id': 'id 3'}]
+ 'formats': [
+ {'id': 'id 1', 'height': 1080, 'width': 1920},
+ {'id': 'id 2', 'height': 720},
+ {'id': 'id 3'}
+ ]
}
def test_prepare_outtmpl_and_filename(self):
def test(tmpl, expected, *, info=None, **params):
params['outtmpl'] = tmpl
- ydl = YoutubeDL(params)
+ ydl = FakeYDL(params)
ydl._num_downloads = 1
self.assertEqual(ydl.validate_outtmpl(tmpl), None)
@@ -716,13 +711,14 @@ class TestYoutubeDL(unittest.TestCase):
test('%(id)s', '-abcd', info={'id': '-abcd'})
test('%(id)s', '.abcd', info={'id': '.abcd'})
test('%(id)s', 'ab__cd', info={'id': 'ab__cd'})
- test('%(id)s', ('ab:cd', 'ab -cd'), info={'id': 'ab:cd'})
+ test('%(id)s', ('ab:cd', 'ab:cd'), info={'id': 'ab:cd'})
test('%(id.0)s', '-', info={'id': '--'})
# Invalid templates
self.assertTrue(isinstance(YoutubeDL.validate_outtmpl('%(title)'), ValueError))
test('%(invalid@tmpl|def)s', 'none', outtmpl_na_placeholder='none')
test('%(..)s', 'NA')
+ test('%(formats.{id)s', 'NA')
# Entire info_dict
def expect_same_infodict(out):
@@ -764,7 +760,7 @@ class TestYoutubeDL(unittest.TestCase):
test('a%(width|)d', 'a', outtmpl_na_placeholder='none')
FORMATS = self.outtmpl_info['formats']
- sanitize = lambda x: x.replace(':', ' -').replace('"', "'").replace('\n', ' ')
+ sanitize = lambda x: x.replace(':', ':').replace('"', """).replace('\n', ' ')
# Custom type casting
test('%(formats.:.id)l', 'id 1, id 2, id 3')
@@ -782,13 +778,13 @@ class TestYoutubeDL(unittest.TestCase):
test('%(filesize)#D', '1Ki')
test('%(height)5.2D', ' 1.08k')
test('%(title4)#S', 'foo_bar_test')
- test('%(title4).10S', ('foo \'bar\' ', 'foo \'bar\'' + ('#' if compat_os_name == 'nt' else ' ')))
+ test('%(title4).10S', ('foo "bar" ', 'foo "bar"' + ('#' if compat_os_name == 'nt' else ' ')))
if compat_os_name == 'nt':
- test('%(title4)q', ('"foo \\"bar\\" test"', "'foo _'bar_' test'"))
- test('%(formats.:.id)#q', ('"id 1" "id 2" "id 3"', "'id 1' 'id 2' 'id 3'"))
- test('%(formats.0.id)#q', ('"id 1"', "'id 1'"))
+ test('%(title4)q', ('"foo \\"bar\\" test"', ""foo ⧹"bar⧹" test""))
+ test('%(formats.:.id)#q', ('"id 1" "id 2" "id 3"', '"id 1" "id 2" "id 3"'))
+ test('%(formats.0.id)#q', ('"id 1"', '"id 1"'))
else:
- test('%(title4)q', ('\'foo "bar" test\'', "'foo 'bar' test'"))
+ test('%(title4)q', ('\'foo "bar" test\'', '\'foo "bar" test\''))
test('%(formats.:.id)#q', "'id 1' 'id 2' 'id 3'")
test('%(formats.0.id)#q', "'id 1'")
@@ -807,6 +803,12 @@ class TestYoutubeDL(unittest.TestCase):
test('%(formats.:2:-1)r', repr(FORMATS[:2:-1]))
test('%(formats.0.id.-1+id)f', '1235.000000')
test('%(formats.0.id.-1+formats.1.id.-1)d', '3')
+ out = json.dumps([{'id': f['id'], 'height.:2': str(f['height'])[:2]}
+ if 'height' in f else {'id': f['id']}
+ for f in FORMATS])
+ test('%(formats.:.{id,height.:2})j', (out, sanitize(out)))
+ test('%(formats.:.{id,height}.id)l', ', '.join(f['id'] for f in FORMATS))
+ test('%(.{id,title})j', ('{"id": "1234"}', '{"id": "1234"}'))
# Alternates
test('%(title,id)s', '1234')
@@ -833,21 +835,21 @@ class TestYoutubeDL(unittest.TestCase):
# test('%(foo|)s', ('', '_')) # fixme
# Environment variable expansion for prepare_filename
- compat_setenv('__hypervideo_dl_var', 'expanded')
+ os.environ['__hypervideo_dl_var'] = 'expanded'
envvar = '%__hypervideo_dl_var%' if compat_os_name == 'nt' else '$__hypervideo_dl_var'
test(envvar, (envvar, 'expanded'))
if compat_os_name == 'nt':
test('%s%', ('%s%', '%s%'))
- compat_setenv('s', 'expanded')
+ os.environ['s'] = 'expanded'
test('%s%', ('%s%', 'expanded')) # %s% should be expanded before escaping %s
- compat_setenv('(test)s', 'expanded')
+ os.environ['(test)s'] = 'expanded'
test('%(test)s%', ('NA%', 'expanded')) # Environment should take priority over template
# Path expansion and escaping
test('Hello %(title1)s', 'Hello $PATH')
test('Hello %(title2)s', 'Hello %PATH%')
- test('%(title3)s', ('foo/bar\\test', 'foo_bar_test'))
- test('folder/%(title3)s', ('folder/foo/bar\\test', 'folder%sfoo_bar_test' % os.path.sep))
+ test('%(title3)s', ('foo/bar\\test', 'foo⧸bar⧹test'))
+ test('folder/%(title3)s', ('folder/foo/bar\\test', 'folder%sfoo⧸bar⧹test' % os.path.sep))
def test_format_note(self):
ydl = YoutubeDL()
@@ -982,41 +984,80 @@ class TestYoutubeDL(unittest.TestCase):
self.assertEqual(res, [])
def test_playlist_items_selection(self):
- entries = [{
- 'id': compat_str(i),
- 'title': compat_str(i),
- 'url': TEST_URL,
- } for i in range(1, 5)]
- playlist = {
- '_type': 'playlist',
- 'id': 'test',
- 'entries': entries,
- 'extractor': 'test:playlist',
- 'extractor_key': 'test:playlist',
- 'webpage_url': 'http://example.com',
- }
+ INDICES, PAGE_SIZE = list(range(1, 11)), 3
- def get_downloaded_info_dicts(params):
+ def entry(i, evaluated):
+ evaluated.append(i)
+ return {
+ 'id': str(i),
+ 'title': str(i),
+ 'url': TEST_URL,
+ }
+
+ def pagedlist_entries(evaluated):
+ def page_func(n):
+ start = PAGE_SIZE * n
+ for i in INDICES[start: start + PAGE_SIZE]:
+ yield entry(i, evaluated)
+ return OnDemandPagedList(page_func, PAGE_SIZE)
+
+ def page_num(i):
+ return (i + PAGE_SIZE - 1) // PAGE_SIZE
+
+ def generator_entries(evaluated):
+ for i in INDICES:
+ yield entry(i, evaluated)
+
+ def list_entries(evaluated):
+ return list(generator_entries(evaluated))
+
+ def lazylist_entries(evaluated):
+ return LazyList(generator_entries(evaluated))
+
+ def get_downloaded_info_dicts(params, entries):
ydl = YDL(params)
- # make a deep copy because the dictionary and nested entries
- # can be modified
- ydl.process_ie_result(copy.deepcopy(playlist))
+ ydl.process_ie_result({
+ '_type': 'playlist',
+ 'id': 'test',
+ 'extractor': 'test:playlist',
+ 'extractor_key': 'test:playlist',
+ 'webpage_url': 'http://example.com',
+ 'entries': entries,
+ })
return ydl.downloaded_info_dicts
- def test_selection(params, expected_ids):
- results = [
- (v['playlist_autonumber'] - 1, (int(v['id']), v['playlist_index']))
- for v in get_downloaded_info_dicts(params)]
- self.assertEqual(results, list(enumerate(zip(expected_ids, expected_ids))))
-
- test_selection({}, [1, 2, 3, 4])
- test_selection({'playlistend': 10}, [1, 2, 3, 4])
- test_selection({'playlistend': 2}, [1, 2])
- test_selection({'playliststart': 10}, [])
- test_selection({'playliststart': 2}, [2, 3, 4])
- test_selection({'playlist_items': '2-4'}, [2, 3, 4])
+ def test_selection(params, expected_ids, evaluate_all=False):
+ expected_ids = list(expected_ids)
+ if evaluate_all:
+ generator_eval = pagedlist_eval = INDICES
+ elif not expected_ids:
+ generator_eval = pagedlist_eval = []
+ else:
+ generator_eval = INDICES[0: max(expected_ids)]
+ pagedlist_eval = INDICES[PAGE_SIZE * page_num(min(expected_ids)) - PAGE_SIZE:
+ PAGE_SIZE * page_num(max(expected_ids))]
+
+ for name, func, expected_eval in (
+ ('list', list_entries, INDICES),
+ ('Generator', generator_entries, generator_eval),
+ # ('LazyList', lazylist_entries, generator_eval), # Generator and LazyList follow the exact same code path
+ ('PagedList', pagedlist_entries, pagedlist_eval),
+ ):
+ evaluated = []
+ entries = func(evaluated)
+ results = [(v['playlist_autonumber'] - 1, (int(v['id']), v['playlist_index']))
+ for v in get_downloaded_info_dicts(params, entries)]
+ self.assertEqual(results, list(enumerate(zip(expected_ids, expected_ids))), f'Entries of {name} for {params}')
+ self.assertEqual(sorted(evaluated), expected_eval, f'Evaluation of {name} for {params}')
+
+ test_selection({}, INDICES)
+ test_selection({'playlistend': 20}, INDICES, True)
+ test_selection({'playlistend': 2}, INDICES[:2])
+ test_selection({'playliststart': 11}, [], True)
+ test_selection({'playliststart': 2}, INDICES[1:])
+ test_selection({'playlist_items': '2-4'}, INDICES[1:4])
test_selection({'playlist_items': '2,4'}, [2, 4])
- test_selection({'playlist_items': '10'}, [])
+ test_selection({'playlist_items': '20'}, [], True)
test_selection({'playlist_items': '0'}, [])
# Tests for https://github.com/ytdl-org/youtube-dl/issues/10591
@@ -1025,15 +1066,37 @@ class TestYoutubeDL(unittest.TestCase):
# Tests for https://github.com/hypervideo/hypervideo/issues/720
# https://github.com/hypervideo/hypervideo/issues/302
- test_selection({'playlistreverse': True}, [4, 3, 2, 1])
- test_selection({'playliststart': 2, 'playlistreverse': True}, [4, 3, 2])
+ test_selection({'playlistreverse': True}, INDICES[::-1])
+ test_selection({'playliststart': 2, 'playlistreverse': True}, INDICES[:0:-1])
test_selection({'playlist_items': '2,4', 'playlistreverse': True}, [4, 2])
test_selection({'playlist_items': '4,2'}, [4, 2])
+ # Tests for --playlist-items start:end:step
+ test_selection({'playlist_items': ':'}, INDICES, True)
+ test_selection({'playlist_items': '::1'}, INDICES, True)
+ test_selection({'playlist_items': '::-1'}, INDICES[::-1], True)
+ test_selection({'playlist_items': ':6'}, INDICES[:6])
+ test_selection({'playlist_items': ':-6'}, INDICES[:-5], True)
+ test_selection({'playlist_items': '-1:6:-2'}, INDICES[:4:-2], True)
+ test_selection({'playlist_items': '9:-6:-2'}, INDICES[8:3:-2], True)
+
+ test_selection({'playlist_items': '1:inf:2'}, INDICES[::2], True)
+ test_selection({'playlist_items': '-2:inf'}, INDICES[-2:], True)
+ test_selection({'playlist_items': ':inf:-1'}, [], True)
+ test_selection({'playlist_items': '0-2:2'}, [2])
+ test_selection({'playlist_items': '1-:2'}, INDICES[::2], True)
+ test_selection({'playlist_items': '0--2:2'}, INDICES[1:-1:2], True)
+
+ test_selection({'playlist_items': '10::3'}, [10], True)
+ test_selection({'playlist_items': '-1::3'}, [10], True)
+ test_selection({'playlist_items': '11::3'}, [], True)
+ test_selection({'playlist_items': '-15::2'}, INDICES[1::2], True)
+ test_selection({'playlist_items': '-15::15'}, [], True)
+
def test_urlopen_no_file_protocol(self):
# see https://github.com/ytdl-org/youtube-dl/issues/8227
ydl = YDL()
- self.assertRaises(compat_urllib_error.URLError, ydl.urlopen, 'file:///etc/passwd')
+ self.assertRaises(urllib.error.URLError, ydl.urlopen, 'file:///etc/passwd')
def test_do_not_override_ie_key_in_url_transparent(self):
ydl = YDL()
@@ -1082,7 +1145,7 @@ class TestYoutubeDL(unittest.TestCase):
class _YDL(YDL):
def __init__(self, *args, **kwargs):
- super(_YDL, self).__init__(*args, **kwargs)
+ super().__init__(*args, **kwargs)
def trouble(self, s, tb=None):
pass
@@ -1119,7 +1182,7 @@ class TestYoutubeDL(unittest.TestCase):
def _entries(self):
for n in range(3):
- video_id = compat_str(n)
+ video_id = str(n)
yield {
'_type': 'url_transparent',
'ie_key': VideoIE.ie_key(),
diff --git a/test/test_YoutubeDLCookieJar.py b/test/test_YoutubeDLCookieJar.py
index 2ce0070..26922d6 100644
--- a/test/test_YoutubeDLCookieJar.py
+++ b/test/test_YoutubeDLCookieJar.py
@@ -1,15 +1,16 @@
#!/usr/bin/env python3
-# coding: utf-8
-
-from __future__ import unicode_literals
+# Allow direct execution
import os
-import re
import sys
-import tempfile
import unittest
+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import re
+import tempfile
+
from hypervideo_dl.utils import YoutubeDLCookieJar
@@ -20,7 +21,7 @@ class TestYoutubeDLCookieJar(unittest.TestCase):
tf = tempfile.NamedTemporaryFile(delete=False)
try:
cookiejar.save(filename=tf.name, ignore_discard=True, ignore_expires=True)
- temp = tf.read().decode('utf-8')
+ temp = tf.read().decode()
self.assertTrue(re.search(
r'www\.foobar\.foobar\s+FALSE\s+/\s+TRUE\s+0\s+YoutubeDLExpiresEmpty\s+YoutubeDLExpiresEmptyValue', temp))
self.assertTrue(re.search(
diff --git a/test/test_aes.py b/test/test_aes.py
index 9d260b5..0f35bc2 100644
--- a/test/test_aes.py
+++ b/test/test_aes.py
@@ -1,30 +1,33 @@
#!/usr/bin/env python3
-from __future__ import unicode_literals
# Allow direct execution
import os
import sys
import unittest
+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import base64
+
from hypervideo_dl.aes import (
- aes_decrypt,
- aes_encrypt,
- aes_ecb_encrypt,
- aes_ecb_decrypt,
aes_cbc_decrypt,
aes_cbc_decrypt_bytes,
aes_cbc_encrypt,
aes_ctr_decrypt,
aes_ctr_encrypt,
+ aes_decrypt,
+ aes_decrypt_text,
+ aes_ecb_decrypt,
+ aes_ecb_encrypt,
+ aes_encrypt,
aes_gcm_decrypt_and_verify,
aes_gcm_decrypt_and_verify_bytes,
- aes_decrypt_text,
- BLOCK_SIZE_BYTES,
+ key_expansion,
+ pad_block,
)
-from hypervideo_dl.compat import compat_pycrypto_AES
+from hypervideo_dl.dependencies import Cryptodome_AES
from hypervideo_dl.utils import bytes_to_intlist, intlist_to_bytes
-import base64
# the encrypted data can be generate with 'devscripts/generate_aes_testdata.py'
@@ -45,7 +48,7 @@ class TestAES(unittest.TestCase):
data = b'\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6\x27\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd'
decrypted = intlist_to_bytes(aes_cbc_decrypt(bytes_to_intlist(data), self.key, self.iv))
self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
- if compat_pycrypto_AES:
+ if Cryptodome_AES:
decrypted = aes_cbc_decrypt_bytes(data, intlist_to_bytes(self.key), intlist_to_bytes(self.iv))
self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
@@ -75,32 +78,31 @@ class TestAES(unittest.TestCase):
decrypted = intlist_to_bytes(aes_gcm_decrypt_and_verify(
bytes_to_intlist(data), self.key, bytes_to_intlist(authentication_tag), self.iv[:12]))
self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
- if compat_pycrypto_AES:
+ if Cryptodome_AES:
decrypted = aes_gcm_decrypt_and_verify_bytes(
data, intlist_to_bytes(self.key), authentication_tag, intlist_to_bytes(self.iv[:12]))
self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
def test_decrypt_text(self):
- password = intlist_to_bytes(self.key).decode('utf-8')
+ password = intlist_to_bytes(self.key).decode()
encrypted = base64.b64encode(
intlist_to_bytes(self.iv[:8])
+ b'\x17\x15\x93\xab\x8d\x80V\xcdV\xe0\t\xcdo\xc2\xa5\xd8ksM\r\xe27N\xae'
- ).decode('utf-8')
+ ).decode()
decrypted = (aes_decrypt_text(encrypted, password, 16))
self.assertEqual(decrypted, self.secret_msg)
- password = intlist_to_bytes(self.key).decode('utf-8')
+ password = intlist_to_bytes(self.key).decode()
encrypted = base64.b64encode(
intlist_to_bytes(self.iv[:8])
+ b'\x0b\xe6\xa4\xd9z\x0e\xb8\xb9\xd0\xd4i_\x85\x1d\x99\x98_\xe5\x80\xe7.\xbf\xa5\x83'
- ).decode('utf-8')
+ ).decode()
decrypted = (aes_decrypt_text(encrypted, password, 32))
self.assertEqual(decrypted, self.secret_msg)
def test_ecb_encrypt(self):
data = bytes_to_intlist(self.secret_msg)
- data += [0x08] * (BLOCK_SIZE_BYTES - len(data) % BLOCK_SIZE_BYTES)
- encrypted = intlist_to_bytes(aes_ecb_encrypt(data, self.key, self.iv))
+ encrypted = intlist_to_bytes(aes_ecb_encrypt(data, self.key))
self.assertEqual(
encrypted,
b'\xaa\x86]\x81\x97>\x02\x92\x9d\x1bR[[L/u\xd3&\xd1(h\xde{\x81\x94\xba\x02\xae\xbd\xa6\xd0:')
@@ -110,6 +112,41 @@ class TestAES(unittest.TestCase):
decrypted = intlist_to_bytes(aes_ecb_decrypt(data, self.key, self.iv))
self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
+ def test_key_expansion(self):
+ key = '4f6bdaa39e2f8cb07f5e722d9edef314'
+
+ self.assertEqual(key_expansion(bytes_to_intlist(bytearray.fromhex(key))), [
+ 0x4F, 0x6B, 0xDA, 0xA3, 0x9E, 0x2F, 0x8C, 0xB0, 0x7F, 0x5E, 0x72, 0x2D, 0x9E, 0xDE, 0xF3, 0x14,
+ 0x53, 0x66, 0x20, 0xA8, 0xCD, 0x49, 0xAC, 0x18, 0xB2, 0x17, 0xDE, 0x35, 0x2C, 0xC9, 0x2D, 0x21,
+ 0x8C, 0xBE, 0xDD, 0xD9, 0x41, 0xF7, 0x71, 0xC1, 0xF3, 0xE0, 0xAF, 0xF4, 0xDF, 0x29, 0x82, 0xD5,
+ 0x2D, 0xAD, 0xDE, 0x47, 0x6C, 0x5A, 0xAF, 0x86, 0x9F, 0xBA, 0x00, 0x72, 0x40, 0x93, 0x82, 0xA7,
+ 0xF9, 0xBE, 0x82, 0x4E, 0x95, 0xE4, 0x2D, 0xC8, 0x0A, 0x5E, 0x2D, 0xBA, 0x4A, 0xCD, 0xAF, 0x1D,
+ 0x54, 0xC7, 0x26, 0x98, 0xC1, 0x23, 0x0B, 0x50, 0xCB, 0x7D, 0x26, 0xEA, 0x81, 0xB0, 0x89, 0xF7,
+ 0x93, 0x60, 0x4E, 0x94, 0x52, 0x43, 0x45, 0xC4, 0x99, 0x3E, 0x63, 0x2E, 0x18, 0x8E, 0xEA, 0xD9,
+ 0xCA, 0xE7, 0x7B, 0x39, 0x98, 0xA4, 0x3E, 0xFD, 0x01, 0x9A, 0x5D, 0xD3, 0x19, 0x14, 0xB7, 0x0A,
+ 0xB0, 0x4E, 0x1C, 0xED, 0x28, 0xEA, 0x22, 0x10, 0x29, 0x70, 0x7F, 0xC3, 0x30, 0x64, 0xC8, 0xC9,
+ 0xE8, 0xA6, 0xC1, 0xE9, 0xC0, 0x4C, 0xE3, 0xF9, 0xE9, 0x3C, 0x9C, 0x3A, 0xD9, 0x58, 0x54, 0xF3,
+ 0xB4, 0x86, 0xCC, 0xDC, 0x74, 0xCA, 0x2F, 0x25, 0x9D, 0xF6, 0xB3, 0x1F, 0x44, 0xAE, 0xE7, 0xEC])
+
+ def test_pad_block(self):
+ block = [0x21, 0xA0, 0x43, 0xFF]
+
+ self.assertEqual(pad_block(block, 'pkcs7'),
+ block + [0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C])
+
+ self.assertEqual(pad_block(block, 'iso7816'),
+ block + [0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00])
+
+ self.assertEqual(pad_block(block, 'whitespace'),
+ block + [0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20])
+
+ self.assertEqual(pad_block(block, 'zero'),
+ block + [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00])
+
+ block = list(range(16))
+ for mode in ('pkcs7', 'iso7816', 'whitespace', 'zero'):
+ self.assertEqual(pad_block(block, mode), block, mode)
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_age_restriction.py b/test/test_age_restriction.py
index 9b490d0..034359b 100644
--- a/test/test_age_restriction.py
+++ b/test/test_age_restriction.py
@@ -1,14 +1,14 @@
#!/usr/bin/env python3
-from __future__ import unicode_literals
# Allow direct execution
import os
import sys
import unittest
+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from test.helper import try_rm, is_download_test
+from test.helper import is_download_test, try_rm
from hypervideo_dl import YoutubeDL
diff --git a/test/test_all_urls.py b/test/test_all_urls.py
index 74634cb..49653f1 100644
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@@ -1,22 +1,17 @@
#!/usr/bin/env python3
-from __future__ import unicode_literals
-
# Allow direct execution
import os
import sys
import unittest
-import collections
+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from test.helper import gettestcases
+import collections
-from hypervideo_dl.extractor import (
- FacebookIE,
- gen_extractors,
- YoutubeIE,
-)
+from test.helper import gettestcases
+from hypervideo_dl.extractor import FacebookIE, YoutubeIE, gen_extractors
class TestAllURLsMatching(unittest.TestCase):
@@ -81,11 +76,11 @@ class TestAllURLsMatching(unittest.TestCase):
url = tc['url']
for ie in ies:
if type(ie).__name__ in ('GenericIE', tc['name'] + 'IE'):
- self.assertTrue(ie.suitable(url), '%s should match URL %r' % (type(ie).__name__, url))
+ self.assertTrue(ie.suitable(url), f'{type(ie).__name__} should match URL {url!r}')
else:
self.assertFalse(
ie.suitable(url),
- '%s should not match URL %r . That URL belongs to %s.' % (type(ie).__name__, url, tc['name']))
+ f'{type(ie).__name__} should not match URL {url!r} . That URL belongs to {tc["name"]}.')
def test_keywords(self):
self.assertMatch(':ytsubs', ['youtube:subscriptions'])
@@ -120,7 +115,7 @@ class TestAllURLsMatching(unittest.TestCase):
for (ie_name, ie_list) in name_accu.items():
self.assertEqual(
len(ie_list), 1,
- 'Multiple extractors with the same IE_NAME "%s" (%s)' % (ie_name, ', '.join(ie_list)))
+ f'Multiple extractors with the same IE_NAME "{ie_name}" ({", ".join(ie_list)})')
if __name__ == '__main__':
diff --git a/test/test_cache.py b/test/test_cache.py
index 0776e92..f366a15 100644
--- a/test/test_cache.py
+++ b/test/test_cache.py
@@ -1,17 +1,15 @@
#!/usr/bin/env python3
-# coding: utf-8
-
-from __future__ import unicode_literals
-
-import shutil
# Allow direct execution
import os
import sys
import unittest
+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+import shutil
+
from test.helper import FakeYDL
from hypervideo_dl.cache import Cache
diff --git a/test/test_compat.py b/test/test_compat.py
index 5f5d354..7a191c0 100644
--- a/test/test_compat.py
+++ b/test/test_compat.py
@@ -1,80 +1,44 @@
#!/usr/bin/env python3
-# coding: utf-8
-
-from __future__ import unicode_literals
# Allow direct execution
import os
import sys
import unittest
+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+import struct
+import urllib.parse
+
+from hypervideo_dl import compat
from hypervideo_dl.compat import (
- compat_getenv,
- compat_setenv,
- compat_etree_Element,
compat_etree_fromstring,
compat_expanduser,
- compat_shlex_split,
- compat_str,
- compat_struct_unpack,
- compat_urllib_parse_quote,
- compat_urllib_parse_quote_plus,
compat_urllib_parse_unquote,
- compat_urllib_parse_unquote_plus,
compat_urllib_parse_urlencode,
)
class TestCompat(unittest.TestCase):
- def test_compat_getenv(self):
- test_str = 'тест'
- compat_setenv('hypervideo_dl_COMPAT_GETENV', test_str)
- self.assertEqual(compat_getenv('hypervideo_dl_COMPAT_GETENV'), test_str)
-
- def test_compat_setenv(self):
- test_var = 'hypervideo_dl_COMPAT_SETENV'
- test_str = 'тест'
- compat_setenv(test_var, test_str)
- compat_getenv(test_var)
- self.assertEqual(compat_getenv(test_var), test_str)
+ def test_compat_passthrough(self):
+ with self.assertWarns(DeprecationWarning):
+ compat.compat_basestring
+
+ with self.assertWarns(DeprecationWarning):
+ compat.WINDOWS_VT_MODE
+
+ # TODO: Test submodule
+ # compat.asyncio.events # Must not raise error
def test_compat_expanduser(self):
old_home = os.environ.get('HOME')
- test_str = r'C:\Documents and Settings\тест\Application Data'
- compat_setenv('HOME', test_str)
- self.assertEqual(compat_expanduser('~'), test_str)
- compat_setenv('HOME', old_home or '')
-
- def test_all_present(self):
- import hypervideo_dl.compat
- all_names = hypervideo_dl.compat.__all__
- present_names = set(filter(
- lambda c: '_' in c and not c.startswith('_'),
- dir(hypervideo_dl.compat))) - set(['unicode_literals'])
- self.assertEqual(all_names, sorted(present_names))
-
- def test_compat_urllib_parse_quote(self):
- self.assertEqual(compat_urllib_parse_quote('abc def'), 'abc%20def')
- self.assertEqual(compat_urllib_parse_quote('/user/abc+def'), '/user/abc%2Bdef')
- self.assertEqual(compat_urllib_parse_quote('/user/abc+def', safe='+'), '%2Fuser%2Fabc+def')
- self.assertEqual(compat_urllib_parse_quote(''), '')
- self.assertEqual(compat_urllib_parse_quote('%'), '%25')
- self.assertEqual(compat_urllib_parse_quote('%', safe='%'), '%')
- self.assertEqual(compat_urllib_parse_quote('津波'), '%E6%B4%A5%E6%B3%A2')
- self.assertEqual(
- compat_urllib_parse_quote('''<meta property="og:description" content="▁▂▃▄%▅▆▇█" />
-%<a href="https://ar.wikipedia.org/wiki/تسونامي">%a''', safe='<>=":%/ \r\n'),
- '''<meta property="og:description" content="%E2%96%81%E2%96%82%E2%96%83%E2%96%84%%E2%96%85%E2%96%86%E2%96%87%E2%96%88" />
-%<a href="https://ar.wikipedia.org/wiki/%D8%AA%D8%B3%D9%88%D9%86%D8%A7%D9%85%D9%8A">%a''')
- self.assertEqual(
- compat_urllib_parse_quote('''(^◣_◢^)っ︻デ═一 ⇀ ⇀ ⇀ ⇀ ⇀ ↶%I%Break%25Things%''', safe='% '),
- '''%28%5E%E2%97%A3_%E2%97%A2%5E%29%E3%81%A3%EF%B8%BB%E3%83%87%E2%95%90%E4%B8%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%86%B6%I%Break%25Things%''')
-
- def test_compat_urllib_parse_quote_plus(self):
- self.assertEqual(compat_urllib_parse_quote_plus('abc def'), 'abc+def')
- self.assertEqual(compat_urllib_parse_quote_plus('/abc def'), '%2Fabc+def')
+ test_str = R'C:\Documents and Settings\тест\Application Data'
+ try:
+ os.environ['HOME'] = test_str
+ self.assertEqual(compat_expanduser('~'), test_str)
+ finally:
+ os.environ['HOME'] = old_home or ''
def test_compat_urllib_parse_unquote(self):
self.assertEqual(compat_urllib_parse_unquote('abc%20def'), 'abc def')
@@ -96,8 +60,8 @@ class TestCompat(unittest.TestCase):
'''(^◣_◢^)っ︻デ═一 ⇀ ⇀ ⇀ ⇀ ⇀ ↶%I%Break%Things%''')
def test_compat_urllib_parse_unquote_plus(self):
- self.assertEqual(compat_urllib_parse_unquote_plus('abc%20def'), 'abc def')
- self.assertEqual(compat_urllib_parse_unquote_plus('%7e/abc+def'), '~/abc def')
+ self.assertEqual(urllib.parse.unquote_plus('abc%20def'), 'abc def')
+ self.assertEqual(urllib.parse.unquote_plus('%7e/abc+def'), '~/abc def')
def test_compat_urllib_parse_urlencode(self):
self.assertEqual(compat_urllib_parse_urlencode({'abc': 'def'}), 'abc=def')
@@ -109,17 +73,6 @@ class TestCompat(unittest.TestCase):
self.assertEqual(compat_urllib_parse_urlencode([(b'abc', 'def')]), 'abc=def')
self.assertEqual(compat_urllib_parse_urlencode([(b'abc', b'def')]), 'abc=def')
- def test_compat_shlex_split(self):
- self.assertEqual(compat_shlex_split('-option "one two"'), ['-option', 'one two'])
- self.assertEqual(compat_shlex_split('-option "one\ntwo" \n -flag'), ['-option', 'one\ntwo', '-flag'])
- self.assertEqual(compat_shlex_split('-val 中文'), ['-val', '中文'])
-
- def test_compat_etree_Element(self):
- try:
- compat_etree_Element.items
- except AttributeError:
- self.fail('compat_etree_Element is not a type')
-
def test_compat_etree_fromstring(self):
xml = '''
<root foo="bar" spam="中文">
@@ -128,12 +81,12 @@ class TestCompat(unittest.TestCase):
<foo><bar>spam</bar></foo>
</root>
'''
- doc = compat_etree_fromstring(xml.encode('utf-8'))
- self.assertTrue(isinstance(doc.attrib['foo'], compat_str))
- self.assertTrue(isinstance(doc.attrib['spam'], compat_str))
- self.assertTrue(isinstance(doc.find('normal').text, compat_str))
- self.assertTrue(isinstance(doc.find('chinese').text, compat_str))
- self.assertTrue(isinstance(doc.find('foo/bar').text, compat_str))
+ doc = compat_etree_fromstring(xml.encode())
+ self.assertTrue(isinstance(doc.attrib['foo'], str))
+ self.assertTrue(isinstance(doc.attrib['spam'], str))
+ self.assertTrue(isinstance(doc.find('normal').text, str))
+ self.assertTrue(isinstance(doc.find('chinese').text, str))
+ self.assertTrue(isinstance(doc.find('foo/bar').text, str))
def test_compat_etree_fromstring_doctype(self):
xml = '''<?xml version="1.0"?>
@@ -142,7 +95,7 @@ class TestCompat(unittest.TestCase):
compat_etree_fromstring(xml)
def test_struct_unpack(self):
- self.assertEqual(compat_struct_unpack('!B', b'\x00'), (0,))
+ self.assertEqual(struct.unpack('!B', b'\x00'), (0,))
if __name__ == '__main__':
diff --git a/test/test_cookies.py b/test/test_cookies.py
index 053e45b..ab5dd02 100644
--- a/test/test_cookies.py
+++ b/test/test_cookies.py
@@ -3,27 +3,28 @@ from datetime import datetime, timezone
from hypervideo_dl import cookies
from hypervideo_dl.cookies import (
+ LenientSimpleCookie,
LinuxChromeCookieDecryptor,
MacChromeCookieDecryptor,
WindowsChromeCookieDecryptor,
- parse_safari_cookies,
- pbkdf2_sha1,
_get_linux_desktop_environment,
_LinuxDesktopEnvironment,
+ parse_safari_cookies,
+ pbkdf2_sha1,
)
class Logger:
- def debug(self, message):
+ def debug(self, message, *args, **kwargs):
print(f'[verbose] {message}')
- def info(self, message):
+ def info(self, message, *args, **kwargs):
print(message)
- def warning(self, message, only_once=False):
+ def warning(self, message, *args, **kwargs):
self.error(message)
- def error(self, message):
+ def error(self, message, *args, **kwargs):
raise Exception(message)
@@ -137,3 +138,163 @@ class TestCookies(unittest.TestCase):
def test_pbkdf2_sha1(self):
key = pbkdf2_sha1(b'peanuts', b' ' * 16, 1, 16)
self.assertEqual(key, b'g\xe1\x8e\x0fQ\x1c\x9b\xf3\xc9`!\xaa\x90\xd9\xd34')
+
+
+class TestLenientSimpleCookie(unittest.TestCase):
+ def _run_tests(self, *cases):
+ for message, raw_cookie, expected in cases:
+ cookie = LenientSimpleCookie(raw_cookie)
+
+ with self.subTest(message, expected=expected):
+ self.assertEqual(cookie.keys(), expected.keys(), message)
+
+ for key, expected_value in expected.items():
+ morsel = cookie[key]
+ if isinstance(expected_value, tuple):
+ expected_value, expected_attributes = expected_value
+ else:
+ expected_attributes = {}
+
+ attributes = {
+ key: value
+ for key, value in dict(morsel).items()
+ if value != ""
+ }
+ self.assertEqual(attributes, expected_attributes, message)
+
+ self.assertEqual(morsel.value, expected_value, message)
+
+ def test_parsing(self):
+ self._run_tests(
+ # Copied from https://github.com/python/cpython/blob/v3.10.7/Lib/test/test_http_cookies.py
+ (
+ "Test basic cookie",
+ "chips=ahoy; vienna=finger",
+ {"chips": "ahoy", "vienna": "finger"},
+ ),
+ (
+ "Test quoted cookie",
+ 'keebler="E=mc2; L=\\"Loves\\"; fudge=\\012;"',
+ {"keebler": 'E=mc2; L="Loves"; fudge=\012;'},
+ ),
+ (
+ "Allow '=' in an unquoted value",
+ "keebler=E=mc2",
+ {"keebler": "E=mc2"},
+ ),
+ (
+ "Allow cookies with ':' in their name",
+ "key:term=value:term",
+ {"key:term": "value:term"},
+ ),
+ (
+ "Allow '[' and ']' in cookie values",
+ "a=b; c=[; d=r; f=h",
+ {"a": "b", "c": "[", "d": "r", "f": "h"},
+ ),
+ (
+ "Test basic cookie attributes",
+ 'Customer="WILE_E_COYOTE"; Version=1; Path=/acme',
+ {"Customer": ("WILE_E_COYOTE", {"version": "1", "path": "/acme"})},
+ ),
+ (
+ "Test flag only cookie attributes",
+ 'Customer="WILE_E_COYOTE"; HttpOnly; Secure',
+ {"Customer": ("WILE_E_COYOTE", {"httponly": True, "secure": True})},
+ ),
+ (
+ "Test flag only attribute with values",
+ "eggs=scrambled; httponly=foo; secure=bar; Path=/bacon",
+ {"eggs": ("scrambled", {"httponly": "foo", "secure": "bar", "path": "/bacon"})},
+ ),
+ (
+ "Test special case for 'expires' attribute, 4 digit year",
+ 'Customer="W"; expires=Wed, 01 Jan 2010 00:00:00 GMT',
+ {"Customer": ("W", {"expires": "Wed, 01 Jan 2010 00:00:00 GMT"})},
+ ),
+ (
+ "Test special case for 'expires' attribute, 2 digit year",
+ 'Customer="W"; expires=Wed, 01 Jan 98 00:00:00 GMT',
+ {"Customer": ("W", {"expires": "Wed, 01 Jan 98 00:00:00 GMT"})},
+ ),
+ (
+ "Test extra spaces in keys and values",
+ "eggs = scrambled ; secure ; path = bar ; foo=foo ",
+ {"eggs": ("scrambled", {"secure": True, "path": "bar"}), "foo": "foo"},
+ ),
+ (
+ "Test quoted attributes",
+ 'Customer="WILE_E_COYOTE"; Version="1"; Path="/acme"',
+ {"Customer": ("WILE_E_COYOTE", {"version": "1", "path": "/acme"})}
+ ),
+ # Our own tests that CPython passes
+ (
+ "Allow ';' in quoted value",
+ 'chips="a;hoy"; vienna=finger',
+ {"chips": "a;hoy", "vienna": "finger"},
+ ),
+ (
+ "Keep only the last set value",
+ "a=c; a=b",
+ {"a": "b"},
+ ),
+ )
+
+ def test_lenient_parsing(self):
+ self._run_tests(
+ (
+ "Ignore and try to skip invalid cookies",
+ 'chips={"ahoy;": 1}; vienna="finger;"',
+ {"vienna": "finger;"},
+ ),
+ (
+ "Ignore cookies without a name",
+ "a=b; unnamed; c=d",
+ {"a": "b", "c": "d"},
+ ),
+ (
+ "Ignore '\"' cookie without name",
+ 'a=b; "; c=d',
+ {"a": "b", "c": "d"},
+ ),
+ (
+ "Skip all space separated values",
+ "x a=b c=d x; e=f",
+ {"a": "b", "c": "d", "e": "f"},
+ ),
+ (
+ "Skip all space separated values",
+ 'x a=b; data={"complex": "json", "with": "key=value"}; x c=d x',
+ {"a": "b", "c": "d"},
+ ),
+ (
+ "Expect quote mending",
+ 'a=b; invalid="; c=d',
+ {"a": "b", "c": "d"},
+ ),
+ (
+ "Reset morsel after invalid to not capture attributes",
+ "a=b; invalid; Version=1; c=d",
+ {"a": "b", "c": "d"},
+ ),
+ (
+ "Reset morsel after invalid to not capture attributes",
+ "a=b; $invalid; $Version=1; c=d",
+ {"a": "b", "c": "d"},
+ ),
+ (
+ "Continue after non-flag attribute without value",
+ "a=b; path; Version=1; c=d",
+ {"a": "b", "c": "d"},
+ ),
+ (
+ "Allow cookie attributes with `$` prefix",
+ 'Customer="WILE_E_COYOTE"; $Version=1; $Secure; $Path=/acme',
+ {"Customer": ("WILE_E_COYOTE", {"version": "1", "secure": True, "path": "/acme"})},
+ ),
+ (
+ "Invalid Morsel keys should not result in an error",
+ "Key=Value; [Invalid]=Value; Another=Value",
+ {"Key": "Value", "Another": "Value"},
+ ),
+ )
diff --git a/test/test_download.py b/test/test_download.py
index 3cca13b..6f77343 100755
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -1,43 +1,41 @@
#!/usr/bin/env python3
-from __future__ import unicode_literals
-
# Allow direct execution
import os
import sys
import unittest
+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import collections
+import hashlib
+import http.client
+import json
+import socket
+import urllib.error
+
from test.helper import (
assertGreaterEqual,
expect_info_dict,
expect_warnings,
get_params,
gettestcases,
+ getwebpagetestcases,
is_download_test,
report_warning,
try_rm,
)
-
-import hashlib
-import io
-import json
-import socket
-
-import hypervideo_dl.YoutubeDL
-from hypervideo_dl.compat import (
- compat_http_client,
- compat_urllib_error,
- compat_HTTPError,
-)
+import hypervideo_dl.YoutubeDL # isort: split
+from hypervideo_dl.extractor import get_info_extractor
from hypervideo_dl.utils import (
DownloadError,
ExtractorError,
- format_bytes,
UnavailableVideoError,
+ format_bytes,
+ join_nonempty,
)
-from hypervideo_dl.extractor import get_info_extractor
RETRIES = 3
@@ -46,15 +44,15 @@ class YoutubeDL(hypervideo_dl.YoutubeDL):
def __init__(self, *args, **kwargs):
self.to_stderr = self.to_screen
self.processed_info_dicts = []
- super(YoutubeDL, self).__init__(*args, **kwargs)
+ super().__init__(*args, **kwargs)
- def report_warning(self, message):
+ def report_warning(self, message, *args, **kwargs):
# Don't accept warnings during tests
raise ExtractorError(message)
def process_info(self, info_dict):
self.processed_info_dicts.append(info_dict.copy())
- return super(YoutubeDL, self).process_info(info_dict)
+ return super().process_info(info_dict)
def _file_md5(fn):
@@ -62,7 +60,9 @@ def _file_md5(fn):
return hashlib.md5(f.read()).hexdigest()
-defs = gettestcases()
+normal_test_cases = gettestcases()
+webpage_test_cases = getwebpagetestcases()
+tests_counter = collections.defaultdict(collections.Counter)
@is_download_test
@@ -77,24 +77,13 @@ class TestDownload(unittest.TestCase):
def __str__(self):
"""Identify each test with the `add_ie` attribute, if available."""
+ cls, add_ie = type(self), getattr(self, self._testMethodName).add_ie
+ return f'{self._testMethodName} ({cls.__module__}.{cls.__name__}){f" [{add_ie}]" if add_ie else ""}:'
- def strclass(cls):
- """From 2.7's unittest; 2.6 had _strclass so we can't import it."""
- return '%s.%s' % (cls.__module__, cls.__name__)
-
- add_ie = getattr(self, self._testMethodName).add_ie
- return '%s (%s)%s:' % (self._testMethodName,
- strclass(self.__class__),
- ' [%s]' % add_ie if add_ie else '')
-
- def setUp(self):
- self.defs = defs
# Dynamically generate tests
-
def generator(test_case, tname):
-
def test_template(self):
if self.COMPLETED_TESTS.get(tname):
return
@@ -107,33 +96,34 @@ def generator(test_case, tname):
def print_skipping(reason):
print('Skipping %s: %s' % (test_case['name'], reason))
+ self.skipTest(reason)
+
if not ie.working():
print_skipping('IE marked as not _WORKING')
- return
for tc in test_cases:
info_dict = tc.get('info_dict', {})
params = tc.get('params', {})
if not info_dict.get('id'):
- raise Exception('Test definition incorrect. \'id\' key is not present')
- elif not info_dict.get('ext'):
+ raise Exception(f'Test {tname} definition incorrect - "id" key is not present')
+ elif not info_dict.get('ext') and info_dict.get('_type', 'video') == 'video':
if params.get('skip_download') and params.get('ignore_no_formats_error'):
continue
- raise Exception('Test definition incorrect. The output file cannot be known. \'ext\' key is not present')
+ raise Exception(f'Test {tname} definition incorrect - "ext" key must be present to define the output file')
if 'skip' in test_case:
print_skipping(test_case['skip'])
- return
+
for other_ie in other_ies:
if not other_ie.working():
print_skipping('test depends on %sIE, marked as not WORKING' % other_ie.ie_key())
- return
params = get_params(test_case.get('params', {}))
params['outtmpl'] = tname + '_' + params['outtmpl']
if is_playlist and 'playlist' not in test_case:
params.setdefault('extract_flat', 'in_playlist')
- params.setdefault('playlistend', test_case.get('playlist_mincount'))
+ params.setdefault('playlistend', test_case.get(
+ 'playlist_mincount', test_case.get('playlist_count', -2) + 1))
params.setdefault('skip_download', True)
ydl = YoutubeDL(params, auto_init=False)
@@ -172,14 +162,16 @@ def generator(test_case, tname):
force_generic_extractor=params.get('force_generic_extractor', False))
except (DownloadError, ExtractorError) as err:
# Check if the exception is not a network related one
- if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError, compat_http_client.BadStatusLine) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503):
+ if (err.exc_info[0] not in (urllib.error.URLError, socket.timeout, UnavailableVideoError, http.client.BadStatusLine)
+ or (err.exc_info[0] == urllib.error.HTTPError and err.exc_info[1].code == 503)):
+ err.msg = f'{getattr(err, "msg", err)} ({tname})'
raise
if try_num == RETRIES:
report_warning('%s failed due to network errors, skipping...' % tname)
return
- print('Retrying: {0} failed tries\n\n##########\n\n'.format(try_num))
+ print(f'Retrying: {try_num} failed tries\n\n##########\n\n')
try_num += 1
else:
@@ -221,6 +213,8 @@ def generator(test_case, tname):
tc_res_dict = res_dict['entries'][tc_num]
# First, check test cases' data against extracted data alone
expect_info_dict(self, tc_res_dict, tc.get('info_dict', {}))
+ if tc_res_dict.get('_type', 'video') != 'video':
+ continue
# Now, check downloaded file consistency
tc_filename = get_tc_filename(tc)
if not test_case.get('params', {}).get('skip_download', False):
@@ -245,7 +239,7 @@ def generator(test_case, tname):
self.assertTrue(
os.path.exists(info_json_fn),
'Missing info file %s' % info_json_fn)
- with io.open(info_json_fn, encoding='utf-8') as infof:
+ with open(info_json_fn, encoding='utf-8') as infof:
info_dict = json.load(infof)
expect_info_dict(self, info_dict, tc.get('info_dict', {}))
finally:
@@ -260,35 +254,43 @@ def generator(test_case, tname):
# And add them to TestDownload
-tests_counter = {}
-for test_case in defs:
- name = test_case['name']
- i = tests_counter.get(name, 0)
- tests_counter[name] = i + 1
- tname = f'test_{name}_{i}' if i else f'test_{name}'
- test_method = generator(test_case, tname)
- test_method.__name__ = str(tname)
- ie_list = test_case.get('add_ie')
- test_method.add_ie = ie_list and ','.join(ie_list)
- setattr(TestDownload, test_method.__name__, test_method)
- del test_method
+def inject_tests(test_cases, label=''):
+ for test_case in test_cases:
+ name = test_case['name']
+ tname = join_nonempty('test', name, label, tests_counter[name][label], delim='_')
+ tests_counter[name][label] += 1
+
+ test_method = generator(test_case, tname)
+ test_method.__name__ = tname
+ test_method.add_ie = ','.join(test_case.get('add_ie', []))
+ setattr(TestDownload, test_method.__name__, test_method)
-def batch_generator(name, num_tests):
+inject_tests(normal_test_cases)
+# TODO: disable redirection to the IE to ensure we are actually testing the webpage extraction
+inject_tests(webpage_test_cases, 'webpage')
+
+
+def batch_generator(name):
def test_template(self):
- for i in range(num_tests):
- getattr(self, f'test_{name}_{i}' if i else f'test_{name}')()
+ for label, num_tests in tests_counter[name].items():
+ for i in range(num_tests):
+ test_name = join_nonempty('test', name, label, i, delim='_')
+ try:
+ getattr(self, test_name)()
+ except unittest.SkipTest:
+ print(f'Skipped {test_name}')
return test_template
-for name, num_tests in tests_counter.items():
- test_method = batch_generator(name, num_tests)
+for name in tests_counter:
+ test_method = batch_generator(name)
test_method.__name__ = f'test_{name}_all'
test_method.add_ie = ''
setattr(TestDownload, test_method.__name__, test_method)
- del test_method
+del test_method
if __name__ == '__main__':
diff --git a/test/test_downloader_http.py b/test/test_downloader_http.py
index 81b7dee..3b65859 100644
--- a/test/test_downloader_http.py
+++ b/test/test_downloader_http.py
@@ -1,20 +1,21 @@
#!/usr/bin/env python3
-# coding: utf-8
-from __future__ import unicode_literals
# Allow direct execution
import os
-import re
import sys
import unittest
+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import http.server
+import re
+import threading
+
from test.helper import http_server_port, try_rm
from hypervideo_dl import YoutubeDL
-from hypervideo_dl.compat import compat_http_server
from hypervideo_dl.downloader.http import HttpFD
from hypervideo_dl.utils import encodeFilename
-import threading
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
@@ -22,7 +23,7 @@ TEST_DIR = os.path.dirname(os.path.abspath(__file__))
TEST_SIZE = 10 * 1024
-class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
+class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
def log_message(self, format, *args):
pass
@@ -66,7 +67,7 @@ class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
assert False
-class FakeLogger(object):
+class FakeLogger:
def debug(self, msg):
pass
@@ -79,7 +80,7 @@ class FakeLogger(object):
class TestHttpFD(unittest.TestCase):
def setUp(self):
- self.httpd = compat_http_server.HTTPServer(
+ self.httpd = http.server.HTTPServer(
('127.0.0.1', 0), HTTPTestRequestHandler)
self.port = http_server_port(self.httpd)
self.server_thread = threading.Thread(target=self.httpd.serve_forever)
@@ -94,8 +95,8 @@ class TestHttpFD(unittest.TestCase):
try_rm(encodeFilename(filename))
self.assertTrue(downloader.real_download(filename, {
'url': 'http://127.0.0.1:%d/%s' % (self.port, ep),
- }))
- self.assertEqual(os.path.getsize(encodeFilename(filename)), TEST_SIZE)
+ }), ep)
+ self.assertEqual(os.path.getsize(encodeFilename(filename)), TEST_SIZE, ep)
try_rm(encodeFilename(filename))
def download_all(self, params):
diff --git a/test/test_execution.py b/test/test_execution.py
index d9aa965..56a4b2e 100644
--- a/test/test_execution.py
+++ b/test/test_execution.py
@@ -1,53 +1,56 @@
#!/usr/bin/env python3
-# coding: utf-8
-
-from __future__ import unicode_literals
+# Allow direct execution
+import os
+import sys
import unittest
-import sys
-import os
-import subprocess
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from hypervideo_dl.utils import encodeArgument
-rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+import contextlib
+import subprocess
+from hypervideo_dl.utils import Popen
-try:
- _DEV_NULL = subprocess.DEVNULL
-except AttributeError:
- _DEV_NULL = open(os.devnull, 'wb')
+rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+LAZY_EXTRACTORS = 'hypervideo_dl/extractor/lazy_extractors.py'
class TestExecution(unittest.TestCase):
+ def run_hypervideo_dl(self, exe=(sys.executable, 'hypervideo_dl/__main__.py'), opts=('--version', )):
+ stdout, stderr, returncode = Popen.run(
+ [*exe, '--ignore-config', *opts], cwd=rootDir, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ print(stderr, file=sys.stderr)
+ self.assertEqual(returncode, 0)
+ return stdout.strip(), stderr.strip()
+
+ def test_main_exec(self):
+ self.run_hypervideo_dl()
+
def test_import(self):
- subprocess.check_call([sys.executable, '-c', 'import hypervideo_dl'], cwd=rootDir)
+ self.run_hypervideo_dl(exe=(sys.executable, '-c', 'import hypervideo_dl'))
def test_module_exec(self):
- if sys.version_info >= (2, 7): # Python 2.6 doesn't support package execution
- subprocess.check_call([sys.executable, '-m', 'hypervideo_dl', '--version'], cwd=rootDir, stdout=_DEV_NULL)
-
- def test_main_exec(self):
- subprocess.check_call([sys.executable, 'hypervideo_dl/__main__.py', '--version'], cwd=rootDir, stdout=_DEV_NULL)
+ self.run_hypervideo_dl(exe=(sys.executable, '-m', 'hypervideo_dl'))
def test_cmdline_umlauts(self):
- p = subprocess.Popen(
- [sys.executable, 'hypervideo_dl/__main__.py', encodeArgument('ä'), '--version'],
- cwd=rootDir, stdout=_DEV_NULL, stderr=subprocess.PIPE)
- _, stderr = p.communicate()
+ _, stderr = self.run_hypervideo_dl(opts=('ä', '--version'))
self.assertFalse(stderr)
def test_lazy_extractors(self):
try:
- subprocess.check_call([sys.executable, 'devscripts/make_lazy_extractors.py', 'hypervideo_dl/extractor/lazy_extractors.py'], cwd=rootDir, stdout=_DEV_NULL)
- subprocess.check_call([sys.executable, 'test/test_all_urls.py'], cwd=rootDir, stdout=_DEV_NULL)
+ subprocess.check_call([sys.executable, 'devscripts/make_lazy_extractors.py', LAZY_EXTRACTORS],
+ cwd=rootDir, stdout=subprocess.DEVNULL)
+ self.assertTrue(os.path.exists(LAZY_EXTRACTORS))
+
+ _, stderr = self.run_hypervideo_dl(opts=('-s', 'test:'))
+ self.assertFalse(stderr)
+
+ subprocess.check_call([sys.executable, 'test/test_all_urls.py'], cwd=rootDir, stdout=subprocess.DEVNULL)
finally:
- try:
- os.remove('hypervideo_dl/extractor/lazy_extractors.py')
- except (IOError, OSError):
- pass
+ with contextlib.suppress(OSError):
+ os.remove(LAZY_EXTRACTORS)
if __name__ == '__main__':
diff --git a/test/test_http.py b/test/test_http.py
index a7656b0..71d6f1b 100644
--- a/test/test_http.py
+++ b/test/test_http.py
@@ -1,23 +1,25 @@
#!/usr/bin/env python3
-# coding: utf-8
-from __future__ import unicode_literals
# Allow direct execution
import os
import sys
import unittest
+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from test.helper import http_server_port
-from hypervideo_dl import YoutubeDL
-from hypervideo_dl.compat import compat_http_server, compat_urllib_request
+
+import http.server
import ssl
import threading
+import urllib.request
+
+from test.helper import http_server_port
+from hypervideo_dl import YoutubeDL
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
-class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
+class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
def log_message(self, format, *args):
pass
@@ -32,17 +34,6 @@ class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
self.send_header('Content-Type', 'video/mp4')
self.end_headers()
self.wfile.write(b'\x00\x00\x00\x00\x20\x66\x74[video]')
- elif self.path == '/302':
- if sys.version_info[0] == 3:
- # XXX: Python 3 http server does not allow non-ASCII header values
- self.send_response(404)
- self.end_headers()
- return
-
- new_url = 'http://127.0.0.1:%d/中文.html' % http_server_port(self.server)
- self.send_response(302)
- self.send_header(b'Location', new_url.encode('utf-8'))
- self.end_headers()
elif self.path == '/%E4%B8%AD%E6%96%87.html':
self.send_response(200)
self.send_header('Content-Type', 'text/html; charset=utf-8')
@@ -52,7 +43,7 @@ class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
assert False
-class FakeLogger(object):
+class FakeLogger:
def debug(self, msg):
pass
@@ -65,49 +56,84 @@ class FakeLogger(object):
class TestHTTP(unittest.TestCase):
def setUp(self):
- self.httpd = compat_http_server.HTTPServer(
+ self.httpd = http.server.HTTPServer(
('127.0.0.1', 0), HTTPTestRequestHandler)
self.port = http_server_port(self.httpd)
self.server_thread = threading.Thread(target=self.httpd.serve_forever)
self.server_thread.daemon = True
self.server_thread.start()
- def test_unicode_path_redirection(self):
- # XXX: Python 3 http server does not allow non-ASCII header values
- if sys.version_info[0] == 3:
- return
-
- ydl = YoutubeDL({'logger': FakeLogger()})
- r = ydl.extract_info('http://127.0.0.1:%d/302' % self.port)
- self.assertEqual(r['entries'][0]['url'], 'http://127.0.0.1:%d/vid.mp4' % self.port)
-
class TestHTTPS(unittest.TestCase):
def setUp(self):
certfn = os.path.join(TEST_DIR, 'testcert.pem')
- self.httpd = compat_http_server.HTTPServer(
+ self.httpd = http.server.HTTPServer(
('127.0.0.1', 0), HTTPTestRequestHandler)
- self.httpd.socket = ssl.wrap_socket(
- self.httpd.socket, certfile=certfn, server_side=True)
+ sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
+ sslctx.load_cert_chain(certfn, None)
+ self.httpd.socket = sslctx.wrap_socket(self.httpd.socket, server_side=True)
self.port = http_server_port(self.httpd)
self.server_thread = threading.Thread(target=self.httpd.serve_forever)
self.server_thread.daemon = True
self.server_thread.start()
def test_nocheckcertificate(self):
- if sys.version_info >= (2, 7, 9): # No certificate checking anyways
- ydl = YoutubeDL({'logger': FakeLogger()})
- self.assertRaises(
- Exception,
- ydl.extract_info, 'https://127.0.0.1:%d/video.html' % self.port)
+ ydl = YoutubeDL({'logger': FakeLogger()})
+ self.assertRaises(
+ Exception,
+ ydl.extract_info, 'https://127.0.0.1:%d/video.html' % self.port)
ydl = YoutubeDL({'logger': FakeLogger(), 'nocheckcertificate': True})
r = ydl.extract_info('https://127.0.0.1:%d/video.html' % self.port)
- self.assertEqual(r['entries'][0]['url'], 'https://127.0.0.1:%d/vid.mp4' % self.port)
+ self.assertEqual(r['url'], 'https://127.0.0.1:%d/vid.mp4' % self.port)
+
+
+class TestClientCert(unittest.TestCase):
+ def setUp(self):
+ certfn = os.path.join(TEST_DIR, 'testcert.pem')
+ self.certdir = os.path.join(TEST_DIR, 'testdata', 'certificate')
+ cacertfn = os.path.join(self.certdir, 'ca.crt')
+ self.httpd = http.server.HTTPServer(('127.0.0.1', 0), HTTPTestRequestHandler)
+ sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
+ sslctx.verify_mode = ssl.CERT_REQUIRED
+ sslctx.load_verify_locations(cafile=cacertfn)
+ sslctx.load_cert_chain(certfn, None)
+ self.httpd.socket = sslctx.wrap_socket(self.httpd.socket, server_side=True)
+ self.port = http_server_port(self.httpd)
+ self.server_thread = threading.Thread(target=self.httpd.serve_forever)
+ self.server_thread.daemon = True
+ self.server_thread.start()
+
+ def _run_test(self, **params):
+ ydl = YoutubeDL({
+ 'logger': FakeLogger(),
+ # Disable client-side validation of unacceptable self-signed testcert.pem
+ # The test is of a check on the server side, so unaffected
+ 'nocheckcertificate': True,
+ **params,
+ })
+ r = ydl.extract_info('https://127.0.0.1:%d/video.html' % self.port)
+ self.assertEqual(r['url'], 'https://127.0.0.1:%d/vid.mp4' % self.port)
+
+ def test_certificate_combined_nopass(self):
+ self._run_test(client_certificate=os.path.join(self.certdir, 'clientwithkey.crt'))
+
+ def test_certificate_nocombined_nopass(self):
+ self._run_test(client_certificate=os.path.join(self.certdir, 'client.crt'),
+ client_certificate_key=os.path.join(self.certdir, 'client.key'))
+
+ def test_certificate_combined_pass(self):
+ self._run_test(client_certificate=os.path.join(self.certdir, 'clientwithencryptedkey.crt'),
+ client_certificate_password='foobar')
+
+ def test_certificate_nocombined_pass(self):
+ self._run_test(client_certificate=os.path.join(self.certdir, 'client.crt'),
+ client_certificate_key=os.path.join(self.certdir, 'clientencrypted.key'),
+ client_certificate_password='foobar')
def _build_proxy_handler(name):
- class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
+ class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
proxy_name = name
def log_message(self, format, *args):
@@ -117,20 +143,20 @@ def _build_proxy_handler(name):
self.send_response(200)
self.send_header('Content-Type', 'text/plain; charset=utf-8')
self.end_headers()
- self.wfile.write('{self.proxy_name}: {self.path}'.format(self=self).encode('utf-8'))
+ self.wfile.write(f'{self.proxy_name}: {self.path}'.encode())
return HTTPTestRequestHandler
class TestProxy(unittest.TestCase):
def setUp(self):
- self.proxy = compat_http_server.HTTPServer(
+ self.proxy = http.server.HTTPServer(
('127.0.0.1', 0), _build_proxy_handler('normal'))
self.port = http_server_port(self.proxy)
self.proxy_thread = threading.Thread(target=self.proxy.serve_forever)
self.proxy_thread.daemon = True
self.proxy_thread.start()
- self.geo_proxy = compat_http_server.HTTPServer(
+ self.geo_proxy = http.server.HTTPServer(
('127.0.0.1', 0), _build_proxy_handler('geo'))
self.geo_port = http_server_port(self.geo_proxy)
self.geo_proxy_thread = threading.Thread(target=self.geo_proxy.serve_forever)
@@ -138,26 +164,26 @@ class TestProxy(unittest.TestCase):
self.geo_proxy_thread.start()
def test_proxy(self):
- geo_proxy = '127.0.0.1:{0}'.format(self.geo_port)
+ geo_proxy = f'127.0.0.1:{self.geo_port}'
ydl = YoutubeDL({
- 'proxy': '127.0.0.1:{0}'.format(self.port),
+ 'proxy': f'127.0.0.1:{self.port}',
'geo_verification_proxy': geo_proxy,
})
url = 'http://foo.com/bar'
- response = ydl.urlopen(url).read().decode('utf-8')
- self.assertEqual(response, 'normal: {0}'.format(url))
+ response = ydl.urlopen(url).read().decode()
+ self.assertEqual(response, f'normal: {url}')
- req = compat_urllib_request.Request(url)
+ req = urllib.request.Request(url)
req.add_header('Ytdl-request-proxy', geo_proxy)
- response = ydl.urlopen(req).read().decode('utf-8')
- self.assertEqual(response, 'geo: {0}'.format(url))
+ response = ydl.urlopen(req).read().decode()
+ self.assertEqual(response, f'geo: {url}')
def test_proxy_with_idn(self):
ydl = YoutubeDL({
- 'proxy': '127.0.0.1:{0}'.format(self.port),
+ 'proxy': f'127.0.0.1:{self.port}',
})
url = 'http://中文.tw/'
- response = ydl.urlopen(url).read().decode('utf-8')
+ response = ydl.urlopen(url).read().decode()
# b'xn--fiq228c' is '中文'.encode('idna')
self.assertEqual(response, 'normal: http://xn--fiq228c.tw/')
diff --git a/test/test_netrc.py b/test/test_netrc.py
index c7f5272..1d722ab 100644
--- a/test/test_netrc.py
+++ b/test/test_netrc.py
@@ -1,9 +1,10 @@
-# coding: utf-8
-from __future__ import unicode_literals
+#!/usr/bin/env python3
+# Allow direct execution
import os
import sys
import unittest
+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
diff --git a/test/test_options.py b/test/test_options.py
deleted file mode 100644
index 0b2458e..0000000
--- a/test/test_options.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# coding: utf-8
-
-from __future__ import unicode_literals
-
-# Allow direct execution
-import os
-import sys
-import unittest
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-
-from hypervideo_dl.options import _hide_login_info
-
-
-class TestOptions(unittest.TestCase):
- def test_hide_login_info(self):
- self.assertEqual(_hide_login_info(['-u', 'foo', '-p', 'bar']),
- ['-u', 'PRIVATE', '-p', 'PRIVATE'])
- self.assertEqual(_hide_login_info(['-u']), ['-u'])
- self.assertEqual(_hide_login_info(['-u', 'foo', '-u', 'bar']),
- ['-u', 'PRIVATE', '-u', 'PRIVATE'])
- self.assertEqual(_hide_login_info(['--username=foo']),
- ['--username=PRIVATE'])
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/test/test_overwrites.py b/test/test_overwrites.py
index 9ad9bba..77fd100 100644
--- a/test/test_overwrites.py
+++ b/test/test_overwrites.py
@@ -1,18 +1,19 @@
#!/usr/bin/env python3
-from __future__ import unicode_literals
+# Allow direct execution
import os
-from os.path import join
-import subprocess
import sys
import unittest
+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from test.helper import is_download_test, try_rm
+import subprocess
+
+from test.helper import is_download_test, try_rm
root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-download_file = join(root_dir, 'test.webm')
+download_file = os.path.join(root_dir, 'test.webm')
@is_download_test
@@ -46,7 +47,7 @@ class TestOverwrites(unittest.TestCase):
self.assertTrue(os.path.getsize(download_file) > 1)
def tearDown(self):
- try_rm(join(root_dir, 'test.webm'))
+ try_rm(os.path.join(root_dir, 'test.webm'))
if __name__ == '__main__':
diff --git a/test/test_post_hooks.py b/test/test_post_hooks.py
index 8f3b03a..61c35d7 100644
--- a/test/test_post_hooks.py
+++ b/test/test_post_hooks.py
@@ -1,20 +1,21 @@
#!/usr/bin/env python3
-from __future__ import unicode_literals
-
+# Allow direct execution
import os
import sys
import unittest
+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from test.helper import get_params, try_rm, is_download_test
-import hypervideo_dl.YoutubeDL
+
+from test.helper import get_params, is_download_test, try_rm
+import hypervideo_dl.YoutubeDL # isort: split
from hypervideo_dl.utils import DownloadError
class YoutubeDL(hypervideo_dl.YoutubeDL):
def __init__(self, *args, **kwargs):
- super(YoutubeDL, self).__init__(*args, **kwargs)
+ super().__init__(*args, **kwargs)
self.to_stderr = self.to_screen
diff --git a/test/test_postprocessors.py b/test/test_postprocessors.py
index e0b8347..982c321 100644
--- a/test/test_postprocessors.py
+++ b/test/test_postprocessors.py
@@ -1,7 +1,5 @@
#!/usr/bin/env python3
-from __future__ import unicode_literals
-
# Allow direct execution
import os
import sys
@@ -9,6 +7,7 @@ import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
from hypervideo_dl import YoutubeDL
from hypervideo_dl.compat import compat_shlex_quote
from hypervideo_dl.postprocessor import (
@@ -16,7 +15,8 @@ from hypervideo_dl.postprocessor import (
FFmpegThumbnailsConvertorPP,
MetadataFromFieldPP,
MetadataParserPP,
- ModifyChaptersPP
+ ModifyChaptersPP,
+ SponsorBlockPP,
)
@@ -77,11 +77,15 @@ class TestModifyChaptersPP(unittest.TestCase):
self._pp = ModifyChaptersPP(YoutubeDL())
@staticmethod
- def _sponsor_chapter(start, end, cat, remove=False):
- c = {'start_time': start, 'end_time': end, '_categories': [(cat, start, end)]}
- if remove:
- c['remove'] = True
- return c
+ def _sponsor_chapter(start, end, cat, remove=False, title=None):
+ if title is None:
+ title = SponsorBlockPP.CATEGORIES[cat]
+ return {
+ 'start_time': start,
+ 'end_time': end,
+ '_categories': [(cat, start, end, title)],
+ **({'remove': True} if remove else {}),
+ }
@staticmethod
def _chapter(start, end, title=None, remove=False):
@@ -131,6 +135,19 @@ class TestModifyChaptersPP(unittest.TestCase):
'c', '[SponsorBlock]: Filler Tangent', 'c'])
self._remove_marked_arrange_sponsors_test_impl(chapters, expected, [])
+ def test_remove_marked_arrange_sponsors_SponsorBlockChapters(self):
+ chapters = self._chapters([70], ['c']) + [
+ self._sponsor_chapter(10, 20, 'chapter', title='sb c1'),
+ self._sponsor_chapter(15, 16, 'chapter', title='sb c2'),
+ self._sponsor_chapter(30, 40, 'preview'),
+ self._sponsor_chapter(50, 60, 'filler')]
+ expected = self._chapters(
+ [10, 15, 16, 20, 30, 40, 50, 60, 70],
+ ['c', '[SponsorBlock]: sb c1', '[SponsorBlock]: sb c1, sb c2', '[SponsorBlock]: sb c1',
+ 'c', '[SponsorBlock]: Preview/Recap',
+ 'c', '[SponsorBlock]: Filler Tangent', 'c'])
+ self._remove_marked_arrange_sponsors_test_impl(chapters, expected, [])
+
def test_remove_marked_arrange_sponsors_UniqueNamesForOverlappingSponsors(self):
chapters = self._chapters([120], ['c']) + [
self._sponsor_chapter(10, 45, 'sponsor'), self._sponsor_chapter(20, 40, 'selfpromo'),
@@ -174,7 +191,7 @@ class TestModifyChaptersPP(unittest.TestCase):
self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts)
def test_remove_marked_arrange_sponsors_ChapterWithCutHidingSponsor(self):
- cuts = [self._sponsor_chapter(20, 50, 'selpromo', remove=True)]
+ cuts = [self._sponsor_chapter(20, 50, 'selfpromo', remove=True)]
chapters = self._chapters([60], ['c']) + [
self._sponsor_chapter(10, 20, 'intro'),
self._sponsor_chapter(30, 40, 'sponsor'),
@@ -200,7 +217,7 @@ class TestModifyChaptersPP(unittest.TestCase):
self._sponsor_chapter(10, 20, 'sponsor'),
self._sponsor_chapter(20, 30, 'interaction', remove=True),
self._chapter(30, 40, remove=True),
- self._sponsor_chapter(40, 50, 'selpromo', remove=True),
+ self._sponsor_chapter(40, 50, 'selfpromo', remove=True),
self._sponsor_chapter(50, 60, 'interaction')]
expected = self._chapters([10, 20, 30, 40],
['c', '[SponsorBlock]: Sponsor',
@@ -283,7 +300,7 @@ class TestModifyChaptersPP(unittest.TestCase):
chapters = self._chapters([70], ['c']) + [
self._sponsor_chapter(10, 30, 'sponsor'),
self._sponsor_chapter(20, 50, 'interaction'),
- self._sponsor_chapter(30, 50, 'selpromo', remove=True),
+ self._sponsor_chapter(30, 50, 'selfpromo', remove=True),
self._sponsor_chapter(40, 60, 'sponsor'),
self._sponsor_chapter(50, 60, 'interaction')]
expected = self._chapters(
diff --git a/test/test_socks.py b/test/test_socks.py
index 2574e73..6651290 100644
--- a/test/test_socks.py
+++ b/test/test_socks.py
@@ -1,25 +1,18 @@
#!/usr/bin/env python3
-# coding: utf-8
-from __future__ import unicode_literals
# Allow direct execution
import os
import sys
import unittest
+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
import random
import subprocess
+import urllib.request
-from test.helper import (
- FakeYDL,
- get_params,
- is_download_test,
-)
-from hypervideo_dl.compat import (
- compat_str,
- compat_urllib_request,
-)
+from test.helper import FakeYDL, get_params, is_download_test
@is_download_test
@@ -41,7 +34,7 @@ class TestMultipleSocks(unittest.TestCase):
'proxy': params['primary_proxy']
})
self.assertEqual(
- ydl.urlopen('http://yt-dl.org/ip').read().decode('utf-8'),
+ ydl.urlopen('http://yt-dl.org/ip').read().decode(),
params['primary_server_ip'])
def test_proxy_https(self):
@@ -52,7 +45,7 @@ class TestMultipleSocks(unittest.TestCase):
'proxy': params['primary_proxy']
})
self.assertEqual(
- ydl.urlopen('https://yt-dl.org/ip').read().decode('utf-8'),
+ ydl.urlopen('https://yt-dl.org/ip').read().decode(),
params['primary_server_ip'])
def test_secondary_proxy_http(self):
@@ -60,10 +53,10 @@ class TestMultipleSocks(unittest.TestCase):
if params is None:
return
ydl = FakeYDL()
- req = compat_urllib_request.Request('http://yt-dl.org/ip')
+ req = urllib.request.Request('http://yt-dl.org/ip')
req.add_header('Ytdl-request-proxy', params['secondary_proxy'])
self.assertEqual(
- ydl.urlopen(req).read().decode('utf-8'),
+ ydl.urlopen(req).read().decode(),
params['secondary_server_ip'])
def test_secondary_proxy_https(self):
@@ -71,10 +64,10 @@ class TestMultipleSocks(unittest.TestCase):
if params is None:
return
ydl = FakeYDL()
- req = compat_urllib_request.Request('https://yt-dl.org/ip')
+ req = urllib.request.Request('https://yt-dl.org/ip')
req.add_header('Ytdl-request-proxy', params['secondary_proxy'])
self.assertEqual(
- ydl.urlopen(req).read().decode('utf-8'),
+ ydl.urlopen(req).read().decode(),
params['secondary_server_ip'])
@@ -105,16 +98,16 @@ class TestSocks(unittest.TestCase):
ydl = FakeYDL({
'proxy': '%s://127.0.0.1:%d' % (protocol, self.port),
})
- return ydl.urlopen('http://yt-dl.org/ip').read().decode('utf-8')
+ return ydl.urlopen('http://yt-dl.org/ip').read().decode()
def test_socks4(self):
- self.assertTrue(isinstance(self._get_ip('socks4'), compat_str))
+ self.assertTrue(isinstance(self._get_ip('socks4'), str))
def test_socks4a(self):
- self.assertTrue(isinstance(self._get_ip('socks4a'), compat_str))
+ self.assertTrue(isinstance(self._get_ip('socks4a'), str))
def test_socks5(self):
- self.assertTrue(isinstance(self._get_ip('socks5'), compat_str))
+ self.assertTrue(isinstance(self._get_ip('socks5'), str))
if __name__ == '__main__':
diff --git a/test/test_subtitles.py b/test/test_subtitles.py
index 10fa0ca..7657d43 100644
--- a/test/test_subtitles.py
+++ b/test/test_subtitles.py
@@ -1,33 +1,32 @@
#!/usr/bin/env python3
-from __future__ import unicode_literals
# Allow direct execution
import os
import sys
import unittest
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from test.helper import FakeYDL, md5, is_download_test
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from test.helper import FakeYDL, is_download_test, md5
from hypervideo_dl.extractor import (
- YoutubeIE,
- DailymotionIE,
- TedTalkIE,
- VimeoIE,
- WallaIE,
- CeskaTelevizeIE,
- LyndaIE,
NPOIE,
+ NRKTVIE,
PBSIE,
+ CeskaTelevizeIE,
ComedyCentralIE,
- NRKTVIE,
+ DailymotionIE,
+ DemocracynowIE,
+ LyndaIE,
RaiPlayIE,
- VikiIE,
- ThePlatformIE,
- ThePlatformFeedIE,
RTVEALaCartaIE,
- DemocracynowIE,
+ TedTalkIE,
+ ThePlatformFeedIE,
+ ThePlatformIE,
+ VikiIE,
+ VimeoIE,
+ WallaIE,
+ YoutubeIE,
)
@@ -40,6 +39,9 @@ class BaseTestSubtitles(unittest.TestCase):
self.DL = FakeYDL()
self.ie = self.IE()
self.DL.add_info_extractor(self.ie)
+ if not self.IE.working():
+ print('Skipping: %s marked as not _WORKING' % self.IE.ie_key())
+ self.skipTest('IE marked as not _WORKING')
def getInfoDict(self):
info_dict = self.DL.extract_info(self.url, download=False)
@@ -53,12 +55,27 @@ class BaseTestSubtitles(unittest.TestCase):
for sub_info in subtitles.values():
if sub_info.get('data') is None:
uf = self.DL.urlopen(sub_info['url'])
- sub_info['data'] = uf.read().decode('utf-8')
- return dict((l, sub_info['data']) for l, sub_info in subtitles.items())
+ sub_info['data'] = uf.read().decode()
+ return {l: sub_info['data'] for l, sub_info in subtitles.items()}
@is_download_test
class TestYoutubeSubtitles(BaseTestSubtitles):
+ # Available subtitles for QRS8MkLhQmM:
+ # Language formats
+ # ru vtt, ttml, srv3, srv2, srv1, json3
+ # fr vtt, ttml, srv3, srv2, srv1, json3
+ # en vtt, ttml, srv3, srv2, srv1, json3
+ # nl vtt, ttml, srv3, srv2, srv1, json3
+ # de vtt, ttml, srv3, srv2, srv1, json3
+ # ko vtt, ttml, srv3, srv2, srv1, json3
+ # it vtt, ttml, srv3, srv2, srv1, json3
+ # zh-Hant vtt, ttml, srv3, srv2, srv1, json3
+ # hi vtt, ttml, srv3, srv2, srv1, json3
+ # pt-BR vtt, ttml, srv3, srv2, srv1, json3
+ # es-MX vtt, ttml, srv3, srv2, srv1, json3
+ # ja vtt, ttml, srv3, srv2, srv1, json3
+ # pl vtt, ttml, srv3, srv2, srv1, json3
url = 'QRS8MkLhQmM'
IE = YoutubeIE
@@ -67,47 +84,60 @@ class TestYoutubeSubtitles(BaseTestSubtitles):
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles.keys()), 13)
- self.assertEqual(md5(subtitles['en']), '688dd1ce0981683867e7fe6fde2a224b')
- self.assertEqual(md5(subtitles['it']), '31324d30b8430b309f7f5979a504a769')
+ self.assertEqual(md5(subtitles['en']), 'ae1bd34126571a77aabd4d276b28044d')
+ self.assertEqual(md5(subtitles['it']), '0e0b667ba68411d88fd1c5f4f4eab2f9')
for lang in ['fr', 'de']:
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
- def test_youtube_subtitles_ttml_format(self):
+ def _test_subtitles_format(self, fmt, md5_hash, lang='en'):
self.DL.params['writesubtitles'] = True
- self.DL.params['subtitlesformat'] = 'ttml'
+ self.DL.params['subtitlesformat'] = fmt
subtitles = self.getSubtitles()
- self.assertEqual(md5(subtitles['en']), 'c97ddf1217390906fa9fbd34901f3da2')
+ self.assertEqual(md5(subtitles[lang]), md5_hash)
+
+ def test_youtube_subtitles_ttml_format(self):
+ self._test_subtitles_format('ttml', 'c97ddf1217390906fa9fbd34901f3da2')
def test_youtube_subtitles_vtt_format(self):
- self.DL.params['writesubtitles'] = True
- self.DL.params['subtitlesformat'] = 'vtt'
- subtitles = self.getSubtitles()
- self.assertEqual(md5(subtitles['en']), 'ae1bd34126571a77aabd4d276b28044d')
+ self._test_subtitles_format('vtt', 'ae1bd34126571a77aabd4d276b28044d')
- def test_youtube_automatic_captions(self):
- self.url = '8YoUxe5ncPo'
- self.DL.params['writeautomaticsub'] = True
- self.DL.params['subtitleslangs'] = ['it']
- subtitles = self.getSubtitles()
- self.assertTrue(subtitles['it'] is not None)
+ def test_youtube_subtitles_json3_format(self):
+ self._test_subtitles_format('json3', '688dd1ce0981683867e7fe6fde2a224b')
- def test_youtube_no_automatic_captions(self):
- self.url = 'QRS8MkLhQmM'
+ def _test_automatic_captions(self, url, lang):
+ self.url = url
self.DL.params['writeautomaticsub'] = True
+ self.DL.params['subtitleslangs'] = [lang]
subtitles = self.getSubtitles()
- self.assertTrue(not subtitles)
+ self.assertTrue(subtitles[lang] is not None)
+ def test_youtube_automatic_captions(self):
+ # Available automatic captions for 8YoUxe5ncPo:
+ # Language formats (all in vtt, ttml, srv3, srv2, srv1, json3)
+ # gu, zh-Hans, zh-Hant, gd, ga, gl, lb, la, lo, tt, tr,
+ # lv, lt, tk, th, tg, te, fil, haw, yi, ceb, yo, de, da,
+ # el, eo, en, eu, et, es, ru, rw, ro, bn, be, bg, uk, jv,
+ # bs, ja, or, xh, co, ca, cy, cs, ps, pt, pa, vi, pl, hy,
+ # hr, ht, hu, hmn, hi, ha, mg, uz, ml, mn, mi, mk, ur,
+ # mt, ms, mr, ug, ta, my, af, sw, is, am,
+ # *it*, iw, sv, ar,
+ # su, zu, az, id, ig, nl, no, ne, ny, fr, ku, fy, fa, fi,
+ # ka, kk, sr, sq, ko, kn, km, st, sk, si, so, sn, sm, sl,
+ # ky, sd
+ # ...
+ self._test_automatic_captions('8YoUxe5ncPo', 'it')
+
+ @unittest.skip('Video unavailable')
def test_youtube_translated_subtitles(self):
- # This video has a subtitles track, which can be translated
- self.url = 'i0ZabxXmH4Y'
- self.DL.params['writeautomaticsub'] = True
- self.DL.params['subtitleslangs'] = ['it']
- subtitles = self.getSubtitles()
- self.assertTrue(subtitles['it'] is not None)
+ # This video has a subtitles track, which can be translated (#4555)
+ self._test_automatic_captions('Ky9eprVWzlI', 'it')
def test_youtube_nosubtitles(self):
self.DL.expect_warning('video doesn\'t have subtitles')
- self.url = 'n5BB19UTcdA'
+ # Available automatic captions for 8YoUxe5ncPo:
+ # ...
+ # 8YoUxe5ncPo has no subtitles
+ self.url = '8YoUxe5ncPo'
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
@@ -139,6 +169,7 @@ class TestDailymotionSubtitles(BaseTestSubtitles):
@is_download_test
+@unittest.skip('IE broken')
class TestTedSubtitles(BaseTestSubtitles):
url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html'
IE = TedTalkIE
@@ -163,13 +194,13 @@ class TestVimeoSubtitles(BaseTestSubtitles):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
- self.assertEqual(set(subtitles.keys()), set(['de', 'en', 'es', 'fr']))
- self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
- self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8')
+ self.assertEqual(set(subtitles.keys()), {'de', 'en', 'es', 'fr'})
+ self.assertEqual(md5(subtitles['en']), '386cbc9320b94e25cb364b97935e5dd1')
+ self.assertEqual(md5(subtitles['fr']), 'c9b69eef35bc6641c0d4da8a04f9dfac')
def test_nosubtitles(self):
self.DL.expect_warning('video doesn\'t have subtitles')
- self.url = 'http://vimeo.com/56015672'
+ self.url = 'http://vimeo.com/68093876'
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
@@ -177,6 +208,7 @@ class TestVimeoSubtitles(BaseTestSubtitles):
@is_download_test
+@unittest.skip('IE broken')
class TestWallaSubtitles(BaseTestSubtitles):
url = 'http://vod.walla.co.il/movie/2705958/the-yes-men'
IE = WallaIE
@@ -186,7 +218,7 @@ class TestWallaSubtitles(BaseTestSubtitles):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
- self.assertEqual(set(subtitles.keys()), set(['heb']))
+ self.assertEqual(set(subtitles.keys()), {'heb'})
self.assertEqual(md5(subtitles['heb']), 'e758c5d7cb982f6bef14f377ec7a3920')
def test_nosubtitles(self):
@@ -199,6 +231,7 @@ class TestWallaSubtitles(BaseTestSubtitles):
@is_download_test
+@unittest.skip('IE broken')
class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky'
IE = CeskaTelevizeIE
@@ -208,7 +241,7 @@ class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
- self.assertEqual(set(subtitles.keys()), set(['cs']))
+ self.assertEqual(set(subtitles.keys()), {'cs'})
self.assertTrue(len(subtitles['cs']) > 20000)
def test_nosubtitles(self):
@@ -221,6 +254,7 @@ class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
@is_download_test
+@unittest.skip('IE broken')
class TestLyndaSubtitles(BaseTestSubtitles):
url = 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html'
IE = LyndaIE
@@ -229,11 +263,12 @@ class TestLyndaSubtitles(BaseTestSubtitles):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
- self.assertEqual(set(subtitles.keys()), set(['en']))
+ self.assertEqual(set(subtitles.keys()), {'en'})
self.assertEqual(md5(subtitles['en']), '09bbe67222259bed60deaa26997d73a7')
@is_download_test
+@unittest.skip('IE broken')
class TestNPOSubtitles(BaseTestSubtitles):
url = 'http://www.npo.nl/nos-journaal/28-08-2014/POW_00722860'
IE = NPOIE
@@ -242,23 +277,24 @@ class TestNPOSubtitles(BaseTestSubtitles):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
- self.assertEqual(set(subtitles.keys()), set(['nl']))
+ self.assertEqual(set(subtitles.keys()), {'nl'})
self.assertEqual(md5(subtitles['nl']), 'fc6435027572b63fb4ab143abd5ad3f4')
@is_download_test
+@unittest.skip('IE broken')
class TestMTVSubtitles(BaseTestSubtitles):
url = 'http://www.cc.com/video-clips/p63lk0/adam-devine-s-house-party-chasing-white-swans'
IE = ComedyCentralIE
def getInfoDict(self):
- return super(TestMTVSubtitles, self).getInfoDict()['entries'][0]
+ return super().getInfoDict()['entries'][0]
def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
- self.assertEqual(set(subtitles.keys()), set(['en']))
+ self.assertEqual(set(subtitles.keys()), {'en'})
self.assertEqual(md5(subtitles['en']), '78206b8d8a0cfa9da64dc026eea48961')
@@ -271,8 +307,8 @@ class TestNRKSubtitles(BaseTestSubtitles):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
- self.assertEqual(set(subtitles.keys()), set(['no']))
- self.assertEqual(md5(subtitles['no']), '544fa917d3197fcbee64634559221cc2')
+ self.assertEqual(set(subtitles.keys()), {'nb-ttv'})
+ self.assertEqual(md5(subtitles['nb-ttv']), '67e06ff02d0deaf975e68f6cb8f6a149')
@is_download_test
@@ -284,7 +320,7 @@ class TestRaiPlaySubtitles(BaseTestSubtitles):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
- self.assertEqual(set(subtitles.keys()), set(['it']))
+ self.assertEqual(set(subtitles.keys()), {'it'})
self.assertEqual(md5(subtitles['it']), 'b1d90a98755126b61e667567a1f6680a')
def test_subtitles_array_key(self):
@@ -292,11 +328,12 @@ class TestRaiPlaySubtitles(BaseTestSubtitles):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
- self.assertEqual(set(subtitles.keys()), set(['it']))
+ self.assertEqual(set(subtitles.keys()), {'it'})
self.assertEqual(md5(subtitles['it']), '4b3264186fbb103508abe5311cfcb9cd')
@is_download_test
+@unittest.skip('IE broken - DRM only')
class TestVikiSubtitles(BaseTestSubtitles):
url = 'http://www.viki.com/videos/1060846v-punch-episode-18'
IE = VikiIE
@@ -305,7 +342,7 @@ class TestVikiSubtitles(BaseTestSubtitles):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
- self.assertEqual(set(subtitles.keys()), set(['en']))
+ self.assertEqual(set(subtitles.keys()), {'en'})
self.assertEqual(md5(subtitles['en']), '53cb083a5914b2d84ef1ab67b880d18a')
@@ -320,11 +357,12 @@ class TestThePlatformSubtitles(BaseTestSubtitles):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
- self.assertEqual(set(subtitles.keys()), set(['en']))
+ self.assertEqual(set(subtitles.keys()), {'en'})
self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b')
@is_download_test
+@unittest.skip('IE broken')
class TestThePlatformFeedSubtitles(BaseTestSubtitles):
url = 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207'
IE = ThePlatformFeedIE
@@ -333,7 +371,7 @@ class TestThePlatformFeedSubtitles(BaseTestSubtitles):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
- self.assertEqual(set(subtitles.keys()), set(['en']))
+ self.assertEqual(set(subtitles.keys()), {'en'})
self.assertEqual(md5(subtitles['en']), '48649a22e82b2da21c9a67a395eedade')
@@ -348,7 +386,7 @@ class TestRtveSubtitles(BaseTestSubtitles):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
- self.assertEqual(set(subtitles.keys()), set(['es']))
+ self.assertEqual(set(subtitles.keys()), {'es'})
self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca')
@@ -361,16 +399,16 @@ class TestDemocracynowSubtitles(BaseTestSubtitles):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
- self.assertEqual(set(subtitles.keys()), set(['en']))
- self.assertEqual(md5(subtitles['en']), 'acaca989e24a9e45a6719c9b3d60815c')
+ self.assertEqual(set(subtitles.keys()), {'en'})
+ self.assertEqual(md5(subtitles['en']), 'a3cc4c0b5eadd74d9974f1c1f5101045')
def test_subtitles_in_page(self):
self.url = 'http://www.democracynow.org/2015/7/3/this_flag_comes_down_today_bree'
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
- self.assertEqual(set(subtitles.keys()), set(['en']))
- self.assertEqual(md5(subtitles['en']), 'acaca989e24a9e45a6719c9b3d60815c')
+ self.assertEqual(set(subtitles.keys()), {'en'})
+ self.assertEqual(md5(subtitles['en']), 'a3cc4c0b5eadd74d9974f1c1f5101045')
@is_download_test
@@ -382,7 +420,7 @@ class TestPBSSubtitles(BaseTestSubtitles):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
- self.assertEqual(set(subtitles.keys()), set(['en']))
+ self.assertEqual(set(subtitles.keys()), {'en'})
def test_subtitles_dfxp_format(self):
self.DL.params['writesubtitles'] = True
diff --git a/test/test_unicode_literals.py b/test/test_unicode_literals.py
deleted file mode 100644
index 6c1b7ec..0000000
--- a/test/test_unicode_literals.py
+++ /dev/null
@@ -1,63 +0,0 @@
-from __future__ import unicode_literals
-
-# Allow direct execution
-import os
-import sys
-import unittest
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-
-import io
-import re
-
-rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-
-IGNORED_FILES = [
- 'setup.py', # http://bugs.python.org/issue13943
- 'conf.py',
- 'buildserver.py',
-]
-
-IGNORED_DIRS = [
- '.git',
- '.tox',
-]
-
-from test.helper import assertRegexpMatches
-
-
-class TestUnicodeLiterals(unittest.TestCase):
- def test_all_files(self):
- for dirpath, dirnames, filenames in os.walk(rootDir):
- for ignore_dir in IGNORED_DIRS:
- if ignore_dir in dirnames:
- # If we remove the directory from dirnames os.walk won't
- # recurse into it
- dirnames.remove(ignore_dir)
- for basename in filenames:
- if not basename.endswith('.py'):
- continue
- if basename in IGNORED_FILES:
- continue
-
- fn = os.path.join(dirpath, basename)
- with io.open(fn, encoding='utf-8') as inf:
- code = inf.read()
-
- if "'" not in code and '"' not in code:
- continue
- assertRegexpMatches(
- self,
- code,
- r'(?:(?:#.*?|\s*)\n)*from __future__ import (?:[a-z_]+,\s*)*unicode_literals',
- 'unicode_literals import missing in %s' % fn)
-
- m = re.search(r'(?<=\s)u[\'"](?!\)|,|$)', code)
- if m is not None:
- self.assertTrue(
- m is None,
- 'u present in %s, around %s' % (
- fn, code[m.start() - 10:m.end() + 10]))
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/test/test_utils.py b/test/test_utils.py
index 039900c..acb913a 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -1,89 +1,108 @@
#!/usr/bin/env python3
-# coding: utf-8
-
-from __future__ import unicode_literals
# Allow direct execution
import os
+import re
import sys
import unittest
+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-# Various small unit tests
+import contextlib
import io
import itertools
import json
import xml.etree.ElementTree
+from hypervideo_dl.compat import (
+ compat_etree_fromstring,
+ compat_HTMLParseError,
+ compat_os_name,
+)
from hypervideo_dl.utils import (
+ Config,
+ DateRange,
+ ExtractorError,
+ InAdvancePagedList,
+ LazyList,
+ OnDemandPagedList,
age_restricted,
args_to_str,
- encode_base_n,
+ base_url,
caesar,
clean_html,
clean_podcast_url,
- Config,
+ cli_bool_option,
+ cli_option,
+ cli_valueless_option,
date_from_str,
datetime_from_str,
- DateRange,
detect_exe_version,
determine_ext,
+ determine_file_encoding,
+ dfxp2srt,
dict_get,
+ encode_base_n,
encode_compat_str,
encodeFilename,
escape_rfc3986,
escape_url,
+ expand_path,
extract_attributes,
- ExtractorError,
find_xpath_attr,
fix_xml_ampersands,
- format_bytes,
float_or_none,
- get_element_by_class,
+ format_bytes,
+ get_compatible_ext,
get_element_by_attribute,
- get_elements_by_class,
- get_elements_by_attribute,
- get_element_html_by_class,
+ get_element_by_class,
get_element_html_by_attribute,
- get_elements_html_by_class,
+ get_element_html_by_class,
+ get_element_text_and_html_by_tag,
+ get_elements_by_attribute,
+ get_elements_by_class,
get_elements_html_by_attribute,
+ get_elements_html_by_class,
get_elements_text_and_html_by_attribute,
- get_element_text_and_html_by_tag,
- InAdvancePagedList,
int_or_none,
intlist_to_bytes,
+ iri_to_uri,
is_html,
js_to_json,
limit_length,
+ locked_file,
+ lowercase_escape,
+ match_str,
merge_dicts,
mimetype2ext,
month_by_name,
multipart_encode,
ohdave_rsa_encrypt,
- OnDemandPagedList,
orderedSet,
parse_age_limit,
+ parse_bitrate,
+ parse_codecs,
+ parse_count,
+ parse_dfxp_time_expr,
parse_duration,
parse_filesize,
- parse_count,
parse_iso8601,
- parse_resolution,
- parse_bitrate,
parse_qs,
+ parse_resolution,
pkcs1pad,
+ prepend_extension,
read_batch_urls,
+ remove_end,
+ remove_quotes,
+ remove_start,
+ render_table,
+ replace_extension,
+ rot47,
sanitize_filename,
sanitize_path,
sanitize_url,
sanitized_Request,
- expand_path,
- prepend_extension,
- replace_extension,
- remove_start,
- remove_end,
- remove_quotes,
- rot47,
shell_quote,
smuggle_url,
str_to_int,
@@ -91,42 +110,23 @@ from hypervideo_dl.utils import (
strip_or_none,
subtitles_filename,
timeconvert,
+ traverse_obj,
unescapeHTML,
unified_strdate,
unified_timestamp,
unsmuggle_url,
+ update_url_query,
uppercase_escape,
- lowercase_escape,
url_basename,
url_or_none,
- base_url,
- urljoin,
urlencode_postdata,
+ urljoin,
urshift,
- update_url_query,
version_tuple,
- xpath_with_ns,
+ xpath_attr,
xpath_element,
xpath_text,
- xpath_attr,
- render_table,
- match_str,
- parse_dfxp_time_expr,
- dfxp2srt,
- cli_option,
- cli_valueless_option,
- cli_bool_option,
- parse_codecs,
- iri_to_uri,
- LazyList,
-)
-from hypervideo_dl.compat import (
- compat_chr,
- compat_etree_fromstring,
- compat_getenv,
- compat_HTMLParseError,
- compat_os_name,
- compat_setenv,
+ xpath_with_ns,
)
@@ -142,13 +142,13 @@ class TestUtil(unittest.TestCase):
self.assertEqual(sanitize_filename('123'), '123')
- self.assertEqual('abc_de', sanitize_filename('abc/de'))
+ self.assertEqual('abc⧸de', sanitize_filename('abc/de'))
self.assertFalse('/' in sanitize_filename('abc/de///'))
- self.assertEqual('abc_de', sanitize_filename('abc/<>\\*|de'))
- self.assertEqual('xxx', sanitize_filename('xxx/<>\\*|'))
- self.assertEqual('yes no', sanitize_filename('yes? no'))
- self.assertEqual('this - that', sanitize_filename('this: that'))
+ self.assertEqual('abc_de', sanitize_filename('abc/<>\\*|de', is_id=False))
+ self.assertEqual('xxx', sanitize_filename('xxx/<>\\*|', is_id=False))
+ self.assertEqual('yes no', sanitize_filename('yes? no', is_id=False))
+ self.assertEqual('this - that', sanitize_filename('this: that', is_id=False))
self.assertEqual(sanitize_filename('AT&T'), 'AT&T')
aumlaut = 'ä'
@@ -265,15 +265,22 @@ class TestUtil(unittest.TestCase):
def test_expand_path(self):
def env(var):
- return '%{0}%'.format(var) if sys.platform == 'win32' else '${0}'.format(var)
+ return f'%{var}%' if sys.platform == 'win32' else f'${var}'
- compat_setenv('hypervideo_dl_EXPATH_PATH', 'expanded')
+ os.environ['hypervideo_dl_EXPATH_PATH'] = 'expanded'
self.assertEqual(expand_path(env('hypervideo_dl_EXPATH_PATH')), 'expanded')
- self.assertEqual(expand_path(env('HOME')), compat_getenv('HOME'))
- self.assertEqual(expand_path('~'), compat_getenv('HOME'))
- self.assertEqual(
- expand_path('~/%s' % env('hypervideo_dl_EXPATH_PATH')),
- '%s/expanded' % compat_getenv('HOME'))
+
+ old_home = os.environ.get('HOME')
+ test_str = R'C:\Documents and Settings\тест\Application Data'
+ try:
+ os.environ['HOME'] = test_str
+ self.assertEqual(expand_path(env('HOME')), os.getenv('HOME'))
+ self.assertEqual(expand_path('~'), os.getenv('HOME'))
+ self.assertEqual(
+ expand_path('~/%s' % env('hypervideo_dl_EXPATH_PATH')),
+ '%s/expanded' % os.getenv('HOME'))
+ finally:
+ os.environ['HOME'] = old_home or ''
def test_prepend_extension(self):
self.assertEqual(prepend_extension('abc.ext', 'temp'), 'abc.temp.ext')
@@ -364,6 +371,7 @@ class TestUtil(unittest.TestCase):
self.assertEqual(unified_strdate('2012/10/11 01:56:38 +0000'), '20121011')
self.assertEqual(unified_strdate('1968 12 10'), '19681210')
self.assertEqual(unified_strdate('1968-12-10'), '19681210')
+ self.assertEqual(unified_strdate('31-07-2022 20:00'), '20220731')
self.assertEqual(unified_strdate('28/01/2014 21:00:00 +0100'), '20140128')
self.assertEqual(
unified_strdate('11/26/2014 11:30:00 AM PST', day_first=False),
@@ -407,6 +415,10 @@ class TestUtil(unittest.TestCase):
self.assertEqual(unified_timestamp('December 15, 2017 at 7:49 am'), 1513324140)
self.assertEqual(unified_timestamp('2018-03-14T08:32:43.1493874+00:00'), 1521016363)
+ self.assertEqual(unified_timestamp('December 31 1969 20:00:01 EDT'), 1)
+ self.assertEqual(unified_timestamp('Wednesday 31 December 1969 18:01:26 MDT'), 86)
+ self.assertEqual(unified_timestamp('12/31/1969 20:01:18 EDT', False), 78)
+
def test_determine_ext(self):
self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
self.assertEqual(determine_ext('http://example.com/foo/bar/?download', None), None)
@@ -537,9 +549,6 @@ class TestUtil(unittest.TestCase):
self.assertEqual(str_to_int('123,456'), 123456)
self.assertEqual(str_to_int('123.456'), 123456)
self.assertEqual(str_to_int(523), 523)
- # Python 3 has no long
- if sys.version_info < (3, 0):
- eval('self.assertEqual(str_to_int(123456L), 123456)')
self.assertEqual(str_to_int('noninteger'), None)
self.assertEqual(str_to_int([]), None)
@@ -559,6 +568,7 @@ class TestUtil(unittest.TestCase):
self.assertEqual(base_url('http://foo.de/bar/'), 'http://foo.de/bar/')
self.assertEqual(base_url('http://foo.de/bar/baz'), 'http://foo.de/bar/')
self.assertEqual(base_url('http://foo.de/bar/baz?x=z/x/c'), 'http://foo.de/bar/')
+ self.assertEqual(base_url('http://foo.de/bar/baz&x=z&w=y/x/c'), 'http://foo.de/bar/baz&x=z&w=y/x/')
def test_urljoin(self):
self.assertEqual(urljoin('http://foo.de/', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
@@ -668,8 +678,7 @@ class TestUtil(unittest.TestCase):
def get_page(pagenum):
firstid = pagenum * pagesize
upto = min(size, pagenum * pagesize + pagesize)
- for i in range(firstid, upto):
- yield i
+ yield from range(firstid, upto)
pl = OnDemandPagedList(get_page, pagesize)
got = pl.getslice(*sliceargs)
@@ -738,7 +747,7 @@ class TestUtil(unittest.TestCase):
multipart_encode({b'field': b'value'}, boundary='AAAAAA')[0],
b'--AAAAAA\r\nContent-Disposition: form-data; name="field"\r\n\r\nvalue\r\n--AAAAAA--\r\n')
self.assertEqual(
- multipart_encode({'欄位'.encode('utf-8'): '值'.encode('utf-8')}, boundary='AAAAAA')[0],
+ multipart_encode({'欄位'.encode(): '值'.encode()}, boundary='AAAAAA')[0],
b'--AAAAAA\r\nContent-Disposition: form-data; name="\xe6\xac\x84\xe4\xbd\x8d"\r\n\r\n\xe5\x80\xbc\r\n--AAAAAA--\r\n')
self.assertRaises(
ValueError, multipart_encode, {b'field': b'value'}, boundary='value')
@@ -896,7 +905,7 @@ class TestUtil(unittest.TestCase):
'dynamic_range': 'HDR10',
})
self.assertEqual(parse_codecs('av01.0.12M.10.0.110.09.16.09.0'), {
- 'vcodec': 'av01.0.12M.10',
+ 'vcodec': 'av01.0.12M.10.0.110.09.16.09.0',
'acodec': 'none',
'dynamic_range': 'HDR10',
})
@@ -1091,6 +1100,12 @@ class TestUtil(unittest.TestCase):
on = js_to_json('[1,//{},\n2]')
self.assertEqual(json.loads(on), [1, 2])
+ on = js_to_json(R'"\^\$\#"')
+ self.assertEqual(json.loads(on), R'^$#', msg='Unnecessary escapes should be stripped')
+
+ on = js_to_json('\'"\\""\'')
+ self.assertEqual(json.loads(on), '"""', msg='Unnecessary quote escape should be escaped')
+
def test_js_to_json_malformed(self):
self.assertEqual(js_to_json('42a1'), '42"a1"')
self.assertEqual(js_to_json('42a-1'), '42"a"-1')
@@ -1126,7 +1141,7 @@ class TestUtil(unittest.TestCase):
self.assertEqual(extract_attributes('<e x="décompose&#769;">'), {'x': 'décompose\u0301'})
# "Narrow" Python builds don't support unicode code points outside BMP.
try:
- compat_chr(0x10000)
+ chr(0x10000)
supports_outside_bmp = True
except ValueError:
supports_outside_bmp = False
@@ -1399,7 +1414,7 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
<p begin="3" dur="-1">Ignored, three</p>
</div>
</body>
- </tt>'''.encode('utf-8')
+ </tt>'''.encode()
srt_data = '''1
00:00:00,000 --> 00:00:01,000
The following line contains Chinese characters and special symbols
@@ -1417,14 +1432,14 @@ Line
'''
self.assertEqual(dfxp2srt(dfxp_data), srt_data)
- dfxp_data_no_default_namespace = '''<?xml version="1.0" encoding="UTF-8"?>
+ dfxp_data_no_default_namespace = b'''<?xml version="1.0" encoding="UTF-8"?>
<tt xml:lang="en" xmlns:tts="http://www.w3.org/ns/ttml#parameter">
<body>
<div xml:lang="en">
<p begin="0" end="1">The first line</p>
</div>
</body>
- </tt>'''.encode('utf-8')
+ </tt>'''
srt_data = '''1
00:00:00,000 --> 00:00:01,000
The first line
@@ -1432,7 +1447,7 @@ The first line
'''
self.assertEqual(dfxp2srt(dfxp_data_no_default_namespace), srt_data)
- dfxp_data_with_style = '''<?xml version="1.0" encoding="utf-8"?>
+ dfxp_data_with_style = b'''<?xml version="1.0" encoding="utf-8"?>
<tt xmlns="http://www.w3.org/2006/10/ttaf1" xmlns:ttp="http://www.w3.org/2006/10/ttaf1#parameter" ttp:timeBase="media" xmlns:tts="http://www.w3.org/2006/10/ttaf1#style" xml:lang="en" xmlns:ttm="http://www.w3.org/2006/10/ttaf1#metadata">
<head>
<styling>
@@ -1450,7 +1465,7 @@ The first line
<p style="s1" tts:textDecoration="underline" begin="00:00:09.56" id="p2" end="00:00:12.36"><span style="s2" tts:color="lime">inner<br /> </span>style</p>
</div>
</body>
-</tt>'''.encode('utf-8')
+</tt>'''
srt_data = '''1
00:00:02,080 --> 00:00:05,840
<font color="white" face="sansSerif" size="16">default style<font color="red">custom style</font></font>
@@ -1670,6 +1685,9 @@ Line 1
self.assertEqual(list(get_elements_text_and_html_by_attribute('class', 'foo', html)), [])
self.assertEqual(list(get_elements_text_and_html_by_attribute('class', 'no-such-foo', html)), [])
+ self.assertEqual(list(get_elements_text_and_html_by_attribute(
+ 'class', 'foo', '<a class="foo">nice</a><span class="foo">nice</span>', tag='a')), [('nice', '<a class="foo">nice</a>')])
+
GET_ELEMENT_BY_TAG_TEST_STRING = '''
random text lorem ipsum</p>
<div>
@@ -1757,7 +1775,7 @@ Line 1
def test(ll, idx, val, cache):
self.assertEqual(ll[idx], val)
- self.assertEqual(getattr(ll, '_LazyList__cache'), list(cache))
+ self.assertEqual(ll._cache, list(cache))
ll = LazyList(range(10))
test(ll, 0, 0, range(1))
@@ -1795,6 +1813,302 @@ Line 1
self.assertEqual(Config.hide_login_info(['--username=foo']),
['--username=PRIVATE'])
+ def test_locked_file(self):
+ TEXT = 'test_locked_file\n'
+ FILE = 'test_locked_file.ytdl'
+ MODES = 'war' # Order is important
+
+ try:
+ for lock_mode in MODES:
+ with locked_file(FILE, lock_mode, False) as f:
+ if lock_mode == 'r':
+ self.assertEqual(f.read(), TEXT * 2, 'Wrong file content')
+ else:
+ f.write(TEXT)
+ for test_mode in MODES:
+ testing_write = test_mode != 'r'
+ try:
+ with locked_file(FILE, test_mode, False):
+ pass
+ except (BlockingIOError, PermissionError):
+ if not testing_write: # FIXME
+ print(f'Known issue: Exclusive lock ({lock_mode}) blocks read access ({test_mode})')
+ continue
+ self.assertTrue(testing_write, f'{test_mode} is blocked by {lock_mode}')
+ else:
+ self.assertFalse(testing_write, f'{test_mode} is not blocked by {lock_mode}')
+ finally:
+ with contextlib.suppress(OSError):
+ os.remove(FILE)
+
+ def test_determine_file_encoding(self):
+ self.assertEqual(determine_file_encoding(b''), (None, 0))
+ self.assertEqual(determine_file_encoding(b'--verbose -x --audio-format mkv\n'), (None, 0))
+
+ self.assertEqual(determine_file_encoding(b'\xef\xbb\xbf'), ('utf-8', 3))
+ self.assertEqual(determine_file_encoding(b'\x00\x00\xfe\xff'), ('utf-32-be', 4))
+ self.assertEqual(determine_file_encoding(b'\xff\xfe'), ('utf-16-le', 2))
+
+ self.assertEqual(determine_file_encoding(b'\xff\xfe# coding: utf-8\n--verbose'), ('utf-16-le', 2))
+
+ self.assertEqual(determine_file_encoding(b'# coding: utf-8\n--verbose'), ('utf-8', 0))
+ self.assertEqual(determine_file_encoding(b'# coding: someencodinghere-12345\n--verbose'), ('someencodinghere-12345', 0))
+
+ self.assertEqual(determine_file_encoding(b'#coding:utf-8\n--verbose'), ('utf-8', 0))
+ self.assertEqual(determine_file_encoding(b'# coding: utf-8 \r\n--verbose'), ('utf-8', 0))
+
+ self.assertEqual(determine_file_encoding('# coding: utf-32-be'.encode('utf-32-be')), ('utf-32-be', 0))
+ self.assertEqual(determine_file_encoding('# coding: utf-16-le'.encode('utf-16-le')), ('utf-16-le', 0))
+
+ def test_get_compatible_ext(self):
+ self.assertEqual(get_compatible_ext(
+ vcodecs=[None], acodecs=[None, None], vexts=['mp4'], aexts=['m4a', 'm4a']), 'mkv')
+ self.assertEqual(get_compatible_ext(
+ vcodecs=[None], acodecs=[None], vexts=['flv'], aexts=['flv']), 'flv')
+
+ self.assertEqual(get_compatible_ext(
+ vcodecs=[None], acodecs=[None], vexts=['mp4'], aexts=['m4a']), 'mp4')
+ self.assertEqual(get_compatible_ext(
+ vcodecs=[None], acodecs=[None], vexts=['mp4'], aexts=['webm']), 'mkv')
+ self.assertEqual(get_compatible_ext(
+ vcodecs=[None], acodecs=[None], vexts=['webm'], aexts=['m4a']), 'mkv')
+ self.assertEqual(get_compatible_ext(
+ vcodecs=[None], acodecs=[None], vexts=['webm'], aexts=['webm']), 'webm')
+
+ self.assertEqual(get_compatible_ext(
+ vcodecs=['h264'], acodecs=['mp4a'], vexts=['mov'], aexts=['m4a']), 'mp4')
+ self.assertEqual(get_compatible_ext(
+ vcodecs=['av01.0.12M.08'], acodecs=['opus'], vexts=['mp4'], aexts=['webm']), 'webm')
+
+ self.assertEqual(get_compatible_ext(
+ vcodecs=['vp9'], acodecs=['opus'], vexts=['webm'], aexts=['webm'], preferences=['flv', 'mp4']), 'mp4')
+ self.assertEqual(get_compatible_ext(
+ vcodecs=['av1'], acodecs=['mp4a'], vexts=['webm'], aexts=['m4a'], preferences=('webm', 'mkv')), 'mkv')
+
+ def test_traverse_obj(self):
+ _TEST_DATA = {
+ 100: 100,
+ 1.2: 1.2,
+ 'str': 'str',
+ 'None': None,
+ '...': ...,
+ 'urls': [
+ {'index': 0, 'url': 'https://www.example.com/0'},
+ {'index': 1, 'url': 'https://www.example.com/1'},
+ ],
+ 'data': (
+ {'index': 2},
+ {'index': 3},
+ ),
+ 'dict': {},
+ }
+
+ # Test base functionality
+ self.assertEqual(traverse_obj(_TEST_DATA, ('str',)), 'str',
+ msg='allow tuple path')
+ self.assertEqual(traverse_obj(_TEST_DATA, ['str']), 'str',
+ msg='allow list path')
+ self.assertEqual(traverse_obj(_TEST_DATA, (value for value in ("str",))), 'str',
+ msg='allow iterable path')
+ self.assertEqual(traverse_obj(_TEST_DATA, 'str'), 'str',
+ msg='single items should be treated as a path')
+ self.assertEqual(traverse_obj(_TEST_DATA, None), _TEST_DATA)
+ self.assertEqual(traverse_obj(_TEST_DATA, 100), 100)
+ self.assertEqual(traverse_obj(_TEST_DATA, 1.2), 1.2)
+
+ # Test Ellipsis behavior
+ self.assertCountEqual(traverse_obj(_TEST_DATA, ...),
+ (item for item in _TEST_DATA.values() if item is not None),
+ msg='`...` should give all values except `None`')
+ self.assertCountEqual(traverse_obj(_TEST_DATA, ('urls', 0, ...)), _TEST_DATA['urls'][0].values(),
+ msg='`...` selection for dicts should select all values')
+ self.assertEqual(traverse_obj(_TEST_DATA, (..., ..., 'url')),
+ ['https://www.example.com/0', 'https://www.example.com/1'],
+ msg='nested `...` queries should work')
+ self.assertCountEqual(traverse_obj(_TEST_DATA, (..., ..., 'index')), range(4),
+ msg='`...` query result should be flattened')
+
+ # Test function as key
+ self.assertEqual(traverse_obj(_TEST_DATA, lambda x, y: x == 'urls' and isinstance(y, list)),
+ [_TEST_DATA['urls']],
+ msg='function as query key should perform a filter based on (key, value)')
+ self.assertCountEqual(traverse_obj(_TEST_DATA, lambda _, x: isinstance(x[0], str)), {'str'},
+ msg='exceptions in the query function should be catched')
+
+ # Test alternative paths
+ self.assertEqual(traverse_obj(_TEST_DATA, 'fail', 'str'), 'str',
+ msg='multiple `paths` should be treated as alternative paths')
+ self.assertEqual(traverse_obj(_TEST_DATA, 'str', 100), 'str',
+ msg='alternatives should exit early')
+ self.assertEqual(traverse_obj(_TEST_DATA, 'fail', 'fail'), None,
+ msg='alternatives should return `default` if exhausted')
+ self.assertEqual(traverse_obj(_TEST_DATA, (..., 'fail'), 100), 100,
+ msg='alternatives should track their own branching return')
+ self.assertEqual(traverse_obj(_TEST_DATA, ('dict', ...), ('data', ...)), list(_TEST_DATA['data']),
+ msg='alternatives on empty objects should search further')
+
+ # Test branch and path nesting
+ self.assertEqual(traverse_obj(_TEST_DATA, ('urls', (3, 0), 'url')), ['https://www.example.com/0'],
+ msg='tuple as key should be treated as branches')
+ self.assertEqual(traverse_obj(_TEST_DATA, ('urls', [3, 0], 'url')), ['https://www.example.com/0'],
+ msg='list as key should be treated as branches')
+ self.assertEqual(traverse_obj(_TEST_DATA, ('urls', ((1, 'fail'), (0, 'url')))), ['https://www.example.com/0'],
+ msg='double nesting in path should be treated as paths')
+ self.assertEqual(traverse_obj(['0', [1, 2]], [(0, 1), 0]), [1],
+ msg='do not fail early on branching')
+ self.assertCountEqual(traverse_obj(_TEST_DATA, ('urls', ((1, ('fail', 'url')), (0, 'url')))),
+ ['https://www.example.com/0', 'https://www.example.com/1'],
+ msg='tripple nesting in path should be treated as branches')
+ self.assertEqual(traverse_obj(_TEST_DATA, ('urls', ('fail', (..., 'url')))),
+ ['https://www.example.com/0', 'https://www.example.com/1'],
+ msg='ellipsis as branch path start gets flattened')
+
+ # Test dictionary as key
+ self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2}), {0: 100, 1: 1.2},
+ msg='dict key should result in a dict with the same keys')
+ self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', 0, 'url')}),
+ {0: 'https://www.example.com/0'},
+ msg='dict key should allow paths')
+ self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', (3, 0), 'url')}),
+ {0: ['https://www.example.com/0']},
+ msg='tuple in dict path should be treated as branches')
+ self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', ((1, 'fail'), (0, 'url')))}),
+ {0: ['https://www.example.com/0']},
+ msg='double nesting in dict path should be treated as paths')
+ self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', ((1, ('fail', 'url')), (0, 'url')))}),
+ {0: ['https://www.example.com/1', 'https://www.example.com/0']},
+ msg='tripple nesting in dict path should be treated as branches')
+ self.assertEqual(traverse_obj(_TEST_DATA, {0: 'fail'}), {},
+ msg='remove `None` values when dict key')
+ self.assertEqual(traverse_obj(_TEST_DATA, {0: 'fail'}, default=...), {0: ...},
+ msg='do not remove `None` values if `default`')
+ self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}), {0: {}},
+ msg='do not remove empty values when dict key')
+ self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}, default=...), {0: {}},
+ msg='do not remove empty values when dict key and a default')
+ self.assertEqual(traverse_obj(_TEST_DATA, {0: ('dict', ...)}), {0: []},
+ msg='if branch in dict key not successful, return `[]`')
+
+ # Testing default parameter behavior
+ _DEFAULT_DATA = {'None': None, 'int': 0, 'list': []}
+ self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail'), None,
+ msg='default value should be `None`')
+ self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail', 'fail', default=...), ...,
+ msg='chained fails should result in default')
+ self.assertEqual(traverse_obj(_DEFAULT_DATA, 'None', 'int'), 0,
+ msg='should not short cirquit on `None`')
+ self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail', default=1), 1,
+ msg='invalid dict key should result in `default`')
+ self.assertEqual(traverse_obj(_DEFAULT_DATA, 'None', default=1), 1,
+ msg='`None` is a deliberate sentinel and should become `default`')
+ self.assertEqual(traverse_obj(_DEFAULT_DATA, ('list', 10)), None,
+ msg='`IndexError` should result in `default`')
+ self.assertEqual(traverse_obj(_DEFAULT_DATA, (..., 'fail'), default=1), 1,
+ msg='if branched but not successful return `default` if defined, not `[]`')
+ self.assertEqual(traverse_obj(_DEFAULT_DATA, (..., 'fail'), default=None), None,
+ msg='if branched but not successful return `default` even if `default` is `None`')
+ self.assertEqual(traverse_obj(_DEFAULT_DATA, (..., 'fail')), [],
+ msg='if branched but not successful return `[]`, not `default`')
+ self.assertEqual(traverse_obj(_DEFAULT_DATA, ('list', ...)), [],
+ msg='if branched but object is empty return `[]`, not `default`')
+
+ # Testing expected_type behavior
+ _EXPECTED_TYPE_DATA = {'str': 'str', 'int': 0}
+ self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=str), 'str',
+ msg='accept matching `expected_type` type')
+ self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=int), None,
+ msg='reject non matching `expected_type` type')
+ self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'int', expected_type=lambda x: str(x)), '0',
+ msg='transform type using type function')
+ self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str',
+ expected_type=lambda _: 1 / 0), None,
+ msg='wrap expected_type fuction in try_call')
+ self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, ..., expected_type=str), ['str'],
+ msg='eliminate items that expected_type fails on')
+
+ # Test get_all behavior
+ _GET_ALL_DATA = {'key': [0, 1, 2]}
+ self.assertEqual(traverse_obj(_GET_ALL_DATA, ('key', ...), get_all=False), 0,
+ msg='if not `get_all`, return only first matching value')
+ self.assertEqual(traverse_obj(_GET_ALL_DATA, ..., get_all=False), [0, 1, 2],
+ msg='do not overflatten if not `get_all`')
+
+ # Test casesense behavior
+ _CASESENSE_DATA = {
+ 'KeY': 'value0',
+ 0: {
+ 'KeY': 'value1',
+ 0: {'KeY': 'value2'},
+ },
+ }
+ self.assertEqual(traverse_obj(_CASESENSE_DATA, 'key'), None,
+ msg='dict keys should be case sensitive unless `casesense`')
+ self.assertEqual(traverse_obj(_CASESENSE_DATA, 'keY',
+ casesense=False), 'value0',
+ msg='allow non matching key case if `casesense`')
+ self.assertEqual(traverse_obj(_CASESENSE_DATA, (0, ('keY',)),
+ casesense=False), ['value1'],
+ msg='allow non matching key case in branch if `casesense`')
+ self.assertEqual(traverse_obj(_CASESENSE_DATA, (0, ((0, 'keY'),)),
+ casesense=False), ['value2'],
+ msg='allow non matching key case in branch path if `casesense`')
+
+ # Test traverse_string behavior
+ _TRAVERSE_STRING_DATA = {'str': 'str', 1.2: 1.2}
+ self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', 0)), None,
+ msg='do not traverse into string if not `traverse_string`')
+ self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', 0),
+ traverse_string=True), 's',
+ msg='traverse into string if `traverse_string`')
+ self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, (1.2, 1),
+ traverse_string=True), '.',
+ msg='traverse into converted data if `traverse_string`')
+ self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', ...),
+ traverse_string=True), list('str'),
+ msg='`...` branching into string should result in list')
+ self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', (0, 2)),
+ traverse_string=True), ['s', 'r'],
+ msg='branching into string should result in list')
+ self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', lambda _, x: x),
+ traverse_string=True), list('str'),
+ msg='function branching into string should result in list')
+
+ # Test is_user_input behavior
+ _IS_USER_INPUT_DATA = {'range8': list(range(8))}
+ self.assertEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', '3'),
+ is_user_input=True), 3,
+ msg='allow for string indexing if `is_user_input`')
+ self.assertCountEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', '3:'),
+ is_user_input=True), tuple(range(8))[3:],
+ msg='allow for string slice if `is_user_input`')
+ self.assertCountEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', ':4:2'),
+ is_user_input=True), tuple(range(8))[:4:2],
+ msg='allow step in string slice if `is_user_input`')
+ self.assertCountEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', ':'),
+ is_user_input=True), range(8),
+ msg='`:` should be treated as `...` if `is_user_input`')
+ with self.assertRaises(TypeError, msg='too many params should result in error'):
+ traverse_obj(_IS_USER_INPUT_DATA, ('range8', ':::'), is_user_input=True)
+
+ # Test re.Match as input obj
+ mobj = re.fullmatch(r'0(12)(?P<group>3)(4)?', '0123')
+ self.assertEqual(traverse_obj(mobj, ...), [x for x in mobj.groups() if x is not None],
+ msg='`...` on a `re.Match` should give its `groups()`')
+ self.assertEqual(traverse_obj(mobj, lambda k, _: k in (0, 2)), ['0123', '3'],
+ msg='function on a `re.Match` should give groupno, value starting at 0')
+ self.assertEqual(traverse_obj(mobj, 'group'), '3',
+ msg='str key on a `re.Match` should give group with that name')
+ self.assertEqual(traverse_obj(mobj, 2), '3',
+ msg='int key on a `re.Match` should give group with that name')
+ self.assertEqual(traverse_obj(mobj, 'gRoUp', casesense=False), '3',
+ msg='str key on a `re.Match` should respect casesense')
+ self.assertEqual(traverse_obj(mobj, 'fail'), None,
+ msg='failing str key on a `re.Match` should return `default`')
+ self.assertEqual(traverse_obj(mobj, 'gRoUpS', casesense=False), None,
+ msg='failing str key on a `re.Match` should return `default`')
+ self.assertEqual(traverse_obj(mobj, 8), None,
+ msg='failing int key on a `re.Match` should return `default`')
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_verbose_output.py b/test/test_verbose_output.py
index 98c6d70..de0cb8e 100644
--- a/test/test_verbose_output.py
+++ b/test/test_verbose_output.py
@@ -1,15 +1,15 @@
#!/usr/bin/env python3
-# coding: utf-8
-
-from __future__ import unicode_literals
+# Allow direct execution
+import os
+import sys
import unittest
-import sys
-import os
-import subprocess
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import subprocess
+
rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
@@ -17,7 +17,8 @@ class TestVerboseOutput(unittest.TestCase):
def test_private_info_arg(self):
outp = subprocess.Popen(
[
- sys.executable, 'hypervideo_dl/__main__.py', '-v',
+ sys.executable, 'hypervideo_dl/__main__.py',
+ '-v', '--ignore-config',
'--username', 'johnsmith@gmail.com',
'--password', 'my_secret_password',
], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
@@ -30,7 +31,8 @@ class TestVerboseOutput(unittest.TestCase):
def test_private_info_shortarg(self):
outp = subprocess.Popen(
[
- sys.executable, 'hypervideo_dl/__main__.py', '-v',
+ sys.executable, 'hypervideo_dl/__main__.py',
+ '-v', '--ignore-config',
'-u', 'johnsmith@gmail.com',
'-p', 'my_secret_password',
], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
@@ -43,7 +45,8 @@ class TestVerboseOutput(unittest.TestCase):
def test_private_info_eq(self):
outp = subprocess.Popen(
[
- sys.executable, 'hypervideo_dl/__main__.py', '-v',
+ sys.executable, 'hypervideo_dl/__main__.py',
+ '-v', '--ignore-config',
'--username=johnsmith@gmail.com',
'--password=my_secret_password',
], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
@@ -56,7 +59,8 @@ class TestVerboseOutput(unittest.TestCase):
def test_private_info_shortarg_eq(self):
outp = subprocess.Popen(
[
- sys.executable, 'hypervideo_dl/__main__.py', '-v',
+ sys.executable, 'hypervideo_dl/__main__.py',
+ '-v', '--ignore-config',
'-u=johnsmith@gmail.com',
'-p=my_secret_password',
], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
diff --git a/test/test_write_annotations.py b/test/test_write_annotations.py
deleted file mode 100644
index 6f6c7ab..0000000
--- a/test/test_write_annotations.py
+++ /dev/null
@@ -1,80 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-from __future__ import unicode_literals
-
-# Allow direct execution
-import os
-import sys
-import unittest
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-
-from test.helper import get_params, try_rm
-
-
-import io
-
-import xml.etree.ElementTree
-
-import hypervideo_dl.YoutubeDL
-import hypervideo_dl.extractor
-
-
-class YoutubeDL(hypervideo_dl.YoutubeDL):
- def __init__(self, *args, **kwargs):
- super(YoutubeDL, self).__init__(*args, **kwargs)
- self.to_stderr = self.to_screen
-
-
-params = get_params({
- 'writeannotations': True,
- 'skip_download': True,
- 'writeinfojson': False,
- 'format': 'flv',
-})
-
-
-TEST_ID = 'gr51aVj-mLg'
-ANNOTATIONS_FILE = TEST_ID + '.annotations.xml'
-EXPECTED_ANNOTATIONS = ['Speech bubble', 'Note', 'Title', 'Spotlight', 'Label']
-
-
-class TestAnnotations(unittest.TestCase):
- def setUp(self):
- # Clear old files
- self.tearDown()
-
- def test_info_json(self):
- expected = list(EXPECTED_ANNOTATIONS) # Two annotations could have the same text.
- ie = hypervideo_dl.extractor.YoutubeIE()
- ydl = YoutubeDL(params)
- ydl.add_info_extractor(ie)
- ydl.download([TEST_ID])
- self.assertTrue(os.path.exists(ANNOTATIONS_FILE))
- annoxml = None
- with io.open(ANNOTATIONS_FILE, 'r', encoding='utf-8') as annof:
- annoxml = xml.etree.ElementTree.parse(annof)
- self.assertTrue(annoxml is not None, 'Failed to parse annotations XML')
- root = annoxml.getroot()
- self.assertEqual(root.tag, 'document')
- annotationsTag = root.find('annotations')
- self.assertEqual(annotationsTag.tag, 'annotations')
- annotations = annotationsTag.findall('annotation')
-
- # Not all the annotations have TEXT children and the annotations are returned unsorted.
- for a in annotations:
- self.assertEqual(a.tag, 'annotation')
- if a.get('type') == 'text':
- textTag = a.find('TEXT')
- text = textTag.text
- self.assertTrue(text in expected) # assertIn only added in python 2.7
- # remove the first occurrence, there could be more than one annotation with the same text
- expected.remove(text)
- # We should have seen (and removed) all the expected annotation texts.
- self.assertEqual(len(expected), 0, 'Not all expected annotations were found.')
-
- def tearDown(self):
- try_rm(ANNOTATIONS_FILE)
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py
index b94b733..50f42d1 100644
--- a/test/test_youtube_lists.py
+++ b/test/test_youtube_lists.py
@@ -1,18 +1,16 @@
#!/usr/bin/env python3
-from __future__ import unicode_literals
# Allow direct execution
import os
import sys
import unittest
+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from test.helper import FakeYDL, is_download_test
-from hypervideo_dl.extractor import (
- YoutubeIE,
- YoutubeTabIE,
-)
+from test.helper import FakeYDL, is_download_test
+from hypervideo_dl.extractor import YoutubeIE, YoutubeTabIE
+from hypervideo_dl.utils import ExtractorError
@is_download_test
@@ -56,6 +54,18 @@ class TestYoutubeLists(unittest.TestCase):
self.assertEqual(video['duration'], 10)
self.assertEqual(video['uploader'], 'Philipp Hagemeister')
+ def test_youtube_channel_no_uploads(self):
+ dl = FakeYDL()
+ dl.params['extract_flat'] = True
+ ie = YoutubeTabIE(dl)
+ # no uploads
+ with self.assertRaisesRegex(ExtractorError, r'no uploads'):
+ ie.extract('https://www.youtube.com/channel/UC2yXPzFejc422buOIzn_0CA')
+
+ # no uploads and no UCID given
+ with self.assertRaisesRegex(ExtractorError, r'no uploads'):
+ ie.extract('https://www.youtube.com/news')
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_youtube_misc.py b/test/test_youtube_misc.py
index 4571cc1..2889795 100644
--- a/test/test_youtube_misc.py
+++ b/test/test_youtube_misc.py
@@ -1,10 +1,10 @@
#!/usr/bin/env python3
-from __future__ import unicode_literals
# Allow direct execution
import os
import sys
import unittest
+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
diff --git a/test/testdata/certificate/ca.crt b/test/testdata/certificate/ca.crt
new file mode 100644
index 0000000..ddf7be7
--- /dev/null
+++ b/test/testdata/certificate/ca.crt
@@ -0,0 +1,10 @@
+-----BEGIN CERTIFICATE-----
+MIIBfDCCASOgAwIBAgIUUgngoxFpuWft8gjj3uEFoqJyoJowCgYIKoZIzj0EAwIw
+FDESMBAGA1UEAwwJeXRkbHB0ZXN0MB4XDTIyMDQxNTAzMDEwMVoXDTM4MTAxNTAz
+MDEwMVowFDESMBAGA1UEAwwJeXRkbHB0ZXN0MFkwEwYHKoZIzj0CAQYIKoZIzj0D
+AQcDQgAEcTaKMtIn2/1kgid1zXFpLm87FMT5PP3/bltKVVH3DLO//0kUslCHYxFU
+KpcCfVt9aueRyUFi1TNkkkEZ9D6fbqNTMFEwHQYDVR0OBBYEFBdY2rVNLFGM6r1F
+iuamNDaiq0QoMB8GA1UdIwQYMBaAFBdY2rVNLFGM6r1FiuamNDaiq0QoMA8GA1Ud
+EwEB/wQFMAMBAf8wCgYIKoZIzj0EAwIDRwAwRAIgXJg2jio1kow2g/iP54Qq+iI2
+m4EAvZiY0Im/Ni3PHawCIC6KCl6QcHANbeq8ckOXNGusjl6OWhvEM3uPBPhqskq1
+-----END CERTIFICATE-----
diff --git a/test/testdata/certificate/ca.key b/test/testdata/certificate/ca.key
new file mode 100644
index 0000000..38920d5
--- /dev/null
+++ b/test/testdata/certificate/ca.key
@@ -0,0 +1,5 @@
+-----BEGIN EC PRIVATE KEY-----
+MHcCAQEEIG2L1bHdl3PnaLiJ7Zm8aAGCj4GiVbSbXQcrJAdL+yqOoAoGCCqGSM49
+AwEHoUQDQgAEcTaKMtIn2/1kgid1zXFpLm87FMT5PP3/bltKVVH3DLO//0kUslCH
+YxFUKpcCfVt9aueRyUFi1TNkkkEZ9D6fbg==
+-----END EC PRIVATE KEY-----
diff --git a/test/testdata/certificate/ca.srl b/test/testdata/certificate/ca.srl
new file mode 100644
index 0000000..de2d1ea
--- /dev/null
+++ b/test/testdata/certificate/ca.srl
@@ -0,0 +1 @@
+4A260C33C4D34612646E6321E1E767DF1A95EF0B
diff --git a/test/testdata/certificate/client.crt b/test/testdata/certificate/client.crt
new file mode 100644
index 0000000..874622f
--- /dev/null
+++ b/test/testdata/certificate/client.crt
@@ -0,0 +1,9 @@
+-----BEGIN CERTIFICATE-----
+MIIBIzCBygIUSiYMM8TTRhJkbmMh4edn3xqV7wswCgYIKoZIzj0EAwIwFDESMBAG
+A1UEAwwJeXRkbHB0ZXN0MB4XDTIyMDQxNTAzMDEyN1oXDTM4MTAxNTAzMDEyN1ow
+FTETMBEGA1UEAwwKeXRkbHB0ZXN0MjBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IA
+BKREKVDWfLKZknzYg+BUkmTn43f2pl/LNSyKPtXo/UV7hhp6JXIq3ZuZ7rubyuMS
+XNuH+2Cl9msSpJB2LhJs5kcwCgYIKoZIzj0EAwIDSAAwRQIhAMRr46vO25/5nUhD
+aHp4L67AeSvrjvSFHfubyD3Kr5dwAiA8EfOgVxc8Qh6ozTcbXO/WnBfS48ZFRSQY
+D0dB8M1kJw==
+-----END CERTIFICATE-----
diff --git a/test/testdata/certificate/client.csr b/test/testdata/certificate/client.csr
new file mode 100644
index 0000000..2d5d7a5
--- /dev/null
+++ b/test/testdata/certificate/client.csr
@@ -0,0 +1,7 @@
+-----BEGIN CERTIFICATE REQUEST-----
+MIHQMHcCAQAwFTETMBEGA1UEAwwKeXRkbHB0ZXN0MjBZMBMGByqGSM49AgEGCCqG
+SM49AwEHA0IABKREKVDWfLKZknzYg+BUkmTn43f2pl/LNSyKPtXo/UV7hhp6JXIq
+3ZuZ7rubyuMSXNuH+2Cl9msSpJB2LhJs5kegADAKBggqhkjOPQQDAgNJADBGAiEA
+1LZ72mtPmVxhGtdMvpZ0fyA68H2RC5IMHpLq18T55UcCIQDKpkXXVTvAzS0JioCq
+6kiYq8Oxx6ZMoI+11k75/Kip1g==
+-----END CERTIFICATE REQUEST-----
diff --git a/test/testdata/certificate/client.key b/test/testdata/certificate/client.key
new file mode 100644
index 0000000..e47389b
--- /dev/null
+++ b/test/testdata/certificate/client.key
@@ -0,0 +1,5 @@
+-----BEGIN EC PRIVATE KEY-----
+MHcCAQEEIAW6h9hwT0Aha+JBukgmHnrKRPoqPNWYA86ic0UaKHs8oAoGCCqGSM49
+AwEHoUQDQgAEpEQpUNZ8spmSfNiD4FSSZOfjd/amX8s1LIo+1ej9RXuGGnolcird
+m5nuu5vK4xJc24f7YKX2axKkkHYuEmzmRw==
+-----END EC PRIVATE KEY-----
diff --git a/test/testdata/certificate/clientencrypted.key b/test/testdata/certificate/clientencrypted.key
new file mode 100644
index 0000000..0baee37
--- /dev/null
+++ b/test/testdata/certificate/clientencrypted.key
@@ -0,0 +1,8 @@
+-----BEGIN EC PRIVATE KEY-----
+Proc-Type: 4,ENCRYPTED
+DEK-Info: AES-256-CBC,4B39160146F15544922E553E08299A35
+
+96A7/iBkIfTVb8r2812ued2pS49FfVY4Ppz/45OGF0uFayMtMl8/GuEBCamuhFXS
+rnOOpco96TTeeKZHqR45wnf4tgHM8IjoQ6H0EX3lVF19OHnArAgrGYtohWUGSyGn
+IgLJFdUewIjdI7XApTJprQFE5E2tETXFA95mCz88u1c=
+-----END EC PRIVATE KEY-----
diff --git a/test/testdata/certificate/clientwithencryptedkey.crt b/test/testdata/certificate/clientwithencryptedkey.crt
new file mode 100644
index 0000000..f357e4c
--- /dev/null
+++ b/test/testdata/certificate/clientwithencryptedkey.crt
@@ -0,0 +1,17 @@
+-----BEGIN CERTIFICATE-----
+MIIBIzCBygIUSiYMM8TTRhJkbmMh4edn3xqV7wswCgYIKoZIzj0EAwIwFDESMBAG
+A1UEAwwJeXRkbHB0ZXN0MB4XDTIyMDQxNTAzMDEyN1oXDTM4MTAxNTAzMDEyN1ow
+FTETMBEGA1UEAwwKeXRkbHB0ZXN0MjBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IA
+BKREKVDWfLKZknzYg+BUkmTn43f2pl/LNSyKPtXo/UV7hhp6JXIq3ZuZ7rubyuMS
+XNuH+2Cl9msSpJB2LhJs5kcwCgYIKoZIzj0EAwIDSAAwRQIhAMRr46vO25/5nUhD
+aHp4L67AeSvrjvSFHfubyD3Kr5dwAiA8EfOgVxc8Qh6ozTcbXO/WnBfS48ZFRSQY
+D0dB8M1kJw==
+-----END CERTIFICATE-----
+-----BEGIN EC PRIVATE KEY-----
+Proc-Type: 4,ENCRYPTED
+DEK-Info: AES-256-CBC,4B39160146F15544922E553E08299A35
+
+96A7/iBkIfTVb8r2812ued2pS49FfVY4Ppz/45OGF0uFayMtMl8/GuEBCamuhFXS
+rnOOpco96TTeeKZHqR45wnf4tgHM8IjoQ6H0EX3lVF19OHnArAgrGYtohWUGSyGn
+IgLJFdUewIjdI7XApTJprQFE5E2tETXFA95mCz88u1c=
+-----END EC PRIVATE KEY-----
diff --git a/test/testdata/certificate/clientwithkey.crt b/test/testdata/certificate/clientwithkey.crt
new file mode 100644
index 0000000..942f6e2
--- /dev/null
+++ b/test/testdata/certificate/clientwithkey.crt
@@ -0,0 +1,14 @@
+-----BEGIN CERTIFICATE-----
+MIIBIzCBygIUSiYMM8TTRhJkbmMh4edn3xqV7wswCgYIKoZIzj0EAwIwFDESMBAG
+A1UEAwwJeXRkbHB0ZXN0MB4XDTIyMDQxNTAzMDEyN1oXDTM4MTAxNTAzMDEyN1ow
+FTETMBEGA1UEAwwKeXRkbHB0ZXN0MjBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IA
+BKREKVDWfLKZknzYg+BUkmTn43f2pl/LNSyKPtXo/UV7hhp6JXIq3ZuZ7rubyuMS
+XNuH+2Cl9msSpJB2LhJs5kcwCgYIKoZIzj0EAwIDSAAwRQIhAMRr46vO25/5nUhD
+aHp4L67AeSvrjvSFHfubyD3Kr5dwAiA8EfOgVxc8Qh6ozTcbXO/WnBfS48ZFRSQY
+D0dB8M1kJw==
+-----END CERTIFICATE-----
+-----BEGIN EC PRIVATE KEY-----
+MHcCAQEEIAW6h9hwT0Aha+JBukgmHnrKRPoqPNWYA86ic0UaKHs8oAoGCCqGSM49
+AwEHoUQDQgAEpEQpUNZ8spmSfNiD4FSSZOfjd/amX8s1LIo+1ej9RXuGGnolcird
+m5nuu5vK4xJc24f7YKX2axKkkHYuEmzmRw==
+-----END EC PRIVATE KEY-----
diff --git a/test/testdata/certificate/instructions.md b/test/testdata/certificate/instructions.md
new file mode 100644
index 0000000..b0e3fbd
--- /dev/null
+++ b/test/testdata/certificate/instructions.md
@@ -0,0 +1,19 @@
+# Generate certificates for client cert tests
+
+## CA
+```sh
+openssl ecparam -name prime256v1 -genkey -noout -out ca.key
+openssl req -new -x509 -sha256 -days 6027 -key ca.key -out ca.crt -subj "/CN=ytdlptest"
+```
+
+## Client
+```sh
+openssl ecparam -name prime256v1 -genkey -noout -out client.key
+openssl ec -in client.key -out clientencrypted.key -passout pass:foobar -aes256
+openssl req -new -sha256 -key client.key -out client.csr -subj "/CN=ytdlptest2"
+openssl x509 -req -in client.csr -CA ca.crt -CAkey ca.key -CAcreateserial -out client.crt -days 6027 -sha256
+cp client.crt clientwithkey.crt
+cp client.crt clientwithencryptedkey.crt
+cat client.key >> clientwithkey.crt
+cat clientencrypted.key >> clientwithencryptedkey.crt
+``` \ No newline at end of file
diff --git a/test/testdata/ism/ec-3_test.Manifest b/test/testdata/ism/ec-3_test.Manifest
new file mode 100644
index 0000000..45f95de
--- /dev/null
+++ b/test/testdata/ism/ec-3_test.Manifest
@@ -0,0 +1 @@
+<?xml version="1.0" encoding="utf-8"?><!--Transformed by VSMT using XSL stylesheet for rule Identity--><!-- Created with Unified Streaming Platform (version=1.10.12-18737) --><SmoothStreamingMedia MajorVersion="2" MinorVersion="0" TimeScale="10000000" Duration="370000000"><StreamIndex Type="audio" QualityLevels="1" TimeScale="10000000" Language="deu" Name="audio_deu" Chunks="19" Url="QualityLevels({bitrate})/Fragments(audio_deu={start time})?noStreamProfile=1"><QualityLevel Index="0" Bitrate="127802" CodecPrivateData="1190" SamplingRate="48000" Channels="2" BitsPerSample="16" PacketSize="4" AudioTag="255" FourCC="AACL" /><c t="0" d="20053333" /><c d="20053334" /><c d="20053333" /><c d="19840000" /><c d="20053333" /><c d="20053334" /><c d="20053333" /><c d="19840000" /><c d="20053333" /><c d="20053334" /><c d="20053333" /><c d="19840000" /><c d="20053333" /><c d="20053334" /><c d="20053333" /><c d="19840000" /><c d="20053333" /><c d="20053334" /><c d="7253333" /></StreamIndex><StreamIndex Type="audio" QualityLevels="1" TimeScale="10000000" Language="deu" Name="audio_deu_1" Chunks="19" Url="QualityLevels({bitrate})/Fragments(audio_deu_1={start time})?noStreamProfile=1"><QualityLevel Index="0" Bitrate="224000" CodecPrivateData="00063F000000AF87FBA7022DFB42A4D405CD93843BDD0700200F00" FourCCData="0700200F00" SamplingRate="48000" Channels="6" BitsPerSample="16" PacketSize="896" AudioTag="65534" FourCC="EC-3" /><c t="0" d="20160000" /><c d="19840000" /><c d="20160000" /><c d="19840000" /><c d="20160000" /><c d="19840000" /><c d="20160000" /><c d="19840000" /><c d="20160000" /><c d="19840000" /><c d="20160000" /><c d="19840000" /><c d="20160000" /><c d="19840000" /><c d="20160000" /><c d="19840000" /><c d="20160000" /><c d="19840000" /><c d="8320000" /></StreamIndex><StreamIndex Type="video" QualityLevels="8" TimeScale="10000000" Language="deu" Name="video_deu" Chunks="19" Url="QualityLevels({bitrate})/Fragments(video_deu={start time})?noStreamProfile=1" MaxWidth="1920" MaxHeight="1080" DisplayWidth="1920" DisplayHeight="1080"><QualityLevel Index="0" Bitrate="23909" CodecPrivateData="000000016742C00CDB06077E5C05A808080A00000300020000030009C0C02EE0177CC6300F142AE00000000168CA8DC8" MaxWidth="384" MaxHeight="216" FourCC="AVC1" /><QualityLevel Index="1" Bitrate="403188" CodecPrivateData="00000001674D4014E98323B602D4040405000003000100000300320F1429380000000168EAECF2" MaxWidth="400" MaxHeight="224" FourCC="AVC1" /><QualityLevel Index="2" Bitrate="680365" CodecPrivateData="00000001674D401EE981405FF2E02D4040405000000300100000030320F162D3800000000168EAECF2" MaxWidth="640" MaxHeight="360" FourCC="AVC1" /><QualityLevel Index="3" Bitrate="1253465" CodecPrivateData="00000001674D401EE981405FF2E02D4040405000000300100000030320F162D3800000000168EAECF2" MaxWidth="640" MaxHeight="360" FourCC="AVC1" /><QualityLevel Index="4" Bitrate="2121558" CodecPrivateData="00000001674D401EECA0601BD80B50101014000003000400000300C83C58B6580000000168E93B3C80" MaxWidth="768" MaxHeight="432" FourCC="AVC1" /><QualityLevel Index="5" Bitrate="3275545" CodecPrivateData="00000001674D4020ECA02802DD80B501010140000003004000000C83C60C65800000000168E93B3C80" MaxWidth="1280" MaxHeight="720" FourCC="AVC1" /><QualityLevel Index="6" Bitrate="5300196" CodecPrivateData="00000001674D4028ECA03C0113F2E02D4040405000000300100000030320F18319600000000168E93B3C80" MaxWidth="1920" MaxHeight="1080" FourCC="AVC1" /><QualityLevel Index="7" Bitrate="8079312" CodecPrivateData="00000001674D4028ECA03C0113F2E02D4040405000000300100000030320F18319600000000168E93B3C80" MaxWidth="1920" MaxHeight="1080" FourCC="AVC1" /><c t="0" d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="10000000" /></StreamIndex></SmoothStreamingMedia> \ No newline at end of file
diff --git a/test/testdata/m3u8/pluzz_francetv_11507.m3u8 b/test/testdata/m3u8/pluzz_francetv_11507.m3u8
deleted file mode 100644
index 0809f5a..0000000
--- a/test/testdata/m3u8/pluzz_francetv_11507.m3u8
+++ /dev/null
@@ -1,14 +0,0 @@
-#EXTM3U
- #EXT-X-VERSION:5
- #EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="Francais",DEFAULT=NO,FORCED=NO,URI="http://replayftv-pmd.francetv.fr/subtitles/2017/16/156589847-1492488987.m3u8",LANGUAGE="fra"
- #EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="aac",LANGUAGE="fra",NAME="Francais",DEFAULT=YES, AUTOSELECT=YES
-#EXT-X-STREAM-INF:SUBTITLES="subs",AUDIO="aac",PROGRAM-ID=1,BANDWIDTH=180000,RESOLUTION=256x144,CODECS="avc1.66.30, mp4a.40.2"
-http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_0_av.m3u8?null=0
-#EXT-X-STREAM-INF:SUBTITLES="subs",AUDIO="aac",PROGRAM-ID=1,BANDWIDTH=303000,RESOLUTION=320x180,CODECS="avc1.66.30, mp4a.40.2"
-http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_1_av.m3u8?null=0
-#EXT-X-STREAM-INF:SUBTITLES="subs",AUDIO="aac",PROGRAM-ID=1,BANDWIDTH=575000,RESOLUTION=512x288,CODECS="avc1.66.30, mp4a.40.2"
-http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_2_av.m3u8?null=0
-#EXT-X-STREAM-INF:SUBTITLES="subs",AUDIO="aac",PROGRAM-ID=1,BANDWIDTH=831000,RESOLUTION=704x396,CODECS="avc1.77.30, mp4a.40.2"
-http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_3_av.m3u8?null=0
-#EXT-X-STREAM-INF:SUBTITLES="subs",AUDIO="aac",PROGRAM-ID=1,BANDWIDTH=1467000,RESOLUTION=1024x576,CODECS="avc1.77.30, mp4a.40.2"
-http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_4_av.m3u8?null=0
diff --git a/test/testdata/m3u8/teamcoco_11995.m3u8 b/test/testdata/m3u8/teamcoco_11995.m3u8
deleted file mode 100644
index a6e4216..0000000
--- a/test/testdata/m3u8/teamcoco_11995.m3u8
+++ /dev/null
@@ -1,16 +0,0 @@
-#EXTM3U
-#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="audio-0",NAME="Default",AUTOSELECT=YES,DEFAULT=YES,URI="hls/CONAN_020217_Highlight_show-audio-160k_v4.m3u8"
-#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="audio-1",NAME="Default",AUTOSELECT=YES,DEFAULT=YES,URI="hls/CONAN_020217_Highlight_show-audio-64k_v4.m3u8"
-#EXT-X-I-FRAME-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=37862000,CODECS="avc1.4d001f",URI="hls/CONAN_020217_Highlight_show-2m_iframe.m3u8"
-#EXT-X-I-FRAME-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=18750000,CODECS="avc1.4d001e",URI="hls/CONAN_020217_Highlight_show-1m_iframe.m3u8"
-#EXT-X-I-FRAME-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=6535000,CODECS="avc1.42001e",URI="hls/CONAN_020217_Highlight_show-400k_iframe.m3u8"
-#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=2374000,RESOLUTION=1024x576,CODECS="avc1.4d001f,mp4a.40.2",AUDIO="audio-0"
-hls/CONAN_020217_Highlight_show-2m_v4.m3u8
-#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=1205000,RESOLUTION=640x360,CODECS="avc1.4d001e,mp4a.40.2",AUDIO="audio-0"
-hls/CONAN_020217_Highlight_show-1m_v4.m3u8
-#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=522000,RESOLUTION=400x224,CODECS="avc1.42001e,mp4a.40.2",AUDIO="audio-0"
-hls/CONAN_020217_Highlight_show-400k_v4.m3u8
-#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=413000,RESOLUTION=400x224,CODECS="avc1.42001e,mp4a.40.5",AUDIO="audio-1"
-hls/CONAN_020217_Highlight_show-400k_v4.m3u8
-#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=71000,CODECS="mp4a.40.5",AUDIO="audio-1"
-hls/CONAN_020217_Highlight_show-audio-64k_v4.m3u8
diff --git a/test/testdata/m3u8/ted_18923.m3u8 b/test/testdata/m3u8/ted_18923.m3u8
deleted file mode 100644
index 52a2711..0000000
--- a/test/testdata/m3u8/ted_18923.m3u8
+++ /dev/null
@@ -1,28 +0,0 @@
-#EXTM3U
-#EXT-X-VERSION:4
-#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=1255659,PROGRAM-ID=1,CODECS="avc1.42c01e,mp4a.40.2",RESOLUTION=640x360
-/videos/BorisHesser_2018S/video/600k.m3u8?nobumpers=true&uniqueId=76011e2b
-#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=163154,PROGRAM-ID=1,CODECS="avc1.42c00c,mp4a.40.2",RESOLUTION=320x180
-/videos/BorisHesser_2018S/video/64k.m3u8?nobumpers=true&uniqueId=76011e2b
-#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=481701,PROGRAM-ID=1,CODECS="avc1.42c015,mp4a.40.2",RESOLUTION=512x288
-/videos/BorisHesser_2018S/video/180k.m3u8?nobumpers=true&uniqueId=76011e2b
-#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=769968,PROGRAM-ID=1,CODECS="avc1.42c015,mp4a.40.2",RESOLUTION=512x288
-/videos/BorisHesser_2018S/video/320k.m3u8?nobumpers=true&uniqueId=76011e2b
-#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=984037,PROGRAM-ID=1,CODECS="avc1.42c015,mp4a.40.2",RESOLUTION=512x288
-/videos/BorisHesser_2018S/video/450k.m3u8?nobumpers=true&uniqueId=76011e2b
-#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=1693925,PROGRAM-ID=1,CODECS="avc1.4d401f,mp4a.40.2",RESOLUTION=853x480
-/videos/BorisHesser_2018S/video/950k.m3u8?nobumpers=true&uniqueId=76011e2b
-#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=2462469,PROGRAM-ID=1,CODECS="avc1.640028,mp4a.40.2",RESOLUTION=1280x720
-/videos/BorisHesser_2018S/video/1500k.m3u8?nobumpers=true&uniqueId=76011e2b
-#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=68101,PROGRAM-ID=1,CODECS="mp4a.40.2",DEFAULT=YES
-/videos/BorisHesser_2018S/audio/600k.m3u8?nobumpers=true&uniqueId=76011e2b
-
-#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=74298,PROGRAM-ID=1,CODECS="avc1.42c00c",RESOLUTION=320x180,URI="/videos/BorisHesser_2018S/video/64k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
-#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=216200,PROGRAM-ID=1,CODECS="avc1.42c015",RESOLUTION=512x288,URI="/videos/BorisHesser_2018S/video/180k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
-#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=304717,PROGRAM-ID=1,CODECS="avc1.42c015",RESOLUTION=512x288,URI="/videos/BorisHesser_2018S/video/320k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
-#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=350933,PROGRAM-ID=1,CODECS="avc1.42c015",RESOLUTION=512x288,URI="/videos/BorisHesser_2018S/video/450k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
-#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=495850,PROGRAM-ID=1,CODECS="avc1.42c01e",RESOLUTION=640x360,URI="/videos/BorisHesser_2018S/video/600k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
-#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=810750,PROGRAM-ID=1,CODECS="avc1.4d401f",RESOLUTION=853x480,URI="/videos/BorisHesser_2018S/video/950k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
-#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=1273700,PROGRAM-ID=1,CODECS="avc1.640028",RESOLUTION=1280x720,URI="/videos/BorisHesser_2018S/video/1500k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
-
-#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="600k",LANGUAGE="en",NAME="Audio",AUTOSELECT=YES,DEFAULT=YES,URI="/videos/BorisHesser_2018S/audio/600k.m3u8?nobumpers=true&uniqueId=76011e2b",BANDWIDTH=614400
diff --git a/test/testdata/m3u8/toggle_mobile_12211.m3u8 b/test/testdata/m3u8/toggle_mobile_12211.m3u8
deleted file mode 100644
index 69604e6..0000000
--- a/test/testdata/m3u8/toggle_mobile_12211.m3u8
+++ /dev/null
@@ -1,13 +0,0 @@
-#EXTM3U
-#EXT-X-VERSION:4
-#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="audio",LANGUAGE="eng",NAME="English",URI="http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_sa2ntrdg/name/a.mp4/index.m3u8"
-#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="audio",LANGUAGE="und",NAME="Undefined",URI="http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_r7y0nitg/name/a.mp4/index.m3u8"
-
-#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=155648,RESOLUTION=320x180,AUDIO="audio"
-http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_qlk9hlzr/name/a.mp4/index.m3u8
-#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=502784,RESOLUTION=480x270,AUDIO="audio"
-http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_oefackmi/name/a.mp4/index.m3u8
-#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=827392,RESOLUTION=640x360,AUDIO="audio"
-http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/12/pv/1/flavorId/0_vyg9pj7k/name/a.mp4/index.m3u8
-#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=1396736,RESOLUTION=854x480,AUDIO="audio"
-http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/12/pv/1/flavorId/0_50n4psvx/name/a.mp4/index.m3u8
diff --git a/test/testdata/m3u8/twitch_vod.m3u8 b/test/testdata/m3u8/twitch_vod.m3u8
deleted file mode 100644
index 7617277..0000000
--- a/test/testdata/m3u8/twitch_vod.m3u8
+++ /dev/null
@@ -1,20 +0,0 @@
-#EXTM3U
-#EXT-X-TWITCH-INFO:ORIGIN="s3",CLUSTER="edgecast_vod",REGION="EU",MANIFEST-CLUSTER="edgecast_vod",USER-IP="109.171.17.81"
-#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="chunked",NAME="Source",AUTOSELECT=YES,DEFAULT=YES
-#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=3214134,CODECS="avc1.100.31,mp4a.40.2",RESOLUTION="1280x720",VIDEO="chunked"
-https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/chunked/index-muted-HM49I092CC.m3u8
-#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="high",NAME="High",AUTOSELECT=YES,DEFAULT=YES
-#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=1603789,CODECS="avc1.42C01F,mp4a.40.2",RESOLUTION="1280x720",VIDEO="high"
-https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/high/index-muted-HM49I092CC.m3u8
-#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="medium",NAME="Medium",AUTOSELECT=YES,DEFAULT=YES
-#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=893387,CODECS="avc1.42C01E,mp4a.40.2",RESOLUTION="852x480",VIDEO="medium"
-https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/medium/index-muted-HM49I092CC.m3u8
-#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="low",NAME="Low",AUTOSELECT=YES,DEFAULT=YES
-#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=628347,CODECS="avc1.42C01E,mp4a.40.2",RESOLUTION="640x360",VIDEO="low"
-https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/low/index-muted-HM49I092CC.m3u8
-#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="mobile",NAME="Mobile",AUTOSELECT=YES,DEFAULT=YES
-#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=280474,CODECS="avc1.42C00D,mp4a.40.2",RESOLUTION="400x226",VIDEO="mobile"
-https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/mobile/index-muted-HM49I092CC.m3u8
-#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="audio_only",NAME="Audio Only",AUTOSELECT=NO,DEFAULT=NO
-#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=182725,CODECS="mp4a.40.2",VIDEO="audio_only"
-https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/audio_only/index-muted-HM49I092CC.m3u8
diff --git a/test/testdata/m3u8/vidio.m3u8 b/test/testdata/m3u8/vidio.m3u8
deleted file mode 100644
index 89c2444..0000000
--- a/test/testdata/m3u8/vidio.m3u8
+++ /dev/null
@@ -1,10 +0,0 @@
-#EXTM3U
-
-#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=300000,RESOLUTION=480x270,NAME="270p 3G"
-https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b300.mp4.m3u8
-
-#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=600000,RESOLUTION=640x360,NAME="360p SD"
-https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b600.mp4.m3u8
-
-#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=1200000,RESOLUTION=1280x720,NAME="720p HD"
-https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b1200.mp4.m3u8
diff --git a/tox.ini b/tox.ini
deleted file mode 100644
index 4a31eb2..0000000
--- a/tox.ini
+++ /dev/null
@@ -1,15 +0,0 @@
-[tox]
-envlist = py26,py27,py33,py34,py35
-
-# Needed?
-[testenv]
-deps =
- nose
- coverage
-# We need a valid $HOME for test_compat_expanduser
-passenv = HOME
-defaultargs = test --exclude test_download.py --exclude test_age_restriction.py
- --exclude test_subtitles.py --exclude test_write_annotations.py
- --exclude test_youtube_lists.py --exclude test_socks.py
-commands = nosetests --verbose {posargs:{[testenv]defaultargs}} # --with-coverage --cover-package=hypervideo_dl --cover-html
- # test.test_download:TestDownload.test_NowVideo