aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJesús <heckyel@hyperbola.info>2021-10-18 15:24:21 -0500
committerJesús <heckyel@hyperbola.info>2021-10-18 15:24:21 -0500
commit5122028a4bcac4ae577ef7fbd55ccad5cb34ef5e (patch)
tree65209bc739db35e31f1c9b5b868eb5df4fe12ae3
parent27fe903c511691c078942bef5ee9a05a43b15c8f (diff)
downloadhypervideo-5122028a4bcac4ae577ef7fbd55ccad5cb34ef5e.tar.lz
hypervideo-5122028a4bcac4ae577ef7fbd55ccad5cb34ef5e.tar.xz
hypervideo-5122028a4bcac4ae577ef7fbd55ccad5cb34ef5e.zip
update from upstream
-rw-r--r--AUTHORS249
-rw-r--r--CONTRIBUTORS127
-rw-r--r--Changelog.md1237
-rw-r--r--MANIFEST.in12
-rw-r--r--Makefile119
-rwxr-xr-xbin/hypervideo2
-rwxr-xr-xdevscripts/bash-completion.py4
-rw-r--r--devscripts/buildserver.py2
-rw-r--r--devscripts/check-porn.py2
-rwxr-xr-xdevscripts/fish-completion.py5
-rw-r--r--devscripts/generate_aes_testdata.py1
-rw-r--r--devscripts/lazy_load_template.py26
-rw-r--r--devscripts/logo.icobin0 -> 41043 bytes
-rwxr-xr-xdevscripts/make_contributing.py7
-rw-r--r--devscripts/make_lazy_extractors.py30
-rwxr-xr-xdevscripts/make_readme.py5
-rw-r--r--devscripts/make_supportedsites.py2
-rw-r--r--devscripts/prepare_manpage.py1
-rw-r--r--devscripts/run_tests.bat21
-rwxr-xr-xdevscripts/run_tests.sh36
-rw-r--r--devscripts/zsh-completion.in2
-rwxr-xr-xdevscripts/zsh-completion.py4
-rwxr-xr-xhypervideo_dl/YoutubeDL.py2739
-rw-r--r--hypervideo_dl/__init__.py479
-rwxr-xr-xhypervideo_dl/__main__.py2
-rw-r--r--hypervideo_dl/aes.py265
-rw-r--r--hypervideo_dl/cache.py2
-rw-r--r--hypervideo_dl/compat.py3048
-rw-r--r--hypervideo_dl/cookies.py745
-rw-r--r--hypervideo_dl/downloader/__init__.py105
-rw-r--r--hypervideo_dl/downloader/common.py171
-rw-r--r--hypervideo_dl/downloader/dash.py88
-rw-r--r--hypervideo_dl/downloader/external.py258
-rw-r--r--hypervideo_dl/downloader/f4m.py19
-rw-r--r--hypervideo_dl/downloader/fragment.py255
-rw-r--r--hypervideo_dl/downloader/hls.py285
-rw-r--r--hypervideo_dl/downloader/http.py37
-rw-r--r--hypervideo_dl/downloader/ism.py58
-rw-r--r--hypervideo_dl/downloader/mhtml.py202
-rw-r--r--hypervideo_dl/downloader/niconico.py57
-rw-r--r--hypervideo_dl/downloader/rtmp.py18
-rw-r--r--hypervideo_dl/downloader/rtsp.py4
-rw-r--r--hypervideo_dl/downloader/websocket.py59
-rw-r--r--hypervideo_dl/downloader/youtube_live_chat.py236
-rw-r--r--hypervideo_dl/extractor/__init__.py9
-rw-r--r--hypervideo_dl/extractor/abc.py104
-rw-r--r--hypervideo_dl/extractor/abcnews.py3
-rw-r--r--hypervideo_dl/extractor/abcotvs.py3
-rw-r--r--hypervideo_dl/extractor/acast.py3
-rw-r--r--hypervideo_dl/extractor/adobepass.py155
-rw-r--r--hypervideo_dl/extractor/adobetv.py8
-rw-r--r--hypervideo_dl/extractor/adultswim.py3
-rw-r--r--hypervideo_dl/extractor/aenetworks.py11
-rw-r--r--hypervideo_dl/extractor/afreecatv.py23
-rw-r--r--hypervideo_dl/extractor/aljazeera.py5
-rw-r--r--hypervideo_dl/extractor/alura.py179
-rw-r--r--hypervideo_dl/extractor/amcnetworks.py69
-rw-r--r--hypervideo_dl/extractor/americastestkitchen.py5
-rw-r--r--hypervideo_dl/extractor/animelab.py285
-rw-r--r--hypervideo_dl/extractor/anvato.py25
-rw-r--r--hypervideo_dl/extractor/anvato_token_generator/__init__.py7
-rw-r--r--hypervideo_dl/extractor/anvato_token_generator/common.py6
-rw-r--r--hypervideo_dl/extractor/anvato_token_generator/nfl.py30
-rw-r--r--hypervideo_dl/extractor/aol.py9
-rw-r--r--hypervideo_dl/extractor/apa.py2
-rw-r--r--hypervideo_dl/extractor/aparat.py3
-rw-r--r--hypervideo_dl/extractor/appleconnect.py13
-rw-r--r--hypervideo_dl/extractor/appletrailers.py2
-rw-r--r--hypervideo_dl/extractor/archiveorg.py427
-rw-r--r--hypervideo_dl/extractor/arcpublishing.py11
-rw-r--r--hypervideo_dl/extractor/ard.py181
-rw-r--r--hypervideo_dl/extractor/arkena.py6
-rw-r--r--hypervideo_dl/extractor/arte.py15
-rw-r--r--hypervideo_dl/extractor/asiancrush.py4
-rw-r--r--hypervideo_dl/extractor/atresplayer.py13
-rw-r--r--hypervideo_dl/extractor/atvat.py125
-rw-r--r--hypervideo_dl/extractor/audius.py274
-rw-r--r--hypervideo_dl/extractor/awaan.py7
-rw-r--r--hypervideo_dl/extractor/azmedien.py3
-rw-r--r--hypervideo_dl/extractor/baidu.py3
-rw-r--r--hypervideo_dl/extractor/bandcamp.py48
-rw-r--r--hypervideo_dl/extractor/bannedvideo.py158
-rw-r--r--hypervideo_dl/extractor/bbc.py9
-rw-r--r--hypervideo_dl/extractor/beatport.py4
-rw-r--r--hypervideo_dl/extractor/beeg.py4
-rw-r--r--hypervideo_dl/extractor/behindkink.py3
-rw-r--r--hypervideo_dl/extractor/bellmedia.py3
-rw-r--r--hypervideo_dl/extractor/bet.py2
-rw-r--r--hypervideo_dl/extractor/bilibili.py468
-rw-r--r--hypervideo_dl/extractor/bitchute.py32
-rw-r--r--hypervideo_dl/extractor/bitwave.py61
-rw-r--r--hypervideo_dl/extractor/blackboardcollaborate.py67
-rw-r--r--hypervideo_dl/extractor/blinkx.py86
-rw-r--r--hypervideo_dl/extractor/bokecc.py5
-rw-r--r--hypervideo_dl/extractor/bongacams.py3
-rw-r--r--hypervideo_dl/extractor/box.py3
-rw-r--r--hypervideo_dl/extractor/bpb.py2
-rw-r--r--hypervideo_dl/extractor/br.py5
-rw-r--r--hypervideo_dl/extractor/bravotv.py38
-rw-r--r--hypervideo_dl/extractor/breakcom.py3
-rw-r--r--hypervideo_dl/extractor/brightcove.py33
-rw-r--r--hypervideo_dl/extractor/byutv.py17
-rw-r--r--hypervideo_dl/extractor/c56.py3
-rw-r--r--hypervideo_dl/extractor/cam4.py32
-rw-r--r--hypervideo_dl/extractor/cammodels.py2
-rw-r--r--hypervideo_dl/extractor/canalplus.py5
-rw-r--r--hypervideo_dl/extractor/canvas.py83
-rw-r--r--hypervideo_dl/extractor/cbc.py476
-rw-r--r--hypervideo_dl/extractor/cbs.py134
-rw-r--r--hypervideo_dl/extractor/cbsinteractive.py3
-rw-r--r--hypervideo_dl/extractor/cbssports.py3
-rw-r--r--hypervideo_dl/extractor/ccma.py3
-rw-r--r--hypervideo_dl/extractor/cctv.py2
-rw-r--r--hypervideo_dl/extractor/cda.py44
-rw-r--r--hypervideo_dl/extractor/ceskatelevize.py5
-rw-r--r--hypervideo_dl/extractor/cgtn.py64
-rw-r--r--hypervideo_dl/extractor/channel9.py8
-rw-r--r--hypervideo_dl/extractor/chilloutzone.py3
-rw-r--r--hypervideo_dl/extractor/chingari.py209
-rw-r--r--hypervideo_dl/extractor/cinemax.py3
-rw-r--r--hypervideo_dl/extractor/ciscolive.py7
-rw-r--r--hypervideo_dl/extractor/ciscowebex.py90
-rw-r--r--hypervideo_dl/extractor/cjsw.py3
-rw-r--r--hypervideo_dl/extractor/clyp.py7
-rw-r--r--hypervideo_dl/extractor/cmt.py6
-rw-r--r--hypervideo_dl/extractor/cnbc.py3
-rw-r--r--hypervideo_dl/extractor/cnn.py3
-rw-r--r--hypervideo_dl/extractor/comedycentral.py5
-rw-r--r--hypervideo_dl/extractor/common.py1653
-rw-r--r--hypervideo_dl/extractor/commonmistakes.py4
-rw-r--r--hypervideo_dl/extractor/commonprotocols.py14
-rw-r--r--hypervideo_dl/extractor/condenast.py2
-rw-r--r--hypervideo_dl/extractor/corus.py5
-rw-r--r--hypervideo_dl/extractor/coub.py6
-rw-r--r--hypervideo_dl/extractor/crackle.py279
-rw-r--r--hypervideo_dl/extractor/crunchyroll.py133
-rw-r--r--hypervideo_dl/extractor/cultureunplugged.py3
-rw-r--r--hypervideo_dl/extractor/curiositystream.py22
-rw-r--r--hypervideo_dl/extractor/dailymotion.py10
-rw-r--r--hypervideo_dl/extractor/damtomo.py113
-rw-r--r--hypervideo_dl/extractor/daum.py9
-rw-r--r--hypervideo_dl/extractor/dbtv.py2
-rw-r--r--hypervideo_dl/extractor/deezer.py127
-rw-r--r--hypervideo_dl/extractor/dfb.py3
-rw-r--r--hypervideo_dl/extractor/digiteka.py2
-rw-r--r--hypervideo_dl/extractor/discovery.py3
-rw-r--r--hypervideo_dl/extractor/discoverynetworks.py3
-rw-r--r--hypervideo_dl/extractor/discoveryplusindia.py98
-rw-r--r--hypervideo_dl/extractor/disney.py5
-rw-r--r--hypervideo_dl/extractor/dispeak.py2
-rw-r--r--hypervideo_dl/extractor/dlive.py3
-rw-r--r--hypervideo_dl/extractor/doodstream.py71
-rw-r--r--hypervideo_dl/extractor/dplay.py112
-rw-r--r--hypervideo_dl/extractor/drbonanza.py3
-rw-r--r--hypervideo_dl/extractor/dropbox.py4
-rw-r--r--hypervideo_dl/extractor/drtuber.py2
-rw-r--r--hypervideo_dl/extractor/drtv.py4
-rw-r--r--hypervideo_dl/extractor/dtube.py3
-rw-r--r--hypervideo_dl/extractor/duboku.py242
-rw-r--r--hypervideo_dl/extractor/dw.py14
-rw-r--r--hypervideo_dl/extractor/eagleplatform.py2
-rw-r--r--hypervideo_dl/extractor/egghead.py19
-rw-r--r--hypervideo_dl/extractor/eighttracks.py20
-rw-r--r--hypervideo_dl/extractor/einthusan.py3
-rw-r--r--hypervideo_dl/extractor/elonet.py89
-rw-r--r--hypervideo_dl/extractor/epicon.py119
-rw-r--r--hypervideo_dl/extractor/eporner.py3
-rw-r--r--hypervideo_dl/extractor/eroprofile.py39
-rw-r--r--hypervideo_dl/extractor/espn.py2
-rw-r--r--hypervideo_dl/extractor/europa.py4
-rw-r--r--hypervideo_dl/extractor/euscreen.py64
-rw-r--r--hypervideo_dl/extractor/everyonesmixtape.py76
-rw-r--r--hypervideo_dl/extractor/extractors.py320
-rw-r--r--hypervideo_dl/extractor/facebook.py151
-rw-r--r--hypervideo_dl/extractor/fancode.py187
-rw-r--r--hypervideo_dl/extractor/fc2.py3
-rw-r--r--hypervideo_dl/extractor/filmmodu.py74
-rw-r--r--hypervideo_dl/extractor/filmweb.py3
-rw-r--r--hypervideo_dl/extractor/firsttv.py2
-rw-r--r--hypervideo_dl/extractor/fivetv.py3
-rw-r--r--hypervideo_dl/extractor/flickr.py2
-rw-r--r--hypervideo_dl/extractor/fourtube.py4
-rw-r--r--hypervideo_dl/extractor/foxnews.py2
-rw-r--r--hypervideo_dl/extractor/francetv.py385
-rw-r--r--hypervideo_dl/extractor/frontendmasters.py2
-rw-r--r--hypervideo_dl/extractor/funimation.py408
-rw-r--r--hypervideo_dl/extractor/funk.py3
-rw-r--r--hypervideo_dl/extractor/fxnetworks.py77
-rw-r--r--hypervideo_dl/extractor/gab.py64
-rw-r--r--hypervideo_dl/extractor/gaia.py3
-rw-r--r--hypervideo_dl/extractor/gamestar.py3
-rw-r--r--hypervideo_dl/extractor/gaskrank.py2
-rw-r--r--hypervideo_dl/extractor/gazeta.py3
-rw-r--r--hypervideo_dl/extractor/gdcvault.py2
-rw-r--r--hypervideo_dl/extractor/gedidigital.py57
-rw-r--r--hypervideo_dl/extractor/generic.py320
-rw-r--r--hypervideo_dl/extractor/gettr.py110
-rw-r--r--hypervideo_dl/extractor/giantbomb.py3
-rw-r--r--hypervideo_dl/extractor/globo.py157
-rw-r--r--hypervideo_dl/extractor/go.py19
-rw-r--r--hypervideo_dl/extractor/godtube.py3
-rw-r--r--hypervideo_dl/extractor/googledrive.py4
-rw-r--r--hypervideo_dl/extractor/googlepodcasts.py3
-rw-r--r--hypervideo_dl/extractor/googlesearch.py28
-rw-r--r--hypervideo_dl/extractor/gopro.py110
-rw-r--r--hypervideo_dl/extractor/gotostage.py73
-rw-r--r--hypervideo_dl/extractor/gronkh.py43
-rw-r--r--hypervideo_dl/extractor/hearthisat.py90
-rw-r--r--hypervideo_dl/extractor/hidive.py100
-rw-r--r--hypervideo_dl/extractor/hotstar.py303
-rw-r--r--hypervideo_dl/extractor/hrfensehen.py102
-rw-r--r--hypervideo_dl/extractor/hrti.py5
-rw-r--r--hypervideo_dl/extractor/hungama.py58
-rw-r--r--hypervideo_dl/extractor/ichinanalive.py167
-rw-r--r--hypervideo_dl/extractor/ign.py2
-rw-r--r--hypervideo_dl/extractor/imggaming.py5
-rw-r--r--hypervideo_dl/extractor/imgur.py2
-rw-r--r--hypervideo_dl/extractor/instagram.py35
-rw-r--r--hypervideo_dl/extractor/internetvideoarchive.py7
-rw-r--r--hypervideo_dl/extractor/iprima.py2
-rw-r--r--hypervideo_dl/extractor/iqiyi.py2
-rw-r--r--hypervideo_dl/extractor/itv.py173
-rw-r--r--hypervideo_dl/extractor/ivi.py33
-rw-r--r--hypervideo_dl/extractor/ivideon.py3
-rw-r--r--hypervideo_dl/extractor/iwara.py24
-rw-r--r--hypervideo_dl/extractor/jeuxvideo.py3
-rw-r--r--hypervideo_dl/extractor/joj.py216
-rw-r--r--hypervideo_dl/extractor/jove.py3
-rw-r--r--hypervideo_dl/extractor/jwplatform.py11
-rw-r--r--hypervideo_dl/extractor/kakao.py121
-rw-r--r--hypervideo_dl/extractor/kaltura.py4
-rw-r--r--hypervideo_dl/extractor/kanalplay.py96
-rw-r--r--hypervideo_dl/extractor/keezmovies.py4
-rw-r--r--hypervideo_dl/extractor/kinja.py2
-rw-r--r--hypervideo_dl/extractor/koo.py116
-rw-r--r--hypervideo_dl/extractor/kusi.py3
-rw-r--r--hypervideo_dl/extractor/kuwo.py2
-rw-r--r--hypervideo_dl/extractor/la7.py174
-rw-r--r--hypervideo_dl/extractor/lbry.py30
-rw-r--r--hypervideo_dl/extractor/lecturio.py4
-rw-r--r--hypervideo_dl/extractor/leeco.py2
-rw-r--r--hypervideo_dl/extractor/lego.py3
-rw-r--r--hypervideo_dl/extractor/libsyn.py3
-rw-r--r--hypervideo_dl/extractor/lifenews.py2
-rw-r--r--hypervideo_dl/extractor/limelight.py11
-rw-r--r--hypervideo_dl/extractor/line.py10
-rw-r--r--hypervideo_dl/extractor/linkedin.py32
-rw-r--r--hypervideo_dl/extractor/linuxacademy.py21
-rw-r--r--hypervideo_dl/extractor/litv.py2
-rw-r--r--hypervideo_dl/extractor/livestream.py8
-rw-r--r--hypervideo_dl/extractor/lnkgo.py3
-rw-r--r--hypervideo_dl/extractor/localnews8.py3
-rw-r--r--hypervideo_dl/extractor/lovehomeporn.py3
-rw-r--r--hypervideo_dl/extractor/lrt.py3
-rw-r--r--hypervideo_dl/extractor/lynda.py6
-rw-r--r--hypervideo_dl/extractor/magentamusik360.py61
-rw-r--r--hypervideo_dl/extractor/mailru.py25
-rw-r--r--hypervideo_dl/extractor/manoto.py138
-rw-r--r--hypervideo_dl/extractor/massengeschmacktv.py2
-rw-r--r--hypervideo_dl/extractor/mdr.py4
-rw-r--r--hypervideo_dl/extractor/medaltv.py4
-rw-r--r--hypervideo_dl/extractor/mediaite.py93
-rw-r--r--hypervideo_dl/extractor/mediaklikk.py104
-rw-r--r--hypervideo_dl/extractor/mediaset.py116
-rw-r--r--hypervideo_dl/extractor/mediasite.py69
-rw-r--r--hypervideo_dl/extractor/metacafe.py4
-rw-r--r--hypervideo_dl/extractor/metacritic.py2
-rw-r--r--hypervideo_dl/extractor/mgoon.py3
-rw-r--r--hypervideo_dl/extractor/microsoftvirtualacademy.py4
-rw-r--r--hypervideo_dl/extractor/mildom.py258
-rw-r--r--hypervideo_dl/extractor/minoto.py5
-rw-r--r--hypervideo_dl/extractor/mirrativ.py134
-rw-r--r--hypervideo_dl/extractor/mit.py2
-rw-r--r--hypervideo_dl/extractor/mixcloud.py7
-rw-r--r--hypervideo_dl/extractor/moevideo.py3
-rw-r--r--hypervideo_dl/extractor/mojvideo.py3
-rw-r--r--hypervideo_dl/extractor/morningstar.py3
-rw-r--r--hypervideo_dl/extractor/motherless.py30
-rw-r--r--hypervideo_dl/extractor/moviezine.py3
-rw-r--r--hypervideo_dl/extractor/msn.py4
-rw-r--r--hypervideo_dl/extractor/mtv.py188
-rw-r--r--hypervideo_dl/extractor/muenchentv.py2
-rw-r--r--hypervideo_dl/extractor/musescore.py67
-rw-r--r--hypervideo_dl/extractor/mxplayer.py222
-rw-r--r--hypervideo_dl/extractor/mychannels.py3
-rw-r--r--hypervideo_dl/extractor/myspace.py16
-rw-r--r--hypervideo_dl/extractor/myvideoge.py56
-rw-r--r--hypervideo_dl/extractor/n1.py136
-rw-r--r--hypervideo_dl/extractor/naver.py85
-rw-r--r--hypervideo_dl/extractor/nba.py13
-rw-r--r--hypervideo_dl/extractor/nbc.py161
-rw-r--r--hypervideo_dl/extractor/ndr.py179
-rw-r--r--hypervideo_dl/extractor/nebula.py238
-rw-r--r--hypervideo_dl/extractor/neteasemusic.py2
-rw-r--r--hypervideo_dl/extractor/netzkino.py50
-rw-r--r--hypervideo_dl/extractor/newgrounds.py217
-rw-r--r--hypervideo_dl/extractor/nexx.py2
-rw-r--r--hypervideo_dl/extractor/nfhsnetwork.py144
-rw-r--r--hypervideo_dl/extractor/nhk.py5
-rw-r--r--hypervideo_dl/extractor/nhl.py3
-rw-r--r--hypervideo_dl/extractor/nick.py85
-rw-r--r--hypervideo_dl/extractor/niconico.py660
-rw-r--r--hypervideo_dl/extractor/ninecninemedia.py9
-rw-r--r--hypervideo_dl/extractor/ninenow.py58
-rw-r--r--hypervideo_dl/extractor/nitter.py228
-rw-r--r--hypervideo_dl/extractor/noco.py235
-rw-r--r--hypervideo_dl/extractor/nova.py4
-rw-r--r--hypervideo_dl/extractor/novaplay.py63
-rw-r--r--hypervideo_dl/extractor/npo.py7
-rw-r--r--hypervideo_dl/extractor/nrk.py8
-rw-r--r--hypervideo_dl/extractor/ntvde.py2
-rw-r--r--hypervideo_dl/extractor/nuvid.py86
-rw-r--r--hypervideo_dl/extractor/nytimes.py10
-rw-r--r--hypervideo_dl/extractor/nzherald.py98
-rw-r--r--hypervideo_dl/extractor/odnoklassniki.py5
-rw-r--r--hypervideo_dl/extractor/olympics.py56
-rw-r--r--hypervideo_dl/extractor/on24.py91
-rw-r--r--hypervideo_dl/extractor/ondemandkorea.py38
-rw-r--r--hypervideo_dl/extractor/onet.py4
-rw-r--r--hypervideo_dl/extractor/ooyala.py5
-rw-r--r--hypervideo_dl/extractor/openload.py3
-rw-r--r--hypervideo_dl/extractor/openrec.py126
-rw-r--r--hypervideo_dl/extractor/ora.py2
-rw-r--r--hypervideo_dl/extractor/orf.py5
-rw-r--r--hypervideo_dl/extractor/packtpub.py5
-rw-r--r--hypervideo_dl/extractor/palcomp3.py7
-rw-r--r--hypervideo_dl/extractor/pandoratv.py7
-rw-r--r--hypervideo_dl/extractor/paramountplus.py145
-rw-r--r--hypervideo_dl/extractor/parliamentliveuk.py76
-rw-r--r--hypervideo_dl/extractor/parlview.py68
-rw-r--r--hypervideo_dl/extractor/patreon.py86
-rw-r--r--hypervideo_dl/extractor/pbs.py33
-rw-r--r--hypervideo_dl/extractor/peertube.py818
-rw-r--r--hypervideo_dl/extractor/peloton.py222
-rw-r--r--hypervideo_dl/extractor/performgroup.py3
-rw-r--r--hypervideo_dl/extractor/periscope.py8
-rw-r--r--hypervideo_dl/extractor/philharmoniedeparis.py2
-rw-r--r--hypervideo_dl/extractor/photobucket.py3
-rw-r--r--hypervideo_dl/extractor/piksel.py2
-rw-r--r--hypervideo_dl/extractor/pinterest.py6
-rw-r--r--hypervideo_dl/extractor/pladform.py4
-rw-r--r--hypervideo_dl/extractor/playfm.py3
-rw-r--r--hypervideo_dl/extractor/playplustv.py3
-rw-r--r--hypervideo_dl/extractor/playtvak.py2
-rw-r--r--hypervideo_dl/extractor/playwire.py3
-rw-r--r--hypervideo_dl/extractor/pluralsight.py9
-rw-r--r--hypervideo_dl/extractor/plutotv.py184
-rw-r--r--hypervideo_dl/extractor/podomatic.py3
-rw-r--r--hypervideo_dl/extractor/pokemon.py73
-rw-r--r--hypervideo_dl/extractor/polskieradio.py47
-rw-r--r--hypervideo_dl/extractor/popcorntimes.py3
-rw-r--r--hypervideo_dl/extractor/popcorntv.py3
-rw-r--r--hypervideo_dl/extractor/porncom.py2
-rw-r--r--hypervideo_dl/extractor/pornflip.py82
-rw-r--r--hypervideo_dl/extractor/pornhd.py3
-rw-r--r--hypervideo_dl/extractor/pornhub.py126
-rw-r--r--hypervideo_dl/extractor/pornovoisines.py3
-rw-r--r--hypervideo_dl/extractor/pornoxo.py3
-rw-r--r--hypervideo_dl/extractor/presstv.py3
-rw-r--r--hypervideo_dl/extractor/projectveritas.py55
-rw-r--r--hypervideo_dl/extractor/prosiebensat1.py4
-rw-r--r--hypervideo_dl/extractor/pyvideo.py2
-rw-r--r--hypervideo_dl/extractor/qqmusic.py2
-rw-r--r--hypervideo_dl/extractor/radiko.py234
-rw-r--r--hypervideo_dl/extractor/radiocanada.py3
-rw-r--r--hypervideo_dl/extractor/radiofrance.py4
-rw-r--r--hypervideo_dl/extractor/radlive.py179
-rw-r--r--hypervideo_dl/extractor/rai.py146
-rw-r--r--hypervideo_dl/extractor/raywenderlich.py2
-rw-r--r--hypervideo_dl/extractor/rbmaradio.py3
-rw-r--r--hypervideo_dl/extractor/rcs.py427
-rw-r--r--hypervideo_dl/extractor/rcti.py354
-rw-r--r--hypervideo_dl/extractor/redbulltv.py5
-rw-r--r--hypervideo_dl/extractor/reddit.py32
-rw-r--r--hypervideo_dl/extractor/redtube.py3
-rw-r--r--hypervideo_dl/extractor/rice.py2
-rw-r--r--hypervideo_dl/extractor/rmcdecouverte.py29
-rw-r--r--hypervideo_dl/extractor/roosterteeth.py26
-rw-r--r--hypervideo_dl/extractor/roxwel.py3
-rw-r--r--hypervideo_dl/extractor/rtbf.py4
-rw-r--r--hypervideo_dl/extractor/rtl2.py4
-rw-r--r--hypervideo_dl/extractor/rtp.py88
-rw-r--r--hypervideo_dl/extractor/rts.py2
-rw-r--r--hypervideo_dl/extractor/rtve.py3
-rw-r--r--hypervideo_dl/extractor/rumble.py47
-rw-r--r--hypervideo_dl/extractor/rutube.py11
-rw-r--r--hypervideo_dl/extractor/rutv.py8
-rw-r--r--hypervideo_dl/extractor/ruutu.py6
-rw-r--r--hypervideo_dl/extractor/safari.py11
-rw-r--r--hypervideo_dl/extractor/saitosan.py78
-rw-r--r--hypervideo_dl/extractor/sapo.py2
-rw-r--r--hypervideo_dl/extractor/savefrom.py3
-rw-r--r--hypervideo_dl/extractor/scrippsnetworks.py5
-rw-r--r--hypervideo_dl/extractor/seeker.py2
-rw-r--r--hypervideo_dl/extractor/senateisvp.py2
-rw-r--r--hypervideo_dl/extractor/sendtonews.py4
-rw-r--r--hypervideo_dl/extractor/sevenplus.py48
-rw-r--r--hypervideo_dl/extractor/seznamzpravy.py4
-rw-r--r--hypervideo_dl/extractor/shahid.py6
-rw-r--r--hypervideo_dl/extractor/shemaroome.py104
-rw-r--r--hypervideo_dl/extractor/simplecast.py2
-rw-r--r--hypervideo_dl/extractor/sina.py9
-rw-r--r--hypervideo_dl/extractor/sixplay.py8
-rw-r--r--hypervideo_dl/extractor/skynewsau.py46
-rw-r--r--hypervideo_dl/extractor/slideshare.py3
-rw-r--r--hypervideo_dl/extractor/snotr.py3
-rw-r--r--hypervideo_dl/extractor/sohu.py4
-rw-r--r--hypervideo_dl/extractor/sonyliv.py72
-rw-r--r--hypervideo_dl/extractor/soundcloud.py279
-rw-r--r--hypervideo_dl/extractor/soundgasm.py2
-rw-r--r--hypervideo_dl/extractor/southpark.py64
-rw-r--r--hypervideo_dl/extractor/sovietscloset.py221
-rw-r--r--hypervideo_dl/extractor/spankbang.py32
-rw-r--r--hypervideo_dl/extractor/spankwire.py2
-rw-r--r--hypervideo_dl/extractor/spiegeltv.py17
-rw-r--r--hypervideo_dl/extractor/sport5.py3
-rw-r--r--hypervideo_dl/extractor/sportdeutschland.py11
-rw-r--r--hypervideo_dl/extractor/springboardplatform.py2
-rw-r--r--hypervideo_dl/extractor/srgssr.py19
-rw-r--r--hypervideo_dl/extractor/stanfordoc.py2
-rw-r--r--hypervideo_dl/extractor/startv.py103
-rw-r--r--hypervideo_dl/extractor/steam.py4
-rw-r--r--hypervideo_dl/extractor/streamable.py8
-rw-r--r--hypervideo_dl/extractor/streamanity.py51
-rw-r--r--hypervideo_dl/extractor/streamcloud.py4
-rw-r--r--hypervideo_dl/extractor/stv.py3
-rw-r--r--hypervideo_dl/extractor/svt.py10
-rw-r--r--hypervideo_dl/extractor/tagesschau.py4
-rw-r--r--hypervideo_dl/extractor/tastytrade.py43
-rw-r--r--hypervideo_dl/extractor/tbs.py11
-rw-r--r--hypervideo_dl/extractor/teachable.py4
-rw-r--r--hypervideo_dl/extractor/teachertube.py2
-rw-r--r--hypervideo_dl/extractor/techtalks.py2
-rw-r--r--hypervideo_dl/extractor/tele13.py2
-rw-r--r--hypervideo_dl/extractor/tele5.py4
-rw-r--r--hypervideo_dl/extractor/telemb.py4
-rw-r--r--hypervideo_dl/extractor/telemundo.py58
-rw-r--r--hypervideo_dl/extractor/tennistv.py10
-rw-r--r--hypervideo_dl/extractor/tenplay.py88
-rw-r--r--hypervideo_dl/extractor/testurl.py2
-rw-r--r--hypervideo_dl/extractor/tf1.py3
-rw-r--r--hypervideo_dl/extractor/theplatform.py13
-rw-r--r--hypervideo_dl/extractor/theta.py87
-rw-r--r--hypervideo_dl/extractor/theweatherchannel.py3
-rw-r--r--hypervideo_dl/extractor/thisav.py3
-rw-r--r--hypervideo_dl/extractor/threeqsdn.py24
-rw-r--r--hypervideo_dl/extractor/tiktok.py602
-rw-r--r--hypervideo_dl/extractor/tinypic.py2
-rw-r--r--hypervideo_dl/extractor/tmz.py240
-rw-r--r--hypervideo_dl/extractor/tnaflix.py2
-rw-r--r--hypervideo_dl/extractor/toggle.py10
-rw-r--r--hypervideo_dl/extractor/tokentube.py152
-rw-r--r--hypervideo_dl/extractor/toongoggles.py3
-rw-r--r--hypervideo_dl/extractor/toutv.py2
-rw-r--r--hypervideo_dl/extractor/traileraddict.py2
-rw-r--r--hypervideo_dl/extractor/trovo.py73
-rw-r--r--hypervideo_dl/extractor/trutv.py3
-rw-r--r--hypervideo_dl/extractor/tubitv.py43
-rw-r--r--hypervideo_dl/extractor/tumblr.py3
-rw-r--r--hypervideo_dl/extractor/turbo.py2
-rw-r--r--hypervideo_dl/extractor/turner.py7
-rw-r--r--hypervideo_dl/extractor/tv2.py136
-rw-r--r--hypervideo_dl/extractor/tv2hu.py132
-rw-r--r--hypervideo_dl/extractor/tv4.py31
-rw-r--r--hypervideo_dl/extractor/tv5mondeplus.py40
-rw-r--r--hypervideo_dl/extractor/tv5unis.py3
-rw-r--r--hypervideo_dl/extractor/tver.py3
-rw-r--r--hypervideo_dl/extractor/tvigle.py3
-rw-r--r--hypervideo_dl/extractor/tvland.py7
-rw-r--r--hypervideo_dl/extractor/tvnow.py172
-rw-r--r--hypervideo_dl/extractor/tvp.py2
-rw-r--r--hypervideo_dl/extractor/tvplay.py48
-rw-r--r--hypervideo_dl/extractor/twentyfourvideo.py3
-rw-r--r--hypervideo_dl/extractor/twentythreevideo.py3
-rw-r--r--hypervideo_dl/extractor/twitcasting.py111
-rw-r--r--hypervideo_dl/extractor/twitch.py10
-rw-r--r--hypervideo_dl/extractor/twitter.py46
-rw-r--r--hypervideo_dl/extractor/udemy.py2
-rw-r--r--hypervideo_dl/extractor/ukcolumn.py72
-rw-r--r--hypervideo_dl/extractor/umg.py10
-rw-r--r--hypervideo_dl/extractor/unistra.py2
-rw-r--r--hypervideo_dl/extractor/uol.py1
-rw-r--r--hypervideo_dl/extractor/uplynk.py5
-rw-r--r--hypervideo_dl/extractor/urort.py2
-rw-r--r--hypervideo_dl/extractor/urplay.py17
-rw-r--r--hypervideo_dl/extractor/usanetwork.py2
-rw-r--r--hypervideo_dl/extractor/ustream.py4
-rw-r--r--hypervideo_dl/extractor/ustudio.py5
-rw-r--r--hypervideo_dl/extractor/utreon.py85
-rw-r--r--hypervideo_dl/extractor/varzesh3.py7
-rw-r--r--hypervideo_dl/extractor/veo.py74
-rw-r--r--hypervideo_dl/extractor/vesti.py2
-rw-r--r--hypervideo_dl/extractor/vevo.py140
-rw-r--r--hypervideo_dl/extractor/vgtv.py2
-rw-r--r--hypervideo_dl/extractor/vh1.py27
-rw-r--r--hypervideo_dl/extractor/vice.py6
-rw-r--r--hypervideo_dl/extractor/viddler.py3
-rw-r--r--hypervideo_dl/extractor/videa.py54
-rw-r--r--hypervideo_dl/extractor/videomore.py12
-rw-r--r--hypervideo_dl/extractor/vidio.py234
-rw-r--r--hypervideo_dl/extractor/vidzi.py68
-rw-r--r--hypervideo_dl/extractor/vier.py4
-rw-r--r--hypervideo_dl/extractor/viewlift.py6
-rw-r--r--hypervideo_dl/extractor/viidea.py2
-rw-r--r--hypervideo_dl/extractor/viki.py328
-rw-r--r--hypervideo_dl/extractor/vimeo.py368
-rw-r--r--hypervideo_dl/extractor/vine.py4
-rw-r--r--hypervideo_dl/extractor/viu.py151
-rw-r--r--hypervideo_dl/extractor/vk.py4
-rw-r--r--hypervideo_dl/extractor/vlive.py77
-rw-r--r--hypervideo_dl/extractor/voicy.py147
-rw-r--r--hypervideo_dl/extractor/voot.py58
-rw-r--r--hypervideo_dl/extractor/vrt.py11
-rw-r--r--hypervideo_dl/extractor/vrv.py3
-rw-r--r--hypervideo_dl/extractor/vube.py10
-rw-r--r--hypervideo_dl/extractor/vupload.py51
-rw-r--r--hypervideo_dl/extractor/vvvvid.py4
-rw-r--r--hypervideo_dl/extractor/vzaar.py2
-rw-r--r--hypervideo_dl/extractor/wakanim.py14
-rw-r--r--hypervideo_dl/extractor/walla.py2
-rw-r--r--hypervideo_dl/extractor/wat.py16
-rw-r--r--hypervideo_dl/extractor/watchbox.py3
-rw-r--r--hypervideo_dl/extractor/watchindianporn.py2
-rw-r--r--hypervideo_dl/extractor/wdr.py17
-rw-r--r--hypervideo_dl/extractor/whowatch.py99
-rw-r--r--hypervideo_dl/extractor/wimtv.py163
-rw-r--r--hypervideo_dl/extractor/wistia.py2
-rw-r--r--hypervideo_dl/extractor/xboxclips.py7
-rw-r--r--hypervideo_dl/extractor/xfileshare.py2
-rw-r--r--hypervideo_dl/extractor/xhamster.py9
-rw-r--r--hypervideo_dl/extractor/ximalaya.py2
-rw-r--r--hypervideo_dl/extractor/xnxx.py2
-rw-r--r--hypervideo_dl/extractor/xstream.py4
-rw-r--r--hypervideo_dl/extractor/xtube.py18
-rw-r--r--hypervideo_dl/extractor/xxxymovies.py3
-rw-r--r--hypervideo_dl/extractor/yahoo.py63
-rw-r--r--hypervideo_dl/extractor/yandexdisk.py3
-rw-r--r--hypervideo_dl/extractor/yandexmusic.py13
-rw-r--r--hypervideo_dl/extractor/yandexvideo.py88
-rw-r--r--hypervideo_dl/extractor/youjizz.py3
-rw-r--r--hypervideo_dl/extractor/youku.py2
-rw-r--r--hypervideo_dl/extractor/youporn.py2
-rw-r--r--hypervideo_dl/extractor/youtube.py3600
-rw-r--r--hypervideo_dl/extractor/zapiks.py2
-rw-r--r--hypervideo_dl/extractor/zaq1.py101
-rw-r--r--hypervideo_dl/extractor/zattoo.py6
-rw-r--r--hypervideo_dl/extractor/zdf.py42
-rw-r--r--hypervideo_dl/extractor/zee5.py244
-rw-r--r--hypervideo_dl/extractor/zingmp3.py5
-rw-r--r--hypervideo_dl/extractor/zoom.py15
-rw-r--r--hypervideo_dl/extractor/zype.py7
-rw-r--r--hypervideo_dl/minicurses.py109
-rw-r--r--hypervideo_dl/options.py1234
-rw-r--r--hypervideo_dl/postprocessor/__init__.py43
-rw-r--r--hypervideo_dl/postprocessor/common.py125
-rw-r--r--hypervideo_dl/postprocessor/embedthumbnail.py279
-rw-r--r--hypervideo_dl/postprocessor/exec.py42
-rw-r--r--hypervideo_dl/postprocessor/ffmpeg.py732
-rw-r--r--hypervideo_dl/postprocessor/metadataparser.py116
-rw-r--r--hypervideo_dl/postprocessor/modify_chapters.py336
-rw-r--r--hypervideo_dl/postprocessor/movefilesafterdownload.py54
-rw-r--r--hypervideo_dl/postprocessor/sponskrub.py96
-rw-r--r--hypervideo_dl/postprocessor/sponsorblock.py96
-rw-r--r--hypervideo_dl/postprocessor/xattrpp.py13
-rw-r--r--hypervideo_dl/utils.py1053
-rw-r--r--hypervideo_dl/version.py2
-rw-r--r--hypervideo_dl/webvtt.py402
-rw-r--r--pytest.ini4
-rw-r--r--requirements.txt3
-rw-r--r--setup.cfg4
-rw-r--r--setup.py147
-rw-r--r--test/helper.py17
-rw-r--r--test/parameters.json74
-rw-r--r--test/test_InfoExtractor.py973
-rw-r--r--test/test_YoutubeDL.py401
-rw-r--r--test/test_YoutubeDLCookieJar.py2
-rw-r--r--test/test_aes.py52
-rw-r--r--test/test_age_restriction.py6
-rw-r--r--test/test_all_urls.py12
-rw-r--r--test/test_cache.py2
-rw-r--r--test/test_compat.py31
-rw-r--r--test/test_cookies.py107
-rw-r--r--test/test_download.py54
-rw-r--r--test/test_downloader_http.py2
-rw-r--r--test/test_execution.py2
-rw-r--r--test/test_http.py2
-rw-r--r--test/test_overwrites.py53
-rw-r--r--test/test_post_hooks.py69
-rw-r--r--test/test_postprocessors.py555
-rw-r--r--test/test_socks.py5
-rw-r--r--test/test_subtitles.py75
-rw-r--r--test/test_utils.py256
-rw-r--r--test/test_verbose_output.py2
-rw-r--r--test/test_youtube_lists.py7
-rw-r--r--test/test_youtube_misc.py2
-rw-r--r--test/testdata/ism/sintel.Manifest988
-rw-r--r--test/testdata/m3u8/bipbop_16x9.m3u838
-rw-r--r--test/testdata/m3u8/img_bipbop_adv_example_fmp4.m3u876
-rw-r--r--test/testdata/mpd/subtitles.mpd351
-rw-r--r--test/testdata/thumbnails/foo %d bar/foo_%d.webpbin0 -> 3928 bytes
-rw-r--r--tox.ini2
601 files changed, 38841 insertions, 12264 deletions
diff --git a/AUTHORS b/AUTHORS
deleted file mode 100644
index 4a6d7da..0000000
--- a/AUTHORS
+++ /dev/null
@@ -1,249 +0,0 @@
-Ricardo Garcia Gonzalez
-Danny Colligan
-Benjamin Johnson
-Vasyl' Vavrychuk
-Witold Baryluk
-Paweł Paprota
-Gergely Imreh
-Rogério Brito
-Philipp Hagemeister
-Sören Schulze
-Kevin Ngo
-Ori Avtalion
-shizeeg
-Filippo Valsorda
-Christian Albrecht
-Dave Vasilevsky
-Jaime Marquínez Ferrándiz
-Jeff Crouse
-Osama Khalid
-Michael Walter
-M. Yasoob Ullah Khalid
-Julien Fraichard
-Johny Mo Swag
-Axel Noack
-Albert Kim
-Pierre Rudloff
-Huarong Huo
-Ismael Mejía
-Steffan Donal
-Andras Elso
-Jelle van der Waa
-Marcin Cieślak
-Anton Larionov
-Takuya Tsuchida
-Sergey M.
-Michael Orlitzky
-Chris Gahan
-Saimadhav Heblikar
-Mike Col
-Oleg Prutz
-pulpe
-Andreas Schmitz
-Michael Kaiser
-Niklas Laxström
-David Triendl
-Anthony Weems
-David Wagner
-Juan C. Olivares
-Mattias Harrysson
-phaer
-Sainyam Kapoor
-Nicolas Évrard
-Jason Normore
-Hoje Lee
-Adam Thalhammer
-Georg Jähnig
-Ralf Haring
-Koki Takahashi
-Ariset Llerena
-Adam Malcontenti-Wilson
-Tobias Bell
-Naglis Jonaitis
-Charles Chen
-Hassaan Ali
-Dobrosław Żybort
-David Fabijan
-Sebastian Haas
-Alexander Kirk
-Erik Johnson
-Keith Beckman
-Ole Ernst
-Aaron McDaniel (mcd1992)
-Magnus Kolstad
-Hari Padmanaban
-Carlos Ramos
-5moufl
-lenaten
-Dennis Scheiba
-Damon Timm
-winwon
-Xavier Beynon
-Gabriel Schubiner
-xantares
-Jan Matějka
-Mauroy Sébastien
-William Sewell
-Dao Hoang Son
-Oskar Jauch
-Matthew Rayfield
-t0mm0
-Tithen-Firion
-Zack Fernandes
-cryptonaut
-Adrian Kretz
-Mathias Rav
-Petr Kutalek
-Will Glynn
-Max Reimann
-Cédric Luthi
-Thijs Vermeir
-Joel Leclerc
-Christopher Krooss
-Ondřej Caletka
-Dinesh S
-Johan K. Jensen
-Yen Chi Hsuan
-Enam Mijbah Noor
-David Luhmer
-Shaya Goldberg
-Paul Hartmann
-Frans de Jonge
-Robin de Rooij
-Ryan Schmidt
-Leslie P. Polzer
-Duncan Keall
-Alexander Mamay
-Devin J. Pohly
-Eduardo Ferro Aldama
-Jeff Buchbinder
-Amish Bhadeshia
-Joram Schrijver
-Will W.
-Mohammad Teimori Pabandi
-Roman Le Négrate
-Matthias Küch
-Julian Richen
-Ping O.
-Mister Hat
-Peter Ding
-jackyzy823
-George Brighton
-Remita Amine
-Aurélio A. Heckert
-Bernhard Minks
-sceext
-Zach Bruggeman
-Tjark Saul
-slangangular
-Behrouz Abbasi
-ngld
-nyuszika7h
-Shaun Walbridge
-Lee Jenkins
-Anssi Hannula
-Lukáš Lalinský
-Qijiang Fan
-Rémy Léone
-Marco Ferragina
-reiv
-Muratcan Simsek
-Evan Lu
-flatgreen
-Brian Foley
-Vignesh Venkat
-Tom Gijselinck
-Founder Fang
-Andrew Alexeyew
-Saso Bezlaj
-Erwin de Haan
-Jens Wille
-Robin Houtevelts
-Patrick Griffis
-Aidan Rowe
-mutantmonkey
-Ben Congdon
-Kacper Michajłow
-José Joaquín Atria
-Viťas Strádal
-Kagami Hiiragi
-Philip Huppert
-blahgeek
-Kevin Deldycke
-inondle
-Tomáš Čech
-Déstin Reed
-Roman Tsiupa
-Artur Krysiak
-Jakub Adam Wieczorek
-Aleksandar Topuzović
-Nehal Patel
-Rob van Bekkum
-Petr Zvoníček
-Pratyush Singh
-Aleksander Nitecki
-Sebastian Blunt
-Matěj Cepl
-Xie Yanbo
-Philip Xu
-John Hawkinson
-Rich Leeper
-Zhong Jianxin
-Thor77
-Mattias Wadman
-Arjan Verwer
-Costy Petrisor
-Logan B
-Alex Seiler
-Vijay Singh
-Paul Hartmann
-Stephen Chen
-Fabian Stahl
-Bagira
-Odd Stråbø
-Philip Herzog
-Thomas Christlieb
-Marek Rusinowski
-Tobias Gruetzmacher
-Olivier Bilodeau
-Lars Vierbergen
-Juanjo Benages
-Xiao Di Guan
-Thomas Winant
-Daniel Twardowski
-Jeremie Jarosh
-Gerard Rovira
-Marvin Ewald
-Frédéric Bournival
-Timendum
-gritstub
-Adam Voss
-Mike Fährmann
-Jan Kundrát
-Giuseppe Fabiano
-Örn Guðjónsson
-Parmjit Virk
-Genki Sky
-Ľuboš Katrinec
-Corey Nicholson
-Ashutosh Chaudhary
-John Dong
-Tatsuyuki Ishi
-Daniel Weber
-Kay Bouché
-Yang Hongbo
-Lei Wang
-Petr Novák
-Leonardo Taccari
-Martin Weinelt
-Surya Oktafendri
-TingPing
-Alexandre Macabies
-Bastian de Groot
-Niklas Haas
-András Veres-Szentkirályi
-Enes Solak
-Nathan Rossi
-Thomas van der Berg
-Luca Cherubin
-Adrian Heine \ No newline at end of file
diff --git a/CONTRIBUTORS b/CONTRIBUTORS
new file mode 100644
index 0000000..048d988
--- /dev/null
+++ b/CONTRIBUTORS
@@ -0,0 +1,127 @@
+pukkandan (owner)
+shirt-dev (collaborator)
+coletdjnz/colethedj (collaborator)
+Ashish0804 (collaborator)
+h-h-h-h
+pauldubois98
+nixxo
+GreyAlien502
+kyuyeunk
+siikamiika
+jbruchon
+alexmerkel
+glenn-slayden
+Unrud
+wporr
+mariuszskon
+ohnonot
+samiksome
+alxnull
+FelixFrog
+Zocker1999NET
+nao20010128nao
+kurumigi
+bbepis
+animelover1984/horahoradev
+Pccode66
+RobinD42
+hseg
+DennyDai
+codeasashu
+teesid
+kevinoconnor7
+damianoamatruda
+2ShedsJackson
+CXwudi
+xtkoba
+llacb47
+hheimbuerger
+B0pol
+lkho
+fstirlitz
+Lamieur
+tsukumijima
+Hadi0609
+b5eff52
+craftingmod
+tpikonen
+tripulse
+king-millez
+alex-gedeon
+hhirtz
+louie-github
+MinePlayersPE
+olifre
+rhsmachine/zenerdi0de
+nihil-admirari
+krichbanana
+ohmybahgosh
+nyuszika7h
+blackjack4494
+pyx
+TpmKranz
+mzbaulhaque
+zackmark29
+mbway
+zerodytrash
+wesnm
+pento
+rigstot
+dirkf
+funniray
+Jessecar96
+jhwgh1968
+kikuyan
+max-te
+nchilada
+pgaig
+PSlava
+stdedos
+u-spec-png
+Sipherdrakon
+kidonng
+smege1001
+tandy1000
+IONECarter
+capntrips
+mrfade
+ParadoxGBB
+wlritchi
+NeroBurner
+mahanstreamer
+alerikaisattera
+Derkades
+BunnyHelp
+i6t
+std-move
+Chocobozzz
+ouwou
+korli
+octotherp
+CeruleanSky
+zootedb0t
+chao813
+ChillingPepper
+ConquerorDopy
+dalanmiller
+DigitalDJ
+f4pp3rk1ng
+gesa
+Jules-A
+makeworld-the-better-one
+MKSherbini
+mrx23dot
+poschi3
+raphaeldore
+renalid
+sleaux-meaux
+sulyi
+tmarki
+Vangelis66
+AjaxGb
+ajj8
+jakubadamw
+jfogelman
+timethrow
+sarnoud
+Bojidarist
diff --git a/Changelog.md b/Changelog.md
new file mode 100644
index 0000000..2e6da33
--- /dev/null
+++ b/Changelog.md
@@ -0,0 +1,1237 @@
+# Changelog
+
+<!--
+# Instuctions for creating release
+
+* Run `make doc`
+* Update Changelog.md and CONTRIBUTORS
+* Change "Merged with ytdl" version in Readme.md if needed
+* Add new/fixed extractors in "new features" section of Readme.md
+* Commit as `Release <version>`
+* Push to origin/release using `git push origin master:release`
+ build task will now run
+
+-->
+
+
+### 2021.10.10
+
+* [downloader/ffmpeg] Fix bug in initializing `FFmpegPostProcessor`
+* [minicurses] Fix when printing to file
+* [downloader] Fix throttledratelimit
+* [francetv] Fix extractor by [fstirlitz](https://github.com/fstirlitz), [sarnoud](https://github.com/sarnoud)
+* [NovaPlay] Add extractor by [Bojidarist](https://github.com/Bojidarist)
+* [ffmpeg] Revert "Set max probesize" - No longer needed
+* [docs] Remove incorrect dependency on VC++10
+* [build] Allow to release without changelog
+
+### 2021.10.09
+
+* Improved progress reporting
+ * Separate `--console-title` and `--no-progress`
+ * Add option `--progress` to show progress-bar even in quiet mode
+ * Fix and refactor `minicurses` and use it for all progress reporting
+ * Standardize use of terminal sequences and enable color support for windows 10
+ * Add option `--progress-template` to customize progress-bar and console-title
+ * Add postprocessor hooks and progress reporting
+* [postprocessor] Add plugin support with option `--use-postprocessor`
+* [extractor] Extract storyboards from SMIL manifests by [fstirlitz](https://github.com/fstirlitz)
+* [outtmpl] Alternate form of format type `l` for `\n` delimited list
+* [outtmpl] Format type `U` for unicode normalization
+* [outtmpl] Allow empty output template to skip a type of file
+* Merge webm formats into mkv if thumbnails are to be embedded
+* [adobepass] Add RCN as MSO by [jfogelman](https://github.com/jfogelman)
+* [ciscowebex] Add extractor by [damianoamatruda](https://github.com/damianoamatruda)
+* [Gettr] Add extractor by [i6t](https://github.com/i6t)
+* [GoPro] Add extractor by [i6t](https://github.com/i6t)
+* [N1] Add extractor by [u-spec-png](https://github.com/u-spec-png)
+* [Theta] Add video extractor by [alerikaisattera](https://github.com/alerikaisattera)
+* [Veo] Add extractor by [i6t](https://github.com/i6t)
+* [Vupload] Add extractor by [u-spec-png](https://github.com/u-spec-png)
+* [bbc] Extract better quality videos by [ajj8](https://github.com/ajj8)
+* [Bilibili] Add subtitle converter by [u-spec-png](https://github.com/u-spec-png)
+* [CBC] Cleanup tests by [makeworld-the-better-one](https://github.com/makeworld-the-better-one)
+* [Douyin] Rewrite extractor by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [Funimation] Fix for /v/ urls by [pukkandan](https://github.com/pukkandan), [Jules-A](https://github.com/Jules-A)
+* [Funimation] Sort formats according to the relevant extractor-args
+* [Hidive] Fix duplicate and incorrect formats
+* [HotStarSeries] Fix cookies by [Ashish0804](https://github.com/Ashish0804)
+* [LinkedInLearning] Add subtitles by [Ashish0804](https://github.com/Ashish0804)
+* [Mediaite] Relax valid url by [coletdjnz](https://github.com/coletdjnz)
+* [Newgrounds] Add age_limit and fix duration by [u-spec-png](https://github.com/u-spec-png)
+* [Newgrounds] Fix view count on songs by [u-spec-png](https://github.com/u-spec-png)
+* [parliamentlive.tv] Fix extractor by [u-spec-png](https://github.com/u-spec-png)
+* [PolskieRadio] Fix extractors by [jakubadamw](https://github.com/jakubadamw), [u-spec-png](https://github.com/u-spec-png)
+* [reddit] Add embedded url by [u-spec-png](https://github.com/u-spec-png)
+* [reddit] Fix 429 by generating a random `reddit_session` by [AjaxGb](https://github.com/AjaxGb)
+* [Rumble] Add RumbleChannelIE by [Ashish0804](https://github.com/Ashish0804)
+* [soundcloud:playlist] Detect last page correctly
+* [SovietsCloset] Add duration from m3u8 by [ChillingPepper](https://github.com/ChillingPepper)
+* [Streamable] Add codecs by [u-spec-png](https://github.com/u-spec-png)
+* [vidme] Remove extractor by [alerikaisattera](https://github.com/alerikaisattera)
+* [youtube:tab] Fallback to API when webpage fails to download by [coletdjnz](https://github.com/coletdjnz)
+* [youtube] Fix non-fatal errors in fetching player
+* Fix `--flat-playlist` when neither IE nor id is known
+* Fix `-f mp4` behaving differently from youtube-dl
+* Workaround for bug in `ssl.SSLContext.load_default_certs`
+* [aes] Improve performance slightly by [sulyi](https://github.com/sulyi)
+* [cookies] Fix keyring fallback by [mbway](https://github.com/mbway)
+* [embedsubtitle] Fix error when duration is unknown
+* [ffmpeg] Fix error when subtitle file is missing
+* [ffmpeg] Set max probesize to workaround AAC HLS stream issues by [shirt](https://github.com/shirt-dev)
+* [FixupM3u8] Remove redundant run if merged is needed
+* [hls] Fix decryption issues by [shirt](https://github.com/shirt-dev), [pukkandan](https://github.com/pukkandan)
+* [http] Respect user-provided chunk size over extractor's
+* [utils] Let traverse_obj accept functions as keys
+* [docs] Add note about our custom ffmpeg builds
+* [docs] Write embedding and contributing documentation by [pukkandan](https://github.com/pukkandan), [timethrow](https://github.com/timethrow)
+* [update] Check for new version even if not updateable
+* [build] Add more files to the tarball
+* [build] Allow building with py2exe (and misc fixes)
+* [build] Use pycryptodomex by [shirt](https://github.com/shirt-dev), [pukkandan](https://github.com/pukkandan)
+* [cleanup] Some minor refactoring, improve docs and misc cleanup
+
+
+### 2021.09.25
+
+* Add new option `--netrc-location`
+* [outtmpl] Allow alternate fields using `,`
+* [outtmpl] Add format type `B` to treat the value as bytes (eg: to limit the filename to a certain number of bytes)
+* Separate the options `--ignore-errors` and `--no-abort-on-error`
+* Basic framework for simultaneous download of multiple formats by [nao20010128nao](https://github.com/nao20010128nao)
+* [17live] Add 17.live extractor by [nao20010128nao](https://github.com/nao20010128nao)
+* [bilibili] Add BiliIntlIE and BiliIntlSeriesIE by [Ashish0804](https://github.com/Ashish0804)
+* [CAM4] Add extractor by [alerikaisattera](https://github.com/alerikaisattera)
+* [Chingari] Add extractors by [Ashish0804](https://github.com/Ashish0804)
+* [CGTN] Add extractor by [chao813](https://github.com/chao813)
+* [damtomo] Add extractor by [nao20010128nao](https://github.com/nao20010128nao)
+* [gotostage] Add extractor by [poschi3](https://github.com/poschi3)
+* [Koo] Add extractor by [Ashish0804](https://github.com/Ashish0804)
+* [Mediaite] Add Extractor by [Ashish0804](https://github.com/Ashish0804)
+* [Mediaklikk] Add Extractor by [tmarki](https://github.com/tmarki), [mrx23dot](https://github.com/mrx23dot), [coletdjnz](https://github.com/coletdjnz)
+* [MuseScore] Add Extractor by [Ashish0804](https://github.com/Ashish0804)
+* [Newgrounds] Add NewgroundsUserIE and improve extractor by [u-spec-png](https://github.com/u-spec-png)
+* [nzherald] Add NZHeraldIE by [coletdjnz](https://github.com/coletdjnz)
+* [Olympics] Add replay extractor by [Ashish0804](https://github.com/Ashish0804)
+* [Peertube] Add channel and playlist extractors by [u-spec-png](https://github.com/u-spec-png)
+* [radlive] Add extractor by [nyuszika7h](https://github.com/nyuszika7h)
+* [SovietsCloset] Add extractor by [ChillingPepper](https://github.com/ChillingPepper)
+* [Streamanity] Add Extractor by [alerikaisattera](https://github.com/alerikaisattera)
+* [Theta] Add extractor by [alerikaisattera](https://github.com/alerikaisattera)
+* [Yandex] Add ZenYandexIE and ZenYandexChannelIE by [Ashish0804](https://github.com/Ashish0804)
+* [9Now] handle episodes of series by [dalanmiller](https://github.com/dalanmiller)
+* [AnimalPlanet] Fix extractor by [Sipherdrakon](https://github.com/Sipherdrakon)
+* [Arte] Improve description extraction by [renalid](https://github.com/renalid)
+* [atv.at] Use jwt for API by [NeroBurner](https://github.com/NeroBurner)
+* [brightcove] Extract subtitles from manifests
+* [CBC] Fix CBC Gem extractors by [makeworld-the-better-one](https://github.com/makeworld-the-better-one)
+* [cbs] Report appropriate error for DRM
+* [comedycentral] Support `collection-playlist` by [nixxo](https://github.com/nixxo)
+* [DIYNetwork] Support new format by [Sipherdrakon](https://github.com/Sipherdrakon)
+* [downloader/niconico] Pass custom headers by [nao20010128nao](https://github.com/nao20010128nao)
+* [dw] Fix extractor
+* [Fancode] Fix live streams by [zenerdi0de](https://github.com/zenerdi0de)
+* [funimation] Fix for locations outside US by [Jules-A](https://github.com/Jules-A), [pukkandan](https://github.com/pukkandan)
+* [globo] Fix GloboIE by [Ashish0804](https://github.com/Ashish0804)
+* [HiDive] Fix extractor by [Ashish0804](https://github.com/Ashish0804)
+* [Hotstar] Add referer for subs by [Ashish0804](https://github.com/Ashish0804)
+* [itv] Fix extractor, add subtitles and thumbnails by [coletdjnz](https://github.com/coletdjnz), [sleaux-meaux](https://github.com/sleaux-meaux), [Vangelis66](https://github.com/Vangelis66)
+* [lbry] Show error message from API response
+* [Mxplayer] Use mobile API by [Ashish0804](https://github.com/Ashish0804)
+* [NDR] Rewrite NDRIE by [Ashish0804](https://github.com/Ashish0804)
+* [Nuvid] Fix extractor by [u-spec-png](https://github.com/u-spec-png)
+* [Oreilly] Handle new web url by [MKSherbini](https://github.com/MKSherbini)
+* [pbs] Fix subtitle extraction by [coletdjnz](https://github.com/coletdjnz), [gesa](https://github.com/gesa), [raphaeldore](https://github.com/raphaeldore)
+* [peertube] Update instances by [u-spec-png](https://github.com/u-spec-png)
+* [plutotv] Fix extractor for URLs with `/en`
+* [reddit] Workaround for 429 by redirecting to old.reddit.com
+* [redtube] Fix exts
+* [soundcloud] Make playlist extraction lazy
+* [soundcloud] Retry playlist pages on `502` error and update `_CLIENT_ID`
+* [southpark] Fix SouthParkDE by [coletdjnz](https://github.com/coletdjnz)
+* [SovietsCloset] Fix playlists for games with only named categories by [ConquerorDopy](https://github.com/ConquerorDopy)
+* [SpankBang] Fix uploader by [f4pp3rk1ng](https://github.com/f4pp3rk1ng), [coletdjnz](https://github.com/coletdjnz)
+* [tiktok] Use API to fetch higher quality video by [MinePlayersPE](https://github.com/MinePlayersPE), [llacb47](https://github.com/llacb47)
+* [TikTokUser] Fix extractor using mobile API by [MinePlayersPE](https://github.com/MinePlayersPE), [llacb47](https://github.com/llacb47)
+* [videa] Fix some extraction errors by [nyuszika7h](https://github.com/nyuszika7h)
+* [VrtNU] Handle login errors by [llacb47](https://github.com/llacb47)
+* [vrv] Don't raise error when thumbnails are missing
+* [youtube] Cleanup authentication code by [coletdjnz](https://github.com/coletdjnz)
+* [youtube] Fix `--mark-watched` with `--cookies-from-browser`
+* [youtube] Improvements to JS player extraction and add extractor-args to skip it by [coletdjnz](https://github.com/coletdjnz)
+* [youtube] Retry on 'Unknown Error' by [coletdjnz](https://github.com/coletdjnz)
+* [youtube] Return full URL instead of just ID
+* [youtube] Warn when trying to download clips
+* [zdf] Improve format sorting
+* [zype] Extract subtitles from the m3u8 manifest by [fstirlitz](https://github.com/fstirlitz)
+* Allow `--force-write-archive` to work with `--flat-playlist`
+* Download subtitles in order of `--sub-langs`
+* Allow `0` in `--playlist-items`
+* Handle more playlist errors with `-i`
+* Fix `--no-get-comments`
+* Fix `extra_info` being reused across runs
+* Fix compat options `no-direct-merge` and `playlist-index`
+* Dump files should obey `--trim-filename` by [sulyi](https://github.com/sulyi)
+* [aes] Add `aes_gcm_decrypt_and_verify` by [sulyi](https://github.com/sulyi), [pukkandan](https://github.com/pukkandan)
+* [aria2c] Fix IV for some AES-128 streams by [shirt](https://github.com/shirt-dev)
+* [compat] Don't ignore `HOME` (if set) on windows
+* [cookies] Make browser names case insensitive
+* [cookies] Print warning for cookie decoding error only once
+* [extractor] Fix root-relative URLs in MPD by [DigitalDJ](https://github.com/DigitalDJ)
+* [ffmpeg] Add `aac_adtstoasc` when merging if needed
+* [fragment,aria2c] Generalize and refactor some code
+* [fragment] Avoid repeated request for AES key
+* [fragment] Fix range header when using `-N` and media sequence by [shirt](https://github.com/shirt-dev)
+* [hls,aes] Fallback to native implementation for AES-CBC and detect `Cryptodome` in addition to `Crypto`
+* [hls] Byterange + AES128 is supported by native downloader
+* [ModifyChapters] Improve sponsor chapter merge algorithm by [nihil-admirari](https://github.com/nihil-admirari)
+* [ModifyChapters] Minor fixes
+* [WebVTT] Adjust parser to accommodate PBS subtitles
+* [utils] Improve `extract_timezone` by [dirkf](https://github.com/dirkf)
+* [options] Fix `--no-config` and refactor reading of config files
+* [options] Strip spaces and ignore empty entries in list-like switches
+* [test/cookies] Improve logging
+* [build] Automate more of the release process by [animelover1984](https://github.com/animelover1984), [pukkandan](https://github.com/pukkandan)
+* [build] Fix sha256 by [nihil-admirari](https://github.com/nihil-admirari)
+* [build] Bring back brew taps by [nao20010128nao](https://github.com/nao20010128nao)
+* [build] Provide `--onedir` zip for windows by [pukkandan](https://github.com/pukkandan)
+* [cleanup,docs] Add deprecation warning in docs for some counter intuitive behaviour
+* [cleanup] Fix line endings for `nebula.py` by [glenn-slayden](https://github.com/glenn-slayden)
+* [cleanup] Improve `make clean-test` by [sulyi](https://github.com/sulyi)
+* [cleanup] Misc
+
+
+### 2021.09.02
+
+* **Native SponsorBlock** implementation by [nihil-admirari](https://github.com/nihil-admirari), [pukkandan](https://github.com/pukkandan)
+ * `--sponsorblock-remove CATS` removes specified chapters from file
+ * `--sponsorblock-mark CATS` marks the specified sponsor sections as chapters
+ * `--sponsorblock-chapter-title TMPL` to specify sponsor chapter template
+ * `--sponsorblock-api URL` to use a different API
+ * No re-encoding is done unless `--force-keyframes-at-cuts` is used
+ * The fetched sponsor sections are written to the infojson
+ * Deprecates: `--sponskrub`, `--no-sponskrub`, `--sponskrub-cut`, `--no-sponskrub-cut`, `--sponskrub-force`, `--no-sponskrub-force`, `--sponskrub-location`, `--sponskrub-args`
+* Split `--embed-chapters` from `--embed-metadata` (it still implies the former by default)
+* Add option `--remove-chapters` to remove arbitrary chapters by [nihil-admirari](https://github.com/nihil-admirari), [pukkandan](https://github.com/pukkandan)
+* Add option `--force-keyframes-at-cuts` for more accurate cuts when removing and splitting chapters by [nihil-admirari](https://github.com/nihil-admirari)
+* Let `--match-filter` reject entries early
+ * Makes redundant: `--match-title`, `--reject-title`, `--min-views`, `--max-views`
+* [lazy_extractor] Improvements (It now passes all tests)
+ * Bugfix for when plugin directory doesn't exist by [kidonng](https://github.com/kidonng)
+ * Create instance only after pre-checking archive
+ * Import actual class if an attribute is accessed
+ * Fix `suitable` and add flake8 test
+* [downloader/ffmpeg] Experimental support for DASH manifests (including live)
+ * Your ffmpeg must have [this patch](https://github.com/FFmpeg/FFmpeg/commit/3249c757aed678780e22e99a1a49f4672851bca9) applied for YouTube DASH to work
+* [downloader/ffmpeg] Allow passing custom arguments before `-i`
+* [BannedVideo] Add extractor by [smege1001](https://github.com/smege1001), [blackjack4494](https://github.com/blackjack4494), [pukkandan](https://github.com/pukkandan)
+* [bilibili] Add category extractor by [animelover1984](https://github.com/animelover1984)
+* [Epicon] Add extractors by [Ashish0804](https://github.com/Ashish0804)
+* [filmmodu] Add extractor by [mzbaulhaque](https://github.com/mzbaulhaque)
+* [GabTV] Add extractor by [Ashish0804](https://github.com/Ashish0804)
+* [Hungama] Fix `HungamaSongIE` and add `HungamaAlbumPlaylistIE` by [Ashish0804](https://github.com/Ashish0804)
+* [ManotoTV] Add new extractors by [tandy1000](https://github.com/tandy1000)
+* [Niconico] Add Search extractors by [animelover1984](https://github.com/animelover1984), [pukkandan](https://github.com/pukkandan)
+* [Patreon] Add `PatreonUserIE` by [zenerdi0de](https://github.com/zenerdi0de)
+* [peloton] Add extractor by [IONECarter](https://github.com/IONECarter), [capntrips](https://github.com/capntrips), [pukkandan](https://github.com/pukkandan)
+* [ProjectVeritas] Add extractor by [Ashish0804](https://github.com/Ashish0804)
+* [radiko] Add extractors by [nao20010128nao](https://github.com/nao20010128nao)
+* [StarTV] Add extractor for `startv.com.tr` by [mrfade](https://github.com/mrfade), [coletdjnz](https://github.com/coletdjnz)
+* [tiktok] Add `TikTokUserIE` by [Ashish0804](https://github.com/Ashish0804), [pukkandan](https://github.com/pukkandan)
+* [Tokentube] Add extractor by [u-spec-png](https://github.com/u-spec-png)
+* [TV2Hu] Fix `TV2HuIE` and add `TV2HuSeriesIE` by [Ashish0804](https://github.com/Ashish0804)
+* [voicy] Add extractor by [nao20010128nao](https://github.com/nao20010128nao)
+* [adobepass] Fix Verizon SAML login by [nyuszika7h](https://github.com/nyuszika7h), [ParadoxGBB](https://github.com/ParadoxGBB)
+* [afreecatv] Fix adult VODs by [wlritchi](https://github.com/wlritchi)
+* [afreecatv] Tolerate failure to parse date string by [wlritchi](https://github.com/wlritchi)
+* [aljazeera] Fix extractor by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [ATV.at] Fix extractor for ATV.at by [NeroBurner](https://github.com/NeroBurner), [coletdjnz](https://github.com/coletdjnz)
+* [bitchute] Fix test by [mahanstreamer](https://github.com/mahanstreamer)
+* [camtube] Remove obsolete extractor by [alerikaisattera](https://github.com/alerikaisattera)
+* [CDA] Add more formats by [u-spec-png](https://github.com/u-spec-png)
+* [eroprofile] Fix page skipping in albums by [jhwgh1968](https://github.com/jhwgh1968)
+* [facebook] Fix format sorting
+* [facebook] Fix metadata extraction by [kikuyan](https://github.com/kikuyan)
+* [facebook] Update onion URL by [Derkades](https://github.com/Derkades)
+* [HearThisAtIE] Fix extractor by [Ashish0804](https://github.com/Ashish0804)
+* [instagram] Add referrer to prevent throttling by [u-spec-png](https://github.com/u-spec-png), [kikuyan](https://github.com/kikuyan)
+* [iwara.tv] Extract more metadata by [BunnyHelp](https://github.com/BunnyHelp)
+* [iwara] Add thumbnail by [i6t](https://github.com/i6t)
+* [kakao] Fix extractor
+* [mediaset] Fix extraction for some videos by [nyuszika7h](https://github.com/nyuszika7h)
+* [Motherless] Fix extractor by [coletdjnz](https://github.com/coletdjnz)
+* [Nova] fix extractor by [std-move](https://github.com/std-move)
+* [ParamountPlus] Fix geo verification by [shirt](https://github.com/shirt-dev)
+* [peertube] handle new video URL format by [Chocobozzz](https://github.com/Chocobozzz)
+* [pornhub] Separate and fix playlist extractor by [mzbaulhaque](https://github.com/mzbaulhaque)
+* [reddit] Fix for quarantined subreddits by [ouwou](https://github.com/ouwou)
+* [ShemarooMe] Fix extractor by [Ashish0804](https://github.com/Ashish0804)
+* [soundcloud] Refetch `client_id` on 403
+* [tiktok] Fix metadata extraction
+* [TV2] Fix extractor by [Ashish0804](https://github.com/Ashish0804)
+* [tv5mondeplus] Fix extractor by [korli](https://github.com/korli)
+* [VH1,TVLand] Fix extractors by [Sipherdrakon](https://github.com/Sipherdrakon)
+* [Viafree] Fix extractor and extract subtitles by [coletdjnz](https://github.com/coletdjnz)
+* [XHamster] Extract `uploader_id` by [octotherp](https://github.com/octotherp)
+* [youtube] Add `shorts` to `_VALID_URL`
+* [youtube] Add av01 itags to known formats list by [blackjack4494](https://github.com/blackjack4494)
+* [youtube] Extract error messages from HTTPError response by [coletdjnz](https://github.com/coletdjnz)
+* [youtube] Fix subtitle names
+* [youtube] Prefer audio stream that YouTube considers default
+* [youtube] Remove annotations and deprecate `--write-annotations` by [coletdjnz](https://github.com/coletdjnz)
+* [Zee5] Fix extractor and add subtitles by [Ashish0804](https://github.com/Ashish0804)
+* [aria2c] Obey `--rate-limit`
+* [EmbedSubtitle] Continue even if some files are missing
+* [extractor] Better error message for DRM
+* [extractor] Common function `_match_valid_url`
+* [extractor] Show video id in error messages if possible
+* [FormatSort] Remove priority of `lang`
+* [options] Add `_set_from_options_callback`
+* [SubtitleConvertor] Fix bug during subtitle conversion
+* [utils] Add `parse_qs`
+* [webvtt] Fix timestamp overflow adjustment by [fstirlitz](https://github.com/fstirlitz)
+* Bugfix for `--replace-in-metadata`
+* Don't try to merge with final extension
+* Fix `--force-overwrites` when using `-k`
+* Fix `--no-prefer-free-formats` by [CeruleanSky](https://github.com/CeruleanSky)
+* Fix `-F` for extractors that directly return url
+* Fix `-J` when there are failed videos
+* Fix `extra_info` being reused across runs
+* Fix `playlist_index` not obeying `playlist_start` and add tests
+* Fix resuming of single formats when using `--no-part`
+* Revert erroneous use of the `Content-Length` header by [fstirlitz](https://github.com/fstirlitz)
+* Use `os.replace` where applicable by; paulwrubel
+* [build] Add homebrew taps `yt-dlp/taps/yt-dlp` by [nao20010128nao](https://github.com/nao20010128nao)
+* [build] Fix bug in making `yt-dlp.tar.gz`
+* [docs] Fix some typos by [pukkandan](https://github.com/pukkandan), [zootedb0t](https://github.com/zootedb0t)
+* [cleanup] Replace improper use of tab in trovo by [glenn-slayden](https://github.com/glenn-slayden)
+
+
+### 2021.08.10
+
+* Add option `--replace-in-metadata`
+* Add option `--no-simulate` to not simulate even when `--print` or `--list...` are used - Deprecates `--print-json`
+* Allow entire infodict to be printed using `%()s` - makes `--dump-json` redundant
+* Allow multiple `--exec` and `--exec-before-download`
+* Add regex to `--match-filter`
+* Add all format filtering operators also to `--match-filter` by [max-te](https://github.com/max-te)
+* Add compat-option `no-keep-subs`
+* [adobepass] Add MSO Cablevision by [Jessecar96](https://github.com/Jessecar96)
+* [BandCamp] Add BandcampMusicIE by [Ashish0804](https://github.com/Ashish0804)
+* [blackboardcollaborate] Add new extractor by [mzbaulhaque](https://github.com/mzbaulhaque)
+* [eroprofile] Add album downloader by [jhwgh1968](https://github.com/jhwgh1968)
+* [mirrativ] Add extractors by [nao20010128nao](https://github.com/nao20010128nao)
+* [openrec] Add extractors by [nao20010128nao](https://github.com/nao20010128nao)
+* [nbcolympics:stream] Fix extractor by [nchilada](https://github.com/nchilada), [pukkandan](https://github.com/pukkandan)
+* [nbcolympics] Update extractor for 2020 olympics by [wesnm](https://github.com/wesnm)
+* [paramountplus] Separate extractor and fix some titles by [shirt](https://github.com/shirt-dev), [pukkandan](https://github.com/pukkandan)
+* [RCTIPlus] Support events and TV by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [Newgrounds] Improve extractor and fix playlist by [u-spec-png](https://github.com/u-spec-png)
+* [aenetworks] Update `_THEPLATFORM_KEY` and `_THEPLATFORM_SECRET` by [wesnm](https://github.com/wesnm)
+* [crunchyroll] Fix thumbnail by [funniray](https://github.com/funniray)
+* [HotStar] Use API for metadata and extract subtitles by [Ashish0804](https://github.com/Ashish0804)
+* [instagram] Fix comments extraction by [u-spec-png](https://github.com/u-spec-png)
+* [peertube] Fix videos without description by [u-spec-png](https://github.com/u-spec-png)
+* [twitch:clips] Extract `display_id` by [dirkf](https://github.com/dirkf)
+* [viki] Print error message from API request
+* [Vine] Remove invalid formats by [u-spec-png](https://github.com/u-spec-png)
+* [VrtNU] Fix XSRF token by [pgaig](https://github.com/pgaig)
+* [vrv] Fix thumbnail extraction by [funniray](https://github.com/funniray)
+* [youtube] Add extractor-arg `include-live-dash` to show live dash formats
+* [youtube] Improve signature function detection by [PSlava](https://github.com/PSlava)
+* [youtube] Raise appropriate error when API pages can't be downloaded
+* Ensure `_write_ytdl_file` closes file handle on error
+* Fix `--compat-options filename` by [stdedos](https://github.com/stdedos)
+* Fix issues with infodict sanitization
+* Fix resuming when using `--no-part`
+* Fix wrong extension for intermediate files
+* Handle `BrokenPipeError` by [kikuyan](https://github.com/kikuyan)
+* Show libraries present in verbose head
+* [extractor] Detect `sttp` as subtitles in MPD by [fstirlitz](https://github.com/fstirlitz)
+* [extractor] Reset non-repeating warnings per video
+* [ffmpeg] Fix streaming `mp4` to `stdout`
+* [ffpmeg] Allow `--ffmpeg-location` to be a file with different name
+* [utils] Fix `InAdvancePagedList.__getitem__`
+* [utils] Fix `traverse_obj` depth when `is_user_input`
+* [webvtt] Merge daisy-chained duplicate cues by [fstirlitz](https://github.com/fstirlitz)
+* [build] Use custom build of `pyinstaller` by [shirt](https://github.com/shirt-dev)
+* [tests:download] Add batch testing for extractors (`test_YourExtractor_all`)
+* [docs] Document which fields `--add-metadata` adds to the file
+* [docs] Fix some mistakes and improve doc
+* [cleanup] Misc code cleanup
+
+
+### 2021.08.02
+
+* Add logo, banner and donate links
+* [outtmpl] Expand and escape environment variables
+* [outtmpl] Add format types `j` (json), `l` (comma delimited list), `q` (quoted for terminal)
+* [downloader] Allow streaming some unmerged formats to stdout using ffmpeg
+* [youtube] **Age-gate bypass**
+ * Add `agegate` clients by [pukkandan](https://github.com/pukkandan), [MinePlayersPE](https://github.com/MinePlayersPE)
+ * Add `thirdParty` to agegate clients to bypass more videos
+ * Simplify client definitions, expose `embedded` clients
+ * Improve age-gate detection by [coletdjnz](https://github.com/coletdjnz)
+ * Fix default global API key by [coletdjnz](https://github.com/coletdjnz)
+ * Add `creator` clients for age-gate bypass using unverified accounts by [zerodytrash](https://github.com/zerodytrash), [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
+* [adobepass] Add MSO Sling TV by [wesnm](https://github.com/wesnm)
+* [CBS] Add ParamountPlusSeriesIE by [Ashish0804](https://github.com/Ashish0804)
+* [dplay] Add `ScienceChannelIE` by [Sipherdrakon](https://github.com/Sipherdrakon)
+* [UtreonIE] Add extractor by [Ashish0804](https://github.com/Ashish0804)
+* [youtube] Add `mweb` client by [coletdjnz](https://github.com/coletdjnz)
+* [youtube] Add `player_client=all`
+* [youtube] Force `hl=en` for comments by [coletdjnz](https://github.com/coletdjnz)
+* [youtube] Fix format sorting when using alternate clients
+* [youtube] Misc cleanup by [pukkandan](https://github.com/pukkandan), [coletdjnz](https://github.com/coletdjnz)
+* [youtube] Extract SAPISID only once
+* [CBS] Add fallback by [llacb47](https://github.com/llacb47), [pukkandan](https://github.com/pukkandan)
+* [Hotstar] Support cookies by [Ashish0804](https://github.com/Ashish0804)
+* [HotStarSeriesIE] Fix regex by [Ashish0804](https://github.com/Ashish0804)
+* [bilibili] Improve `_VALID_URL`
+* [mediaset] Fix extraction by [nixxo](https://github.com/nixxo)
+* [Mxplayer] Add h265 formats by [Ashish0804](https://github.com/Ashish0804)
+* [RCTIPlus] Remove PhantomJS dependency by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [tenplay] Add MA15+ age limit by [pento](https://github.com/pento)
+* [vidio] Fix login error detection by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [vimeo] Better extraction of original file by [Ashish0804](https://github.com/Ashish0804)
+* [generic] Support KVS player (replaces ThisVidIE) by [rigstot](https://github.com/rigstot)
+* Add compat-option `no-clean-infojson`
+* Remove `asr` appearing twice in `-F`
+* Set `home:` as the default key for `-P`
+* [utils] Fix slicing of reversed `LazyList`
+* [FormatSort] Fix bug for audio with unknown codec
+* [test:download] Support testing with `ignore_no_formats_error`
+* [cleanup] Refactor some code
+
+
+### 2021.07.24
+
+* [youtube:tab] Extract video duration early
+* [downloader] Pass `info_dict` to `progress_hook`s
+* [youtube] Fix age-gated videos for API clients when cookies are supplied by [coletdjnz](https://github.com/coletdjnz)
+* [youtube] Disable `get_video_info` age-gate workaround - This endpoint seems to be completely dead
+* [youtube] Try all clients even if age-gated
+* [youtube] Fix subtitles only being extracted from the first client
+* [youtube] Simplify `_get_text`
+* [cookies] bugfix for microsoft edge on macOS
+* [cookies] Handle `sqlite` `ImportError` gracefully by [mbway](https://github.com/mbway)
+* [cookies] Handle errors when importing `keyring`
+
+### 2021.07.21
+
+* **Add option `--cookies-from-browser`** to load cookies from a browser by [mbway](https://github.com/mbway)
+ * Usage: `--cookies-from-browser BROWSER[:PROFILE_NAME_OR_PATH]`
+ * Also added `--no-cookies-from-browser`
+ * To decrypt chromium cookies, `keyring` is needed for UNIX and `pycryptodome` for Windows
+* Add option `--exec-before-download`
+* Add field `live_status`
+* [FFmpegMetadata] Add language of each stream and some refactoring
+* [douyin] Add extractor by [pukkandan](https://github.com/pukkandan), [pyx](https://github.com/pyx)
+* [pornflip] Add extractor by [mzbaulhaque](https://github.com/mzbaulhaque)
+* **[youtube] Extract data from multiple clients** by [pukkandan](https://github.com/pukkandan), [coletdjnz](https://github.com/coletdjnz)
+ * `player_client` now accepts multiple clients
+ * Default `player_client` = `android,web`
+ * This uses twice as many requests, but avoids throttling for most videos while also not losing any formats
+ * Music clients can be specifically requested and is enabled by default if `music.youtube.com`
+ * Added `player_client=ios` (Known issue: formats from ios are not sorted correctly)
+ * Add age-gate bypass for android and ios clients
+* [youtube] Extract more thumbnails
+ * The thumbnail URLs are hard-coded and their actual existence is tested lazily
+ * Added option `--no-check-formats` to not test them
+* [youtube] Misc fixes
+ * Improve extraction of livestream metadata by [pukkandan](https://github.com/pukkandan), [krichbanana](https://github.com/krichbanana)
+ * Hide live dash formats since they can't be downloaded anyway
+ * Fix authentication when using multiple accounts by [coletdjnz](https://github.com/coletdjnz)
+ * Fix controversial videos when requested via API by [coletdjnz](https://github.com/coletdjnz)
+ * Fix session index extraction and headers for non-web player clients by [coletdjnz](https://github.com/coletdjnz)
+ * Make `--extractor-retries` work for more errors
+ * Fix sorting of 3gp format
+ * Sanity check `chapters` (and refactor related code)
+ * Make `parse_time_text` and `_extract_chapters` non-fatal
+ * Misc cleanup and bug fixes by [coletdjnz](https://github.com/coletdjnz)
+* [youtube:tab] Fix channels tab
+* [youtube:tab] Extract playlist availability by [coletdjnz](https://github.com/coletdjnz)
+* **[youtube:comments] Move comment extraction to new API** by [coletdjnz](https://github.com/coletdjnz)
+ * Adds extractor-args `comment_sort` (`top`/`new`), `max_comments`, `max_comment_depth`
+* [youtube:comments] Fix `is_favorited`, improve `like_count` parsing by [coletdjnz](https://github.com/coletdjnz)
+* [BravoTV] Improve metadata extraction by [kevinoconnor7](https://github.com/kevinoconnor7)
+* [crunchyroll:playlist] Force http
+* [yahoo:gyao:player] Relax `_VALID_URL` by [nao20010128nao](https://github.com/nao20010128nao)
+* [nebula] Authentication via tokens from cookie jar by [hheimbuerger](https://github.com/hheimbuerger), [TpmKranz](https://github.com/TpmKranz)
+* [RTP] Fix extraction and add subtitles by [fstirlitz](https://github.com/fstirlitz)
+* [viki] Rewrite extractors and add extractor-arg `video_types` to `vikichannel` by [zackmark29](https://github.com/zackmark29), [pukkandan](https://github.com/pukkandan)
+* [vlive] Extract thumbnail directly in addition to the one from Naver
+* [generic] Extract previously missed subtitles by [fstirlitz](https://github.com/fstirlitz)
+* [generic] Extract everything in the SMIL manifest and detect discarded subtitles by [fstirlitz](https://github.com/fstirlitz)
+* [embedthumbnail] Fix `_get_thumbnail_resolution`
+* [metadatafromfield] Do not detect numbers as field names
+* Fix selectors `all`, `mergeall` and add tests
+* Errors in playlist extraction should obey `--ignore-errors`
+* Fix bug where `original_url` was not propagated when `_type`=`url`
+* Revert "Merge webm formats into mkv if thumbnails are to be embedded (#173)"
+ * This was wrongly checking for `write_thumbnail`
+* Improve `extractor_args` parsing
+* Rename `NOTE` in `-F` to `MORE INFO` since it's often confused to be the same as `format_note`
+* Add `only_once` param for `write_debug` and `report_warning`
+* [extractor] Allow extracting multiple groups in `_search_regex` by [fstirlitz](https://github.com/fstirlitz)
+* [utils] Improve `traverse_obj`
+* [utils] Add `variadic`
+* [utils] Improve `js_to_json` comment regex by [fstirlitz](https://github.com/fstirlitz)
+* [webtt] Fix timestamps
+* [compat] Remove unnecessary code
+* [docs] fix default of multistreams
+
+
+### 2021.07.07
+
+* Merge youtube-dl: Upto [commit/a803582](https://github.com/ytdl-org/youtube-dl/commit/a8035827177d6b59aca03bd717acb6a9bdd75ada)
+* Add `--extractor-args` to pass some extractor-specific arguments. See [readme](https://github.com/yt-dlp/yt-dlp#extractor-arguments)
+ * Add extractor option `skip` for `youtube`. Eg: `--extractor-args youtube:skip=hls,dash`
+ * Deprecates `--youtube-skip-dash-manifest`, `--youtube-skip-hls-manifest`, `--youtube-include-dash-manifest`, `--youtube-include-hls-manifest`
+* Allow `--list...` options to work with `--print`, `--quiet` and other `--list...` options
+* [youtube] Use `player` API for additional video extraction requests by [coletdjnz](https://github.com/coletdjnz)
+ * **Fixes youtube premium music** (format 141) extraction
+ * Adds extractor option `player_client` = `web`/`android`
+ * **`--extractor-args youtube:player_client=android` works around the throttling** for the time-being
+ * Adds extractor option `player_skip=config`
+ * Adds age-gate fallback using embedded client
+* [youtube] Choose correct Live chat API for upcoming streams by [krichbanana](https://github.com/krichbanana)
+* [youtube] Fix subtitle names for age-gated videos
+* [youtube:comments] Fix error handling and add `itct` to params by [coletdjnz](https://github.com/coletdjnz)
+* [youtube_live_chat] Fix download with cookies by [siikamiika](https://github.com/siikamiika)
+* [youtube_live_chat] use `clickTrackingParams` by [siikamiika](https://github.com/siikamiika)
+* [Funimation] Rewrite extractor
+ * Add `FunimationShowIE` by [Mevious](https://github.com/Mevious)
+ * **Treat the different versions of an episode as different formats of a single video**
+ * This changes the video `id` and will break break existing archives
+ * Compat option `seperate-video-versions` to fall back to old behavior including using the old video ids
+ * Support direct `/player/` URL
+ * Extractor options `language` and `version` to pre-select them during extraction
+ * These options may be removed in the future if we can extract all formats without additional network requests
+ * Do not rely on these for format selection and use `-f` filters instead
+* [AdobePass] Add Spectrum MSO by [kevinoconnor7](https://github.com/kevinoconnor7), [ohmybahgosh](https://github.com/ohmybahgosh)
+* [facebook] Extract description and fix title
+* [fancode] Fix extraction, support live and allow login with refresh token by [zenerdi0de](https://github.com/zenerdi0de)
+* [plutotv] Improve `_VALID_URL`
+* [RCTIPlus] Add extractor by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [Soundcloud] Allow login using oauth token by [blackjack4494](https://github.com/blackjack4494)
+* [TBS] Support livestreams by [llacb47](https://github.com/llacb47)
+* [videa] Fix extraction by [nyuszika7h](https://github.com/nyuszika7h)
+* [yahoo] Fix extraction by [llacb47](https://github.com/llacb47), [pukkandan](https://github.com/pukkandan)
+* Process videos when using `--ignore-no-formats-error` by [krichbanana](https://github.com/krichbanana)
+* Fix `--throttled-rate` when using `--load-info-json`
+* Fix `--flat-playlist` when entry has no `ie_key`
+* Fix `check_formats` catching `ExtractorError` instead of `DownloadError`
+* Fix deprecated option `--list-formats-old`
+* [downloader/ffmpeg] Fix `--ppa` when using simultaneous download
+* [extractor] Prevent unnecessary download of hls manifests and refactor `hls_split_discontinuity`
+* [fragment] Handle status of download and errors in threads correctly; and minor refactoring
+* [thumbnailsconvertor] Treat `jpeg` as `jpg`
+* [utils] Fix issues with `LazyList` reversal
+* [extractor] Allow extractors to set their own login hint
+* [cleanup] Simplify format selector code with `LazyList` and `yield from`
+* [cleanup] Clean `extractor.common._merge_subtitles` signature
+* [cleanup] Fix some typos
+
+
+### 2021.06.23
+
+* Merge youtube-dl: Upto [commit/379f52a](https://github.com/ytdl-org/youtube-dl/commit/379f52a4954013767219d25099cce9e0f9401961)
+* **Add option `--throttled-rate`** below which video data is re-extracted
+* [fragment] **Merge during download for `-N`**, and refactor `hls`/`dash`
+* [websockets] Add `WebSocketFragmentFD` by [nao20010128nao](https://github.com/nao20010128nao), [pukkandan](https://github.com/pukkandan)
+* Allow `images` formats in addition to video/audio
+* [downloader/mhtml] Add new downloader for slideshows/storyboards by [fstirlitz](https://github.com/fstirlitz)
+* [youtube] Temporary **fix for age-gate**
+* [youtube] Support ongoing live chat by [siikamiika](https://github.com/siikamiika)
+* [youtube] Improve SAPISID cookie handling by [coletdjnz](https://github.com/coletdjnz)
+* [youtube] Login is not needed for `:ytrec`
+* [youtube] Non-fatal alert reporting for unavailable videos page by [coletdjnz](https://github.com/coletdjnz)
+* [twitcasting] Websocket support by [nao20010128nao](https://github.com/nao20010128nao)
+* [mediasite] Extract slides by [fstirlitz](https://github.com/fstirlitz)
+* [funimation] Extract subtitles
+* [pornhub] Extract `cast`
+* [hotstar] Use server time for authentication instead of local time
+* [EmbedThumbnail] Fix for already downloaded thumbnail
+* [EmbedThumbnail] Add compat-option `embed-thumbnail-atomicparsley`
+* Expand `--check-formats` to thumbnails
+* Fix id sanitization in filenames
+* Skip fixup of existing files and add `--fixup force` to force it
+* Better error handling of syntax errors in `-f`
+* Use `NamedTemporaryFile` for `--check-formats`
+* [aria2c] Lower `--min-split-size` for HTTP downloads
+* [options] Rename `--add-metadata` to `--embed-metadata`
+* [utils] Improve `LazyList` and add tests
+* [build] Build Windows x86 version with py3.7 and remove redundant tests by [pukkandan](https://github.com/pukkandan), [shirt](https://github.com/shirt-dev)
+* [docs] Clarify that `--embed-metadata` embeds chapter markers
+* [cleanup] Refactor fixup
+
+
+### 2021.06.09
+
+* Fix bug where `%(field)d` in filename template throws error
+* [outtmpl] Improve offset parsing
+* [test] More rigorous tests for `prepare_filename`
+
+### 2021.06.08
+
+* Remove support for obsolete Python versions: Only 3.6+ is now supported
+* Merge youtube-dl: Upto [commit/c2350ca](https://github.com/ytdl-org/youtube-dl/commit/c2350cac243ba1ec1586fe85b0d62d1b700047a2)
+* [hls] Fix decryption for multithreaded downloader
+* [extractor] Fix pre-checking archive for some extractors
+* [extractor] Fix FourCC fallback when parsing ISM by [fstirlitz](https://github.com/fstirlitz)
+* [twitcasting] Add TwitCastingUserIE, TwitCastingLiveIE by [pukkandan](https://github.com/pukkandan), [nao20010128nao](https://github.com/nao20010128nao)
+* [vidio] Add VidioPremierIE and VidioLiveIE by [MinePlayersPE](Https://github.com/MinePlayersPE)
+* [viki] Fix extraction from [ytdl-org/youtube-dl@59e583f](https://github.com/ytdl-org/youtube-dl/commit/59e583f7e8530ca92776c866897d895c072e2a82)
+* [youtube] Support shorts URL
+* [zoom] Extract transcripts as subtitles
+* Add field `original_url` with the user-inputted URL
+* Fix and refactor `prepare_outtmpl`
+* Make more fields available for `--print` when used with `--flat-playlist`
+* [utils] Generalize `traverse_dict` to `traverse_obj`
+* [downloader/ffmpeg] Hide FFmpeg banner unless in verbose mode by [fstirlitz](https://github.com/fstirlitz)
+* [build] Release `yt-dlp.tar.gz`
+* [build,update] Add GNU-style SHA512 and prepare updater for simlar SHA256 by [nihil-admirari](https://github.com/nihil-admirari)
+* [pyinst] Show Python version in exe metadata by [nihil-admirari](https://github.com/nihil-admirari)
+* [docs] Improve documentation of dependencies
+* [cleanup] Mark unused files
+* [cleanup] Point all shebang to `python3` by [fstirlitz](https://github.com/fstirlitz)
+* [cleanup] Remove duplicate file `trovolive.py`
+
+
+### 2021.06.01
+
+* Merge youtube-dl: Upto [commit/d495292](https://github.com/ytdl-org/youtube-dl/commit/d495292852b6c2f1bd58bc2141ff2b0265c952cf)
+* Pre-check archive and filters during playlist extraction
+* Handle Basic Auth `user:pass` in URLs by [hhirtz](https://github.com/hhirtz) and [pukkandan](https://github.com/pukkandan)
+* [archiveorg] Add YoutubeWebArchiveIE by [coletdjnz](https://github.com/coletdjnz) and [alex-gedeon](https://github.com/alex-gedeon)
+* [fancode] Add extractor by [rhsmachine](https://github.com/rhsmachine)
+* [patreon] Support vimeo embeds by [rhsmachine](https://github.com/rhsmachine)
+* [Saitosan] Add new extractor by [llacb47](https://github.com/llacb47)
+* [ShemarooMe] Add extractor by [Ashish0804](https://github.com/Ashish0804) and [pukkandan](https://github.com/pukkandan)
+* [telemundo] Add extractor by [king-millez](https://github.com/king-millez)
+* [SonyLIV] Add SonyLIVSeriesIE and subtitle support by [Ashish0804](https://github.com/Ashish0804)
+* [Hotstar] Add HotStarSeriesIE by [Ashish0804](https://github.com/Ashish0804)
+* [Voot] Add VootSeriesIE by [Ashish0804](https://github.com/Ashish0804)
+* [vidio] Support login and premium videos by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [fragment] When using `-N`, do not keep the fragment content in memory
+* [ffmpeg] Download and merge in a single step if possible
+* [ThumbnailsConvertor] Support conversion to `png` and make it the default by [louie-github](https://github.com/louie-github)
+* [VideoConvertor] Generalize with remuxer and allow conditional recoding
+* [EmbedThumbnail] Embed in `mp4`/`m4a` using mutagen by [tripulse](https://github.com/tripulse) and [pukkandan](https://github.com/pukkandan)
+* [EmbedThumbnail] Embed if any thumbnail was downloaded, not just the best
+* [EmbedThumbnail] Correctly escape filename
+* [update] replace self without launching a subprocess in windows
+* [update] Block further update for unsupported systems
+* Refactor `__process_playlist` by creating `LazyList`
+* Write messages to `stderr` when both `quiet` and `verbose`
+* Sanitize and sort playlist thumbnails
+* Remove `None` values from `info.json`
+* [extractor] Always prefer native hls downloader by default
+* [extractor] Skip subtitles without URI in m3u8 manifests by [hheimbuerger](https://github.com/hheimbuerger)
+* [extractor] Functions to parse `socket.io` response as `json` by [pukkandan](https://github.com/pukkandan) and [llacb47](https://github.com/llacb47)
+* [extractor] Allow `note=False` when extracting manifests
+* [utils] Escape URLs in `sanitized_Request`, not `sanitize_url`
+* [hls] Disable external downloader for `webtt`
+* [youtube] `/live` URLs should raise error if channel is not live
+* [youtube] Bug fixes
+* [zee5] Fix m3u8 formats' extension
+* [ard] Allow URLs without `-` before id by [olifre](https://github.com/olifre)
+* [cleanup] `YoutubeDL._match_entry`
+* [cleanup] Refactor updater
+* [cleanup] Refactor ffmpeg convertors
+* [cleanup] setup.py
+
+
+### 2021.05.20
+
+* **Youtube improvements**:
+ * Support youtube music `MP`, `VL` and `browse` pages
+ * Extract more formats for youtube music by [craftingmod](https://github.com/craftingmod), [coletdjnz](https://github.com/coletdjnz) and [pukkandan](https://github.com/pukkandan)
+ * Extract multiple subtitles in same language by [pukkandan](https://github.com/pukkandan) and [tpikonen](https://github.com/tpikonen)
+ * Redirect channels that doesn't have a `videos` tab to their `UU` playlists
+ * Support in-channel search
+ * Sort audio-only formats correctly
+ * Always extract `maxresdefault` thumbnail
+ * Extract audio language
+ * Add subtitle language names by [nixxo](https://github.com/nixxo) and [tpikonen](https://github.com/tpikonen)
+ * Show alerts only from the final webpage
+ * Add `html5=1` param to `get_video_info` page requests by [coletdjnz](https://github.com/coletdjnz)
+ * Better message when login required
+* **Add option `--print`**: to print any field/template
+ * Makes redundant: `--get-description`, `--get-duration`, `--get-filename`, `--get-format`, `--get-id`, `--get-thumbnail`, `--get-title`, `--get-url`
+* Field `additional_urls` to download additional videos from metadata using [`--parse-metadata`](https://github.com/yt-dlp/yt-dlp#modifying-metadata)
+* Merge youtube-dl: Upto [commit/dfbbe29](https://github.com/ytdl-org/youtube-dl/commit/dfbbe2902fc67f0f93ee47a8077c148055c67a9b)
+* Write thumbnail of playlist and add `pl_thumbnail` outtmpl key
+* [embedthumbnail] Add `flac` support and refactor `mutagen` code by [pukkandan](https://github.com/pukkandan) and [tripulse](https://github.com/tripulse)
+* [audius:artist] Add extractor by [king-millez](https://github.com/king-millez)
+* [parlview] Add extractor by [king-millez](https://github.com/king-millez)
+* [tenplay] Fix extractor by [king-millez](https://github.com/king-millez)
+* [rmcdecouverte] Generalize `_VALID_URL`
+* Add compat-option `no-attach-infojson`
+* Add field `name` for subtitles
+* Ensure `post_extract` and `pre_process` only run once
+* Fix `--check-formats` when there is network error
+* Standardize `write_debug` and `get_param`
+* [options] Alias `--write-comments`, `--no-write-comments`
+* [options] Refactor callbacks
+* [test:download] Only extract enough videos for `playlist_mincount`
+* [extractor] bugfix for when `compat_opts` is not given
+* [build] Fix x86 build by [shirt](https://github.com/shirt-dev)
+* [cleanup] code formatting, youtube tests and readme
+
+### 2021.05.11
+* **Deprecate support for python versions < 3.6**
+* **Subtitle extraction from manifests** by [fstirlitz](https://github.com/fstirlitz). See [be6202f](https://github.com/yt-dlp/yt-dlp/commit/be6202f12b97858b9d716e608394b51065d0419f) for details
+* **Improve output template:**
+ * Allow slicing lists/strings using `field.start:end:step`
+ * A field can also be used as offset like `field1+num+field2`
+ * A default value can be given using `field|default`
+ * Prevent invalid fields from causing errors
+* **Merge youtube-dl**: Upto [commit/a726009](https://github.com/ytdl-org/youtube-dl/commit/a7260099873acc6dc7d76cafad2f6b139087afd0)
+* **Remove options** `-l`, `-t`, `-A` completely and disable `--auto-number`, `--title`, `--literal`, `--id`
+* [Plugins] Prioritize plugins over standard extractors and prevent plugins from overwriting the standard extractor classes
+* [downloader] Fix `quiet` and `to_stderr`
+* [fragment] Ensure the file is closed on error
+* [fragment] Make sure first segment is not skipped
+* [aria2c] Fix whitespace being stripped off
+* [embedthumbnail] Fix bug where jpeg thumbnails were converted again
+* [FormatSort] Fix for when some formats have quality and others don't
+* [utils] Add `network_exceptions`
+* [utils] Escape URL while sanitizing
+* [ukcolumn] Add Extractor
+* [whowatch] Add extractor by [nao20010128nao](https://github.com/nao20010128nao)
+* [CBS] Improve `_VALID_URL` to support movies
+* [crackle] Improve extraction
+* [curiositystream] Fix collections
+* [francetvinfo] Improve video id extraction
+* [generic] Respect the encoding in manifest
+* [limelight] Obey `allow_unplayable_formats`
+* [mediasite] Generalize URL pattern by [fstirlitz](https://github.com/fstirlitz)
+* [mxplayer] Add MxplayerShowIE by [Ashish0804](https://github.com/Ashish0804)
+* [nebula] Move to nebula.app by [Lamieur](https://github.com/Lamieur)
+* [niconico] Fix HLS formats by [CXwudi](https://github.com/CXwudi), [tsukumijima](https://github.com/tsukumijima), [nao20010128nao](https://github.com/nao20010128nao) and [pukkandan](https://github.com/pukkandan)
+* [niconico] Fix title and thumbnail extraction by [CXwudi](https://github.com/CXwudi)
+* [plutotv] Extract subtitles from manifests
+* [plutotv] Fix format extraction for some urls
+* [rmcdecouverte] Improve `_VALID_URL`
+* [sonyliv] Fix `title` and `series` extraction by [Ashish0804](https://github.com/Ashish0804)
+* [tubi] Raise "no video formats" error when video url is empty
+* [youtube:tab] Detect playlists inside community posts
+* [youtube] Add `oembed` to reserved names
+* [zee5] Fix extraction for some URLs by [Hadi0609](https://github.com/Hadi0609)
+* [zee5] Fix py2 compatibility
+* Fix `playlist_index` and add `playlist_autonumber`. See [#302](https://github.com/yt-dlp/yt-dlp/issues/302) for details
+* Add experimental option `--check-formats` to test the URLs before format selection
+* Option `--compat-options` to revert [some of yt-dlp's changes](https://github.com/yt-dlp/yt-dlp#differences-in-default-behavior)
+ * Deprecates `--list-formats-as-table`, `--list-formats-old`
+* Fix number of digits in `%(playlist_index)s`
+* Fix case sensitivity of format selector
+* Revert "[core] be able to hand over id and title using url_result"
+* Do not strip out whitespaces in `-o` and `-P`
+* Fix `preload_download_archive` writing verbose message to `stdout`
+* Move option warnings to `YoutubeDL`so that they obey `--no-warnings` and can output colors
+* Py2 compatibility for `FileNotFoundError`
+
+
+### 2021.04.22
+* **Improve output template:**
+ * Objects can be traversed like `%(field.key1.key2)s`
+ * An offset can be added to numeric fields as `%(field+N)s`
+ * Deprecates `--autonumber-start`
+* **Improve `--sub-langs`:**
+ * Treat `--sub-langs` entries as regex
+ * `all` can be used to refer to all the subtitles
+ * language codes can be prefixed with `-` to exclude it
+ * Deprecates `--all-subs`
+* Add option `--ignore-no-formats-error` to ignore the "no video format" and similar errors
+* Add option `--skip-playlist-after-errors` to skip the rest of a playlist after a given number of errors are encountered
+* Merge youtube-dl: Upto [commit/7e8b3f9](https://github.com/ytdl-org/youtube-dl/commit/7e8b3f9439ebefb3a3a4e5da9c0bd2b595976438)
+* [downloader] Fix bug in downloader selection
+* [BilibiliChannel] Fix pagination by [nao20010128nao](https://github.com/nao20010128nao) and [pukkandan](https://github.com/pukkandan)
+* [rai] Add support for http formats by [nixxo](https://github.com/nixxo)
+* [TubiTv] Add TubiTvShowIE by [Ashish0804](https://github.com/Ashish0804)
+* [twitcasting] Fix extractor
+* [viu:ott] Fix extractor and support series by [lkho](https://github.com/lkho) and [pukkandan](https://github.com/pukkandan)
+* [youtube:tab] Show unavailable videos in playlists by [coletdjnz](https://github.com/coletdjnz)
+* [youtube:tab] Reload with unavailable videos for all playlists
+* [youtube] Ignore invalid stretch ratio
+* [youtube] Improve channel syncid extraction to support ytcfg by [coletdjnz](https://github.com/coletdjnz)
+* [youtube] Standardize API calls for tabs, mixes and search by [coletdjnz](https://github.com/coletdjnz)
+* [youtube] Bugfix in `_extract_ytcfg`
+* [mildom:user:vod] Download only necessary amount of pages
+* [mildom] Remove proxy completely by [fstirlitz](https://github.com/fstirlitz)
+* [go] Fix `_VALID_URL`
+* [MetadataFromField] Improve regex and add tests
+* [Exec] Ensure backward compatibility when the command contains `%`
+* [extractor] Fix inconsistent use of `report_warning`
+* Ensure `mergeall` selects best format when multistreams are disabled
+* Improve the yt-dlp.sh script by [fstirlitz](https://github.com/fstirlitz)
+* [lazy_extractor] Do not load plugins
+* [ci] Disable fail-fast
+* [docs] Clarify which deprecated options still work
+* [docs] Fix typos
+
+
+### 2021.04.11
+* Add option `--convert-thumbnails` (only jpg currently supported)
+* Format selector `mergeall` to download and merge all formats
+* Pass any field to `--exec` using similar syntax to output template
+* Choose downloader for each protocol using `--downloader PROTO:NAME`
+ * Alias `--downloader` for `--external-downloader`
+ * Added `native` as an option for the downloader
+* Merge youtube-dl: Upto [commit/4fb25ff](https://github.com/ytdl-org/youtube-dl/commit/4fb25ff5a3be5206bb72e5c4046715b1529fb2c7) (except vimeo)
+* [DiscoveryPlusIndia] Add DiscoveryPlusIndiaShowIE by [Ashish0804](https://github.com/Ashish0804)
+* [NFHSNetwork] Add extractor by [llacb47](https://github.com/llacb47)
+* [nebula] Add extractor (watchnebula.com) by [hheimbuerger](https://github.com/hheimbuerger)
+* [nitter] Fix extraction of reply tweets and update instance list by [B0pol](https://github.com/B0pol)
+* [nitter] Fix thumbnails by [B0pol](https://github.com/B0pol)
+* [youtube] Fix thumbnail URL
+* [youtube] Parse API parameters from initial webpage by [coletdjnz](https://github.com/coletdjnz)
+* [youtube] Extract comments' approximate timestamp by [coletdjnz](https://github.com/coletdjnz)
+* [youtube] Fix alert extraction
+* [bilibili] Fix uploader
+* [utils] Add `datetime_from_str` and `datetime_add_months` by [coletdjnz](https://github.com/coletdjnz)
+* Run some `postprocessors` before actual download
+* Improve argument parsing for `-P`, `-o`, `-S`
+* Fix some `m3u8` not obeying `--allow-unplayable-formats`
+* Fix default of `dynamic_mpd`
+* Deprecate `--all-formats`, `--include-ads`, `--hls-prefer-native`, `--hls-prefer-ffmpeg`
+* [docs] Improvements
+
+### 2021.04.03
+* Merge youtube-dl: Upto [commit/654b4f4](https://github.com/ytdl-org/youtube-dl/commit/654b4f4ff2718f38b3182c1188c5d569c14cc70a)
+* Ability to set a specific field in the file's metadata using `--parse-metadata`
+* Ability to select n'th best format like `-f bv*.2`
+* [DiscoveryPlus] Add discoveryplus.in
+* [la7] Add podcasts and podcast playlists by [nixxo](https://github.com/nixxo)
+* [mildom] Update extractor with current proxy by [nao20010128nao](https://github.com/nao20010128nao)
+* [ard:mediathek] Fix video id extraction
+* [generic] Detect Invidious' link element
+* [youtube] Show premium state in `availability` by [coletdjnz](https://github.com/coletdjnz)
+* [viewsource] Add extractor to handle `view-source:`
+* [sponskrub] Run before embedding thumbnail
+* [docs] Improve `--parse-metadata` documentation
+
+
+### 2021.03.24.1
+* Revert [commit/8562218](https://github.com/ytdl-org/youtube-dl/commit/8562218350a79d4709da8593bb0c538aa0824acf)
+
+### 2021.03.24
+* Merge youtube-dl: Upto 2021.03.25 ([commit/8562218](https://github.com/ytdl-org/youtube-dl/commit/8562218350a79d4709da8593bb0c538aa0824acf))
+* Parse metadata from multiple fields using `--parse-metadata`
+* Ability to load playlist infojson using `--load-info-json`
+* Write current epoch to infojson when using `--no-clean-infojson`
+* [youtube_live_chat] fix bug when trying to set cookies
+* [niconico] Fix for when logged in by [CXwudi](https://github.com/CXwudi) and [xtkoba](https://github.com/xtkoba)
+* [linuxacadamy] Fix login
+
+
+### 2021.03.21
+* Merge youtube-dl: Upto [commit/7e79ba7](https://github.com/ytdl-org/youtube-dl/commit/7e79ba7dd6e6649dd2ce3a74004b2044f2182881)
+* Option `--no-clean-infojson` to keep private keys in the infojson
+* [aria2c] Support retry/abort unavailable fragments by [damianoamatruda](https://github.com/damianoamatruda)
+* [aria2c] Better default arguments
+* [movefiles] Fix bugs and make more robust
+* [formatSort] Fix `quality` being ignored
+* [splitchapters] Fix for older ffmpeg
+* [sponskrub] Pass proxy to sponskrub
+* Make sure `post_hook` gets the final filename
+* Recursively remove any private keys from infojson
+* Embed video URL metadata inside `mp4` by [damianoamatruda](https://github.com/damianoamatruda) and [pukkandan](https://github.com/pukkandan)
+* Merge `webm` formats into `mkv` if thumbnails are to be embedded by [damianoamatruda](https://github.com/damianoamatruda)
+* Use headers and cookies when downloading subtitles by [damianoamatruda](https://github.com/damianoamatruda)
+* Parse resolution in info dictionary by [damianoamatruda](https://github.com/damianoamatruda)
+* More consistent warning messages by [damianoamatruda](https://github.com/damianoamatruda) and [pukkandan](https://github.com/pukkandan)
+* [docs] Add deprecated options and aliases in readme
+* [docs] Fix some minor mistakes
+
+* [niconico] Partial fix adapted from [animelover1984/youtube-dl@b5eff52](https://github.com/animelover1984/youtube-dl/commit/b5eff52dd9ed5565672ea1694b38c9296db3fade) (login and smile formats still don't work)
+* [niconico] Add user extractor by [animelover1984](https://github.com/animelover1984)
+* [bilibili] Add anthology support by [animelover1984](https://github.com/animelover1984)
+* [amcnetworks] Fix extractor by [2ShedsJackson](https://github.com/2ShedsJackson)
+* [stitcher] Merge from youtube-dl by [nixxo](https://github.com/nixxo)
+* [rcs] Improved extraction by [nixxo](https://github.com/nixxo)
+* [linuxacadamy] Improve regex
+* [youtube] Show if video is `private`, `unlisted` etc in info (`availability`) by [coletdjnz](https://github.com/coletdjnz) and [pukkandan](https://github.com/pukkandan)
+* [youtube] bugfix for channel playlist extraction
+* [nbc] Improve metadata extraction by [2ShedsJackson](https://github.com/2ShedsJackson)
+
+
+### 2021.03.15
+* **Split video by chapters**: using option `--split-chapters`
+ * The output file of the split files can be set with `-o`/`-P` using the prefix `chapter:`
+ * Additional keys `section_title`, `section_number`, `section_start`, `section_end` are available in the output template
+* **Parallel fragment downloads** by [shirt](https://github.com/shirt-dev)
+ * Use option `--concurrent-fragments` (`-N`) to set the number of threads (default 1)
+* Merge youtube-dl: Upto [commit/3be0980](https://github.com/ytdl-org/youtube-dl/commit/3be098010f667b14075e3dfad1e74e5e2becc8ea)
+* [zee5] Add Show Extractor by [Ashish0804](https://github.com/Ashish0804) and [pukkandan](https://github.com/pukkandan)
+* [rai] fix drm check [nixxo](https://github.com/nixxo)
+* [wimtv] Add extractor by [nixxo](https://github.com/nixxo)
+* [mtv] Add mtv.it and extract series metadata by [nixxo](https://github.com/nixxo)
+* [pluto.tv] Add extractor by [kevinoconnor7](https://github.com/kevinoconnor7)
+* [youtube] Rewrite comment extraction by [coletdjnz](https://github.com/coletdjnz)
+* [embedthumbnail] Set mtime correctly
+* Refactor some postprocessor/downloader code by [pukkandan](https://github.com/pukkandan) and [shirt](https://github.com/shirt-dev)
+
+
+### 2021.03.07
+* [youtube] Fix history, mixes, community pages and trending by [pukkandan](https://github.com/pukkandan) and [coletdjnz](https://github.com/coletdjnz)
+* [youtube] Fix private feeds/playlists on multi-channel accounts by [coletdjnz](https://github.com/coletdjnz)
+* [youtube] Extract alerts from continuation by [coletdjnz](https://github.com/coletdjnz)
+* [cbs] Add support for ParamountPlus by [shirt](https://github.com/shirt-dev)
+* [mxplayer] Rewrite extractor with show support by [pukkandan](https://github.com/pukkandan) and [Ashish0804](https://github.com/Ashish0804)
+* [gedi] Improvements from youtube-dl by [nixxo](https://github.com/nixxo)
+* [vimeo] Fix videos with password by [teesid](https://github.com/teesid)
+* [lbry] Support `lbry://` url by [nixxo](https://github.com/nixxo)
+* [bilibili] Change `Accept` header by [pukkandan](https://github.com/pukkandan) and [animelover1984](https://github.com/animelover1984)
+* [trovo] Pass origin header
+* [rai] Check for DRM by [nixxo](https://github.com/nixxo)
+* [downloader] Fix bug for `ffmpeg`/`httpie`
+* [update] Fix updater removing the executable bit on some UNIX distros
+* [update] Fix current build hash for UNIX
+* [docs] Include wget/curl/aria2c install instructions for Unix by [Ashish0804](https://github.com/Ashish0804)
+* Fix some videos downloading with `m3u8` extension
+* Remove "fixup is ignored" warning when fixup wasn't passed by user
+
+
+### 2021.03.03.2
+* [build] Fix bug
+
+### 2021.03.03
+* [youtube] Use new browse API for continuation page extraction by [coletdjnz](https://github.com/coletdjnz) and [pukkandan](https://github.com/pukkandan)
+* Fix HLS playlist downloading by [shirt](https://github.com/shirt-dev)
+* Merge youtube-dl: Upto [2021.03.03](https://github.com/ytdl-org/youtube-dl/releases/tag/2021.03.03)
+* [mtv] Fix extractor
+* [nick] Fix extractor by [DennyDai](https://github.com/DennyDai)
+* [mxplayer] Add new extractor by [codeasashu](https://github.com/codeasashu)
+* [youtube] Throw error when `--extractor-retries` are exhausted
+* Reduce default of `--extractor-retries` to 3
+* Fix packaging bugs by [hseg](https://github.com/hseg)
+
+
+### 2021.03.01
+* Allow specifying path in `--external-downloader`
+* Add option `--sleep-requests` to sleep b/w requests
+* Add option `--extractor-retries` to retry on known extractor errors
+* Extract comments only when needed
+* `--get-comments` doesn't imply `--write-info-json` if `-J`, `-j` or `--print-json` are used
+* Fix `get_executable_path` by [shirt](https://github.com/shirt-dev)
+* [youtube] Retry on more known errors than just HTTP-5xx
+* [youtube] Fix inconsistent `webpage_url`
+* [tennistv] Fix format sorting
+* [bilibiliaudio] Recognize the file as audio-only
+* [hrfensehen] Fix wrong import
+* [viki] Fix viki play pass authentication by [RobinD42](https://github.com/RobinD42)
+* [readthedocs] Improvements by [shirt](https://github.com/shirt-dev)
+* [hls] Fix bug with m3u8 format extraction
+* [hls] Enable `--hls-use-mpegts` by default when downloading live-streams
+* [embedthumbnail] Fix bug with deleting original thumbnail
+* [build] Fix completion paths, zsh pip completion install by [hseg](https://github.com/hseg)
+* [ci] Disable download tests unless specifically invoked
+* Cleanup some code and fix typos
+
+
+### 2021.02.24
+* Moved project to an organization [yt-dlp](https://github.com/yt-dlp)
+* **Completely changed project name to yt-dlp** by [Pccode66](https://github.com/Pccode66) and [pukkandan](https://github.com/pukkandan)
+ * Also, `youtube-dlc` config files are no longer loaded
+* Merge youtube-dl: Upto [commit/4460329](https://github.com/ytdl-org/youtube-dl/commit/44603290e5002153f3ebad6230cc73aef42cc2cd) (except tmz, gedi)
+* [Readthedocs](https://yt-dlp.readthedocs.io) support by [shirt](https://github.com/shirt-dev)
+* [youtube] Show if video was a live stream in info (`was_live`)
+* [Zee5] Add new extractor by [Ashish0804](https://github.com/Ashish0804) and [pukkandan](https://github.com/pukkandan)
+* [jwplatform] Add support for `hyland.com`
+* [tennistv] Fix extractor
+* [hls] Support media initialization by [shirt](https://github.com/shirt-dev)
+* [hls] Added options `--hls-split-discontinuity` to better support media discontinuity by [shirt](https://github.com/shirt-dev)
+* [ffmpeg] Allow passing custom arguments before -i using `--ppa "ffmpeg_i1:ARGS"` syntax
+* Fix `--windows-filenames` removing `/` from UNIX paths
+* [hls] Show warning if pycryptodome is not found
+* [docs] Improvements
+ * Fix documentation of `Extractor Options`
+ * Document `all` in format selection
+ * Document `playable_in_embed` in output templates
+
+
+### 2021.02.19
+* Merge youtube-dl: Upto [commit/cf2dbec](https://github.com/ytdl-org/youtube-dl/commit/cf2dbec6301177a1fddf72862de05fa912d9869d) (except kakao)
+* [viki] Fix extractor
+* [niconico] Extract `channel` and `channel_id` by [kurumigi](https://github.com/kurumigi)
+* [youtube] Multiple page support for hashtag URLs
+* [youtube] Add more invidious instances
+* [youtube] Fix comment extraction when comment text is empty
+* Option `--windows-filenames` to force use of windows compatible filenames
+* [ExtractAudio] Bugfix
+* Don't raise `parser.error` when exiting for update
+* [MoveFiles] Fix for when merger can't run
+* Changed `--trim-file-name` to `--trim-filenames` to be similar to related options
+* Format Sort improvements:
+ * Prefer `vp9.2` more than other `vp9` codecs
+ * Remove forced priority of `quality`
+ * Remove unnecessary `field_preference` and misuse of `preference` from extractors
+* Build improvements:
+ * Fix hash output by [shirt](https://github.com/shirt-dev)
+ * Lock python package versions for x86 and use `wheels` by [shirt](https://github.com/shirt-dev)
+ * Exclude `vcruntime140.dll` from UPX by [jbruchon](https://github.com/jbruchon)
+ * Set version number based on UTC time, not local time
+ * Publish on PyPi only if token is set
+* [docs] Better document `--prefer-free-formats` and add `--no-prefer-free-format`
+
+
+### 2021.02.15
+* Merge youtube-dl: Upto [2021.02.10](https://github.com/ytdl-org/youtube-dl/releases/tag/2021.02.10) (except archive.org)
+* [niconico] Improved extraction and support encrypted/SMILE movies by [kurumigi](https://github.com/kurumigi), [tsukumijima](https://github.com/tsukumijima), [bbepis](https://github.com/bbepis), [pukkandan](https://github.com/pukkandan)
+* Fix HLS AES-128 with multiple keys in external downloaders by [shirt](https://github.com/shirt-dev)
+* [youtube_live_chat] Fix by using POST API by [siikamiika](https://github.com/siikamiika)
+* [rumble] Add support for video page
+* Option `--allow-unplayable-formats` to allow downloading unplayable video formats
+* [ExtractAudio] Don't re-encode when file is already in a common audio format
+* [youtube] Fix search continuations
+* [youtube] Fix for new accounts
+* Improve build/updater: by [pukkandan](https://github.com/pukkandan) and [shirt](https://github.com/shirt-dev)
+ * Fix SHA256 calculation in build and implement hash checking for updater
+ * Exit immediately in windows once the update process starts
+ * Fix updater for `x86.exe`
+ * Updater looks for both `yt-dlp` and `youtube-dlc` in releases for future-proofing
+ * Change optional dependency to `pycryptodome`
+* Fix issue with unicode filenames in aria2c by [shirt](https://github.com/shirt-dev)
+* Fix `allow_playlist_files` not being correctly passed through
+* Fix for empty HTTP head requests by [shirt](https://github.com/shirt-dev)
+* Fix `get_executable_path` in UNIX
+* [sponskrub] Print ffmpeg output and errors to terminal
+* `__real_download` should be false when ffmpeg unavailable and no download
+* Show `exe`/`zip`/`source` and 32/64bit in verbose message
+
+
+### 2021.02.09
+* **aria2c support for DASH/HLS**: by [shirt](https://github.com/shirt-dev)
+* **Implement Updater** (`-U`) by [shirt](https://github.com/shirt-dev)
+* [youtube] Fix comment extraction
+* [youtube_live_chat] Improve extraction
+* [youtube] Fix for channel URLs sometimes not downloading all pages
+* [aria2c] Changed default arguments to `--console-log-level=warn --summary-interval=0 --file-allocation=none -x16 -j16 -s16`
+* Add fallback for thumbnails
+* [embedthumbnail] Keep original thumbnail after conversion if write_thumbnail given
+* [embedsubtitle] Keep original subtitle after conversion if write_subtitles given
+* [pyinst.py] Move back to root dir
+* [youtube] Simplified renderer parsing and bugfixes
+* [movefiles] Fix compatibility with python2
+* [remuxvideo] Fix validation of conditional remux
+* [sponskrub] Don't raise error when the video does not exist
+* [docs] Crypto is an optional dependency
+
+
+### 2021.02.04
+* Merge youtube-dl: Upto [2021.02.04.1](https://github.com/ytdl-org/youtube-dl/releases/tag/2021.02.04.1)
+* **Date/time formatting in output template:**
+ * You can use [`strftime`](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes) to format date/time fields. Example: `%(upload_date>%Y-%m-%d)s`
+* **Multiple output templates:**
+ * Separate output templates can be given for the different metadata files by using `-o TYPE:TEMPLATE`
+ * The allowed types are: `subtitle|thumbnail|description|annotation|infojson|pl_description|pl_infojson`
+* [youtube] More metadata extraction for channel/playlist URLs (channel, uploader, thumbnail, tags)
+* New option `--no-write-playlist-metafiles` to prevent writing playlist metadata files
+* [audius] Fix extractor
+* [youtube_live_chat] Fix `parse_yt_initial_data` and add `fragment_retries`
+* [postprocessor] Raise errors correctly
+* [metadatafromtitle] Fix bug when extracting data from numeric fields
+* Fix issue with overwriting files
+* Fix "Default format spec" appearing in quiet mode
+* [FormatSort] Allow user to prefer av01 over vp9 (The default is still vp9)
+* [FormatSort] fix bug where `quality` had more priority than `hasvid`
+* [pyinst] Automatically detect python architecture and working directory
+* Strip out internal fields such as `_filename` from infojson
+
+
+### 2021.01.29
+* **Features from [animelover1984/youtube-dl](https://github.com/animelover1984/youtube-dl)**: by [animelover1984](https://github.com/animelover1984) and [bbepis](https://github.com/bbepis)
+ * Add `--get-comments`
+ * [youtube] Extract comments
+ * [billibilli] Added BiliBiliSearchIE, BilibiliChannelIE
+ * [billibilli] Extract comments
+ * [billibilli] Better video extraction
+ * Write playlist data to infojson
+ * [FFmpegMetadata] Embed infojson inside the video
+ * [EmbedThumbnail] Try embedding in mp4 using ffprobe and `-disposition`
+ * [EmbedThumbnail] Treat mka like mkv and mov like mp4
+ * [EmbedThumbnail] Embed in ogg/opus
+ * [VideoRemuxer] Conditionally remux video
+ * [VideoRemuxer] Add `-movflags +faststart` when remuxing to mp4
+ * [ffmpeg] Print entire stderr in verbose when there is error
+ * [EmbedSubtitle] Warn when embedding ass in mp4
+ * [anvato] Use NFLTokenGenerator if possible
+* **Parse additional metadata**: New option `--parse-metadata` to extract additional metadata from existing fields
+ * The extracted fields can be used in `--output`
+ * Deprecated `--metadata-from-title`
+* [Audius] Add extractor
+* [youtube] Extract playlist description and write it to `.description` file
+* Detect existing files even when using `recode`/`remux` (`extract-audio` is partially fixed)
+* Fix wrong user config from v2021.01.24
+* [youtube] Report error message from youtube as error instead of warning
+* [FormatSort] Fix some fields not sorting from v2021.01.24
+* [postprocessor] Deprecate `avconv`/`avprobe`. All current functionality is left untouched. But don't expect any new features to work with avconv
+* [postprocessor] fix `write_debug` to not throw error when there is no `_downloader`
+* [movefiles] Don't give "cant find" warning when move is unnecessary
+* Refactor `update-version`, `pyinst.py` and related files
+* [ffmpeg] Document more formats that are supported for remux/recode
+
+
+### 2021.01.24
+* Merge youtube-dl: Upto [2021.01.24](https://github.com/ytdl-org/youtube-dl/releases/tag/2021.01.16)
+* Plugin support ([documentation](https://github.com/yt-dlp/yt-dlp#plugins))
+* **Multiple paths**: New option `-P`/`--paths` to give different paths for different types of files
+ * The syntax is `-P "type:path" -P "type:path"`
+ * Valid types are: home, temp, description, annotation, subtitle, infojson, thumbnail
+ * Additionally, configuration file is taken from home directory or current directory
+* Allow passing different arguments to different external downloaders
+* [mildom] Add extractor by [nao20010128nao](https://github.com/nao20010128nao)
+* Warn when using old style `--external-downloader-args` and `--post-processor-args`
+* Fix `--no-overwrite` when using `--write-link`
+* [sponskrub] Output `unrecognized argument` error message correctly
+* [cbs] Make failure to extract title non-fatal
+* Fix typecasting when pre-checking archive
+* Fix issue with setting title on UNIX
+* Deprecate redundant aliases in `formatSort`. The aliases remain functional for backward compatibility, but will be left undocumented
+* [tests] Fix test_post_hooks
+* [tests] Split core and download tests
+
+
+### 2021.01.20
+* [TrovoLive] Add extractor (only VODs)
+* [pokemon] Add `/#/player` URLs
+* Improved parsing of multiple postprocessor-args, add `--ppa` as alias
+* [EmbedThumbnail] Simplify embedding in mkv
+* [sponskrub] Encode filenames correctly, better debug output and error message
+* [readme] Cleanup options
+
+
+### 2021.01.16
+* Merge youtube-dl: Upto [2021.01.16](https://github.com/ytdl-org/youtube-dl/releases/tag/2021.01.16)
+* **Configuration files:**
+ * Portable configuration file: `./yt-dlp.conf`
+ * Allow the configuration files to be named `yt-dlp` instead of `youtube-dlc`. See [this](https://github.com/yt-dlp/yt-dlp#configuration) for details
+* Add PyPI release
+
+
+### 2021.01.14
+* Added option `--break-on-reject`
+* [roosterteeth.com] Fix for bonus episodes by [Zocker1999NET](https://github.com/Zocker1999NET)
+* [tiktok] Fix for when share_info is empty
+* [EmbedThumbnail] Fix bug due to incorrect function name
+* [docs] Changed sponskrub links to point to [yt-dlp/SponSkrub](https://github.com/yt-dlp/SponSkrub) since I am now providing both linux and windows releases
+* [docs] Change all links to correctly point to new fork URL
+* [docs] Fixes typos
+
+
+### 2021.01.12
+* [roosterteeth.com] Add subtitle support by [samiksome](https://github.com/samiksome)
+* Added `--force-overwrites`, `--no-force-overwrites` by [alxnull](https://github.com/alxnull)
+* Changed fork name to `yt-dlp`
+* Fix typos by [FelixFrog](https://github.com/FelixFrog)
+* [ci] Option to skip
+* [changelog] Added unreleased changes in blackjack4494/yt-dlc
+
+
+### 2021.01.10
+* [archive.org] Fix extractor and add support for audio and playlists by [wporr](https://github.com/wporr)
+* [Animelab] Added by [mariuszskon](https://github.com/mariuszskon)
+* [youtube:search] Fix view_count by [ohnonot](https://github.com/ohnonot)
+* [youtube] Show if video is embeddable in info (`playable_in_embed`)
+* Update version badge automatically in README
+* Enable `test_youtube_search_matching`
+* Create `to_screen` and similar functions in postprocessor/common
+
+
+### 2021.01.09
+* [youtube] Fix bug in automatic caption extraction
+* Add `post_hooks` to YoutubeDL by [alexmerkel](https://github.com/alexmerkel)
+* Batch file enumeration improvements by [glenn-slayden](https://github.com/glenn-slayden)
+* Stop immediately when reaching `--max-downloads` by [glenn-slayden](https://github.com/glenn-slayden)
+* Fix incorrect ANSI sequence for restoring console-window title by [glenn-slayden](https://github.com/glenn-slayden)
+* Kill child processes when yt-dlc is killed by [Unrud](https://github.com/Unrud)
+
+
+### 2021.01.08
+* Merge youtube-dl: Upto [2021.01.08](https://github.com/ytdl-org/youtube-dl/releases/tag/2021.01.08) except stitcher ([1](https://github.com/ytdl-org/youtube-dl/commit/bb38a1215718cdf36d73ff0a7830a64cd9fa37cc), [2](https://github.com/ytdl-org/youtube-dl/commit/a563c97c5cddf55f8989ed7ea8314ef78e30107f))
+* Moved changelog to separate file
+
+
+### 2021.01.07-1
+* [Akamai] fix by [nixxo](https://github.com/nixxo)
+* [Tiktok] merge youtube-dl tiktok extractor by [GreyAlien502](https://github.com/GreyAlien502)
+* [vlive] add support for playlists by [kyuyeunk](https://github.com/kyuyeunk)
+* [youtube_live_chat] make sure playerOffsetMs is positive by [siikamiika](https://github.com/siikamiika)
+* Ignore extra data streams in ffmpeg by [jbruchon](https://github.com/jbruchon)
+* Allow passing different arguments to different postprocessors using `--postprocessor-args`
+* Deprecated `--sponskrub-args`. The same can now be done using `--postprocessor-args "sponskrub:<args>"`
+* [CI] Split tests into core-test and full-test
+
+
+### 2021.01.07
+* Removed priority of `av01` codec in `-S` since most devices don't support it yet
+* Added `duration_string` to be used in `--output`
+* Created First Release
+
+
+### 2021.01.05-1
+* **Changed defaults:**
+ * Enabled `--ignore`
+ * Disabled `--video-multistreams` and `--audio-multistreams`
+ * Changed default format selection to `bv*+ba/b` when `--audio-multistreams` is disabled
+ * Changed default format sort order to `res,fps,codec,size,br,asr,proto,ext,has_audio,source,format_id`
+ * Changed `webm` to be more preferable than `flv` in format sorting
+ * Changed default output template to `%(title)s [%(id)s].%(ext)s`
+ * Enabled `--list-formats-as-table`
+
+
+### 2021.01.05
+* **Format Sort:** Added `--format-sort` (`-S`), `--format-sort-force` (`--S-force`) - See [Sorting Formats](README.md#sorting-formats) for details
+* **Format Selection:** See [Format Selection](README.md#format-selection) for details
+ * New format selectors: `best*`, `worst*`, `bestvideo*`, `bestaudio*`, `worstvideo*`, `worstaudio*`
+ * Changed video format sorting to show video only files and video+audio files together.
+ * Added `--video-multistreams`, `--no-video-multistreams`, `--audio-multistreams`, `--no-audio-multistreams`
+ * Added `b`,`w`,`v`,`a` as alias for `best`, `worst`, `video` and `audio` respectively
+* Shortcut Options: Added `--write-link`, `--write-url-link`, `--write-webloc-link`, `--write-desktop-link` by [h-h-h-h](https://github.com/h-h-h-h) - See [Internet Shortcut Options](README.md#internet-shortcut-options) for details
+* **Sponskrub integration:** Added `--sponskrub`, `--sponskrub-cut`, `--sponskrub-force`, `--sponskrub-location`, `--sponskrub-args` - See [SponSkrub Options](README.md#sponskrub-sponsorblock-options) for details
+* Added `--force-download-archive` (`--force-write-archive`) by [h-h-h-h](https://github.com/h-h-h-h)
+* Added `--list-formats-as-table`, `--list-formats-old`
+* **Negative Options:** Makes it possible to negate most boolean options by adding a `no-` to the switch. Usefull when you want to reverse an option that is defined in a config file
+ * Added `--no-ignore-dynamic-mpd`, `--no-allow-dynamic-mpd`, `--allow-dynamic-mpd`, `--youtube-include-hls-manifest`, `--no-youtube-include-hls-manifest`, `--no-youtube-skip-hls-manifest`, `--no-download`, `--no-download-archive`, `--resize-buffer`, `--part`, `--mtime`, `--no-keep-fragments`, `--no-cookies`, `--no-write-annotations`, `--no-write-info-json`, `--no-write-description`, `--no-write-thumbnail`, `--youtube-include-dash-manifest`, `--post-overwrites`, `--no-keep-video`, `--no-embed-subs`, `--no-embed-thumbnail`, `--no-add-metadata`, `--no-include-ads`, `--no-write-sub`, `--no-write-auto-sub`, `--no-playlist-reverse`, `--no-restrict-filenames`, `--youtube-include-dash-manifest`, `--no-format-sort-force`, `--flat-videos`, `--no-list-formats-as-table`, `--no-sponskrub`, `--no-sponskrub-cut`, `--no-sponskrub-force`
+ * Renamed: `--write-subs`, `--no-write-subs`, `--no-write-auto-subs`, `--write-auto-subs`. Note that these can still be used without the ending "s"
+* Relaxed validation for format filters so that any arbitrary field can be used
+* Fix for embedding thumbnail in mp3 by [pauldubois98](https://github.com/pauldubois98) ([ytdl-org/youtube-dl#21569](https://github.com/ytdl-org/youtube-dl/pull/21569))
+* Make Twitch Video ID output from Playlist and VOD extractor same. This is only a temporary fix
+* Merge youtube-dl: Upto [2021.01.03](https://github.com/ytdl-org/youtube-dl/commit/8e953dcbb10a1a42f4e12e4e132657cb0100a1f8) - See [blackjack4494/yt-dlc#280](https://github.com/blackjack4494/yt-dlc/pull/280) for details
+ * Extractors [tiktok](https://github.com/ytdl-org/youtube-dl/commit/fb626c05867deab04425bad0c0b16b55473841a2) and [hotstar](https://github.com/ytdl-org/youtube-dl/commit/bb38a1215718cdf36d73ff0a7830a64cd9fa37cc) have not been merged
+* Cleaned up the fork for public use
+
+
+**PS**: All uncredited changes above this point are authored by [pukkandan](https://github.com/pukkandan)
+
+### Unreleased changes in [blackjack4494/yt-dlc](https://github.com/blackjack4494/yt-dlc)
+* Updated to youtube-dl release 2020.11.26 by [pukkandan](https://github.com/pukkandan)
+* Youtube improvements by [pukkandan](https://github.com/pukkandan)
+ * Implemented all Youtube Feeds (ytfav, ytwatchlater, ytsubs, ythistory, ytrec) and SearchURL
+ * Fix some improper Youtube URLs
+ * Redirect channel home to /video
+ * Print youtube's warning message
+ * Handle Multiple pages for feeds better
+* [youtube] Fix ytsearch not returning results sometimes due to promoted content by [coletdjnz](https://github.com/coletdjnz)
+* [youtube] Temporary fix for automatic captions - disable json3 by [blackjack4494](https://github.com/blackjack4494)
+* Add --break-on-existing by [gergesh](https://github.com/gergesh)
+* Pre-check video IDs in the archive before downloading by [pukkandan](https://github.com/pukkandan)
+* [bitwave.tv] New extractor by [lorpus](https://github.com/lorpus)
+* [Gedi] Add extractor by [nixxo](https://github.com/nixxo)
+* [Rcs] Add new extractor by [nixxo](https://github.com/nixxo)
+* [skyit] New skyitalia extractor by [nixxo](https://github.com/nixxo)
+* [france.tv] Fix thumbnail URL by [renalid](https://github.com/renalid)
+* [ina] support mobile links by [B0pol](https://github.com/B0pol)
+* [instagram] Fix thumbnail extractor by [nao20010128nao](https://github.com/nao20010128nao)
+* [SouthparkDe] Support for English URLs by [xypwn](https://github.com/xypwn)
+* [spreaker] fix SpreakerShowIE test URL by [pukkandan](https://github.com/pukkandan)
+* [Vlive] Fix playlist handling when downloading a channel by [kyuyeunk](https://github.com/kyuyeunk)
+* [tmz] Fix extractor by [diegorodriguezv](https://github.com/diegorodriguezv)
+* [generic] Detect embedded bitchute videos by [pukkandan](https://github.com/pukkandan)
+* [generic] Extract embedded youtube and twitter videos by [diegorodriguezv](https://github.com/diegorodriguezv)
+* [ffmpeg] Ensure all streams are copied by [pukkandan](https://github.com/pukkandan)
+* [embedthumbnail] Fix for os.rename error by [pukkandan](https://github.com/pukkandan)
+* make_win.bat: don't use UPX to pack vcruntime140.dll by [jbruchon](https://github.com/jbruchon)
diff --git a/MANIFEST.in b/MANIFEST.in
index 72879c5..e43cb87 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,9 +1,9 @@
-include README.md
-include LICENSE
include AUTHORS
-include ChangeLog
-include hypervideo.bash-completion
-include hypervideo.fish
+include Changelog.md
+include LICENSE
+include README.md
+include completions/*/*
+include supportedsites.md
include hypervideo.1
-recursive-include docs Makefile conf.py *.rst
+recursive-include devscripts *
recursive-include test *
diff --git a/Makefile b/Makefile
index 8608982..b54e8ad 100644
--- a/Makefile
+++ b/Makefile
@@ -1,59 +1,56 @@
-all: hypervideo README.md CONTRIBUTING.md README.txt hypervideo.1 hypervideo.bash-completion hypervideo.zsh hypervideo.fish
+all: hypervideo doc pypi-files
+clean: clean-test clean-dist clean-cache
+completions: completion-bash completion-fish completion-zsh
+doc: README.md CONTRIBUTING.md
+ot: offlinetest
+tar: hypervideo.tar.gz
-clean:
- rm -rf hypervideo.1.temp.md hypervideo.1 hypervideo.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ hypervideo.tar.gz hypervideo.zsh hypervideo.fish hypervideo_dl/extractor/lazy_extractors.py *.dump *.part* *.ytdl *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.wav *.ape *.swf *.jpg *.png CONTRIBUTING.md.tmp hypervideo hypervideo.exe
- find . -name "*.pyc" -delete
- find . -name "*.class" -delete
+# Keep this list in sync with MANIFEST.in
+# intended use: when building a source distribution,
+# make pypi-files && python setup.py sdist
+pypi-files: AUTHORS Changelog.md LICENSE README.md completions devscripts/* test/*
+
+.PHONY: all clean install test tar pypi-files completions ot offlinetest codetest
+
+clean-test:
+ rm -rf *.3gp *.annotations.xml *.ape *.avi *.description *.dump *.flac *.flv *.frag *.frag.aria2 *.frag.urls \
+ *.info.json *.jpeg *.jpg *.live_chat.json *.m4a *.m4v *.mkv *.mp3 *.mp4 *.ogg *.opus *.part* *.png *.sbv *.srt \
+ *.swf *.swp *.ttml *.vtt *.wav *.webm *.webp *.ytdl test/testdata/player-*.js
+clean-dist:
+ rm -rf MANIFEST build/ dist/ .coverage cover/ hypervideo.tar.gz completions/ hypervideo_dl/extractor/lazy_extractors.py *.spec CONTRIBUTING.md.tmp hypervideo hypervideo.exe hypervideo_dl.egg-info/ AUTHORS .mailmap
+clean-cache:
+ find . -name "*.pyc" -o -name "*.class" -delete
+
+completion-bash: completions/bash/hypervideo
+completion-fish: completions/fish/hypervideo.fish
+completion-zsh: completions/zsh/_hypervideo
+lazy-extractors: hypervideo_dl/extractor/lazy_extractors.py
PREFIX ?= /usr/local
+DESTDIR ?= .
BINDIR ?= $(PREFIX)/bin
MANDIR ?= $(PREFIX)/man
SHAREDIR ?= $(PREFIX)/share
-PYTHON ?= /usr/bin/env python
+PYTHON ?= /usr/bin/env python3
# set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local
SYSCONFDIR = $(shell if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi)
-# set markdown input format to "markdown-smart" for pandoc version 2 and to "markdown" for pandoc prior to version 2
-MARKDOWN = $(shell if [ `pandoc -v | head -n1 | cut -d" " -f2 | head -c1` = "2" ]; then echo markdown-smart; else echo markdown; fi)
-
-install: hypervideo hypervideo.1 hypervideo.bash-completion hypervideo.zsh hypervideo.fish
- install -d $(DESTDIR)$(BINDIR)
- install -m 755 hypervideo $(DESTDIR)$(BINDIR)
- install -d $(DESTDIR)$(MANDIR)/man1
- install -m 644 hypervideo.1 $(DESTDIR)$(MANDIR)/man1
- install -d $(DESTDIR)$(SYSCONFDIR)/bash_completion.d
- install -m 644 hypervideo.bash-completion $(DESTDIR)$(SYSCONFDIR)/bash_completion.d/hypervideo
- install -d $(DESTDIR)$(SHAREDIR)/zsh/site-functions
- install -m 644 hypervideo.zsh $(DESTDIR)$(SHAREDIR)/zsh/site-functions/_hypervideo
- install -d $(DESTDIR)$(SYSCONFDIR)/fish/completions
- install -m 644 hypervideo.fish $(DESTDIR)$(SYSCONFDIR)/fish/completions/hypervideo.fish
+install: hypervideo completions
+ install -Dm755 hypervideo $(DESTDIR)$(BINDIR)
+ install -Dm644 completions/bash/hypervideo $(DESTDIR)$(SHAREDIR)/bash-completion/completions/hypervideo
+ install -Dm644 completions/zsh/_hypervideo $(DESTDIR)$(SHAREDIR)/zsh/site-functions/_hypervideo
+ install -Dm644 completions/fish/hypervideo.fish $(DESTDIR)$(SHAREDIR)/fish/vendor_completions.d/hypervideo.fish
codetest:
flake8 .
test:
- nosetests --verbose test
+ $(PYTHON) -m pytest
$(MAKE) codetest
-ot: offlinetest
-
-# Keep this list in sync with devscripts/run_tests.sh
offlinetest: codetest
- $(PYTHON) -m nose --verbose test \
- --exclude test_age_restriction.py \
- --exclude test_download.py \
- --exclude test_socks.py \
- --exclude test_subtitles.py \
- --exclude test_write_annotations.py \
- --exclude test_youtube_lists.py \
- --exclude test_youtube_signature.py
-
-tar: hypervideo.tar.gz
-
-.PHONY: all clean install test tar bash-completion pypi-files zsh-completion fish-completion ot offlinetest codetest
-
-pypi-files: hypervideo.bash-completion README.txt hypervideo.1 hypervideo.fish
+ $(PYTHON) -m pytest -k "not download"
hypervideo: hypervideo_dl/*.py hypervideo_dl/*/*.py
mkdir -p zip
@@ -76,37 +73,24 @@ README.md: hypervideo_dl/*.py hypervideo_dl/*/*.py
CONTRIBUTING.md: README.md
$(PYTHON) devscripts/make_contributing.py README.md CONTRIBUTING.md
-README.txt: README.md
- pandoc -f $(MARKDOWN) -t plain README.md -o README.txt
-
-hypervideo.1: README.md
- $(PYTHON) devscripts/prepare_manpage.py hypervideo.1.temp.md
- pandoc -s -f $(MARKDOWN) -t man hypervideo.1.temp.md -o hypervideo.1
- rm -f hypervideo.1.temp.md
-
-hypervideo.bash-completion: hypervideo_dl/*.py hypervideo_dl/*/*.py devscripts/bash-completion.in
+completions/bash/hypervideo: hypervideo_dl/*.py hypervideo_dl/*/*.py devscripts/bash-completion.in
+ mkdir -p completions/bash
$(PYTHON) devscripts/bash-completion.py
-bash-completion: hypervideo.bash-completion
-
-hypervideo.zsh: hypervideo_dl/*.py hypervideo_dl/*/*.py devscripts/zsh-completion.in
+completions/zsh/_hypervideo: hypervideo_dl/*.py hypervideo_dl/*/*.py devscripts/zsh-completion.in
+ mkdir -p completions/zsh
$(PYTHON) devscripts/zsh-completion.py
-zsh-completion: hypervideo.zsh
-
-hypervideo.fish: hypervideo_dl/*.py hypervideo_dl/*/*.py devscripts/fish-completion.in
+completions/fish/hypervideo.fish: hypervideo_dl/*.py hypervideo_dl/*/*.py devscripts/fish-completion.in
+ mkdir -p completions/fish
$(PYTHON) devscripts/fish-completion.py
-fish-completion: hypervideo.fish
-
-lazy-extractors: hypervideo_dl/extractor/lazy_extractors.py
-
_EXTRACTOR_FILES = $(shell find hypervideo_dl/extractor -iname '*.py' -and -not -iname 'lazy_extractors.py')
hypervideo_dl/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES)
$(PYTHON) devscripts/make_lazy_extractors.py $@
-hypervideo.tar.gz: hypervideo README.md README.txt hypervideo.1 hypervideo.bash-completion hypervideo.zsh hypervideo.fish ChangeLog AUTHORS
- @tar -czf hypervideo.tar.gz --transform "s|^|hypervideo/|" --owner 0 --group 0 \
+hypervideo.tar.gz: all
+ @tar -czf $(DESTDIR)/hypervideo.tar.gz --transform "s|^|hypervideo/|" --owner 0 --group 0 \
--exclude '*.DS_Store' \
--exclude '*.kate-swp' \
--exclude '*.pyc' \
@@ -114,10 +98,15 @@ hypervideo.tar.gz: hypervideo README.md README.txt hypervideo.1 hypervideo.bash-
--exclude '*~' \
--exclude '__pycache__' \
--exclude '.git' \
- --exclude 'docs/_build' \
-- \
- bin devscripts test hypervideo_dl docs \
- ChangeLog AUTHORS LICENSE README.md README.txt \
- Makefile MANIFEST.in hypervideo.1 hypervideo.bash-completion \
- hypervideo.zsh hypervideo.fish setup.py setup.cfg \
- hypervideo
+ bin README.md Changelog.md LICENSE \
+ CONTRIBUTING.md CONTRIBUTORS AUTHORS \
+ Makefile MANIFEST.in README.md completions \
+ setup.py setup.cfg hypervideo hypervideo_dl requirements.txt \
+ devscripts test tox.ini pytest.ini
+
+AUTHORS: .mailmap
+ git shortlog -s -n | cut -f2 | sort > AUTHORS
+
+.mailmap:
+ git shortlog -s -e -n | awk '!(out[$$NF]++) { $$1="";sub(/^[ \t]+/,""); print}' > .mailmap
diff --git a/bin/hypervideo b/bin/hypervideo
index 73bf9b0..baecdeb 100755
--- a/bin/hypervideo
+++ b/bin/hypervideo
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python
import hypervideo_dl
diff --git a/devscripts/bash-completion.py b/devscripts/bash-completion.py
index 12abd45..e0768d2 100755
--- a/devscripts/bash-completion.py
+++ b/devscripts/bash-completion.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
from __future__ import unicode_literals
import os
@@ -8,7 +8,7 @@ import sys
sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
import hypervideo_dl
-BASH_COMPLETION_FILE = "hypervideo.bash-completion"
+BASH_COMPLETION_FILE = "completions/bash/hypervideo"
BASH_COMPLETION_TEMPLATE = "devscripts/bash-completion.in"
diff --git a/devscripts/buildserver.py b/devscripts/buildserver.py
index 6f8aae1..2a8039e 100644
--- a/devscripts/buildserver.py
+++ b/devscripts/buildserver.py
@@ -1,3 +1,5 @@
+# UNUSED
+
#!/usr/bin/python3
import argparse
diff --git a/devscripts/check-porn.py b/devscripts/check-porn.py
index a62711e..7dd372f 100644
--- a/devscripts/check-porn.py
+++ b/devscripts/check-porn.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
from __future__ import unicode_literals
"""
diff --git a/devscripts/fish-completion.py b/devscripts/fish-completion.py
index b6e9949..84ced2d 100755
--- a/devscripts/fish-completion.py
+++ b/devscripts/fish-completion.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
from __future__ import unicode_literals
import optparse
@@ -10,10 +10,11 @@ sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
import hypervideo_dl
from hypervideo_dl.utils import shell_quote
-FISH_COMPLETION_FILE = 'hypervideo.fish'
+FISH_COMPLETION_FILE = 'completions/fish/hypervideo.fish'
FISH_COMPLETION_TEMPLATE = 'devscripts/fish-completion.in'
EXTRA_ARGS = {
+ 'remux-video': ['--arguments', 'mp4 mkv', '--exclusive'],
'recode-video': ['--arguments', 'mp4 flv ogg webm mkv', '--exclusive'],
# Options that need a file parameter
diff --git a/devscripts/generate_aes_testdata.py b/devscripts/generate_aes_testdata.py
index 00dc5bd..09feeaa 100644
--- a/devscripts/generate_aes_testdata.py
+++ b/devscripts/generate_aes_testdata.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python3
from __future__ import unicode_literals
import codecs
diff --git a/devscripts/lazy_load_template.py b/devscripts/lazy_load_template.py
index c4e5fc1..da89e07 100644
--- a/devscripts/lazy_load_template.py
+++ b/devscripts/lazy_load_template.py
@@ -1,19 +1,31 @@
# coding: utf-8
-from __future__ import unicode_literals
-
import re
+from ..utils import bug_reports_message, write_string
+
+
+class LazyLoadMetaClass(type):
+ def __getattr__(cls, name):
+ if '_real_class' not in cls.__dict__:
+ write_string(
+ f'WARNING: Falling back to normal extractor since lazy extractor '
+ f'{cls.__name__} does not have attribute {name}{bug_reports_message()}')
+ return getattr(cls._get_real_class(), name)
+
-class LazyLoadExtractor(object):
+class LazyLoadExtractor(metaclass=LazyLoadMetaClass):
_module = None
+ _WORKING = True
@classmethod
- def ie_key(cls):
- return cls.__name__[:-2]
+ def _get_real_class(cls):
+ if '_real_class' not in cls.__dict__:
+ mod = __import__(cls._module, fromlist=(cls.__name__,))
+ cls._real_class = getattr(mod, cls.__name__)
+ return cls._real_class
def __new__(cls, *args, **kwargs):
- mod = __import__(cls._module, fromlist=(cls.__name__,))
- real_cls = getattr(mod, cls.__name__)
+ real_cls = cls._get_real_class()
instance = real_cls.__new__(real_cls)
instance.__init__(*args, **kwargs)
return instance
diff --git a/devscripts/logo.ico b/devscripts/logo.ico
new file mode 100644
index 0000000..5503a43
--- /dev/null
+++ b/devscripts/logo.ico
Binary files differ
diff --git a/devscripts/make_contributing.py b/devscripts/make_contributing.py
index dbc2e08..8c5f107 100755
--- a/devscripts/make_contributing.py
+++ b/devscripts/make_contributing.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
from __future__ import unicode_literals
import io
@@ -7,6 +7,8 @@ import re
def main():
+ return # This is unused in hypervideo
+
parser = optparse.OptionParser(usage='%prog INFILE OUTFILE')
options, args = parser.parse_args()
if len(args) != 2:
@@ -20,8 +22,7 @@ def main():
bug_text = re.search(
r'(?s)#\s*BUGS\s*[^\n]*\s*(.*?)#\s*COPYRIGHT', readme).group(1)
dev_text = re.search(
- r'(?s)(#\s*DEVELOPER INSTRUCTIONS.*?)#\s*EMBEDDING HYPERVIDEO',
- readme).group(1)
+ r'(?s)(#\s*DEVELOPER INSTRUCTIONS.*?)#\s*EMBEDDING HYPERVIDEO', readme).group(1)
out = bug_text + dev_text
diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py
index b9a851c..7a38e40 100644
--- a/devscripts/make_lazy_extractors.py
+++ b/devscripts/make_lazy_extractors.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python3
from __future__ import unicode_literals, print_function
from inspect import getsource
@@ -6,27 +7,35 @@ import os
from os.path import dirname as dirn
import sys
-print('WARNING: Lazy loading extractors is an experimental feature that may not always work', file=sys.stderr)
-
sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
lazy_extractors_filename = sys.argv[1]
if os.path.exists(lazy_extractors_filename):
os.remove(lazy_extractors_filename)
+# Block plugins from loading
+plugins_dirname = 'ytdlp_plugins'
+plugins_blocked_dirname = 'ytdlp_plugins_blocked'
+if os.path.exists(plugins_dirname):
+ os.rename(plugins_dirname, plugins_blocked_dirname)
+
from hypervideo_dl.extractor import _ALL_CLASSES
from hypervideo_dl.extractor.common import InfoExtractor, SearchInfoExtractor
+if os.path.exists(plugins_blocked_dirname):
+ os.rename(plugins_blocked_dirname, plugins_dirname)
+
with open('devscripts/lazy_load_template.py', 'rt') as f:
module_template = f.read()
+CLASS_PROPERTIES = ['ie_key', 'working', '_match_valid_url', 'suitable', '_match_id', 'get_temp_id']
module_contents = [
- module_template + '\n' + getsource(InfoExtractor.suitable) + '\n',
- 'class LazyLoadSearchExtractor(LazyLoadExtractor):\n pass\n']
+ module_template,
+ *[getsource(getattr(InfoExtractor, k)) for k in CLASS_PROPERTIES],
+ '\nclass LazyLoadSearchExtractor(LazyLoadExtractor):\n pass\n']
ie_template = '''
class {name}({bases}):
- _VALID_URL = {valid_url!r}
_module = '{module}'
'''
@@ -47,14 +56,17 @@ def get_base_name(base):
def build_lazy_ie(ie, name):
- valid_url = getattr(ie, '_VALID_URL', None)
s = ie_template.format(
name=name,
bases=', '.join(map(get_base_name, ie.__bases__)),
- valid_url=valid_url,
module=ie.__module__)
+ valid_url = getattr(ie, '_VALID_URL', None)
+ if valid_url:
+ s += f' _VALID_URL = {valid_url!r}\n'
+ if not ie._WORKING:
+ s += ' _WORKING = False\n'
if ie.suitable.__func__ is not InfoExtractor.suitable.__func__:
- s += '\n' + getsource(ie.suitable)
+ s += f'\n{getsource(ie.suitable)}'
if hasattr(ie, '_make_valid_url'):
# search extractors
s += make_valid_template.format(valid_url=ie._make_valid_url())
@@ -92,7 +104,7 @@ for ie in ordered_cls:
names.append(name)
module_contents.append(
- '_ALL_CLASSES = [{0}]'.format(', '.join(names)))
+ '\n_ALL_CLASSES = [{0}]'.format(', '.join(names)))
module_src = '\n'.join(module_contents) + '\n'
diff --git a/devscripts/make_readme.py b/devscripts/make_readme.py
index 8fbce07..1a9a017 100755
--- a/devscripts/make_readme.py
+++ b/devscripts/make_readme.py
@@ -1,3 +1,8 @@
+#!/usr/bin/env python3
+
+# hypervideo --help | make_readme.py
+# This must be run in a console of correct width
+
from __future__ import unicode_literals
import io
diff --git a/devscripts/make_supportedsites.py b/devscripts/make_supportedsites.py
index 09807b0..a079406 100644
--- a/devscripts/make_supportedsites.py
+++ b/devscripts/make_supportedsites.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
from __future__ import unicode_literals
import io
diff --git a/devscripts/prepare_manpage.py b/devscripts/prepare_manpage.py
index 5b74238..58090d4 100644
--- a/devscripts/prepare_manpage.py
+++ b/devscripts/prepare_manpage.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python3
from __future__ import unicode_literals
import io
diff --git a/devscripts/run_tests.bat b/devscripts/run_tests.bat
index 01a79b6..b8bb393 100644
--- a/devscripts/run_tests.bat
+++ b/devscripts/run_tests.bat
@@ -1,17 +1,16 @@
+@setlocal
@echo off
+cd /d %~dp0..
-rem Keep this list in sync with the `offlinetest` target in Makefile
-set DOWNLOAD_TESTS="age_restriction^|download^|socks^|subtitles^|write_annotations^|youtube_lists^|youtube_signature"
-
-if "%YTDL_TEST_SET%" == "core" (
- set test_set="-I test_("%DOWNLOAD_TESTS%")\.py"
- set multiprocess_args=""
-) else if "%YTDL_TEST_SET%" == "download" (
- set test_set="-I test_(?!"%DOWNLOAD_TESTS%").+\.py"
- set multiprocess_args="--processes=4 --process-timeout=540"
+if ["%~1"]==[""] (
+ set "test_set="test""
+) else if ["%~1"]==["core"] (
+ set "test_set="-m not download""
+) else if ["%~1"]==["download"] (
+ set "test_set="-m "download""
) else (
- echo YTDL_TEST_SET is not set or invalid
+ echo.Invalid test type "%~1". Use "core" ^| "download"
exit /b 1
)
-nosetests test --verbose %test_set:"=% %multiprocess_args:"=%
+pytest %test_set%
diff --git a/devscripts/run_tests.sh b/devscripts/run_tests.sh
index b8f48b9..c9a75ba 100755
--- a/devscripts/run_tests.sh
+++ b/devscripts/run_tests.sh
@@ -1,22 +1,14 @@
-#!/bin/bash
-
-# Keep this list in sync with the `offlinetest` target in Makefile
-DOWNLOAD_TESTS="age_restriction|download|socks|subtitles|write_annotations|youtube_lists|youtube_signature"
-
-test_set=""
-multiprocess_args=""
-
-case "$YTDL_TEST_SET" in
- core)
- test_set="-I test_($DOWNLOAD_TESTS)\.py"
- ;;
- download)
- test_set="-I test_(?!$DOWNLOAD_TESTS).+\.py"
- multiprocess_args="--processes=4 --process-timeout=540"
- ;;
- *)
- break
- ;;
-esac
-
-nosetests test --verbose $test_set $multiprocess_args
+#!/bin/sh
+
+if [ -z $1 ]; then
+ test_set='test'
+elif [ $1 = 'core' ]; then
+ test_set="-m not download"
+elif [ $1 = 'download' ]; then
+ test_set="-m download"
+else
+ echo 'Invalid test type "'$1'". Use "core" | "download"'
+ exit 1
+fi
+
+python3 -m pytest "$test_set"
diff --git a/devscripts/zsh-completion.in b/devscripts/zsh-completion.in
index 1906949..e5cb92e 100644
--- a/devscripts/zsh-completion.in
+++ b/devscripts/zsh-completion.in
@@ -16,6 +16,8 @@ __hypervideo_dl() {
_path_files
elif [[ ${prev} =~ ${diropts} ]]; then
_path_files -/
+ elif [[ ${prev} == "--remux-video" ]]; then
+ _arguments '*: :(mp4 mkv)'
elif [[ ${prev} == "--recode-video" ]]; then
_arguments '*: :(mp4 flv ogg webm mkv)'
else
diff --git a/devscripts/zsh-completion.py b/devscripts/zsh-completion.py
index b570469..c8620a5 100755
--- a/devscripts/zsh-completion.py
+++ b/devscripts/zsh-completion.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
from __future__ import unicode_literals
import os
@@ -8,7 +8,7 @@ import sys
sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
import hypervideo_dl
-ZSH_COMPLETION_FILE = "hypervideo.zsh"
+ZSH_COMPLETION_FILE = "completions/zsh/_hypervideo"
ZSH_COMPLETION_TEMPLATE = "devscripts/zsh-completion.in"
diff --git a/hypervideo_dl/YoutubeDL.py b/hypervideo_dl/YoutubeDL.py
index d8621ed..5b5a0d7 100755
--- a/hypervideo_dl/YoutubeDL.py
+++ b/hypervideo_dl/YoutubeDL.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
# coding: utf-8
from __future__ import absolute_import, unicode_literals
@@ -9,6 +9,7 @@ import copy
import datetime
import errno
import fileinput
+import functools
import io
import itertools
import json
@@ -19,29 +20,32 @@ import platform
import re
import shutil
import subprocess
-import socket
import sys
+import tempfile
import time
import tokenize
import traceback
import random
+import unicodedata
from string import ascii_letters
from .compat import (
compat_basestring,
- compat_cookiejar,
compat_get_terminal_size,
- compat_http_client,
compat_kwargs,
compat_numeric_types,
compat_os_name,
+ compat_pycrypto_AES,
+ compat_shlex_quote,
compat_str,
compat_tokenize_tokenize,
compat_urllib_error,
compat_urllib_request,
compat_urllib_request_DataHandler,
+ windows_enable_vt_mode,
)
+from .cookies import load_cookies
from .utils import (
age_restricted,
args_to_str,
@@ -51,21 +55,34 @@ from .utils import (
DEFAULT_OUTTMPL,
determine_ext,
determine_protocol,
+ DOT_DESKTOP_LINK_TEMPLATE,
+ DOT_URL_LINK_TEMPLATE,
+ DOT_WEBLOC_LINK_TEMPLATE,
DownloadError,
encode_compat_str,
encodeFilename,
+ EntryNotInPlaylist,
error_to_compat_str,
+ ExistingVideoReached,
expand_path,
ExtractorError,
+ float_or_none,
format_bytes,
+ format_field,
formatSeconds,
GeoRestrictedError,
+ HEADRequest,
int_or_none,
+ iri_to_uri,
ISO3166Utils,
+ LazyList,
locked_file,
+ make_dir,
make_HTTPS_handler,
MaxDownloadsReached,
+ network_exceptions,
orderedSet,
+ OUTTMPL_TYPES,
PagedList,
parse_filesize,
PerRequestProxyHandler,
@@ -73,7 +90,9 @@ from .utils import (
PostProcessingError,
preferredencoding,
prepend_extension,
+ process_communicate_or_kill,
register_socks_protocols,
+ RejectedVideoReached,
render_table,
replace_extension,
SameFileError,
@@ -82,30 +101,53 @@ from .utils import (
sanitize_url,
sanitized_Request,
std_headers,
+ STR_FORMAT_RE_TMPL,
+ STR_FORMAT_TYPES,
str_or_none,
+ strftime_or_none,
subtitles_filename,
+ supports_terminal_sequences,
+ TERMINAL_SEQUENCES,
+ ThrottledDownload,
+ to_high_limit_path,
+ traverse_obj,
+ try_get,
UnavailableVideoError,
url_basename,
+ variadic,
version_tuple,
write_json_file,
write_string,
- YoutubeDLCookieJar,
YoutubeDLCookieProcessor,
YoutubeDLHandler,
YoutubeDLRedirectHandler,
)
from .cache import Cache
-from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
+from .extractor import (
+ gen_extractor_classes,
+ get_info_extractor,
+ _LAZY_LOADER,
+ _PLUGIN_CLASSES as plugin_extractors
+)
from .extractor.openload import PhantomJSwrapper
-from .downloader import get_suitable_downloader
+from .downloader import (
+ FFmpegFD,
+ get_suitable_downloader,
+ shorten_protocol_name
+)
from .downloader.rtmp import rtmpdump_version
from .postprocessor import (
+ get_postprocessor,
+ EmbedThumbnailPP,
+ FFmpegFixupDurationPP,
FFmpegFixupM3u8PP,
FFmpegFixupM4aPP,
FFmpegFixupStretchedPP,
+ FFmpegFixupTimestampPP,
FFmpegMergerPP,
FFmpegPostProcessor,
- get_postprocessor,
+ MoveFilesAfterDownloadPP,
+ _PLUGIN_CLASSES as plugin_postprocessors
)
from .version import __version__
@@ -150,24 +192,57 @@ class YoutubeDL(object):
verbose: Print additional info to stdout.
quiet: Do not print messages to stdout.
no_warnings: Do not print out anything for warnings.
- forceurl: Force printing final URL.
- forcetitle: Force printing title.
- forceid: Force printing ID.
- forcethumbnail: Force printing thumbnail URL.
- forcedescription: Force printing description.
- forcefilename: Force printing final filename.
- forceduration: Force printing duration.
+ forceprint: A list of templates to force print
+ forceurl: Force printing final URL. (Deprecated)
+ forcetitle: Force printing title. (Deprecated)
+ forceid: Force printing ID. (Deprecated)
+ forcethumbnail: Force printing thumbnail URL. (Deprecated)
+ forcedescription: Force printing description. (Deprecated)
+ forcefilename: Force printing final filename. (Deprecated)
+ forceduration: Force printing duration. (Deprecated)
forcejson: Force printing info_dict as JSON.
dump_single_json: Force printing the info_dict of the whole playlist
(or video) as a single JSON line.
- simulate: Do not download the video files.
- format: Video format code. See options.py for more information.
- outtmpl: Template for output names.
+ force_write_download_archive: Force writing download archive regardless
+ of 'skip_download' or 'simulate'.
+ simulate: Do not download the video files. If unset (or None),
+ simulate only if listsubtitles, listformats or list_thumbnails is used
+ format: Video format code. see "FORMAT SELECTION" for more details.
+ allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
+ ignore_no_formats_error: Ignore "No video formats" error. Usefull for
+ extracting metadata even if the video is not actually
+ available for download (experimental)
+ format_sort: How to sort the video formats. see "Sorting Formats"
+ for more details.
+ format_sort_force: Force the given format_sort. see "Sorting Formats"
+ for more details.
+ allow_multiple_video_streams: Allow multiple video streams to be merged
+ into a single file
+ allow_multiple_audio_streams: Allow multiple audio streams to be merged
+ into a single file
+ check_formats Whether to test if the formats are downloadable.
+ Can be True (check all), False (check none)
+ or None (check only if requested by extractor)
+ paths: Dictionary of output paths. The allowed keys are 'home'
+ 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
+ outtmpl: Dictionary of templates for output names. Allowed keys
+ are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
+ For compatibility with youtube-dl, a single string can also be used
outtmpl_na_placeholder: Placeholder for unavailable meta fields.
restrictfilenames: Do not allow "&" and spaces in file names
- ignoreerrors: Do not stop on download errors.
+ trim_file_name: Limit length of filename (extension excluded)
+ windowsfilenames: Force the filenames to be windows compatible
+ ignoreerrors: Do not stop on download/postprocessing errors.
+ Can be 'only_download' to ignore only download errors.
+ Default is 'only_download' for CLI, but False for API
+ skip_playlist_after_errors: Number of allowed failures until the rest of
+ the playlist is skipped
force_generic_extractor: Force downloader to use the generic extractor
- nooverwrites: Prevent overwriting files.
+ overwrites: Overwrite all video and metadata files if True,
+ overwrite only non-video files if None
+ and don't overwrite any file if False
+ For compatibility with youtube-dl,
+ "nooverwrites" may also be used instead
playliststart: Playlist item to start at.
playlistend: Playlist item to end at.
playlist_items: Specific indices of playlist to download.
@@ -177,18 +252,33 @@ class YoutubeDL(object):
rejecttitle: Reject downloads for matching titles.
logger: Log messages to a logging.Logger instance.
logtostderr: Log messages to stderr instead of stdout.
+ consoletitle: Display progress in console window's titlebar.
writedescription: Write the video description to a .description file
writeinfojson: Write the video description to a .info.json file
+ clean_infojson: Remove private fields from the infojson
+ getcomments: Extract video comments. This will not be written to disk
+ unless writeinfojson is also given
writeannotations: Write the video annotations to a .annotations.xml file
writethumbnail: Write the thumbnail image to a file
+ allow_playlist_files: Whether to write playlists' description, infojson etc
+ also to disk when using the 'write*' options
write_all_thumbnails: Write all thumbnail formats to files
+ writelink: Write an internet shortcut file, depending on the
+ current platform (.url/.webloc/.desktop)
+ writeurllink: Write a Windows internet shortcut file (.url)
+ writewebloclink: Write a macOS internet shortcut file (.webloc)
+ writedesktoplink: Write a Linux internet shortcut file (.desktop)
writesubtitles: Write the video subtitles to a file
writeautomaticsub: Write the automatically generated subtitles to a file
- allsubtitles: Downloads all the subtitles of the video
+ allsubtitles: Deprecated - Use subtitleslangs = ['all']
+ Downloads all the subtitles of the video
(requires writesubtitles or writeautomaticsub)
listsubtitles: Lists all available subtitles for the video
subtitlesformat: The format code for subtitles
- subtitleslangs: List of languages of the subtitles to download
+ subtitleslangs: List of languages of the subtitles to download (can be regex).
+ The list may contain "all" to refer to all the available
+ subtitles. The language can be prefixed with a "-" to
+ exclude it from the requested languages. Eg: ['all', '-live_chat']
keepvideo: Keep the video file after post-processing
daterange: A DateRange object, download only if the upload_date is in the range.
skip_download: Skip the actual download of the video file
@@ -209,7 +299,14 @@ class YoutubeDL(object):
download_archive: File name of a file where all downloads are recorded.
Videos already present in the file are not downloaded
again.
- cookiefile: File name where cookies should be read from and dumped to.
+ break_on_existing: Stop the download process after attempting to download a
+ file that is in the archive.
+ break_on_reject: Stop the download process when encountering a video that
+ has been filtered out.
+ cookiefile: File name where cookies should be read from and dumped to
+ cookiesfrombrowser: A tuple containing the name of the browser and the profile
+ name/path from where cookies are loaded.
+ Eg: ('chrome', ) or (vivaldi, 'default')
nocheckcertificate:Do not verify SSL certificates
prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
At the moment, this is only supported by YouTube.
@@ -230,12 +327,18 @@ class YoutubeDL(object):
postprocessors: A list of dictionaries, each with an entry
* key: The name of the postprocessor. See
hypervideo_dl/postprocessor/__init__.py for a list.
- as well as any further keyword arguments for the
- postprocessor.
+ * when: When to run the postprocessor. Can be one of
+ pre_process|before_dl|post_process|after_move.
+ Assumed to be 'post_process' if not given
+ post_hooks: Deprecated - Register a custom postprocessor instead
+ A list of functions that get called as the final step
+ for each video file, after all postprocessors have been
+ called. The filename will be passed as the only argument.
progress_hooks: A list of functions that get called on download
progress, with a dictionary with the entries
* status: One of "downloading", "error", or "finished".
Check this first and ignore unknown values.
+ * info_dict: The extracted info_dict
If status is one of "downloading", or "finished", the
following properties may also be present:
@@ -256,7 +359,19 @@ class YoutubeDL(object):
Progress hooks are guaranteed to be called at least once
(with status "finished") if the download is successful.
+ postprocessor_hooks: A list of functions that get called on postprocessing
+ progress, with a dictionary with the entries
+ * status: One of "started", "processing", or "finished".
+ Check this first and ignore unknown values.
+ * postprocessor: Name of the postprocessor
+ * info_dict: The extracted info_dict
+
+ Progress hooks are guaranteed to be called at least twice
+ (with status "started" and "finished") if the processing is successful.
merge_output_format: Extension to use when merging formats.
+ final_ext: Expected final extension; used to detect when the file was
+ already downloaded and converted. "merge_output_format" is
+ replaced by this extension when given
fixup: Automatically correct known faults of the file.
One of:
- "never": do nothing
@@ -265,7 +380,9 @@ class YoutubeDL(object):
about it, warn otherwise (default)
source_address: Client-side IP address to bind to.
call_home: Boolean, true iff we are allowed to contact the
- hypervideo servers for debugging.
+ hypervideo servers for debugging. (BROKEN)
+ sleep_interval_requests: Number of seconds to sleep between requests
+ during extraction
sleep_interval: Number of seconds to sleep before each download when
used alone or a lower bound of a range for randomized
sleep before each download (minimum possible number
@@ -276,6 +393,7 @@ class YoutubeDL(object):
Must only be used along with sleep_interval.
Actual sleep time will be a random float from range
[sleep_interval; max_sleep_interval].
+ sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
listformats: Print an overview of available video formats and exit.
list_thumbnails: Print a table of all thumbnails and exit.
match_filter: A function that gets called with the info_dict of
@@ -295,48 +413,86 @@ class YoutubeDL(object):
geo_bypass_country
The following options determine which downloader is picked:
- external_downloader: Executable of the external downloader to call.
- None or unset for standard (built-in) downloader.
- hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
+ external_downloader: A dictionary of protocol keys and the executable of the
+ external downloader to use for it. The allowed protocols
+ are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
+ Set the value to 'native' to use the native downloader
+ hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
+ or {'m3u8': 'ffmpeg'} instead.
+ Use the native HLS downloader instead of ffmpeg/avconv
if True, otherwise use ffmpeg/avconv if False, otherwise
use downloader suggested by extractor if None.
+ compat_opts: Compatibility options. See "Differences in default behavior".
+ The following options do not work when used through the API:
+ filename, abort-on-error, multistreams, no-live-chat, format-sort
+ no-clean-infojson, no-playlist-metafiles, no-keep-subs.
+ Refer __init__.py for their implementation
+ progress_template: Dictionary of templates for progress outputs.
+ Allowed keys are 'download', 'postprocess',
+ 'download-title' (console title) and 'postprocess-title'.
+ The template is mapped on a dictionary with keys 'progress' and 'info'
The following parameters are not used by YoutubeDL itself, they are used by
the downloader (see hypervideo_dl/downloader/common.py):
- nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
- noresizebuffer, retries, continuedl, noprogress, consoletitle,
- xattr_set_filesize, external_downloader_args, hls_use_mpegts,
- http_chunk_size.
+ nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
+ max_filesize, test, noresizebuffer, retries, fragment_retries, continuedl,
+ noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
+ external_downloader_args.
The following options are used by the post processors:
prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
- otherwise prefer ffmpeg.
+ otherwise prefer ffmpeg. (avconv support is deprecated)
ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
to the binary or its containing directory.
- postprocessor_args: A list of additional command-line arguments for the
- postprocessor.
-
- The following options are used by the Youtube extractor:
- youtube_include_dash_manifest: If True (default), DASH manifests and related
- data will be downloaded and processed by extractor.
- You can reduce network I/O by disabling it if you don't
- care about DASH.
+ postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
+ and a list of additional command-line arguments for the
+ postprocessor/executable. The dict can also have "PP+EXE" keys
+ which are used when the given exe is used by the given PP.
+ Use 'default' as the name for arguments to passed to all PP
+ For compatibility with youtube-dl, a single list of args
+ can also be used
+
+ The following options are used by the extractors:
+ extractor_retries: Number of times to retry for known errors
+ dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
+ hls_split_discontinuity: Split HLS playlists to different formats at
+ discontinuities such as ad breaks (default: False)
+ extractor_args: A dictionary of arguments to be passed to the extractors.
+ See "EXTRACTOR ARGUMENTS" for details.
+ Eg: {'youtube': {'skip': ['dash', 'hls']}}
+ youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
+ If True (default), DASH manifests and related
+ data will be downloaded and processed by extractor.
+ You can reduce network I/O by disabling it if you don't
+ care about DASH. (only for youtube)
+ youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
+ If True (default), HLS manifests and related
+ data will be downloaded and processed by extractor.
+ You can reduce network I/O by disabling it if you don't
+ care about HLS. (only for youtube)
"""
_NUMERIC_FIELDS = set((
'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
- 'timestamp', 'upload_year', 'upload_month', 'upload_day',
+ 'timestamp', 'release_timestamp',
'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
'average_rating', 'comment_count', 'age_limit',
'start_time', 'end_time',
'chapter_number', 'season_number', 'episode_number',
'track_number', 'disc_number', 'release_year',
- 'playlist_index',
))
+ _format_selection_exts = {
+ 'audio': {'m4a', 'mp3', 'ogg', 'aac'},
+ 'video': {'mp4', 'flv', 'webm', '3gp'},
+ 'storyboards': {'mhtml'},
+ }
+
params = None
- _ies = []
- _pps = []
+ _ies = {}
+ _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
+ _printed_messages = set()
+ _first_webpage_request = True
_download_retcode = None
_num_downloads = None
_playlist_level = 0
@@ -344,28 +500,45 @@ class YoutubeDL(object):
_screen_file = None
def __init__(self, params=None, auto_init=True):
- """Create a FileDownloader object with the given options."""
+ """Create a FileDownloader object with the given options.
+ @param auto_init Whether to load the default extractors and print header (if verbose).
+ Set to 'no_verbose_header' to not ptint the header
+ """
if params is None:
params = {}
- self._ies = []
+ self._ies = {}
self._ies_instances = {}
- self._pps = []
+ self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
+ self._printed_messages = set()
+ self._first_webpage_request = True
+ self._post_hooks = []
self._progress_hooks = []
+ self._postprocessor_hooks = []
self._download_retcode = 0
self._num_downloads = 0
self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
self._err_file = sys.stderr
- self.params = {
- # Default parameters
- 'nocheckcertificate': False,
- }
- self.params.update(params)
+ self.params = params
self.cache = Cache(self)
+ windows_enable_vt_mode()
+ # FIXME: This will break if we ever print color to stdout
+ self.params['no_color'] = self.params.get('no_color') or not supports_terminal_sequences(self._err_file)
+
+ if sys.version_info < (3, 6):
+ self.report_warning(
+ 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
+
+ if self.params.get('allow_unplayable_formats'):
+ self.report_warning(
+ f'You have asked for {self._color_text("unplayable formats", "blue")} to be listed/downloaded. '
+ 'This is a developer option intended for debugging. \n'
+ ' If you experience any issues while using this option, '
+ f'{self._color_text("DO NOT", "red")} open a bug report')
+
def check_deprecated(param, option, suggestion):
if self.params.get(param) is not None:
- self.report_warning(
- '%s is deprecated. Use %s instead.' % (option, suggestion))
+ self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
return True
return False
@@ -373,9 +546,22 @@ class YoutubeDL(object):
if self.params.get('geo_verification_proxy') is None:
self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
- check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
+ check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
+
+ for msg in self.params.get('warnings', []):
+ self.report_warning(msg)
+
+ if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
+ # nooverwrites was unnecessarily changed to overwrites
+ # in 0c3d0f51778b153f65c21906031c2e091fcfb641
+ # This ensures compatibility with both keys
+ self.params['overwrites'] = not self.params['nooverwrites']
+ elif self.params.get('overwrites') is None:
+ self.params.pop('overwrites', None)
+ else:
+ self.params['nooverwrites'] = not self.params['overwrites']
if params.get('bidi_workaround', False):
try:
@@ -414,29 +600,53 @@ class YoutubeDL(object):
'Set the LC_ALL environment variable to fix this.')
self.params['restrictfilenames'] = True
- if isinstance(params.get('outtmpl'), bytes):
- self.report_warning(
- 'Parameter outtmpl is bytes, but should be a unicode string. '
- 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
+ self.outtmpl_dict = self.parse_outtmpl()
+
+ # Creating format selector here allows us to catch syntax errors before the extraction
+ self.format_selector = (
+ None if self.params.get('format') is None
+ else self.build_format_selector(self.params['format']))
self._setup_opener()
if auto_init:
- self.print_debug_header()
+ if auto_init != 'no_verbose_header':
+ self.print_debug_header()
self.add_default_info_extractors()
for pp_def_raw in self.params.get('postprocessors', []):
- pp_class = get_postprocessor(pp_def_raw['key'])
pp_def = dict(pp_def_raw)
- del pp_def['key']
+ when = pp_def.pop('when', 'post_process')
+ pp_class = get_postprocessor(pp_def.pop('key'))
pp = pp_class(self, **compat_kwargs(pp_def))
- self.add_post_processor(pp)
+ self.add_post_processor(pp, when=when)
+
+ for ph in self.params.get('post_hooks', []):
+ self.add_post_hook(ph)
for ph in self.params.get('progress_hooks', []):
self.add_progress_hook(ph)
register_socks_protocols()
+ def preload_download_archive(fn):
+ """Preload the archive, if any is specified"""
+ if fn is None:
+ return False
+ self.write_debug('Loading archive file %r\n' % fn)
+ try:
+ with locked_file(fn, 'r', encoding='utf-8') as archive_file:
+ for line in archive_file:
+ self.archive.add(line.strip())
+ except IOError as ioe:
+ if ioe.errno != errno.ENOENT:
+ raise
+ return False
+ return True
+
+ self.archive = set()
+ preload_download_archive(self.params.get('download_archive'))
+
def warn_if_short_id(self, argv):
# short YouTube ID starting with dash?
idxs = [
@@ -455,11 +665,19 @@ class YoutubeDL(object):
def add_info_extractor(self, ie):
"""Add an InfoExtractor object to the end of the list."""
- self._ies.append(ie)
+ ie_key = ie.ie_key()
+ self._ies[ie_key] = ie
if not isinstance(ie, type):
- self._ies_instances[ie.ie_key()] = ie
+ self._ies_instances[ie_key] = ie
ie.set_downloader(self)
+ def _get_info_extractor_class(self, ie_key):
+ ie = self._ies.get(ie_key)
+ if ie is None:
+ ie = get_info_extractor(ie_key)
+ self.add_info_extractor(ie)
+ return ie
+
def get_info_extractor(self, ie_key):
"""
Get an instance of an IE with name ie_key, it will try to get one from
@@ -479,15 +697,23 @@ class YoutubeDL(object):
for ie in gen_extractor_classes():
self.add_info_extractor(ie)
- def add_post_processor(self, pp):
+ def add_post_processor(self, pp, when='post_process'):
"""Add a PostProcessor object to the end of the chain."""
- self._pps.append(pp)
+ self._pps[when].append(pp)
pp.set_downloader(self)
+ def add_post_hook(self, ph):
+ """Add the post hook"""
+ self._post_hooks.append(ph)
+
def add_progress_hook(self, ph):
- """Add the progress hook (currently only for the file downloader)"""
+ """Add the download progress hook"""
self._progress_hooks.append(ph)
+ def add_postprocessor_hook(self, ph):
+ """Add the postprocessing progress hook"""
+ self._postprocessor_hooks.append(ph)
+
def _bidi_workaround(self, message):
if not hasattr(self, '_output_channel'):
return message
@@ -501,33 +727,29 @@ class YoutubeDL(object):
for _ in range(line_count))
return res[:-len('\n')]
- def to_screen(self, message, skip_eol=False):
- """Print message to stdout if not in quiet mode."""
- return self.to_stdout(message, skip_eol, check_quiet=True)
-
- def _write_string(self, s, out=None):
- write_string(s, out=out, encoding=self.params.get('encoding'))
+ def _write_string(self, message, out=None, only_once=False):
+ if only_once:
+ if message in self._printed_messages:
+ return
+ self._printed_messages.add(message)
+ write_string(message, out=out, encoding=self.params.get('encoding'))
- def to_stdout(self, message, skip_eol=False, check_quiet=False):
- """Print message to stdout if not in quiet mode."""
+ def to_stdout(self, message, skip_eol=False, quiet=False):
+ """Print message to stdout"""
if self.params.get('logger'):
self.params['logger'].debug(message)
- elif not check_quiet or not self.params.get('quiet', False):
- message = self._bidi_workaround(message)
- terminator = ['\n', ''][skip_eol]
- output = message + terminator
-
- self._write_string(output, self._screen_file)
+ elif not quiet or self.params.get('verbose'):
+ self._write_string(
+ '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
+ self._err_file if quiet else self._screen_file)
- def to_stderr(self, message):
- """Print message to stderr."""
+ def to_stderr(self, message, only_once=False):
+ """Print message to stderr"""
assert isinstance(message, compat_str)
if self.params.get('logger'):
self.params['logger'].error(message)
else:
- message = self._bidi_workaround(message)
- output = message + '\n'
- self._write_string(output, self._err_file)
+ self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
def to_console_title(self, message):
if not self.params.get('consoletitle', False):
@@ -543,7 +765,7 @@ class YoutubeDL(object):
def save_console_title(self):
if not self.params.get('consoletitle', False):
return
- if self.params.get('simulate', False):
+ if self.params.get('simulate'):
return
if compat_os_name != 'nt' and 'TERM' in os.environ:
# Save the title on stack
@@ -552,7 +774,7 @@ class YoutubeDL(object):
def restore_console_title(self):
if not self.params.get('consoletitle', False):
return
- if self.params.get('simulate', False):
+ if self.params.get('simulate'):
return
if compat_os_name != 'nt' and 'TERM' in os.environ:
# Restore the title from stack
@@ -589,8 +811,9 @@ class YoutubeDL(object):
else:
tb_data = traceback.format_list(traceback.extract_stack())
tb = ''.join(tb_data)
- self.to_stderr(tb)
- if not self.params.get('ignoreerrors', False):
+ if tb:
+ self.to_stderr(tb)
+ if not self.params.get('ignoreerrors'):
if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
exc_info = sys.exc_info()[1].exc_info
else:
@@ -598,7 +821,17 @@ class YoutubeDL(object):
raise DownloadError(message, exc_info)
self._download_retcode = 1
- def report_warning(self, message):
+ def to_screen(self, message, skip_eol=False):
+ """Print message to stdout if not in quiet mode"""
+ self.to_stdout(
+ message, skip_eol, quiet=self.params.get('quiet', False))
+
+ def _color_text(self, text, color):
+ if self.params.get('no_color'):
+ return text
+ return f'{TERMINAL_SEQUENCES[color.upper()]}{text}{TERMINAL_SEQUENCES["RESET_STYLE"]}'
+
+ def report_warning(self, message, only_once=False):
'''
Print the message to stderr, it will be prefixed with 'WARNING:'
If stderr is a tty file the 'WARNING:' will be colored
@@ -608,24 +841,24 @@ class YoutubeDL(object):
else:
if self.params.get('no_warnings'):
return
- if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
- _msg_header = '\033[0;33mWARNING:\033[0m'
- else:
- _msg_header = 'WARNING:'
- warning_message = '%s %s' % (_msg_header, message)
- self.to_stderr(warning_message)
+ self.to_stderr(f'{self._color_text("WARNING:", "yellow")} {message}', only_once)
def report_error(self, message, tb=None):
'''
Do the same as trouble, but prefixes the message with 'ERROR:', colored
in red if stderr is a tty file.
'''
- if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
- _msg_header = '\033[0;31mERROR:\033[0m'
+ self.trouble(f'{self._color_text("ERROR:", "red")} {message}', tb)
+
+ def write_debug(self, message, only_once=False):
+ '''Log debug message or Print message to stderr'''
+ if not self.params.get('verbose', False):
+ return
+ message = '[debug] %s' % message
+ if self.params.get('logger'):
+ self.params['logger'].debug(message)
else:
- _msg_header = 'ERROR:'
- error_message = '%s %s' % (_msg_header, message)
- self.trouble(error_message, tb)
+ self.to_stderr(message, only_once)
def report_file_already_downloaded(self, file_name):
"""Report file has already been fully downloaded."""
@@ -634,136 +867,360 @@ class YoutubeDL(object):
except UnicodeEncodeError:
self.to_screen('[download] The file has already been downloaded')
- def prepare_filename(self, info_dict):
- """Generate the output filename."""
+ def report_file_delete(self, file_name):
+ """Report that existing file will be deleted."""
try:
- template_dict = dict(info_dict)
-
- template_dict['epoch'] = int(time.time())
- autonumber_size = self.params.get('autonumber_size')
- if autonumber_size is None:
- autonumber_size = 5
- template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
- if template_dict.get('resolution') is None:
- if template_dict.get('width') and template_dict.get('height'):
- template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
- elif template_dict.get('height'):
- template_dict['resolution'] = '%sp' % template_dict['height']
- elif template_dict.get('width'):
- template_dict['resolution'] = '%dx?' % template_dict['width']
+ self.to_screen('Deleting existing file %s' % file_name)
+ except UnicodeEncodeError:
+ self.to_screen('Deleting existing file')
+
+ def raise_no_formats(self, info, forced=False):
+ has_drm = info.get('__has_drm')
+ msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
+ expected = self.params.get('ignore_no_formats_error')
+ if forced or not expected:
+ raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
+ expected=has_drm or expected)
+ else:
+ self.report_warning(msg)
+
+ def parse_outtmpl(self):
+ outtmpl_dict = self.params.get('outtmpl', {})
+ if not isinstance(outtmpl_dict, dict):
+ outtmpl_dict = {'default': outtmpl_dict}
+ # Remove spaces in the default template
+ if self.params.get('restrictfilenames'):
+ sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
+ else:
+ sanitize = lambda x: x
+ outtmpl_dict.update({
+ k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
+ if outtmpl_dict.get(k) is None})
+ for key, val in outtmpl_dict.items():
+ if isinstance(val, bytes):
+ self.report_warning(
+ 'Parameter outtmpl is bytes, but should be a unicode string. '
+ 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
+ return outtmpl_dict
+
+ def get_output_path(self, dir_type='', filename=None):
+ paths = self.params.get('paths', {})
+ assert isinstance(paths, dict)
+ path = os.path.join(
+ expand_path(paths.get('home', '').strip()),
+ expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
+ filename or '')
+
+ # Temporary fix for #4787
+ # 'Treat' all problem characters by passing filename through preferredencoding
+ # to workaround encoding issues with subprocess on python2 @ Windows
+ if sys.version_info < (3, 0) and sys.platform == 'win32':
+ path = encodeFilename(path, True).decode(preferredencoding())
+ return sanitize_path(path, force=self.params.get('windowsfilenames'))
+
+ @staticmethod
+ def _outtmpl_expandpath(outtmpl):
+ # expand_path translates '%%' into '%' and '$$' into '$'
+ # correspondingly that is not what we want since we need to keep
+ # '%%' intact for template dict substitution step. Working around
+ # with boundary-alike separator hack.
+ sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
+ outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
+
+ # outtmpl should be expand_path'ed before template dict substitution
+ # because meta fields may contain env variables we don't want to
+ # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
+ # title "Hello $PATH", we don't want `$PATH` to be expanded.
+ return expand_path(outtmpl).replace(sep, '')
+
+ @staticmethod
+ def escape_outtmpl(outtmpl):
+ ''' Escape any remaining strings like %s, %abc% etc. '''
+ return re.sub(
+ STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
+ lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
+ outtmpl)
+
+ @classmethod
+ def validate_outtmpl(cls, outtmpl):
+ ''' @return None or Exception object '''
+ outtmpl = re.sub(
+ STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBU]'),
+ lambda mobj: f'{mobj.group(0)[:-1]}s',
+ cls._outtmpl_expandpath(outtmpl))
+ try:
+ cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
+ return None
+ except ValueError as err:
+ return err
+
+ @staticmethod
+ def _copy_infodict(info_dict):
+ info_dict = dict(info_dict)
+ for key in ('__original_infodict', '__postprocessors'):
+ info_dict.pop(key, None)
+ return info_dict
+
+ def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
+ """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict """
+ info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
+
+ info_dict = self._copy_infodict(info_dict)
+ info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
+ formatSeconds(info_dict['duration'], '-' if sanitize else ':')
+ if info_dict.get('duration', None) is not None
+ else None)
+ info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
+ if info_dict.get('resolution') is None:
+ info_dict['resolution'] = self.format_resolution(info_dict, default=None)
+
+ # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
+ # of %(field)s to %(field)0Nd for backward compatibility
+ field_size_compat_map = {
+ 'playlist_index': len(str(info_dict.get('_last_playlist_index') or '')),
+ 'playlist_autonumber': len(str(info_dict.get('n_entries') or '')),
+ 'autonumber': self.params.get('autonumber_size') or 5,
+ }
+
+ TMPL_DICT = {}
+ EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBU]'))
+ MATH_FUNCTIONS = {
+ '+': float.__add__,
+ '-': float.__sub__,
+ }
+ # Field is of the form key1.key2...
+ # where keys (except first) can be string, int or slice
+ FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
+ MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
+ MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
+ INTERNAL_FORMAT_RE = re.compile(r'''(?x)
+ (?P<negate>-)?
+ (?P<fields>{field})
+ (?P<maths>(?:{math_op}{math_field})*)
+ (?:>(?P<strf_format>.+?))?
+ (?P<alternate>(?<!\\),[^|)]+)?
+ (?:\|(?P<default>.*?))?
+ $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
+
+ def _traverse_infodict(k):
+ k = k.split('.')
+ if k[0] == '':
+ k.pop(0)
+ return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
+
+ def get_value(mdict):
+ # Object traversal
+ value = _traverse_infodict(mdict['fields'])
+ # Negative
+ if mdict['negate']:
+ value = float_or_none(value)
+ if value is not None:
+ value *= -1
+ # Do maths
+ offset_key = mdict['maths']
+ if offset_key:
+ value = float_or_none(value)
+ operator = None
+ while offset_key:
+ item = re.match(
+ MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
+ offset_key).group(0)
+ offset_key = offset_key[len(item):]
+ if operator is None:
+ operator = MATH_FUNCTIONS[item]
+ continue
+ item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
+ offset = float_or_none(item)
+ if offset is None:
+ offset = float_or_none(_traverse_infodict(item))
+ try:
+ value = operator(value, multiplier * offset)
+ except (TypeError, ZeroDivisionError):
+ return None
+ operator = None
+ # Datetime formatting
+ if mdict['strf_format']:
+ value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
+
+ return value
+
+ na = self.params.get('outtmpl_na_placeholder', 'NA')
+
+ def _dumpjson_default(obj):
+ if isinstance(obj, (set, LazyList)):
+ return list(obj)
+ raise TypeError(f'Object of type {type(obj).__name__} is not JSON serializable')
+
+ def create_key(outer_mobj):
+ if not outer_mobj.group('has_key'):
+ return outer_mobj.group(0)
+ key = outer_mobj.group('key')
+ mobj = re.match(INTERNAL_FORMAT_RE, key)
+ initial_field = mobj.group('fields').split('.')[-1] if mobj else ''
+ value, default = None, na
+ while mobj:
+ mobj = mobj.groupdict()
+ default = mobj['default'] if mobj['default'] is not None else default
+ value = get_value(mobj)
+ if value is None and mobj['alternate']:
+ mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
+ else:
+ break
+ fmt = outer_mobj.group('format')
+ if fmt == 's' and value is not None and key in field_size_compat_map.keys():
+ fmt = '0{:d}d'.format(field_size_compat_map[key])
+
+ value = default if value is None else value
+
+ str_fmt = f'{fmt[:-1]}s'
+ if fmt[-1] == 'l': # list
+ delim = '\n' if '#' in (outer_mobj.group('conversion') or '') else ', '
+ value, fmt = delim.join(variadic(value)), str_fmt
+ elif fmt[-1] == 'j': # json
+ value, fmt = json.dumps(value, default=_dumpjson_default), str_fmt
+ elif fmt[-1] == 'q': # quoted
+ value, fmt = compat_shlex_quote(str(value)), str_fmt
+ elif fmt[-1] == 'B': # bytes
+ value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
+ value, fmt = value.decode('utf-8', 'ignore'), 's'
+ elif fmt[-1] == 'U': # unicode normalized
+ opts = outer_mobj.group('conversion') or ''
+ value, fmt = unicodedata.normalize(
+ # "+" = compatibility equivalence, "#" = NFD
+ 'NF%s%s' % ('K' if '+' in opts else '', 'D' if '#' in opts else 'C'),
+ value), str_fmt
+ elif fmt[-1] == 'c':
+ if value:
+ value = str(value)[0]
+ else:
+ fmt = str_fmt
+ elif fmt[-1] not in 'rs': # numeric
+ value = float_or_none(value)
+ if value is None:
+ value, fmt = default, 's'
+
+ if sanitize:
+ if fmt[-1] == 'r':
+ # If value is an object, sanitize might convert it to a string
+ # So we convert it to repr first
+ value, fmt = repr(value), str_fmt
+ if fmt[-1] in 'csr':
+ value = sanitize(initial_field, value)
+
+ key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
+ TMPL_DICT[key] = value
+ return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
+
+ return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
+
+ def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
+ outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
+ return self.escape_outtmpl(outtmpl) % info_dict
+
+ def _prepare_filename(self, info_dict, tmpl_type='default'):
+ try:
sanitize = lambda k, v: sanitize_filename(
compat_str(v),
restricted=self.params.get('restrictfilenames'),
is_id=(k == 'id' or k.endswith('_id')))
- template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
- for k, v in template_dict.items()
- if v is not None and not isinstance(v, (list, tuple, dict)))
- template_dict = collections.defaultdict(lambda: self.params.get('outtmpl_na_placeholder', 'NA'), template_dict)
-
- outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
-
- # For fields playlist_index and autonumber convert all occurrences
- # of %(field)s to %(field)0Nd for backward compatibility
- field_size_compat_map = {
- 'playlist_index': len(str(template_dict['n_entries'])),
- 'autonumber': autonumber_size,
- }
- FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
- mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
- if mobj:
- outtmpl = re.sub(
- FIELD_SIZE_COMPAT_RE,
- r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
- outtmpl)
-
- # Missing numeric fields used together with integer presentation types
- # in format specification will break the argument substitution since
- # string NA placeholder is returned for missing fields. We will patch
- # output template for missing fields to meet string presentation type.
- for numeric_field in self._NUMERIC_FIELDS:
- if numeric_field not in template_dict:
- # As of [1] format syntax is:
- # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
- # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
- FORMAT_RE = r'''(?x)
- (?<!%)
- %
- \({0}\) # mapping key
- (?:[#0\-+ ]+)? # conversion flags (optional)
- (?:\d+)? # minimum field width (optional)
- (?:\.\d+)? # precision (optional)
- [hlL]? # length modifier (optional)
- [diouxXeEfFgGcrs%] # conversion type
- '''
- outtmpl = re.sub(
- FORMAT_RE.format(numeric_field),
- r'%({0})s'.format(numeric_field), outtmpl)
-
- # expand_path translates '%%' into '%' and '$$' into '$'
- # correspondingly that is not what we want since we need to keep
- # '%%' intact for template dict substitution step. Working around
- # with boundary-alike separator hack.
- sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
- outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
-
- # outtmpl should be expand_path'ed before template dict substitution
- # because meta fields may contain env variables we don't want to
- # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
- # title "Hello $PATH", we don't want `$PATH` to be expanded.
- filename = expand_path(outtmpl).replace(sep, '') % template_dict
-
- # Temporary fix for #4787
- # 'Treat' all problem characters by passing filename through preferredencoding
- # to workaround encoding issues with subprocess on python2 @ Windows
- if sys.version_info < (3, 0) and sys.platform == 'win32':
- filename = encodeFilename(filename, True).decode(preferredencoding())
- return sanitize_path(filename)
+ outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
+ filename = self.evaluate_outtmpl(outtmpl, info_dict, sanitize)
+
+ force_ext = OUTTMPL_TYPES.get(tmpl_type)
+ if filename and force_ext is not None:
+ filename = replace_extension(filename, force_ext, info_dict.get('ext'))
+
+ # https://github.com/blackjack4494/youtube-dlc/issues/85
+ trim_file_name = self.params.get('trim_file_name', False)
+ if trim_file_name:
+ fn_groups = filename.rsplit('.')
+ ext = fn_groups[-1]
+ sub_ext = ''
+ if len(fn_groups) > 2:
+ sub_ext = fn_groups[-2]
+ filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
+
+ return filename
except ValueError as err:
self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
return None
- def _match_entry(self, info_dict, incomplete):
- """ Returns None iff the file should be downloaded """
+ def prepare_filename(self, info_dict, dir_type='', warn=False):
+ """Generate the output filename."""
+
+ filename = self._prepare_filename(info_dict, dir_type or 'default')
+ if not filename and dir_type not in ('', 'temp'):
+ return ''
+
+ if warn:
+ if not self.params.get('paths'):
+ pass
+ elif filename == '-':
+ self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
+ elif os.path.isabs(filename):
+ self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
+ if filename == '-' or not filename:
+ return filename
+
+ return self.get_output_path(dir_type, filename)
+
+ def _match_entry(self, info_dict, incomplete=False, silent=False):
+ """ Returns None if the file should be downloaded """
video_title = info_dict.get('title', info_dict.get('id', 'video'))
- if 'title' in info_dict:
- # This can happen when we're just evaluating the playlist
- title = info_dict['title']
- matchtitle = self.params.get('matchtitle', False)
- if matchtitle:
- if not re.search(matchtitle, title, re.IGNORECASE):
- return '"' + title + '" title did not match pattern "' + matchtitle + '"'
- rejecttitle = self.params.get('rejecttitle', False)
- if rejecttitle:
- if re.search(rejecttitle, title, re.IGNORECASE):
- return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
- date = info_dict.get('upload_date')
- if date is not None:
- dateRange = self.params.get('daterange', DateRange())
- if date not in dateRange:
- return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
- view_count = info_dict.get('view_count')
- if view_count is not None:
- min_views = self.params.get('min_views')
- if min_views is not None and view_count < min_views:
- return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
- max_views = self.params.get('max_views')
- if max_views is not None and view_count > max_views:
- return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
- if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
- return 'Skipping "%s" because it is age restricted' % video_title
- if self.in_download_archive(info_dict):
- return '%s has already been recorded in archive' % video_title
- if not incomplete:
+ def check_filter():
+ if 'title' in info_dict:
+ # This can happen when we're just evaluating the playlist
+ title = info_dict['title']
+ matchtitle = self.params.get('matchtitle', False)
+ if matchtitle:
+ if not re.search(matchtitle, title, re.IGNORECASE):
+ return '"' + title + '" title did not match pattern "' + matchtitle + '"'
+ rejecttitle = self.params.get('rejecttitle', False)
+ if rejecttitle:
+ if re.search(rejecttitle, title, re.IGNORECASE):
+ return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
+ date = info_dict.get('upload_date')
+ if date is not None:
+ dateRange = self.params.get('daterange', DateRange())
+ if date not in dateRange:
+ return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
+ view_count = info_dict.get('view_count')
+ if view_count is not None:
+ min_views = self.params.get('min_views')
+ if min_views is not None and view_count < min_views:
+ return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
+ max_views = self.params.get('max_views')
+ if max_views is not None and view_count > max_views:
+ return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
+ if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
+ return 'Skipping "%s" because it is age restricted' % video_title
+
match_filter = self.params.get('match_filter')
if match_filter is not None:
- ret = match_filter(info_dict)
+ try:
+ ret = match_filter(info_dict, incomplete=incomplete)
+ except TypeError:
+ # For backward compatibility
+ ret = None if incomplete else match_filter(info_dict)
if ret is not None:
return ret
+ return None
- return None
+ if self.in_download_archive(info_dict):
+ reason = '%s has already been recorded in the archive' % video_title
+ break_opt, break_err = 'break_on_existing', ExistingVideoReached
+ else:
+ reason = check_filter()
+ break_opt, break_err = 'break_on_reject', RejectedVideoReached
+ if reason is not None:
+ if not silent:
+ self.to_screen('[download] ' + reason)
+ if self.params.get(break_opt, False):
+ raise break_err()
+ return reason
@staticmethod
def add_extra_info(info_dict, extra_info):
@@ -771,7 +1228,7 @@ class YoutubeDL(object):
for key, value in extra_info.items():
info_dict.setdefault(key, value)
- def extract_info(self, url, download=True, ie_key=None, extra_info={},
+ def extract_info(self, url, download=True, ie_key=None, extra_info=None,
process=True, force_generic_extractor=False):
"""
Return a list with a dictionary for each video extracted.
@@ -788,28 +1245,36 @@ class YoutubeDL(object):
force_generic_extractor -- force using the generic extractor
"""
+ if extra_info is None:
+ extra_info = {}
+
if not ie_key and force_generic_extractor:
ie_key = 'Generic'
if ie_key:
- ies = [self.get_info_extractor(ie_key)]
+ ies = {ie_key: self._get_info_extractor_class(ie_key)}
else:
ies = self._ies
- for ie in ies:
+ for ie_key, ie in ies.items():
if not ie.suitable(url):
continue
- ie = self.get_info_extractor(ie.ie_key())
if not ie.working():
self.report_warning('The program functionality for this site has been marked as broken, '
'and will probably not work.')
- return self.__extract_info(url, ie, download, extra_info, process)
+ temp_id = ie.get_temp_id(url)
+ if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
+ self.to_screen("[%s] %s: has already been recorded in archive" % (
+ ie_key, temp_id))
+ break
+ return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
else:
self.report_error('no suitable InfoExtractor for URL %s' % url)
def __handle_extraction_exceptions(func):
+ @functools.wraps(func)
def wrapper(self, *args, **kwargs):
try:
return func(self, *args, **kwargs)
@@ -822,10 +1287,14 @@ class YoutubeDL(object):
self.report_error(msg)
except ExtractorError as e: # An error we somewhat expected
self.report_error(compat_str(e), e.format_traceback())
- except MaxDownloadsReached:
+ except ThrottledDownload:
+ self.to_stderr('\r')
+ self.report_warning('The download speed is below throttle limit. Re-extracting data')
+ return wrapper(self, *args, **kwargs)
+ except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached, LazyList.IndexError):
raise
except Exception as e:
- if self.params.get('ignoreerrors', False):
+ if self.params.get('ignoreerrors'):
self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
else:
raise
@@ -842,6 +1311,8 @@ class YoutubeDL(object):
'_type': 'compat_list',
'entries': ie_result,
}
+ if extra_info.get('original_url'):
+ ie_result.setdefault('original_url', extra_info['original_url'])
self.add_default_extra_info(ie_result, ie, url)
if process:
return self.process_ie_result(ie_result, download, extra_info)
@@ -849,14 +1320,19 @@ class YoutubeDL(object):
return ie_result
def add_default_extra_info(self, ie_result, ie, url):
- self.add_extra_info(ie_result, {
- 'extractor': ie.IE_NAME,
- 'webpage_url': url,
- 'webpage_url_basename': url_basename(url),
- 'extractor_key': ie.ie_key(),
- })
-
- def process_ie_result(self, ie_result, download=True, extra_info={}):
+ if url is not None:
+ self.add_extra_info(ie_result, {
+ 'webpage_url': url,
+ 'original_url': url,
+ 'webpage_url_basename': url_basename(url),
+ })
+ if ie is not None:
+ self.add_extra_info(ie_result, {
+ 'extractor': ie.IE_NAME,
+ 'extractor_key': ie.ie_key(),
+ })
+
+ def process_ie_result(self, ie_result, download=True, extra_info=None):
"""
Take the result of the ie(may be modified) and resolve all unresolved
references (URLs, playlist items).
@@ -864,28 +1340,54 @@ class YoutubeDL(object):
It will also download the videos if 'download'.
Returns the resolved ie_result.
"""
+ if extra_info is None:
+ extra_info = {}
result_type = ie_result.get('_type', 'video')
if result_type in ('url', 'url_transparent'):
ie_result['url'] = sanitize_url(ie_result['url'])
+ if ie_result.get('original_url'):
+ extra_info.setdefault('original_url', ie_result['original_url'])
+
extract_flat = self.params.get('extract_flat', False)
if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
or extract_flat is True):
- self.__forced_printings(
- ie_result, self.prepare_filename(ie_result),
- incomplete=True)
+ info_copy = ie_result.copy()
+ ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
+ if ie and not ie_result.get('id'):
+ info_copy['id'] = ie.get_temp_id(ie_result['url'])
+ self.add_default_extra_info(info_copy, ie, ie_result['url'])
+ self.add_extra_info(info_copy, extra_info)
+ self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
+ if self.params.get('force_write_download_archive', False):
+ self.record_download_archive(info_copy)
return ie_result
if result_type == 'video':
self.add_extra_info(ie_result, extra_info)
- return self.process_video_result(ie_result, download=download)
+ ie_result = self.process_video_result(ie_result, download=download)
+ additional_urls = (ie_result or {}).get('additional_urls')
+ if additional_urls:
+ # TODO: Improve MetadataParserPP to allow setting a list
+ if isinstance(additional_urls, compat_str):
+ additional_urls = [additional_urls]
+ self.to_screen(
+ '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
+ self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
+ ie_result['additional_entries'] = [
+ self.extract_info(
+ url, download, extra_info,
+ force_generic_extractor=self.params.get('force_generic_extractor'))
+ for url in additional_urls
+ ]
+ return ie_result
elif result_type == 'url':
# We have to add extra_info to the results because it may be
# contained in a playlist
- return self.extract_info(ie_result['url'],
- download,
- ie_key=ie_result.get('ie_key'),
- extra_info=extra_info)
+ return self.extract_info(
+ ie_result['url'], download,
+ ie_key=ie_result.get('ie_key'),
+ extra_info=extra_info)
elif result_type == 'url_transparent':
# Use the information from the embedding page
info = self.extract_info(
@@ -929,6 +1431,7 @@ class YoutubeDL(object):
self._playlist_level += 1
self._playlist_urls.add(webpage_url)
+ self._sanitize_thumbnails(ie_result)
try:
return self.__process_playlist(ie_result, download)
finally:
@@ -941,15 +1444,12 @@ class YoutubeDL(object):
'It needs to be updated.' % ie_result.get('extractor'))
def _fixup(r):
- self.add_extra_info(
- r,
- {
- 'extractor': ie_result['extractor'],
- 'webpage_url': ie_result['webpage_url'],
- 'webpage_url_basename': url_basename(ie_result['webpage_url']),
- 'extractor_key': ie_result['extractor_key'],
- }
- )
+ self.add_extra_info(r, {
+ 'extractor': ie_result['extractor'],
+ 'webpage_url': ie_result['webpage_url'],
+ 'webpage_url_basename': url_basename(ie_result['webpage_url']),
+ 'extractor_key': ie_result['extractor_key'],
+ })
return r
ie_result['entries'] = [
self.process_ie_result(_fixup(r), download, extra_info)
@@ -959,15 +1459,28 @@ class YoutubeDL(object):
else:
raise Exception('Invalid result type: %s' % result_type)
+ def _ensure_dir_exists(self, path):
+ return make_dir(path, self.report_error)
+
def __process_playlist(self, ie_result, download):
# We process each entry in the playlist
playlist = ie_result.get('title') or ie_result.get('id')
-
self.to_screen('[download] Downloading playlist: %s' % playlist)
+ if 'entries' not in ie_result:
+ raise EntryNotInPlaylist()
+ incomplete_entries = bool(ie_result.get('requested_entries'))
+ if incomplete_entries:
+ def fill_missing_entries(entries, indexes):
+ ret = [None] * max(*indexes)
+ for i, entry in zip(indexes, entries):
+ ret[i - 1] = entry
+ return ret
+ ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
+
playlist_results = []
- playliststart = self.params.get('playliststart', 1) - 1
+ playliststart = self.params.get('playliststart', 1)
playlistend = self.params.get('playlistend')
# For backwards compatibility, interpret -1 as whole list
if playlistend == -1:
@@ -987,59 +1500,92 @@ class YoutubeDL(object):
playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
ie_entries = ie_result['entries']
-
- def make_playlistitems_entries(list_ie_entries):
- num_entries = len(list_ie_entries)
- return [
- list_ie_entries[i - 1] for i in playlistitems
- if -num_entries <= i - 1 < num_entries]
-
- def report_download(num_entries):
- self.to_screen(
- '[%s] playlist %s: Downloading %d videos' %
- (ie_result['extractor'], playlist, num_entries))
+ msg = (
+ 'Downloading %d videos' if not isinstance(ie_entries, list)
+ else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
if isinstance(ie_entries, list):
- n_all_entries = len(ie_entries)
- if playlistitems:
- entries = make_playlistitems_entries(ie_entries)
- else:
- entries = ie_entries[playliststart:playlistend]
- n_entries = len(entries)
- self.to_screen(
- '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
- (ie_result['extractor'], playlist, n_all_entries, n_entries))
- elif isinstance(ie_entries, PagedList):
- if playlistitems:
- entries = []
- for item in playlistitems:
- entries.extend(ie_entries.getslice(
- item - 1, item
- ))
- else:
- entries = ie_entries.getslice(
- playliststart, playlistend)
- n_entries = len(entries)
- report_download(n_entries)
- else: # iterable
- if playlistitems:
- entries = make_playlistitems_entries(list(itertools.islice(
- ie_entries, 0, max(playlistitems))))
- else:
- entries = list(itertools.islice(
- ie_entries, playliststart, playlistend))
- n_entries = len(entries)
- report_download(n_entries)
+ def get_entry(i):
+ return ie_entries[i - 1]
+ else:
+ if not isinstance(ie_entries, PagedList):
+ ie_entries = LazyList(ie_entries)
+
+ def get_entry(i):
+ return YoutubeDL.__handle_extraction_exceptions(
+ lambda self, i: ie_entries[i - 1]
+ )(self, i)
+
+ entries = []
+ items = playlistitems if playlistitems is not None else itertools.count(playliststart)
+ for i in items:
+ if i == 0:
+ continue
+ if playlistitems is None and playlistend is not None and playlistend < i:
+ break
+ entry = None
+ try:
+ entry = get_entry(i)
+ if entry is None:
+ raise EntryNotInPlaylist()
+ except (IndexError, EntryNotInPlaylist):
+ if incomplete_entries:
+ raise EntryNotInPlaylist()
+ elif not playlistitems:
+ break
+ entries.append(entry)
+ try:
+ if entry is not None:
+ self._match_entry(entry, incomplete=True, silent=True)
+ except (ExistingVideoReached, RejectedVideoReached):
+ break
+ ie_result['entries'] = entries
+
+ # Save playlist_index before re-ordering
+ entries = [
+ ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
+ for i, entry in enumerate(entries, 1)
+ if entry is not None]
+ n_entries = len(entries)
+
+ if not playlistitems and (playliststart or playlistend):
+ playlistitems = list(range(playliststart, playliststart + n_entries))
+ ie_result['requested_entries'] = playlistitems
+
+ if self.params.get('allow_playlist_files', True):
+ ie_copy = {
+ 'playlist': playlist,
+ 'playlist_id': ie_result.get('id'),
+ 'playlist_title': ie_result.get('title'),
+ 'playlist_uploader': ie_result.get('uploader'),
+ 'playlist_uploader_id': ie_result.get('uploader_id'),
+ 'playlist_index': 0,
+ }
+ ie_copy.update(dict(ie_result))
+
+ if self._write_info_json('playlist', ie_result,
+ self.prepare_filename(ie_copy, 'pl_infojson')) is None:
+ return
+ if self._write_description('playlist', ie_result,
+ self.prepare_filename(ie_copy, 'pl_description')) is None:
+ return
+ # TODO: This should be passed to ThumbnailsConvertor if necessary
+ self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
if self.params.get('playlistreverse', False):
entries = entries[::-1]
-
if self.params.get('playlistrandom', False):
random.shuffle(entries)
x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
- for i, entry in enumerate(entries, 1):
+ self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
+ failures = 0
+ max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
+ for i, entry_tuple in enumerate(entries, 1):
+ playlist_index, entry = entry_tuple
+ if 'playlist-index' in self.params.get('compat_opts', []):
+ playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
# This __x_forwarded_for_ip thing is a bit ugly but requires
# minimal changes
@@ -1047,24 +1593,30 @@ class YoutubeDL(object):
entry['__x_forwarded_for_ip'] = x_forwarded_for
extra = {
'n_entries': n_entries,
+ '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
+ 'playlist_index': playlist_index,
+ 'playlist_autonumber': i,
'playlist': playlist,
'playlist_id': ie_result.get('id'),
'playlist_title': ie_result.get('title'),
'playlist_uploader': ie_result.get('uploader'),
'playlist_uploader_id': ie_result.get('uploader_id'),
- 'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
'extractor': ie_result['extractor'],
'webpage_url': ie_result['webpage_url'],
'webpage_url_basename': url_basename(ie_result['webpage_url']),
'extractor_key': ie_result['extractor_key'],
}
- reason = self._match_entry(entry, incomplete=True)
- if reason is not None:
- self.to_screen('[download] ' + reason)
+ if self._match_entry(entry, incomplete=True) is not None:
continue
entry_result = self.__process_iterable_entry(entry, download, extra)
+ if not entry_result:
+ failures += 1
+ if failures >= max_failures:
+ self.report_error(
+ 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
+ break
# TODO: skip failed (empty) entries?
playlist_results.append(entry_result)
ie_result['entries'] = playlist_results
@@ -1088,12 +1640,11 @@ class YoutubeDL(object):
'!=': operator.ne,
}
operator_rex = re.compile(r'''(?x)\s*
- (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
- \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
- (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
- $
+ (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
+ (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
+ (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
''' % '|'.join(map(re.escape, OPERATORS.keys())))
- m = operator_rex.search(filter_spec)
+ m = operator_rex.fullmatch(filter_spec)
if m:
try:
comparison_value = int(m.group('value'))
@@ -1114,13 +1665,12 @@ class YoutubeDL(object):
'$=': lambda attr, value: attr.endswith(value),
'*=': lambda attr, value: value in attr,
}
- str_operator_rex = re.compile(r'''(?x)
- \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id|language)
- \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
- \s*(?P<value>[a-zA-Z0-9._-]+)
- \s*$
+ str_operator_rex = re.compile(r'''(?x)\s*
+ (?P<key>[a-zA-Z0-9._-]+)\s*
+ (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
+ (?P<value>[a-zA-Z0-9._-]+)\s*
''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
- m = str_operator_rex.search(filter_spec)
+ m = str_operator_rex.fullmatch(filter_spec)
if m:
comparison_value = m.group('value')
str_op = STR_OPERATORS[m.group('op')]
@@ -1130,7 +1680,7 @@ class YoutubeDL(object):
op = str_op
if not m:
- raise ValueError('Invalid filter specification %r' % filter_spec)
+ raise SyntaxError('Invalid filter specification %r' % filter_spec)
def _filter(f):
actual_value = f.get(m.group('key'))
@@ -1145,23 +1695,22 @@ class YoutubeDL(object):
merger = FFmpegMergerPP(self)
return merger.available and merger.can_merge()
- def prefer_best():
- if self.params.get('simulate', False):
- return False
- if not download:
- return False
- if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':
- return True
- if info_dict.get('is_live'):
- return True
- if not can_merge():
- return True
- return False
-
- req_format_list = ['bestvideo+bestaudio', 'best']
- if prefer_best():
- req_format_list.reverse()
- return '/'.join(req_format_list)
+ prefer_best = (
+ not self.params.get('simulate')
+ and download
+ and (
+ not can_merge()
+ or info_dict.get('is_live', False)
+ or self.outtmpl_dict['default'] == '-'))
+ compat = (
+ prefer_best
+ or self.params.get('allow_multiple_audio_streams', False)
+ or 'format-spec' in self.params.get('compat_opts', []))
+
+ return (
+ 'best/bestvideo+bestaudio' if prefer_best
+ else 'bestvideo*+bestaudio/best' if not compat
+ else 'bestvideo+bestaudio/best')
def build_format_selector(self, format_spec):
def syntax_error(note, start):
@@ -1176,6 +1725,11 @@ class YoutubeDL(object):
GROUP = 'GROUP'
FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
+ allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
+ 'video': self.params.get('allow_multiple_video_streams', False)}
+
+ check_formats = self.params.get('check_formats')
+
def _parse_filter(tokens):
filter_parts = []
for type, string, start, _, _ in tokens:
@@ -1258,13 +1812,13 @@ class YoutubeDL(object):
group = _parse_format_selection(tokens, inside_group=True)
current_selector = FormatSelector(GROUP, group, [])
elif string == '+':
- if inside_merge:
+ if not current_selector:
raise syntax_error('Unexpected "+"', start)
- video_selector = current_selector
- audio_selector = _parse_format_selection(tokens, inside_merge=True)
- if not video_selector or not audio_selector:
- raise syntax_error('"+" must be between two format selectors', start)
- current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
+ selector_1 = current_selector
+ selector_2 = _parse_format_selection(tokens, inside_merge=True)
+ if not selector_2:
+ raise syntax_error('Expected a selector', start)
+ current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
else:
raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
elif type == tokenize.ENDMARKER:
@@ -1273,18 +1827,116 @@ class YoutubeDL(object):
selectors.append(current_selector)
return selectors
+ def _merge(formats_pair):
+ format_1, format_2 = formats_pair
+
+ formats_info = []
+ formats_info.extend(format_1.get('requested_formats', (format_1,)))
+ formats_info.extend(format_2.get('requested_formats', (format_2,)))
+
+ if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
+ get_no_more = {'video': False, 'audio': False}
+ for (i, fmt_info) in enumerate(formats_info):
+ if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
+ formats_info.pop(i)
+ continue
+ for aud_vid in ['audio', 'video']:
+ if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
+ if get_no_more[aud_vid]:
+ formats_info.pop(i)
+ break
+ get_no_more[aud_vid] = True
+
+ if len(formats_info) == 1:
+ return formats_info[0]
+
+ video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
+ audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
+
+ the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
+ the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
+
+ output_ext = self.params.get('merge_output_format')
+ if not output_ext:
+ if the_only_video:
+ output_ext = the_only_video['ext']
+ elif the_only_audio and not video_fmts:
+ output_ext = the_only_audio['ext']
+ else:
+ output_ext = 'mkv'
+
+ filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
+
+ new_dict = {
+ 'requested_formats': formats_info,
+ 'format': '+'.join(filtered('format')),
+ 'format_id': '+'.join(filtered('format_id')),
+ 'ext': output_ext,
+ 'protocol': '+'.join(map(determine_protocol, formats_info)),
+ 'language': '+'.join(orderedSet(filtered('language'))),
+ 'format_note': '+'.join(orderedSet(filtered('format_note'))),
+ 'filesize_approx': sum(filtered('filesize', 'filesize_approx')),
+ 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
+ }
+
+ if the_only_video:
+ new_dict.update({
+ 'width': the_only_video.get('width'),
+ 'height': the_only_video.get('height'),
+ 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
+ 'fps': the_only_video.get('fps'),
+ 'vcodec': the_only_video.get('vcodec'),
+ 'vbr': the_only_video.get('vbr'),
+ 'stretched_ratio': the_only_video.get('stretched_ratio'),
+ })
+
+ if the_only_audio:
+ new_dict.update({
+ 'acodec': the_only_audio.get('acodec'),
+ 'abr': the_only_audio.get('abr'),
+ 'asr': the_only_audio.get('asr'),
+ })
+
+ return new_dict
+
+ def _check_formats(formats):
+ if not check_formats:
+ yield from formats
+ return
+ for f in formats:
+ self.to_screen('[info] Testing format %s' % f['format_id'])
+ temp_file = tempfile.NamedTemporaryFile(
+ suffix='.tmp', delete=False,
+ dir=self.get_output_path('temp') or None)
+ temp_file.close()
+ try:
+ success, _ = self.dl(temp_file.name, f, test=True)
+ except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
+ success = False
+ finally:
+ if os.path.exists(temp_file.name):
+ try:
+ os.remove(temp_file.name)
+ except OSError:
+ self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
+ if success:
+ yield f
+ else:
+ self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
+
def _build_selector_function(selector):
- if isinstance(selector, list):
+ if isinstance(selector, list): # ,
fs = [_build_selector_function(s) for s in selector]
def selector_function(ctx):
for f in fs:
- for format in f(ctx):
- yield format
+ yield from f(ctx)
return selector_function
- elif selector.type == GROUP:
+
+ elif selector.type == GROUP: # ()
selector_function = _build_selector_function(selector.selector)
- elif selector.type == PICKFIRST:
+
+ elif selector.type == PICKFIRST: # /
fs = [_build_selector_function(s) for s in selector.selector]
def selector_function(ctx):
@@ -1293,105 +1945,79 @@ class YoutubeDL(object):
if picked_formats:
return picked_formats
return []
- elif selector.type == SINGLE:
- format_spec = selector.selector
- def selector_function(ctx):
- formats = list(ctx['formats'])
- if not formats:
- return
- if format_spec == 'all':
- for f in formats:
- yield f
- elif format_spec in ['best', 'worst', None]:
- format_idx = 0 if format_spec == 'worst' else -1
- audiovideo_formats = [
- f for f in formats
- if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
- if audiovideo_formats:
- yield audiovideo_formats[format_idx]
- # for extractors with incomplete formats (audio only (soundcloud)
- # or video only (imgur)) we will fallback to best/worst
- # {video,audio}-only format
- elif ctx['incomplete_formats']:
- yield formats[format_idx]
- elif format_spec == 'bestaudio':
- audio_formats = [
- f for f in formats
- if f.get('vcodec') == 'none']
- if audio_formats:
- yield audio_formats[-1]
- elif format_spec == 'worstaudio':
- audio_formats = [
- f for f in formats
- if f.get('vcodec') == 'none']
- if audio_formats:
- yield audio_formats[0]
- elif format_spec == 'bestvideo':
- video_formats = [
- f for f in formats
- if f.get('acodec') == 'none']
- if video_formats:
- yield video_formats[-1]
- elif format_spec == 'worstvideo':
- video_formats = [
- f for f in formats
- if f.get('acodec') == 'none']
- if video_formats:
- yield video_formats[0]
- else:
- extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
- if format_spec in extensions:
- filter_f = lambda f: f['ext'] == format_spec
- else:
- filter_f = lambda f: f['format_id'] == format_spec
- matches = list(filter(filter_f, formats))
- if matches:
- yield matches[-1]
- elif selector.type == MERGE:
- def _merge(formats_info):
- format_1, format_2 = [f['format_id'] for f in formats_info]
- # The first format must contain the video and the
- # second the audio
- if formats_info[0].get('vcodec') == 'none':
- self.report_error('The first format must '
- 'contain the video, try using '
- '"-f %s+%s"' % (format_2, format_1))
- return
- # Formats must be opposite (video+audio)
- if formats_info[0].get('acodec') == 'none' and formats_info[1].get('acodec') == 'none':
- self.report_error(
- 'Both formats %s and %s are video-only, you must specify "-f video+audio"'
- % (format_1, format_2))
- return
- output_ext = (
- formats_info[0]['ext']
- if self.params.get('merge_output_format') is None
- else self.params['merge_output_format'])
- return {
- 'requested_formats': formats_info,
- 'format': '%s+%s' % (formats_info[0].get('format'),
- formats_info[1].get('format')),
- 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
- formats_info[1].get('format_id')),
- 'width': formats_info[0].get('width'),
- 'height': formats_info[0].get('height'),
- 'resolution': formats_info[0].get('resolution'),
- 'fps': formats_info[0].get('fps'),
- 'vcodec': formats_info[0].get('vcodec'),
- 'vbr': formats_info[0].get('vbr'),
- 'stretched_ratio': formats_info[0].get('stretched_ratio'),
- 'acodec': formats_info[1].get('acodec'),
- 'abr': formats_info[1].get('abr'),
- 'ext': output_ext,
- }
- video_selector, audio_selector = map(_build_selector_function, selector.selector)
+ elif selector.type == MERGE: # +
+ selector_1, selector_2 = map(_build_selector_function, selector.selector)
def selector_function(ctx):
for pair in itertools.product(
- video_selector(copy.deepcopy(ctx)), audio_selector(copy.deepcopy(ctx))):
+ selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
yield _merge(pair)
+ elif selector.type == SINGLE: # atom
+ format_spec = selector.selector or 'best'
+
+ # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
+ if format_spec == 'all':
+ def selector_function(ctx):
+ yield from _check_formats(ctx['formats'])
+ elif format_spec == 'mergeall':
+ def selector_function(ctx):
+ formats = list(_check_formats(ctx['formats']))
+ if not formats:
+ return
+ merged_format = formats[-1]
+ for f in formats[-2::-1]:
+ merged_format = _merge((merged_format, f))
+ yield merged_format
+
+ else:
+ format_fallback, format_reverse, format_idx = False, True, 1
+ mobj = re.match(
+ r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
+ format_spec)
+ if mobj is not None:
+ format_idx = int_or_none(mobj.group('n'), default=1)
+ format_reverse = mobj.group('bw')[0] == 'b'
+ format_type = (mobj.group('type') or [None])[0]
+ not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
+ format_modified = mobj.group('mod') is not None
+
+ format_fallback = not format_type and not format_modified # for b, w
+ _filter_f = (
+ (lambda f: f.get('%scodec' % format_type) != 'none')
+ if format_type and format_modified # bv*, ba*, wv*, wa*
+ else (lambda f: f.get('%scodec' % not_format_type) == 'none')
+ if format_type # bv, ba, wv, wa
+ else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
+ if not format_modified # b, w
+ else lambda f: True) # b*, w*
+ filter_f = lambda f: _filter_f(f) and (
+ f.get('vcodec') != 'none' or f.get('acodec') != 'none')
+ else:
+ if format_spec in self._format_selection_exts['audio']:
+ filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
+ elif format_spec in self._format_selection_exts['video']:
+ filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
+ elif format_spec in self._format_selection_exts['storyboards']:
+ filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
+ else:
+ filter_f = lambda f: f.get('format_id') == format_spec # id
+
+ def selector_function(ctx):
+ formats = list(ctx['formats'])
+ matches = list(filter(filter_f, formats)) if filter_f is not None else formats
+ if format_fallback and ctx['incomplete_formats'] and not matches:
+ # for extractors with incomplete formats (audio only (soundcloud)
+ # or video only (imgur)) best/worst will fallback to
+ # best/worst {video,audio}-only format
+ matches = formats
+ matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
+ try:
+ yield matches[format_idx - 1]
+ except IndexError:
+ return
+
filters = [self._build_format_filter(f) for f in selector.filters]
def final_selector(ctx):
@@ -1453,13 +2079,51 @@ class YoutubeDL(object):
self.cookiejar.add_cookie_header(pr)
return pr.get_header('Cookie')
+ def _sanitize_thumbnails(self, info_dict):
+ thumbnails = info_dict.get('thumbnails')
+ if thumbnails is None:
+ thumbnail = info_dict.get('thumbnail')
+ if thumbnail:
+ info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
+ if thumbnails:
+ thumbnails.sort(key=lambda t: (
+ t.get('preference') if t.get('preference') is not None else -1,
+ t.get('width') if t.get('width') is not None else -1,
+ t.get('height') if t.get('height') is not None else -1,
+ t.get('id') if t.get('id') is not None else '',
+ t.get('url')))
+
+ def thumbnail_tester():
+ def test_thumbnail(t):
+ self.to_screen(f'[info] Testing thumbnail {t["id"]}')
+ try:
+ self.urlopen(HEADRequest(t['url']))
+ except network_exceptions as err:
+ self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
+ return False
+ return True
+ return test_thumbnail
+
+ for i, t in enumerate(thumbnails):
+ if t.get('id') is None:
+ t['id'] = '%d' % i
+ if t.get('width') and t.get('height'):
+ t['resolution'] = '%dx%d' % (t['width'], t['height'])
+ t['url'] = sanitize_url(t['url'])
+
+ if self.params.get('check_formats'):
+ info_dict['thumbnails'] = LazyList(filter(thumbnail_tester(), thumbnails[::-1])).reverse()
+ else:
+ info_dict['thumbnails'] = thumbnails
+
def process_video_result(self, info_dict, download=True):
assert info_dict.get('_type', 'video') == 'video'
if 'id' not in info_dict:
raise ExtractorError('Missing "id" field in extractor result')
if 'title' not in info_dict:
- raise ExtractorError('Missing "title" field in extractor result')
+ raise ExtractorError('Missing "title" field in extractor result',
+ video_id=info_dict['id'], ie=info_dict['extractor'])
def report_force_conversion(field, field_not, conversion):
self.report_warning(
@@ -1489,37 +2153,21 @@ class YoutubeDL(object):
info_dict['playlist'] = None
info_dict['playlist_index'] = None
- thumbnails = info_dict.get('thumbnails')
- if thumbnails is None:
- thumbnail = info_dict.get('thumbnail')
- if thumbnail:
- info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
- if thumbnails:
- thumbnails.sort(key=lambda t: (
- t.get('preference') if t.get('preference') is not None else -1,
- t.get('width') if t.get('width') is not None else -1,
- t.get('height') if t.get('height') is not None else -1,
- t.get('id') if t.get('id') is not None else '', t.get('url')))
- for i, t in enumerate(thumbnails):
- t['url'] = sanitize_url(t['url'])
- if t.get('width') and t.get('height'):
- t['resolution'] = '%dx%d' % (t['width'], t['height'])
- if t.get('id') is None:
- t['id'] = '%d' % i
-
- if self.params.get('list_thumbnails'):
- self.list_thumbnails(info_dict)
- return
+ self._sanitize_thumbnails(info_dict)
thumbnail = info_dict.get('thumbnail')
+ thumbnails = info_dict.get('thumbnails')
if thumbnail:
info_dict['thumbnail'] = sanitize_url(thumbnail)
elif thumbnails:
info_dict['thumbnail'] = thumbnails[-1]['url']
- if 'display_id' not in info_dict and 'id' in info_dict:
+ if info_dict.get('display_id') is None and 'id' in info_dict:
info_dict['display_id'] = info_dict['id']
+ if info_dict.get('duration') is not None:
+ info_dict['duration_string'] = formatSeconds(info_dict['duration'])
+
for ts_key, date_key in (
('timestamp', 'upload_date'),
('release_timestamp', 'release_date'),
@@ -1533,6 +2181,23 @@ class YoutubeDL(object):
except (ValueError, OverflowError, OSError):
pass
+ live_keys = ('is_live', 'was_live')
+ live_status = info_dict.get('live_status')
+ if live_status is None:
+ for key in live_keys:
+ if info_dict.get(key) is False:
+ continue
+ if info_dict.get(key):
+ live_status = key
+ break
+ if all(info_dict.get(key) is False for key in live_keys):
+ live_status = 'not_live'
+ if live_status:
+ info_dict['live_status'] = live_status
+ for key in live_keys:
+ if info_dict.get(key) is None:
+ info_dict[key] = (live_status == key)
+
# Auto generate title fields corresponding to the *_number fields when missing
# in order to always have clean titles. This is very common for TV series.
for field in ('chapter', 'season', 'episode'):
@@ -1552,13 +2217,6 @@ class YoutubeDL(object):
automatic_captions = info_dict.get('automatic_captions')
subtitles = info_dict.get('subtitles')
- if self.params.get('listsubtitles', False):
- if 'automatic_captions' in info_dict:
- self.list_subtitles(
- info_dict['id'], automatic_captions, 'automatic captions')
- self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
- return
-
info_dict['requested_subtitles'] = self.process_subtitles(
info_dict['id'], subtitles, automatic_captions)
@@ -1569,8 +2227,12 @@ class YoutubeDL(object):
else:
formats = info_dict['formats']
+ info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
+ if not self.params.get('allow_unplayable_formats'):
+ formats = [f for f in formats if not f.get('has_drm')]
+
if not formats:
- raise ExtractorError('No video formats found!')
+ self.raise_no_formats(info_dict)
def is_wellformed(f):
url = f.get('url')
@@ -1604,25 +2266,32 @@ class YoutubeDL(object):
formats_dict[format_id].append(format)
# Make sure all formats have unique format_id
+ common_exts = set(itertools.chain(*self._format_selection_exts.values()))
for format_id, ambiguous_formats in formats_dict.items():
- if len(ambiguous_formats) > 1:
- for i, format in enumerate(ambiguous_formats):
+ ambigious_id = len(ambiguous_formats) > 1
+ for i, format in enumerate(ambiguous_formats):
+ if ambigious_id:
format['format_id'] = '%s-%d' % (format_id, i)
+ if format.get('ext') is None:
+ format['ext'] = determine_ext(format['url']).lower()
+ # Ensure there is no conflict between id and ext in format selection
+ # See https://github.com/hypervideo/hypervideo/issues/1282
+ if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
+ format['format_id'] = 'f%s' % format['format_id']
for i, format in enumerate(formats):
if format.get('format') is None:
format['format'] = '{id} - {res}{note}'.format(
id=format['format_id'],
res=self.format_resolution(format),
- note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
+ note=format_field(format, 'format_note', ' (%s)'),
)
- # Automatically determine file extension if missing
- if format.get('ext') is None:
- format['ext'] = determine_ext(format['url']).lower()
- # Automatically determine protocol if missing (useful for format
- # selection purposes)
if format.get('protocol') is None:
format['protocol'] = determine_protocol(format)
+ if format.get('resolution') is None:
+ format['resolution'] = self.format_resolution(format, default=None)
+ if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
+ format['dynamic_range'] = 'SDR'
# Add HTTP headers, so that external programs can use them from the
# json output
full_format_info = info_dict.copy()
@@ -1634,23 +2303,39 @@ class YoutubeDL(object):
# TODO Central sorting goes here
- if formats[0] is not info_dict:
+ if not formats or formats[0] is not info_dict:
# only set the 'formats' fields if the original info_dict list them
# otherwise we end up with a circular reference, the first (and unique)
# element in the 'formats' field in info_dict is info_dict itself,
# which can't be exported to json
info_dict['formats'] = formats
+
+ info_dict, _ = self.pre_process(info_dict)
+
+ if self.params.get('list_thumbnails'):
+ self.list_thumbnails(info_dict)
if self.params.get('listformats'):
- self.list_formats(info_dict)
+ if not info_dict.get('formats') and not info_dict.get('url'):
+ self.to_screen('%s has no formats' % info_dict['id'])
+ else:
+ self.list_formats(info_dict)
+ if self.params.get('listsubtitles'):
+ if 'automatic_captions' in info_dict:
+ self.list_subtitles(
+ info_dict['id'], automatic_captions, 'automatic captions')
+ self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
+ list_only = self.params.get('simulate') is None and (
+ self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
+ if list_only:
+ # Without this printing, -F --print-json will not work
+ self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
return
- req_format = self.params.get('format')
- if req_format is None:
+ format_selector = self.format_selector
+ if format_selector is None:
req_format = self._default_format_spec(info_dict, download=download)
- if self.params.get('verbose'):
- self._write_string('[debug] Default format spec: %s\n' % req_format)
-
- format_selector = self.build_format_selector(req_format)
+ self.write_debug('Default format spec: %s' % req_format)
+ format_selector = self.build_format_selector(req_format)
# While in format selection we may need to have an access to the original
# format set in order to calculate some metrics or do some processing.
@@ -1680,18 +2365,27 @@ class YoutubeDL(object):
formats_to_download = list(format_selector(ctx))
if not formats_to_download:
- raise ExtractorError('requested format not available',
- expected=True)
-
- if download:
- if len(formats_to_download) > 1:
- self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
- for format in formats_to_download:
+ if not self.params.get('ignore_no_formats_error'):
+ raise ExtractorError('Requested format is not available', expected=True,
+ video_id=info_dict['id'], ie=info_dict['extractor'])
+ else:
+ self.report_warning('Requested format is not available')
+ # Process what we can, even without any available formats.
+ self.process_info(dict(info_dict))
+ elif download:
+ self.to_screen(
+ '[info] %s: Downloading %d format(s): %s' % (
+ info_dict['id'], len(formats_to_download),
+ ", ".join([f['format_id'] for f in formats_to_download])))
+ for fmt in formats_to_download:
new_info = dict(info_dict)
- new_info.update(format)
+ # Save a reference to the original info_dict so that it can be modified in process_info if needed
+ new_info['__original_infodict'] = info_dict
+ new_info.update(fmt)
self.process_info(new_info)
# We update the info dict with the best quality format (backwards compatibility)
- info_dict.update(formats_to_download[-1])
+ if formats_to_download:
+ info_dict.update(formats_to_download[-1])
return info_dict
def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
@@ -1709,15 +2403,34 @@ class YoutubeDL(object):
available_subs):
return None
+ all_sub_langs = available_subs.keys()
if self.params.get('allsubtitles', False):
- requested_langs = available_subs.keys()
+ requested_langs = all_sub_langs
+ elif self.params.get('subtitleslangs', False):
+ # A list is used so that the order of languages will be the same as
+ # given in subtitleslangs. See https://github.com/hypervideo/hypervideo/issues/1041
+ requested_langs = []
+ for lang_re in self.params.get('subtitleslangs'):
+ if lang_re == 'all':
+ requested_langs.extend(all_sub_langs)
+ continue
+ discard = lang_re[0] == '-'
+ if discard:
+ lang_re = lang_re[1:]
+ current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
+ if discard:
+ for lang in current_langs:
+ while lang in requested_langs:
+ requested_langs.remove(lang)
+ else:
+ requested_langs.extend(current_langs)
+ requested_langs = orderedSet(requested_langs)
+ elif 'en' in available_subs:
+ requested_langs = ['en']
else:
- if self.params.get('subtitleslangs', False):
- requested_langs = self.params.get('subtitleslangs')
- elif 'en' in available_subs:
- requested_langs = ['en']
- else:
- requested_langs = [list(available_subs.keys())[0]]
+ requested_langs = [list(all_sub_langs)[0]]
+ if requested_langs:
+ self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
formats_query = self.params.get('subtitlesformat', 'best')
formats_preference = formats_query.split('/') if formats_query else []
@@ -1744,34 +2457,80 @@ class YoutubeDL(object):
return subs
def __forced_printings(self, info_dict, filename, incomplete):
- def print_mandatory(field):
+ def print_mandatory(field, actual_field=None):
+ if actual_field is None:
+ actual_field = field
if (self.params.get('force%s' % field, False)
- and (not incomplete or info_dict.get(field) is not None)):
- self.to_stdout(info_dict[field])
+ and (not incomplete or info_dict.get(actual_field) is not None)):
+ self.to_stdout(info_dict[actual_field])
def print_optional(field):
if (self.params.get('force%s' % field, False)
and info_dict.get(field) is not None):
self.to_stdout(info_dict[field])
+ info_dict = info_dict.copy()
+ if filename is not None:
+ info_dict['filename'] = filename
+ if info_dict.get('requested_formats') is not None:
+ # For RTMP URLs, also include the playpath
+ info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
+ elif 'url' in info_dict:
+ info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
+
+ if self.params.get('forceprint') or self.params.get('forcejson'):
+ self.post_extract(info_dict)
+ for tmpl in self.params.get('forceprint', []):
+ mobj = re.match(r'\w+(=?)$', tmpl)
+ if mobj and mobj.group(1):
+ tmpl = f'{tmpl[:-1]} = %({tmpl[:-1]})s'
+ elif mobj:
+ tmpl = '%({})s'.format(tmpl)
+ self.to_stdout(self.evaluate_outtmpl(tmpl, info_dict))
+
print_mandatory('title')
print_mandatory('id')
- if self.params.get('forceurl', False) and not incomplete:
- if info_dict.get('requested_formats') is not None:
- for f in info_dict['requested_formats']:
- self.to_stdout(f['url'] + f.get('play_path', ''))
- else:
- # For RTMP URLs, also include the playpath
- self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
+ print_mandatory('url', 'urls')
print_optional('thumbnail')
print_optional('description')
- if self.params.get('forcefilename', False) and filename is not None:
- self.to_stdout(filename)
- if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
+ print_optional('filename')
+ if self.params.get('forceduration') and info_dict.get('duration') is not None:
self.to_stdout(formatSeconds(info_dict['duration']))
print_mandatory('format')
- if self.params.get('forcejson', False):
- self.to_stdout(json.dumps(info_dict))
+
+ if self.params.get('forcejson'):
+ self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
+
+ def dl(self, name, info, subtitle=False, test=False):
+ if not info.get('url'):
+ self.raise_no_formats(info, True)
+
+ if test:
+ verbose = self.params.get('verbose')
+ params = {
+ 'test': True,
+ 'quiet': self.params.get('quiet') or not verbose,
+ 'verbose': verbose,
+ 'noprogress': not verbose,
+ 'nopart': True,
+ 'skip_unavailable_fragments': False,
+ 'keep_fragments': False,
+ 'overwrites': True,
+ '_no_ytdl_file': True,
+ }
+ else:
+ params = self.params
+ fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
+ if not test:
+ for ph in self._progress_hooks:
+ fd.add_progress_hook(ph)
+ urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
+ self.write_debug('Invoking downloader on "%s"' % urls)
+
+ new_info = copy.deepcopy(self._copy_infodict(info))
+ if new_info.get('http_headers') is None:
+ new_info['http_headers'] = self._calc_headers(new_info)
+ return fd.download(name, new_info, subtitle)
def process_info(self, info_dict):
"""Process a single resolved IE result."""
@@ -1786,61 +2545,66 @@ class YoutubeDL(object):
# TODO: backward compatibility, to be removed
info_dict['fulltitle'] = info_dict['title']
- if 'format' not in info_dict:
+ if 'format' not in info_dict and 'ext' in info_dict:
info_dict['format'] = info_dict['ext']
- reason = self._match_entry(info_dict, incomplete=False)
- if reason is not None:
- self.to_screen('[download] ' + reason)
+ if self._match_entry(info_dict) is not None:
return
+ self.post_extract(info_dict)
self._num_downloads += 1
- info_dict['_filename'] = filename = self.prepare_filename(info_dict)
+ # info_dict['_filename'] needs to be set for backward compatibility
+ info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
+ temp_filename = self.prepare_filename(info_dict, 'temp')
+ files_to_move = {}
# Forced printings
- self.__forced_printings(info_dict, filename, incomplete=False)
+ self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
- # Do nothing else if in simulate mode
- if self.params.get('simulate', False):
+ if self.params.get('simulate'):
+ if self.params.get('force_write_download_archive', False):
+ self.record_download_archive(info_dict)
+ # Do nothing else if in simulate mode
return
- if filename is None:
+ if full_filename is None:
+ return
+ if not self._ensure_dir_exists(encodeFilename(full_filename)):
+ return
+ if not self._ensure_dir_exists(encodeFilename(temp_filename)):
return
- def ensure_dir_exists(path):
- try:
- dn = os.path.dirname(path)
- if dn and not os.path.exists(dn):
- os.makedirs(dn)
- return True
- except (OSError, IOError) as err:
- if isinstance(err, OSError) and err.errno == errno.EEXIST:
- return True
- self.report_error('unable to create directory ' + error_to_compat_str(err))
- return False
+ if self._write_description('video', info_dict,
+ self.prepare_filename(info_dict, 'description')) is None:
+ return
- if not ensure_dir_exists(sanitize_path(encodeFilename(filename))):
+ sub_files = self._write_subtitles(info_dict, temp_filename)
+ if sub_files is None:
return
+ files_to_move.update(dict(sub_files))
- if self.params.get('writedescription', False):
- descfn = replace_extension(filename, 'description', info_dict.get('ext'))
- if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
- self.to_screen('[info] Video description is already present')
- elif info_dict.get('description') is None:
- self.report_warning('There\'s no description to write.')
- else:
- try:
- self.to_screen('[info] Writing video description to: ' + descfn)
- with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
- descfile.write(info_dict['description'])
- except (OSError, IOError):
- self.report_error('Cannot write description file ' + descfn)
- return
+ thumb_files = self._write_thumbnails(
+ 'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
+ if thumb_files is None:
+ return
+ files_to_move.update(dict(thumb_files))
+
+ infofn = self.prepare_filename(info_dict, 'infojson')
+ _infojson_written = self._write_info_json('video', info_dict, infofn)
+ if _infojson_written:
+ info_dict['__infojson_filename'] = infofn
+ elif _infojson_written is None:
+ return
+ # Note: Annotations are deprecated
+ annofn = None
if self.params.get('writeannotations', False):
- annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
- if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
+ annofn = self.prepare_filename(info_dict, 'annotation')
+ if annofn:
+ if not self._ensure_dir_exists(encodeFilename(annofn)):
+ return
+ if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
self.to_screen('[info] Video annotations are already present')
elif not info_dict.get('annotations'):
self.report_warning('There are no annotations to write.')
@@ -1855,126 +2619,213 @@ class YoutubeDL(object):
self.report_error('Cannot write annotations file: ' + annofn)
return
- subtitles_are_requested = any([self.params.get('writesubtitles', False),
- self.params.get('writeautomaticsub')])
+ # Write internet shortcut files
+ url_link = webloc_link = desktop_link = False
+ if self.params.get('writelink', False):
+ if sys.platform == "darwin": # macOS.
+ webloc_link = True
+ elif sys.platform.startswith("linux"):
+ desktop_link = True
+ else: # if sys.platform in ['win32', 'cygwin']:
+ url_link = True
+ if self.params.get('writeurllink', False):
+ url_link = True
+ if self.params.get('writewebloclink', False):
+ webloc_link = True
+ if self.params.get('writedesktoplink', False):
+ desktop_link = True
+
+ if url_link or webloc_link or desktop_link:
+ if 'webpage_url' not in info_dict:
+ self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
+ return
+ ascii_url = iri_to_uri(info_dict['webpage_url'])
- if subtitles_are_requested and info_dict.get('requested_subtitles'):
- # subtitles download errors are already managed as troubles in relevant IE
- # that way it will silently go on when used with unsupporting IE
- subtitles = info_dict['requested_subtitles']
- ie = self.get_info_extractor(info_dict['extractor_key'])
- for sub_lang, sub_info in subtitles.items():
- sub_format = sub_info['ext']
- sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
- if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
- self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
- else:
- self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
- if sub_info.get('data') is not None:
- try:
- # Use newline='' to prevent conversion of newline characters
- # See https://github.com/ytdl-org/youtube-dl/issues/10268
- with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
- subfile.write(sub_info['data'])
- except (OSError, IOError):
- self.report_error('Cannot write subtitles file ' + sub_filename)
- return
- else:
- try:
- sub_data = ie._request_webpage(
- sub_info['url'], info_dict['id'], note=False).read()
- with io.open(encodeFilename(sub_filename), 'wb') as subfile:
- subfile.write(sub_data)
- except (ExtractorError, IOError, OSError, ValueError) as err:
- self.report_warning('Unable to download subtitle for "%s": %s' %
- (sub_lang, error_to_compat_str(err)))
- continue
-
- if self.params.get('writeinfojson', False):
- infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
- if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
- self.to_screen('[info] Video description metadata is already present')
+ def _write_link_file(extension, template, newline, embed_filename):
+ linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
+ if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
+ self.to_screen('[info] Internet shortcut is already present')
else:
- self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
try:
- write_json_file(self.filter_requested_info(info_dict), infofn)
+ self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
+ with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
+ template_vars = {'url': ascii_url}
+ if embed_filename:
+ template_vars['filename'] = linkfn[:-(len(extension) + 1)]
+ linkfile.write(template % template_vars)
except (OSError, IOError):
- self.report_error('Cannot write metadata to JSON file ' + infofn)
- return
+ self.report_error('Cannot write internet shortcut ' + linkfn)
+ return False
+ return True
- self._write_thumbnails(info_dict, filename)
+ if url_link:
+ if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
+ return
+ if webloc_link:
+ if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
+ return
+ if desktop_link:
+ if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
+ return
+
+ try:
+ info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
+ except PostProcessingError as err:
+ self.report_error('Preprocessing: %s' % str(err))
+ return
- if not self.params.get('skip_download', False):
+ must_record_download_archive = False
+ if self.params.get('skip_download', False):
+ info_dict['filepath'] = temp_filename
+ info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
+ info_dict['__files_to_move'] = files_to_move
+ info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
+ else:
+ # Download
+ info_dict.setdefault('__postprocessors', [])
try:
- def dl(name, info):
- fd = get_suitable_downloader(info, self.params)(self, self.params)
- for ph in self._progress_hooks:
- fd.add_progress_hook(ph)
- if self.params.get('verbose'):
- self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
- return fd.download(name, info)
+ def existing_file(*filepaths):
+ ext = info_dict.get('ext')
+ final_ext = self.params.get('final_ext', ext)
+ existing_files = []
+ for file in orderedSet(filepaths):
+ if final_ext != ext:
+ converted = replace_extension(file, final_ext, ext)
+ if os.path.exists(encodeFilename(converted)):
+ existing_files.append(converted)
+ if os.path.exists(encodeFilename(file)):
+ existing_files.append(file)
+
+ if not existing_files or self.params.get('overwrites', False):
+ for file in orderedSet(existing_files):
+ self.report_file_delete(file)
+ os.remove(encodeFilename(file))
+ return None
+
+ info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
+ return existing_files[0]
+
+ success = True
if info_dict.get('requested_formats') is not None:
- downloaded = []
- success = True
- merger = FFmpegMergerPP(self)
- if not merger.available:
- postprocessors = []
- self.report_warning('You have requested multiple '
- 'formats but ffmpeg or avconv are not installed.'
- ' The formats won\'t be merged.')
- else:
- postprocessors = [merger]
def compatible_formats(formats):
- video, audio = formats
+ # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
+ video_formats = [format for format in formats if format.get('vcodec') != 'none']
+ audio_formats = [format for format in formats if format.get('acodec') != 'none']
+ if len(video_formats) > 2 or len(audio_formats) > 2:
+ return False
+
# Check extension
- video_ext, audio_ext = video.get('ext'), audio.get('ext')
- if video_ext and audio_ext:
- COMPATIBLE_EXTS = (
- ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'),
- ('webm')
- )
- for exts in COMPATIBLE_EXTS:
- if video_ext in exts and audio_ext in exts:
- return True
+ exts = set(format.get('ext') for format in formats)
+ COMPATIBLE_EXTS = (
+ set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
+ set(('webm',)),
+ )
+ for ext_sets in COMPATIBLE_EXTS:
+ if ext_sets.issuperset(exts):
+ return True
# TODO: Check acodec/vcodec
return False
- filename_real_ext = os.path.splitext(filename)[1][1:]
- filename_wo_ext = (
- os.path.splitext(filename)[0]
- if filename_real_ext == info_dict['ext']
- else filename)
requested_formats = info_dict['requested_formats']
- if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
- info_dict['ext'] = 'mkv'
- self.report_warning(
- 'Requested formats are incompatible for merge and will be merged into mkv.')
+ old_ext = info_dict['ext']
+ if self.params.get('merge_output_format') is None:
+ if not compatible_formats(requested_formats):
+ info_dict['ext'] = 'mkv'
+ self.report_warning(
+ 'Requested formats are incompatible for merge and will be merged into mkv')
+ if (info_dict['ext'] == 'webm'
+ and info_dict.get('thumbnails')
+ # check with type instead of pp_key, __name__, or isinstance
+ # since we dont want any custom PPs to trigger this
+ and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
+ info_dict['ext'] = 'mkv'
+ self.report_warning(
+ 'webm doesn\'t support embedding a thumbnail, mkv will be used')
+ new_ext = info_dict['ext']
+
+ def correct_ext(filename, ext=new_ext):
+ if filename == '-':
+ return filename
+ filename_real_ext = os.path.splitext(filename)[1][1:]
+ filename_wo_ext = (
+ os.path.splitext(filename)[0]
+ if filename_real_ext in (old_ext, new_ext)
+ else filename)
+ return '%s.%s' % (filename_wo_ext, ext)
+
# Ensure filename always has a correct extension for successful merge
- filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
- if os.path.exists(encodeFilename(filename)):
- self.to_screen(
- '[download] %s has already been downloaded and '
- 'merged' % filename)
+ full_filename = correct_ext(full_filename)
+ temp_filename = correct_ext(temp_filename)
+ dl_filename = existing_file(full_filename, temp_filename)
+ info_dict['__real_download'] = False
+
+ if dl_filename is not None:
+ self.report_file_already_downloaded(dl_filename)
+ elif get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-'):
+ info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
+ success, real_download = self.dl(temp_filename, info_dict)
+ info_dict['__real_download'] = real_download
else:
+ downloaded = []
+ merger = FFmpegMergerPP(self)
+ if self.params.get('allow_unplayable_formats'):
+ self.report_warning(
+ 'You have requested merging of multiple formats '
+ 'while also allowing unplayable formats to be downloaded. '
+ 'The formats won\'t be merged to prevent data corruption.')
+ elif not merger.available:
+ self.report_warning(
+ 'You have requested merging of multiple formats but ffmpeg is not installed. '
+ 'The formats won\'t be merged.')
+
+ if temp_filename == '-':
+ reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict)
+ else 'but the formats are incompatible for simultaneous download' if merger.available
+ else 'but ffmpeg is not installed')
+ self.report_warning(
+ f'You have requested downloading multiple formats to stdout {reason}. '
+ 'The formats will be streamed one after the other')
+ fname = temp_filename
for f in requested_formats:
new_info = dict(info_dict)
+ del new_info['requested_formats']
new_info.update(f)
- fname = prepend_extension(
- self.prepare_filename(new_info),
- 'f%s' % f['format_id'], new_info['ext'])
- if not ensure_dir_exists(fname):
- return
- downloaded.append(fname)
- partial_success = dl(fname, new_info)
+ if temp_filename != '-':
+ fname = prepend_extension(
+ correct_ext(temp_filename, new_info['ext']),
+ 'f%s' % f['format_id'], new_info['ext'])
+ if not self._ensure_dir_exists(fname):
+ return
+ f['filepath'] = fname
+ downloaded.append(fname)
+ partial_success, real_download = self.dl(fname, new_info)
+ info_dict['__real_download'] = info_dict['__real_download'] or real_download
success = success and partial_success
- info_dict['__postprocessors'] = postprocessors
- info_dict['__files_to_merge'] = downloaded
+ if merger.available and not self.params.get('allow_unplayable_formats'):
+ info_dict['__postprocessors'].append(merger)
+ info_dict['__files_to_merge'] = downloaded
+ # Even if there were no downloads, it is being merged only now
+ info_dict['__real_download'] = True
+ else:
+ for file in downloaded:
+ files_to_move[file] = None
else:
# Just a single file
- success = dl(filename, info_dict)
- except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+ dl_filename = existing_file(full_filename, temp_filename)
+ if dl_filename is None or dl_filename == temp_filename:
+ # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
+ # So we should try to resume the download
+ success, real_download = self.dl(temp_filename, info_dict)
+ info_dict['__real_download'] = real_download
+ else:
+ self.report_file_already_downloaded(dl_filename)
+
+ dl_filename = dl_filename or temp_filename
+ info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
+
+ except network_exceptions as err:
self.report_error('unable to download video data: %s' % error_to_compat_str(err))
return
except (OSError, IOError) as err:
@@ -1983,79 +2834,77 @@ class YoutubeDL(object):
self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
return
- if success and filename != '-':
- # Fixup content
- fixup_policy = self.params.get('fixup')
- if fixup_policy is None:
- fixup_policy = 'detect_or_warn'
-
- INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
-
- stretched_ratio = info_dict.get('stretched_ratio')
- if stretched_ratio is not None and stretched_ratio != 1:
- if fixup_policy == 'warn':
- self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
- info_dict['id'], stretched_ratio))
- elif fixup_policy == 'detect_or_warn':
- stretched_pp = FFmpegFixupStretchedPP(self)
- if stretched_pp.available:
- info_dict.setdefault('__postprocessors', [])
- info_dict['__postprocessors'].append(stretched_pp)
- else:
- self.report_warning(
- '%s: Non-uniform pixel ratio (%s). %s'
- % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
- else:
- assert fixup_policy in ('ignore', 'never')
-
- if (info_dict.get('requested_formats') is None
- and info_dict.get('container') == 'm4a_dash'):
- if fixup_policy == 'warn':
- self.report_warning(
- '%s: writing DASH m4a. '
- 'Only some players support this container.'
- % info_dict['id'])
- elif fixup_policy == 'detect_or_warn':
- fixup_pp = FFmpegFixupM4aPP(self)
- if fixup_pp.available:
- info_dict.setdefault('__postprocessors', [])
- info_dict['__postprocessors'].append(fixup_pp)
- else:
- self.report_warning(
- '%s: writing DASH m4a. '
- 'Only some players support this container. %s'
- % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
- else:
- assert fixup_policy in ('ignore', 'never')
-
- if (info_dict.get('protocol') == 'm3u8_native'
- or info_dict.get('protocol') == 'm3u8'
- and self.params.get('hls_prefer_native')):
- if fixup_policy == 'warn':
- self.report_warning('%s: malformed AAC bitstream detected.' % (
- info_dict['id']))
- elif fixup_policy == 'detect_or_warn':
- fixup_pp = FFmpegFixupM3u8PP(self)
- if fixup_pp.available:
- info_dict.setdefault('__postprocessors', [])
- info_dict['__postprocessors'].append(fixup_pp)
- else:
- self.report_warning(
- '%s: malformed AAC bitstream detected. %s'
- % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
- else:
- assert fixup_policy in ('ignore', 'never')
+ if success and full_filename != '-':
+
+ def fixup():
+ do_fixup = True
+ fixup_policy = self.params.get('fixup')
+ vid = info_dict['id']
+ if fixup_policy in ('ignore', 'never'):
+ return
+ elif fixup_policy == 'warn':
+ do_fixup = False
+ elif fixup_policy != 'force':
+ assert fixup_policy in ('detect_or_warn', None)
+ if not info_dict.get('__real_download'):
+ do_fixup = False
+
+ def ffmpeg_fixup(cndn, msg, cls):
+ if not cndn:
+ return
+ if not do_fixup:
+ self.report_warning(f'{vid}: {msg}')
+ return
+ pp = cls(self)
+ if pp.available:
+ info_dict['__postprocessors'].append(pp)
+ else:
+ self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
+
+ stretched_ratio = info_dict.get('stretched_ratio')
+ ffmpeg_fixup(
+ stretched_ratio not in (1, None),
+ f'Non-uniform pixel ratio {stretched_ratio}',
+ FFmpegFixupStretchedPP)
+
+ ffmpeg_fixup(
+ (info_dict.get('requested_formats') is None
+ and info_dict.get('container') == 'm4a_dash'
+ and info_dict.get('ext') == 'm4a'),
+ 'writing DASH m4a. Only some players support this container',
+ FFmpegFixupM4aPP)
+
+ downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
+ downloader = downloader.__name__ if downloader else None
+ ffmpeg_fixup(info_dict.get('requested_formats') is None and downloader == 'HlsFD',
+ 'malformed AAC bitstream detected', FFmpegFixupM3u8PP)
+ ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed timestamps detected', FFmpegFixupTimestampPP)
+ ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed duration detected', FFmpegFixupDurationPP)
+
+ fixup()
try:
- self.post_process(filename, info_dict)
- except (PostProcessingError) as err:
- self.report_error('postprocessing: %s' % str(err))
+ info_dict = self.post_process(dl_filename, info_dict, files_to_move)
+ except PostProcessingError as err:
+ self.report_error('Postprocessing: %s' % str(err))
return
- self.record_download_archive(info_dict)
+ try:
+ for ph in self._post_hooks:
+ ph(info_dict['filepath'])
+ except Exception as err:
+ self.report_error('post hooks: %s' % str(err))
+ return
+ must_record_download_archive = True
+
+ if must_record_download_archive or self.params.get('force_write_download_archive', False):
+ self.record_download_archive(info_dict)
+ max_downloads = self.params.get('max_downloads')
+ if max_downloads is not None and self._num_downloads >= int(max_downloads):
+ raise MaxDownloadsReached()
def download(self, url_list):
"""Download a given list of URLs."""
- outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
+ outtmpl = self.outtmpl_dict['default']
if (len(url_list) > 1
and outtmpl != '-'
and '%' not in outtmpl
@@ -2070,11 +2919,18 @@ class YoutubeDL(object):
except UnavailableVideoError:
self.report_error('unable to download video')
except MaxDownloadsReached:
- self.to_screen('[info] Maximum number of downloaded files reached.')
+ self.to_screen('[info] Maximum number of downloads reached')
+ raise
+ except ExistingVideoReached:
+ self.to_screen('[info] Encountered a video that is already in the archive, stopping due to --break-on-existing')
+ raise
+ except RejectedVideoReached:
+ self.to_screen('[info] Encountered a video that did not match filter, stopping due to --break-on-reject')
raise
else:
if self.params.get('dump_single_json', False):
- self.to_stdout(json.dumps(res))
+ self.post_extract(res)
+ self.to_stdout(json.dumps(self.sanitize_info(res)))
return self._download_retcode
@@ -2083,10 +2939,10 @@ class YoutubeDL(object):
[info_filename], mode='r',
openhook=fileinput.hook_encoded('utf-8'))) as f:
# FileInput doesn't have a read method, we can't call json.load
- info = self.filter_requested_info(json.loads('\n'.join(f)))
+ info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
try:
self.process_ie_result(info, download=True)
- except DownloadError:
+ except (DownloadError, EntryNotInPlaylist, ThrottledDownload):
webpage_url = info.get('webpage_url')
if webpage_url is not None:
self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
@@ -2096,32 +2952,102 @@ class YoutubeDL(object):
return self._download_retcode
@staticmethod
- def filter_requested_info(info_dict):
- return dict(
- (k, v) for k, v in info_dict.items()
- if k not in ['requested_formats', 'requested_subtitles'])
+ def sanitize_info(info_dict, remove_private_keys=False):
+ ''' Sanitize the infodict for converting to json '''
+ if info_dict is None:
+ return info_dict
+ info_dict.setdefault('epoch', int(time.time()))
+ remove_keys = {'__original_infodict'} # Always remove this since this may contain a copy of the entire dict
+ keep_keys = ['_type'], # Always keep this to facilitate load-info-json
+ if remove_private_keys:
+ remove_keys |= {
+ 'requested_formats', 'requested_subtitles', 'requested_entries',
+ 'filepath', 'entries', 'original_url', 'playlist_autonumber',
+ }
+ empty_values = (None, {}, [], set(), tuple())
+ reject = lambda k, v: k not in keep_keys and (
+ k.startswith('_') or k in remove_keys or v in empty_values)
+ else:
+ reject = lambda k, v: k in remove_keys
+ filter_fn = lambda obj: (
+ list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set))
+ else obj if not isinstance(obj, dict)
+ else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))
+ return filter_fn(info_dict)
+
+ @staticmethod
+ def filter_requested_info(info_dict, actually_filter=True):
+ ''' Alias of sanitize_info for backward compatibility '''
+ return YoutubeDL.sanitize_info(info_dict, actually_filter)
+
+ def run_pp(self, pp, infodict):
+ files_to_delete = []
+ if '__files_to_move' not in infodict:
+ infodict['__files_to_move'] = {}
+ try:
+ files_to_delete, infodict = pp.run(infodict)
+ except PostProcessingError as e:
+ # Must be True and not 'only_download'
+ if self.params.get('ignoreerrors') is True:
+ self.report_error(e)
+ return infodict
+ raise
+
+ if not files_to_delete:
+ return infodict
+ if self.params.get('keepvideo', False):
+ for f in files_to_delete:
+ infodict['__files_to_move'].setdefault(f, '')
+ else:
+ for old_filename in set(files_to_delete):
+ self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
+ try:
+ os.remove(encodeFilename(old_filename))
+ except (IOError, OSError):
+ self.report_warning('Unable to remove downloaded original file')
+ if old_filename in infodict['__files_to_move']:
+ del infodict['__files_to_move'][old_filename]
+ return infodict
+
+ @staticmethod
+ def post_extract(info_dict):
+ def actual_post_extract(info_dict):
+ if info_dict.get('_type') in ('playlist', 'multi_video'):
+ for video_dict in info_dict.get('entries', {}):
+ actual_post_extract(video_dict or {})
+ return
+
+ post_extractor = info_dict.get('__post_extractor') or (lambda: {})
+ extra = post_extractor().items()
+ info_dict.update(extra)
+ info_dict.pop('__post_extractor', None)
+
+ original_infodict = info_dict.get('__original_infodict') or {}
+ original_infodict.update(extra)
+ original_infodict.pop('__post_extractor', None)
- def post_process(self, filename, ie_info):
+ actual_post_extract(info_dict or {})
+
+ def pre_process(self, ie_info, key='pre_process', files_to_move=None):
+ info = dict(ie_info)
+ info['__files_to_move'] = files_to_move or {}
+ for pp in self._pps[key]:
+ info = self.run_pp(pp, info)
+ return info, info.pop('__files_to_move', None)
+
+ def post_process(self, filename, ie_info, files_to_move=None):
"""Run all the postprocessors on the given file."""
info = dict(ie_info)
info['filepath'] = filename
- pps_chain = []
- if ie_info.get('__postprocessors') is not None:
- pps_chain.extend(ie_info['__postprocessors'])
- pps_chain.extend(self._pps)
- for pp in pps_chain:
- files_to_delete = []
- try:
- files_to_delete, info = pp.run(info)
- except PostProcessingError as e:
- self.report_error(e.msg)
- if files_to_delete and not self.params.get('keepvideo', False):
- for old_filename in files_to_delete:
- self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
- try:
- os.remove(encodeFilename(old_filename))
- except (IOError, OSError):
- self.report_warning('Unable to remove downloaded original file')
+ info['__files_to_move'] = files_to_move or {}
+
+ for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
+ info = self.run_pp(pp, info)
+ info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
+ del info['__files_to_move']
+ for pp in self._pps['after_move']:
+ info = self.run_pp(pp, info)
+ return info
def _make_archive_id(self, info_dict):
video_id = info_dict.get('id')
@@ -2135,13 +3061,13 @@ class YoutubeDL(object):
if not url:
return
# Try to find matching extractor for the URL and take its ie_key
- for ie in self._ies:
+ for ie_key, ie in self._ies.items():
if ie.suitable(url):
- extractor = ie.ie_key()
+ extractor = ie_key
break
else:
return
- return extractor.lower() + ' ' + video_id
+ return '%s %s' % (extractor.lower(), video_id)
def in_download_archive(self, info_dict):
fn = self.params.get('download_archive')
@@ -2152,15 +3078,7 @@ class YoutubeDL(object):
if not vid_id:
return False # Incomplete video information
- try:
- with locked_file(fn, 'r', encoding='utf-8') as archive_file:
- for line in archive_file:
- if line.strip() == vid_id:
- return True
- except IOError as ioe:
- if ioe.errno != errno.ENOENT:
- raise
- return False
+ return vid_id in self.archive
def record_download_archive(self, info_dict):
fn = self.params.get('download_archive')
@@ -2170,23 +3088,26 @@ class YoutubeDL(object):
assert vid_id
with locked_file(fn, 'a', encoding='utf-8') as archive_file:
archive_file.write(vid_id + '\n')
+ self.archive.add(vid_id)
@staticmethod
def format_resolution(format, default='unknown'):
- if format.get('vcodec') == 'none':
+ is_images = format.get('vcodec') == 'none' and format.get('acodec') == 'none'
+ if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
return 'audio only'
if format.get('resolution') is not None:
return format['resolution']
- if format.get('height') is not None:
- if format.get('width') is not None:
- res = '%sx%s' % (format['width'], format['height'])
- else:
- res = '%sp' % format['height']
- elif format.get('width') is not None:
+ if format.get('width') and format.get('height'):
+ res = '%dx%d' % (format['width'], format['height'])
+ elif format.get('height'):
+ res = '%sp' % format['height']
+ elif format.get('width'):
res = '%dx?' % format['width']
+ elif is_images:
+ return 'images'
else:
- res = default
- return res
+ return default
+ return f'{res} images' if is_images else res
def _format_note(self, fdict):
res = ''
@@ -2246,27 +3167,61 @@ class YoutubeDL(object):
def list_formats(self, info_dict):
formats = info_dict.get('formats', [info_dict])
- table = [
- [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
- for f in formats
- if f.get('preference') is None or f['preference'] >= -1000]
- if len(formats) > 1:
- table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
-
- header_line = ['format code', 'extension', 'resolution', 'note']
+ new_format = (
+ 'list-formats' not in self.params.get('compat_opts', [])
+ and self.params.get('listformats_table', True) is not False)
+ if new_format:
+ table = [
+ [
+ format_field(f, 'format_id'),
+ format_field(f, 'ext'),
+ self.format_resolution(f),
+ format_field(f, 'fps', '%d'),
+ format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
+ '|',
+ format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
+ format_field(f, 'tbr', '%4dk'),
+ shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
+ '|',
+ format_field(f, 'vcodec', default='unknown').replace('none', ''),
+ format_field(f, 'vbr', '%4dk'),
+ format_field(f, 'acodec', default='unknown').replace('none', ''),
+ format_field(f, 'abr', '%3dk'),
+ format_field(f, 'asr', '%5dHz'),
+ ', '.join(filter(None, (
+ 'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
+ format_field(f, 'language', '[%s]'),
+ format_field(f, 'format_note'),
+ format_field(f, 'container', ignore=(None, f.get('ext'))),
+ ))),
+ ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
+ header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', 'HDR', '|', ' FILESIZE', ' TBR', 'PROTO',
+ '|', 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'MORE INFO']
+ else:
+ table = [
+ [
+ format_field(f, 'format_id'),
+ format_field(f, 'ext'),
+ self.format_resolution(f),
+ self._format_note(f)]
+ for f in formats
+ if f.get('preference') is None or f['preference'] >= -1000]
+ header_line = ['format code', 'extension', 'resolution', 'note']
+
self.to_screen(
- '[info] Available formats for %s:\n%s' %
- (info_dict['id'], render_table(header_line, table)))
+ '[info] Available formats for %s:' % info_dict['id'])
+ self.to_stdout(render_table(
+ header_line, table, delim=new_format, extraGap=(0 if new_format else 1), hideEmpty=new_format))
def list_thumbnails(self, info_dict):
- thumbnails = info_dict.get('thumbnails')
+ thumbnails = list(info_dict.get('thumbnails'))
if not thumbnails:
self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
return
self.to_screen(
'[info] Thumbnails for %s:' % info_dict['id'])
- self.to_screen(render_table(
+ self.to_stdout(render_table(
['ID', 'width', 'height', 'URL'],
[[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
@@ -2276,10 +3231,17 @@ class YoutubeDL(object):
return
self.to_screen(
'Available %s for %s:' % (name, video_id))
- self.to_screen(render_table(
- ['Language', 'formats'],
- [[lang, ', '.join(f['ext'] for f in reversed(formats))]
- for lang, formats in subtitles.items()]))
+
+ def _row(lang, formats):
+ exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
+ if len(set(names)) == 1:
+ names = [] if names[0] == 'unknown' else names[:1]
+ return [lang, ', '.join(names), ', '.join(exts)]
+
+ self.to_stdout(render_table(
+ ['Language', 'Name', 'Formats'],
+ [_row(lang, formats) for lang, formats in subtitles.items()],
+ hideEmpty=True))
def urlopen(self, req):
""" Start an HTTP download """
@@ -2290,34 +3252,40 @@ class YoutubeDL(object):
def print_debug_header(self):
if not self.params.get('verbose'):
return
-
- if type('') is not compat_str:
- # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
- self.report_warning(
- 'Your Python is broken! Update to a newer and supported version')
-
- stdout_encoding = getattr(
- sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
+ get_encoding = lambda stream: getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)
encoding_str = (
- '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
+ '[debug] Encodings: locale %s, fs %s, stdout %s, stderr %s, pref %s\n' % (
locale.getpreferredencoding(),
sys.getfilesystemencoding(),
- stdout_encoding,
+ get_encoding(self._screen_file), get_encoding(self._err_file),
self.get_encoding()))
- write_string(encoding_str, encoding=None)
- self._write_string('[debug] hypervideo version ' + __version__ + '\n')
+ logger = self.params.get('logger')
+ if logger:
+ write_debug = lambda msg: logger.debug(f'[debug] {msg}')
+ write_debug(encoding_str)
+ else:
+ write_debug = lambda msg: self._write_string(f'[debug] {msg}')
+ write_string(encoding_str, encoding=None)
+
+ write_debug('hypervideo version %s%s\n' % (__version__, '' if source == 'unknown' else f' ({source})'))
if _LAZY_LOADER:
- self._write_string('[debug] Lazy loading extractors enabled' + '\n')
+ write_debug('Lazy loading extractors enabled\n')
+ if plugin_extractors or plugin_postprocessors:
+ write_debug('Plugins: %s\n' % [
+ '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
+ for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
+ if self.params.get('compat_opts'):
+ write_debug('Compatibility options: %s\n' % ', '.join(self.params.get('compat_opts')))
try:
sp = subprocess.Popen(
['git', 'rev-parse', '--short', 'HEAD'],
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
cwd=os.path.dirname(os.path.abspath(__file__)))
- out, err = sp.communicate()
+ out, err = process_communicate_or_kill(sp)
out = out.decode().strip()
if re.match('[0-9a-f]+', out):
- self._write_string('[debug] Git HEAD: ' + out + '\n')
+ write_debug('Git HEAD: %s\n' % out)
except Exception:
try:
sys.exc_clear()
@@ -2330,31 +3298,46 @@ class YoutubeDL(object):
return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
return impl_name
- self._write_string('[debug] Python version %s (%s) - %s\n' % (
- platform.python_version(), python_implementation(),
+ write_debug('Python version %s (%s %s) - %s\n' % (
+ platform.python_version(),
+ python_implementation(),
+ platform.architecture()[0],
platform_name()))
exe_versions = FFmpegPostProcessor.get_versions(self)
exe_versions['rtmpdump'] = rtmpdump_version()
exe_versions['phantomjs'] = PhantomJSwrapper._version()
exe_str = ', '.join(
- '%s %s' % (exe, v)
- for exe, v in sorted(exe_versions.items())
- if v
- )
- if not exe_str:
- exe_str = 'none'
- self._write_string('[debug] exe versions: %s\n' % exe_str)
+ f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
+ ) or 'none'
+ write_debug('exe versions: %s\n' % exe_str)
+
+ from .downloader.websocket import has_websockets
+ from .postprocessor.embedthumbnail import has_mutagen
+ from .cookies import SQLITE_AVAILABLE, KEYRING_AVAILABLE
+
+ lib_str = ', '.join(sorted(filter(None, (
+ compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
+ has_websockets and 'websockets',
+ has_mutagen and 'mutagen',
+ SQLITE_AVAILABLE and 'sqlite',
+ KEYRING_AVAILABLE and 'keyring',
+ )))) or 'none'
+ write_debug('Optional libraries: %s\n' % lib_str)
+ write_debug('ANSI escape support: stdout = %s, stderr = %s\n' % (
+ supports_terminal_sequences(self._screen_file),
+ supports_terminal_sequences(self._err_file)))
proxy_map = {}
for handler in self._opener.handlers:
if hasattr(handler, 'proxies'):
proxy_map.update(handler.proxies)
- self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
+ write_debug('Proxy map: ' + compat_str(proxy_map) + '\n')
if self.params.get('call_home', False):
ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
- self._write_string('[debug] Public IP address: %s\n' % ipaddr)
+ write_debug('Public IP address: %s\n' % ipaddr)
+ return
latest_version = self.urlopen(
'https://yt-dl.org/latest/version').read().decode('utf-8')
if version_tuple(latest_version) > version_tuple(__version__):
@@ -2365,18 +3348,13 @@ class YoutubeDL(object):
def _setup_opener(self):
timeout_val = self.params.get('socket_timeout')
- self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
+ self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
+ opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
opts_cookiefile = self.params.get('cookiefile')
opts_proxy = self.params.get('proxy')
- if opts_cookiefile is None:
- self.cookiejar = compat_cookiejar.CookieJar()
- else:
- opts_cookiefile = expand_path(opts_cookiefile)
- self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
- if os.access(opts_cookiefile, os.R_OK):
- self.cookiejar.load(ignore_discard=True, ignore_expires=True)
+ self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
if opts_proxy is not None:
@@ -2432,38 +3410,133 @@ class YoutubeDL(object):
encoding = preferredencoding()
return encoding
- def _write_thumbnails(self, info_dict, filename):
- if self.params.get('writethumbnail', False):
- thumbnails = info_dict.get('thumbnails')
- if thumbnails:
- thumbnails = [thumbnails[-1]]
- elif self.params.get('write_all_thumbnails', False):
- thumbnails = info_dict.get('thumbnails')
+ def _write_info_json(self, label, ie_result, infofn):
+ ''' Write infojson and returns True = written, False = skip, None = error '''
+ if not self.params.get('writeinfojson'):
+ return False
+ elif not infofn:
+ self.write_debug(f'Skipping writing {label} infojson')
+ return False
+ elif not self._ensure_dir_exists(infofn):
+ return None
+ elif not self.params.get('overwrites', True) and os.path.exists(infofn):
+ self.to_screen(f'[info] {label.title()} metadata is already present')
else:
- return
-
- if not thumbnails:
- # No thumbnails present, so return immediately
- return
+ self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
+ try:
+ write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
+ except (OSError, IOError):
+ self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
+ return None
+ return True
+
+ def _write_description(self, label, ie_result, descfn):
+ ''' Write description and returns True = written, False = skip, None = error '''
+ if not self.params.get('writedescription'):
+ return False
+ elif not descfn:
+ self.write_debug(f'Skipping writing {label} description')
+ return False
+ elif not self._ensure_dir_exists(descfn):
+ return None
+ elif not self.params.get('overwrites', True) and os.path.exists(descfn):
+ self.to_screen(f'[info] {label.title()} description is already present')
+ elif ie_result.get('description') is None:
+ self.report_warning(f'There\'s no {label} description to write')
+ return False
+ else:
+ try:
+ self.to_screen(f'[info] Writing {label} description to: {descfn}')
+ with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
+ descfile.write(ie_result['description'])
+ except (OSError, IOError):
+ self.report_error(f'Cannot write {label} description file {descfn}')
+ return None
+ return True
+
+ def _write_subtitles(self, info_dict, filename):
+ ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
+ ret = []
+ subtitles = info_dict.get('requested_subtitles')
+ if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
+ # subtitles download errors are already managed as troubles in relevant IE
+ # that way it will silently go on when used with unsupporting IE
+ return ret
+
+ sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
+ if not sub_filename_base:
+ self.to_screen('[info] Skipping writing video subtitles')
+ return ret
+ for sub_lang, sub_info in subtitles.items():
+ sub_format = sub_info['ext']
+ sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
+ sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
+ if not self.params.get('overwrites', True) and os.path.exists(sub_filename):
+ self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
+ sub_info['filepath'] = sub_filename
+ ret.append((sub_filename, sub_filename_final))
+ continue
- for t in thumbnails:
- thumb_ext = determine_ext(t['url'], 'jpg')
- suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
- thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
- t['filename'] = thumb_filename = replace_extension(filename + suffix, thumb_ext, info_dict.get('ext'))
+ self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
+ if sub_info.get('data') is not None:
+ try:
+ # Use newline='' to prevent conversion of newline characters
+ # See https://github.com/ytdl-org/youtube-dl/issues/10268
+ with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
+ subfile.write(sub_info['data'])
+ sub_info['filepath'] = sub_filename
+ ret.append((sub_filename, sub_filename_final))
+ continue
+ except (OSError, IOError):
+ self.report_error(f'Cannot write video subtitles file {sub_filename}')
+ return None
- if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
- self.to_screen('[%s] %s: Thumbnail %sis already present' %
- (info_dict['extractor'], info_dict['id'], thumb_display_id))
+ try:
+ sub_copy = sub_info.copy()
+ sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
+ self.dl(sub_filename, sub_copy, subtitle=True)
+ sub_info['filepath'] = sub_filename
+ ret.append((sub_filename, sub_filename_final))
+ except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
+ self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
+ continue
+ return ret
+
+ def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
+ ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
+ write_all = self.params.get('write_all_thumbnails', False)
+ thumbnails, ret = [], []
+ if write_all or self.params.get('writethumbnail', False):
+ thumbnails = info_dict.get('thumbnails') or []
+ multiple = write_all and len(thumbnails) > 1
+
+ if thumb_filename_base is None:
+ thumb_filename_base = filename
+ if thumbnails and not thumb_filename_base:
+ self.write_debug(f'Skipping writing {label} thumbnail')
+ return ret
+
+ for t in thumbnails[::-1]:
+ thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
+ thumb_display_id = f'{label} thumbnail' + (f' {t["id"]}' if multiple else '')
+ thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
+ thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
+
+ if not self.params.get('overwrites', True) and os.path.exists(thumb_filename):
+ ret.append((thumb_filename, thumb_filename_final))
+ t['filepath'] = thumb_filename
+ self.to_screen(f'[info] {thumb_display_id.title()} is already present')
else:
- self.to_screen('[%s] %s: Downloading thumbnail %s...' %
- (info_dict['extractor'], info_dict['id'], thumb_display_id))
+ self.to_screen(f'[info] Downloading {thumb_display_id} ...')
try:
uf = self.urlopen(t['url'])
+ self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
with open(encodeFilename(thumb_filename), 'wb') as thumbf:
shutil.copyfileobj(uf, thumbf)
- self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
- (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
- except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
- self.report_warning('Unable to download thumbnail "%s": %s' %
- (t['url'], error_to_compat_str(err)))
+ ret.append((thumb_filename, thumb_filename_final))
+ t['filepath'] = thumb_filename
+ except network_exceptions as err:
+ self.report_warning(f'Unable to download {thumb_display_id}: {err}')
+ if ret and not write_all:
+ break
+ return ret
diff --git a/hypervideo_dl/__init__.py b/hypervideo_dl/__init__.py
index 70c53fc..d8b7de5 100644
--- a/hypervideo_dl/__init__.py
+++ b/hypervideo_dl/__init__.py
@@ -1,46 +1,59 @@
-#!/usr/bin/env python
+#!/usr/bin/python
# coding: utf-8
-from __future__ import unicode_literals
-
__license__ = 'CC0-1.0'
import codecs
import io
+import itertools
import os
import random
+import re
import sys
-
from .options import (
parseOpts,
)
from .compat import (
compat_getpass,
- compat_shlex_split,
+ compat_shlex_quote,
workaround_optparse_bug9161,
)
+from .cookies import SUPPORTED_BROWSERS
from .utils import (
DateRange,
decodeOption,
- DEFAULT_OUTTMPL,
DownloadError,
+ error_to_compat_str,
+ ExistingVideoReached,
expand_path,
match_filter_func,
MaxDownloadsReached,
+ parse_duration,
preferredencoding,
read_batch_urls,
+ RejectedVideoReached,
+ render_table,
SameFileError,
setproctitle,
std_headers,
write_string,
- render_table,
)
from .downloader import (
FileDownloader,
)
from .extractor import gen_extractors, list_extractors
+from .extractor.common import InfoExtractor
from .extractor.adobepass import MSO_INFO
+from .postprocessor import (
+ FFmpegExtractAudioPP,
+ FFmpegSubtitlesConvertorPP,
+ FFmpegThumbnailsConvertorPP,
+ FFmpegVideoConvertorPP,
+ FFmpegVideoRemuxerPP,
+ MetadataFromFieldPP,
+ MetadataParserPP,
+)
from .YoutubeDL import YoutubeDL
@@ -55,6 +68,7 @@ def _real_main(argv=None):
setproctitle('hypervideo')
parser, opts, args = parseOpts(argv)
+ warnings = []
# Set user agent
if opts.user_agent is not None:
@@ -65,14 +79,7 @@ def _real_main(argv=None):
std_headers['Referer'] = opts.referer
# Custom HTTP headers
- if opts.headers is not None:
- for h in opts.headers:
- if ':' not in h:
- parser.error('wrong header formatting, it should be key:value, not "%s"' % h)
- key, value = h.split(':', 1)
- if opts.verbose:
- write_string('[debug] Adding header from command line option %s:%s\n' % (key, value))
- std_headers[key] = value
+ std_headers.update(opts.headers)
# Dump user agent
if opts.dump_user_agent:
@@ -100,14 +107,14 @@ def _real_main(argv=None):
if opts.list_extractors:
for ie in list_extractors(opts.age_limit):
- write_string(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else '') + '\n', out=sys.stdout)
+ write_string(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie.working() else '') + '\n', out=sys.stdout)
matchedUrls = [url for url in all_urls if ie.suitable(url)]
for mu in matchedUrls:
write_string(' ' + mu + '\n', out=sys.stdout)
sys.exit(0)
if opts.list_extractor_descriptions:
for ie in list_extractors(opts.age_limit):
- if not ie._WORKING:
+ if not ie.working():
continue
desc = getattr(ie, 'IE_DESC', ie.IE_NAME)
if desc is False:
@@ -130,16 +137,12 @@ def _real_main(argv=None):
parser.error('account username missing\n')
if opts.ap_password is not None and opts.ap_username is None:
parser.error('TV Provider account username missing\n')
- if opts.outtmpl is not None and (opts.usetitle or opts.autonumber or opts.useid):
- parser.error('using output template conflicts with using title, video ID or auto number')
if opts.autonumber_size is not None:
if opts.autonumber_size <= 0:
parser.error('auto number size must be positive')
if opts.autonumber_start is not None:
if opts.autonumber_start < 0:
parser.error('auto number start must be positive or 0')
- if opts.usetitle and opts.useid:
- parser.error('using title conflicts with using video ID')
if opts.username is not None and opts.password is None:
opts.password = compat_getpass('Type account password and press [Return]: ')
if opts.ap_username is not None and opts.ap_password is None:
@@ -149,6 +152,11 @@ def _real_main(argv=None):
if numeric_limit is None:
parser.error('invalid rate limit specified')
opts.ratelimit = numeric_limit
+ if opts.throttledratelimit is not None:
+ numeric_limit = FileDownloader.parse_bytes(opts.throttledratelimit)
+ if numeric_limit is None:
+ parser.error('invalid rate limit specified')
+ opts.throttledratelimit = numeric_limit
if opts.min_filesize is not None:
numeric_limit = FileDownloader.parse_bytes(opts.min_filesize)
if numeric_limit is None:
@@ -171,22 +179,34 @@ def _real_main(argv=None):
parser.error('max sleep interval must be greater than or equal to min sleep interval')
else:
opts.max_sleep_interval = opts.sleep_interval
+ if opts.sleep_interval_subtitles is not None:
+ if opts.sleep_interval_subtitles < 0:
+ parser.error('subtitles sleep interval must be positive or 0')
+ if opts.sleep_interval_requests is not None:
+ if opts.sleep_interval_requests < 0:
+ parser.error('requests sleep interval must be positive or 0')
if opts.ap_mso and opts.ap_mso not in MSO_INFO:
parser.error('Unsupported TV Provider, use --ap-list-mso to get a list of supported TV Providers')
+ if opts.overwrites: # --yes-overwrites implies --no-continue
+ opts.continue_dl = False
+ if opts.concurrent_fragment_downloads <= 0:
+ raise ValueError('Concurrent fragments must be positive')
- def parse_retries(retries):
+ def parse_retries(retries, name=''):
if retries in ('inf', 'infinite'):
parsed_retries = float('inf')
else:
try:
parsed_retries = int(retries)
except (TypeError, ValueError):
- parser.error('invalid retry count specified')
+ parser.error('invalid %sretry count specified' % name)
return parsed_retries
if opts.retries is not None:
opts.retries = parse_retries(opts.retries)
if opts.fragment_retries is not None:
- opts.fragment_retries = parse_retries(opts.fragment_retries)
+ opts.fragment_retries = parse_retries(opts.fragment_retries, 'fragment ')
+ if opts.extractor_retries is not None:
+ opts.extractor_retries = parse_retries(opts.extractor_retries, 'extractor ')
if opts.buffersize is not None:
numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize)
if numeric_buffersize is None:
@@ -202,56 +222,239 @@ def _real_main(argv=None):
if opts.playlistend not in (-1, None) and opts.playlistend < opts.playliststart:
raise ValueError('Playlist end must be greater than playlist start')
if opts.extractaudio:
- if opts.audioformat not in ['best', 'aac', 'flac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']:
+ if opts.audioformat not in ['best'] + list(FFmpegExtractAudioPP.SUPPORTED_EXTS):
parser.error('invalid audio format specified')
if opts.audioquality:
opts.audioquality = opts.audioquality.strip('k').strip('K')
if not opts.audioquality.isdigit():
parser.error('invalid audio quality specified')
if opts.recodevideo is not None:
- if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv', 'avi']:
- parser.error('invalid video recode format specified')
+ opts.recodevideo = opts.recodevideo.replace(' ', '')
+ if not re.match(FFmpegVideoConvertorPP.FORMAT_RE, opts.recodevideo):
+ parser.error('invalid video remux format specified')
+ if opts.remuxvideo is not None:
+ opts.remuxvideo = opts.remuxvideo.replace(' ', '')
+ if not re.match(FFmpegVideoRemuxerPP.FORMAT_RE, opts.remuxvideo):
+ parser.error('invalid video remux format specified')
if opts.convertsubtitles is not None:
- if opts.convertsubtitles not in ['srt', 'vtt', 'ass', 'lrc']:
+ if opts.convertsubtitles not in FFmpegSubtitlesConvertorPP.SUPPORTED_EXTS:
parser.error('invalid subtitle format specified')
+ if opts.convertthumbnails is not None:
+ if opts.convertthumbnails not in FFmpegThumbnailsConvertorPP.SUPPORTED_EXTS:
+ parser.error('invalid thumbnail format specified')
+
+ if opts.cookiesfrombrowser is not None:
+ opts.cookiesfrombrowser = [
+ part.strip() or None for part in opts.cookiesfrombrowser.split(':', 1)]
+ if opts.cookiesfrombrowser[0].lower() not in SUPPORTED_BROWSERS:
+ parser.error('unsupported browser specified for cookies')
if opts.date is not None:
date = DateRange.day(opts.date)
else:
date = DateRange(opts.dateafter, opts.datebefore)
- # Do not download videos when there are audio-only formats
+ compat_opts = opts.compat_opts
+
+ def _unused_compat_opt(name):
+ if name not in compat_opts:
+ return False
+ compat_opts.discard(name)
+ compat_opts.update(['*%s' % name])
+ return True
+
+ def set_default_compat(compat_name, opt_name, default=True, remove_compat=True):
+ attr = getattr(opts, opt_name)
+ if compat_name in compat_opts:
+ if attr is None:
+ setattr(opts, opt_name, not default)
+ return True
+ else:
+ if remove_compat:
+ _unused_compat_opt(compat_name)
+ return False
+ elif attr is None:
+ setattr(opts, opt_name, default)
+ return None
+
+ set_default_compat('abort-on-error', 'ignoreerrors', 'only_download')
+ set_default_compat('no-playlist-metafiles', 'allow_playlist_files')
+ set_default_compat('no-clean-infojson', 'clean_infojson')
+ if 'format-sort' in compat_opts:
+ opts.format_sort.extend(InfoExtractor.FormatSort.ytdl_default)
+ _video_multistreams_set = set_default_compat('multistreams', 'allow_multiple_video_streams', False, remove_compat=False)
+ _audio_multistreams_set = set_default_compat('multistreams', 'allow_multiple_audio_streams', False, remove_compat=False)
+ if _video_multistreams_set is False and _audio_multistreams_set is False:
+ _unused_compat_opt('multistreams')
+ outtmpl_default = opts.outtmpl.get('default')
+ if 'filename' in compat_opts:
+ if outtmpl_default is None:
+ outtmpl_default = '%(title)s-%(id)s.%(ext)s'
+ opts.outtmpl.update({'default': outtmpl_default})
+ else:
+ _unused_compat_opt('filename')
+
+ def validate_outtmpl(tmpl, msg):
+ err = YoutubeDL.validate_outtmpl(tmpl)
+ if err:
+ parser.error('invalid %s %r: %s' % (msg, tmpl, error_to_compat_str(err)))
+
+ for k, tmpl in opts.outtmpl.items():
+ validate_outtmpl(tmpl, f'{k} output template')
+ opts.forceprint = opts.forceprint or []
+ for tmpl in opts.forceprint or []:
+ validate_outtmpl(tmpl, 'print template')
+ validate_outtmpl(opts.sponsorblock_chapter_title, 'SponsorBlock chapter title')
+ for k, tmpl in opts.progress_template.items():
+ k = f'{k[:-6]} console title' if '-title' in k else f'{k} progress'
+ validate_outtmpl(tmpl, f'{k} template')
+
if opts.extractaudio and not opts.keepvideo and opts.format is None:
opts.format = 'bestaudio/best'
- # --all-sub automatically sets --write-sub if --write-auto-sub is not given
- # this was the old behaviour if only --all-sub was given.
- if opts.allsubtitles and not opts.writeautomaticsub:
- opts.writesubtitles = True
-
- outtmpl = ((opts.outtmpl is not None and opts.outtmpl)
- or (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s')
- or (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s')
- or (opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s')
- or (opts.usetitle and '%(title)s-%(id)s.%(ext)s')
- or (opts.useid and '%(id)s.%(ext)s')
- or (opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s')
- or DEFAULT_OUTTMPL)
- if not os.path.splitext(outtmpl)[1] and opts.extractaudio:
+ if outtmpl_default is not None and not os.path.splitext(outtmpl_default)[1] and opts.extractaudio:
parser.error('Cannot download a video and extract audio into the same'
' file! Use "{0}.%(ext)s" instead of "{0}" as the output'
- ' template'.format(outtmpl))
+ ' template'.format(outtmpl_default))
+
+ for f in opts.format_sort:
+ if re.match(InfoExtractor.FormatSort.regex, f) is None:
+ parser.error('invalid format sort string "%s" specified' % f)
+
+ def metadataparser_actions(f):
+ if isinstance(f, str):
+ cmd = '--parse-metadata %s' % compat_shlex_quote(f)
+ try:
+ actions = [MetadataFromFieldPP.to_action(f)]
+ except Exception as err:
+ parser.error(f'{cmd} is invalid; {err}')
+ else:
+ cmd = '--replace-in-metadata %s' % ' '.join(map(compat_shlex_quote, f))
+ actions = ((MetadataParserPP.Actions.REPLACE, x, *f[1:]) for x in f[0].split(','))
- any_getting = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson or opts.dump_single_json
+ for action in actions:
+ try:
+ MetadataParserPP.validate_action(*action)
+ except Exception as err:
+ parser.error(f'{cmd} is invalid; {err}')
+ yield action
+
+ if opts.parse_metadata is None:
+ opts.parse_metadata = []
+ if opts.metafromtitle is not None:
+ opts.parse_metadata.append('title:%s' % opts.metafromtitle)
+ opts.parse_metadata = list(itertools.chain(*map(metadataparser_actions, opts.parse_metadata)))
+
+ any_getting = opts.forceprint or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson or opts.dump_single_json
any_printing = opts.print_json
download_archive_fn = expand_path(opts.download_archive) if opts.download_archive is not None else opts.download_archive
+ # If JSON is not printed anywhere, but comments are requested, save it to file
+ printing_json = opts.dumpjson or opts.print_json or opts.dump_single_json
+ if opts.getcomments and not printing_json:
+ opts.writeinfojson = True
+
+ if opts.no_sponsorblock:
+ opts.sponsorblock_mark = set()
+ opts.sponsorblock_remove = set()
+ sponsorblock_query = opts.sponsorblock_mark | opts.sponsorblock_remove
+
+ if (opts.addmetadata or opts.sponsorblock_mark) and opts.addchapters is None:
+ opts.addchapters = True
+ opts.remove_chapters = opts.remove_chapters or []
+
+ def report_conflict(arg1, arg2):
+ warnings.append('%s is ignored since %s was given' % (arg2, arg1))
+
+ if (opts.remove_chapters or sponsorblock_query) and opts.sponskrub is not False:
+ if opts.sponskrub:
+ if opts.remove_chapters:
+ report_conflict('--remove-chapters', '--sponskrub')
+ if opts.sponsorblock_mark:
+ report_conflict('--sponsorblock-mark', '--sponskrub')
+ if opts.sponsorblock_remove:
+ report_conflict('--sponsorblock-remove', '--sponskrub')
+ opts.sponskrub = False
+ if opts.sponskrub_cut and opts.split_chapters and opts.sponskrub is not False:
+ report_conflict('--split-chapter', '--sponskrub-cut')
+ opts.sponskrub_cut = False
+
+ if opts.remuxvideo and opts.recodevideo:
+ report_conflict('--recode-video', '--remux-video')
+ opts.remuxvideo = False
+
+ if opts.allow_unplayable_formats:
+ if opts.extractaudio:
+ report_conflict('--allow-unplayable-formats', '--extract-audio')
+ opts.extractaudio = False
+ if opts.remuxvideo:
+ report_conflict('--allow-unplayable-formats', '--remux-video')
+ opts.remuxvideo = False
+ if opts.recodevideo:
+ report_conflict('--allow-unplayable-formats', '--recode-video')
+ opts.recodevideo = False
+ if opts.addmetadata:
+ report_conflict('--allow-unplayable-formats', '--add-metadata')
+ opts.addmetadata = False
+ if opts.embedsubtitles:
+ report_conflict('--allow-unplayable-formats', '--embed-subs')
+ opts.embedsubtitles = False
+ if opts.embedthumbnail:
+ report_conflict('--allow-unplayable-formats', '--embed-thumbnail')
+ opts.embedthumbnail = False
+ if opts.xattrs:
+ report_conflict('--allow-unplayable-formats', '--xattrs')
+ opts.xattrs = False
+ if opts.fixup and opts.fixup.lower() not in ('never', 'ignore'):
+ report_conflict('--allow-unplayable-formats', '--fixup')
+ opts.fixup = 'never'
+ if opts.remove_chapters:
+ report_conflict('--allow-unplayable-formats', '--remove-chapters')
+ opts.remove_chapters = []
+ if opts.sponsorblock_remove:
+ report_conflict('--allow-unplayable-formats', '--sponsorblock-remove')
+ opts.sponsorblock_remove = set()
+ if opts.sponskrub:
+ report_conflict('--allow-unplayable-formats', '--sponskrub')
+ opts.sponskrub = False
+
# PostProcessors
- postprocessors = []
- if opts.metafromtitle:
+ postprocessors = list(opts.add_postprocessors)
+ if sponsorblock_query:
postprocessors.append({
- 'key': 'MetadataFromTitle',
- 'titleformat': opts.metafromtitle
+ 'key': 'SponsorBlock',
+ 'categories': sponsorblock_query,
+ 'api': opts.sponsorblock_api,
+ # Run this immediately after extraction is complete
+ 'when': 'pre_process'
+ })
+ if opts.parse_metadata:
+ postprocessors.append({
+ 'key': 'MetadataParser',
+ 'actions': opts.parse_metadata,
+ # Run this immediately after extraction is complete
+ 'when': 'pre_process'
+ })
+ if opts.convertsubtitles:
+ postprocessors.append({
+ 'key': 'FFmpegSubtitlesConvertor',
+ 'format': opts.convertsubtitles,
+ # Run this before the actual video download
+ 'when': 'before_dl'
+ })
+ if opts.convertthumbnails:
+ postprocessors.append({
+ 'key': 'FFmpegThumbnailsConvertor',
+ 'format': opts.convertthumbnails,
+ # Run this before the actual video download
+ 'when': 'before_dl'
+ })
+ # Must be after all other before_dl
+ if opts.exec_before_dl_cmd:
+ postprocessors.append({
+ 'key': 'Exec',
+ 'exec_cmd': opts.exec_before_dl_cmd,
+ 'when': 'before_dl'
})
if opts.extractaudio:
postprocessors.append({
@@ -260,61 +463,129 @@ def _real_main(argv=None):
'preferredquality': opts.audioquality,
'nopostoverwrites': opts.nopostoverwrites,
})
+ if opts.remuxvideo:
+ postprocessors.append({
+ 'key': 'FFmpegVideoRemuxer',
+ 'preferedformat': opts.remuxvideo,
+ })
if opts.recodevideo:
postprocessors.append({
'key': 'FFmpegVideoConvertor',
'preferedformat': opts.recodevideo,
})
+ # If ModifyChapters is going to remove chapters, subtitles must already be in the container.
+ if opts.embedsubtitles:
+ already_have_subtitle = opts.writesubtitles and 'no-keep-subs' not in compat_opts
+ postprocessors.append({
+ 'key': 'FFmpegEmbedSubtitle',
+ # already_have_subtitle = True prevents the file from being deleted after embedding
+ 'already_have_subtitle': already_have_subtitle
+ })
+ if not opts.writeautomaticsub and 'no-keep-subs' not in compat_opts:
+ opts.writesubtitles = True
+ # --all-sub automatically sets --write-sub if --write-auto-sub is not given
+ # this was the old behaviour if only --all-sub was given.
+ if opts.allsubtitles and not opts.writeautomaticsub:
+ opts.writesubtitles = True
+ # ModifyChapters must run before FFmpegMetadataPP
+ remove_chapters_patterns, remove_ranges = [], []
+ for regex in opts.remove_chapters:
+ if regex.startswith('*'):
+ dur = list(map(parse_duration, regex[1:].split('-')))
+ if len(dur) == 2 and all(t is not None for t in dur):
+ remove_ranges.append(tuple(dur))
+ continue
+ parser.error(f'invalid --remove-chapters time range {regex!r}. Must be of the form ?start-end')
+ try:
+ remove_chapters_patterns.append(re.compile(regex))
+ except re.error as err:
+ parser.error(f'invalid --remove-chapters regex {regex!r} - {err}')
+ if opts.remove_chapters or sponsorblock_query:
+ postprocessors.append({
+ 'key': 'ModifyChapters',
+ 'remove_chapters_patterns': remove_chapters_patterns,
+ 'remove_sponsor_segments': opts.sponsorblock_remove,
+ 'remove_ranges': remove_ranges,
+ 'sponsorblock_chapter_title': opts.sponsorblock_chapter_title,
+ 'force_keyframes': opts.force_keyframes_at_cuts
+ })
# FFmpegMetadataPP should be run after FFmpegVideoConvertorPP and
# FFmpegExtractAudioPP as containers before conversion may not support
# metadata (3gp, webm, etc.)
- # And this post-processor should be placed before other metadata
- # manipulating post-processors (FFmpegEmbedSubtitle) to prevent loss of
- # extra metadata. By default ffmpeg preserves metadata applicable for both
+ # By default ffmpeg preserves metadata applicable for both
# source and target containers. From this point the container won't change,
# so metadata can be added here.
- if opts.addmetadata:
- postprocessors.append({'key': 'FFmpegMetadata'})
- if opts.convertsubtitles:
+ if opts.addmetadata or opts.addchapters:
postprocessors.append({
- 'key': 'FFmpegSubtitlesConvertor',
- 'format': opts.convertsubtitles,
+ 'key': 'FFmpegMetadata',
+ 'add_chapters': opts.addchapters,
+ 'add_metadata': opts.addmetadata,
})
- if opts.embedsubtitles:
+ # Note: Deprecated
+ # This should be above EmbedThumbnail since sponskrub removes the thumbnail attachment
+ # but must be below EmbedSubtitle and FFmpegMetadata
+ # See https://github.com/hypervideo/hypervideo/issues/204 , https://github.com/faissaloo/SponSkrub/issues/29
+ # If opts.sponskrub is None, sponskrub is used, but it silently fails if the executable can't be found
+ if opts.sponskrub is not False:
postprocessors.append({
- 'key': 'FFmpegEmbedSubtitle',
+ 'key': 'SponSkrub',
+ 'path': opts.sponskrub_path,
+ 'args': opts.sponskrub_args,
+ 'cut': opts.sponskrub_cut,
+ 'force': opts.sponskrub_force,
+ 'ignoreerror': opts.sponskrub is None,
})
if opts.embedthumbnail:
already_have_thumbnail = opts.writethumbnail or opts.write_all_thumbnails
postprocessors.append({
'key': 'EmbedThumbnail',
+ # already_have_thumbnail = True prevents the file from being deleted after embedding
'already_have_thumbnail': already_have_thumbnail
})
if not already_have_thumbnail:
opts.writethumbnail = True
- # XAttrMetadataPP should be run after post-processors that may change file
- # contents
+ opts.outtmpl['pl_thumbnail'] = ''
+ if opts.split_chapters:
+ postprocessors.append({
+ 'key': 'FFmpegSplitChapters',
+ 'force_keyframes': opts.force_keyframes_at_cuts,
+ })
+ # XAttrMetadataPP should be run after post-processors that may change file contents
if opts.xattrs:
postprocessors.append({'key': 'XAttrMetadata'})
- # Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way.
- # So if the user is able to remove the file before your postprocessor runs it might cause a few problems.
+ # Exec must be the last PP
if opts.exec_cmd:
postprocessors.append({
- 'key': 'ExecAfterDownload',
+ 'key': 'Exec',
'exec_cmd': opts.exec_cmd,
+ # Run this only after the files have been moved to their final locations
+ 'when': 'after_move'
})
- external_downloader_args = None
- if opts.external_downloader_args:
- external_downloader_args = compat_shlex_split(opts.external_downloader_args)
- postprocessor_args = None
- if opts.postprocessor_args:
- postprocessor_args = compat_shlex_split(opts.postprocessor_args)
+
+ def report_args_compat(arg, name):
+ warnings.append('%s given without specifying name. The arguments will be given to all %s' % (arg, name))
+
+ if 'default' in opts.external_downloader_args:
+ report_args_compat('--downloader-args', 'external downloaders')
+
+ if 'default-compat' in opts.postprocessor_args and 'default' not in opts.postprocessor_args:
+ report_args_compat('--post-processor-args', 'post-processors')
+ opts.postprocessor_args.setdefault('sponskrub', [])
+ opts.postprocessor_args['default'] = opts.postprocessor_args['default-compat']
+
+ final_ext = (
+ opts.recodevideo if opts.recodevideo in FFmpegVideoConvertorPP.SUPPORTED_EXTS
+ else opts.remuxvideo if opts.remuxvideo in FFmpegVideoRemuxerPP.SUPPORTED_EXTS
+ else opts.audioformat if (opts.extractaudio and opts.audioformat != 'best')
+ else None)
+
match_filter = (
None if opts.match_filter is None
else match_filter_func(opts.match_filter))
ydl_opts = {
'usenetrc': opts.usenetrc,
+ 'netrc_location': opts.netrc_location,
'username': opts.username,
'password': opts.password,
'twofactor': opts.twofactor,
@@ -332,45 +603,68 @@ def _real_main(argv=None):
'forceduration': opts.getduration,
'forcefilename': opts.getfilename,
'forceformat': opts.getformat,
+ 'forceprint': opts.forceprint,
'forcejson': opts.dumpjson or opts.print_json,
'dump_single_json': opts.dump_single_json,
- 'simulate': opts.simulate or any_getting,
+ 'force_write_download_archive': opts.force_write_download_archive,
+ 'simulate': (any_getting or None) if opts.simulate is None else opts.simulate,
'skip_download': opts.skip_download,
'format': opts.format,
+ 'allow_unplayable_formats': opts.allow_unplayable_formats,
+ 'ignore_no_formats_error': opts.ignore_no_formats_error,
+ 'format_sort': opts.format_sort,
+ 'format_sort_force': opts.format_sort_force,
+ 'allow_multiple_video_streams': opts.allow_multiple_video_streams,
+ 'allow_multiple_audio_streams': opts.allow_multiple_audio_streams,
+ 'check_formats': opts.check_formats,
'listformats': opts.listformats,
- 'outtmpl': outtmpl,
+ 'listformats_table': opts.listformats_table,
+ 'outtmpl': opts.outtmpl,
'outtmpl_na_placeholder': opts.outtmpl_na_placeholder,
+ 'paths': opts.paths,
'autonumber_size': opts.autonumber_size,
'autonumber_start': opts.autonumber_start,
'restrictfilenames': opts.restrictfilenames,
+ 'windowsfilenames': opts.windowsfilenames,
'ignoreerrors': opts.ignoreerrors,
'force_generic_extractor': opts.force_generic_extractor,
'ratelimit': opts.ratelimit,
- 'nooverwrites': opts.nooverwrites,
+ 'throttledratelimit': opts.throttledratelimit,
+ 'overwrites': opts.overwrites,
'retries': opts.retries,
'fragment_retries': opts.fragment_retries,
+ 'extractor_retries': opts.extractor_retries,
'skip_unavailable_fragments': opts.skip_unavailable_fragments,
'keep_fragments': opts.keep_fragments,
+ 'concurrent_fragment_downloads': opts.concurrent_fragment_downloads,
'buffersize': opts.buffersize,
'noresizebuffer': opts.noresizebuffer,
'http_chunk_size': opts.http_chunk_size,
'continuedl': opts.continue_dl,
- 'noprogress': opts.noprogress,
+ 'noprogress': opts.quiet if opts.noprogress is None else opts.noprogress,
'progress_with_newline': opts.progress_with_newline,
+ 'progress_template': opts.progress_template,
'playliststart': opts.playliststart,
'playlistend': opts.playlistend,
'playlistreverse': opts.playlist_reverse,
'playlistrandom': opts.playlist_random,
'noplaylist': opts.noplaylist,
- 'logtostderr': opts.outtmpl == '-',
+ 'logtostderr': outtmpl_default == '-',
'consoletitle': opts.consoletitle,
'nopart': opts.nopart,
'updatetime': opts.updatetime,
'writedescription': opts.writedescription,
'writeannotations': opts.writeannotations,
'writeinfojson': opts.writeinfojson,
+ 'allow_playlist_files': opts.allow_playlist_files,
+ 'clean_infojson': opts.clean_infojson,
+ 'getcomments': opts.getcomments,
'writethumbnail': opts.writethumbnail,
'write_all_thumbnails': opts.write_all_thumbnails,
+ 'writelink': opts.writelink,
+ 'writeurllink': opts.writeurllink,
+ 'writewebloclink': opts.writewebloclink,
+ 'writedesktoplink': opts.writedesktoplink,
'writesubtitles': opts.writesubtitles,
'writeautomaticsub': opts.writeautomaticsub,
'allsubtitles': opts.allsubtitles,
@@ -381,6 +675,7 @@ def _real_main(argv=None):
'rejecttitle': decodeOption(opts.rejecttitle),
'max_downloads': opts.max_downloads,
'prefer_free_formats': opts.prefer_free_formats,
+ 'trim_file_name': opts.trim_file_name,
'verbose': opts.verbose,
'dump_intermediate_pages': opts.dump_intermediate_pages,
'write_pages': opts.write_pages,
@@ -395,7 +690,11 @@ def _real_main(argv=None):
'youtube_print_sig_code': opts.youtube_print_sig_code,
'age_limit': opts.age_limit,
'download_archive': download_archive_fn,
+ 'break_on_existing': opts.break_on_existing,
+ 'break_on_reject': opts.break_on_reject,
+ 'skip_playlist_after_errors': opts.skip_playlist_after_errors,
'cookiefile': opts.cookiefile,
+ 'cookiesfrombrowser': opts.cookiesfrombrowser,
'nocheckcertificate': opts.no_check_certificate,
'prefer_insecure': opts.prefer_insecure,
'proxy': opts.proxy,
@@ -405,17 +704,23 @@ def _real_main(argv=None):
'prefer_ffmpeg': opts.prefer_ffmpeg,
'include_ads': opts.include_ads,
'default_search': opts.default_search,
+ 'dynamic_mpd': opts.dynamic_mpd,
+ 'extractor_args': opts.extractor_args,
'youtube_include_dash_manifest': opts.youtube_include_dash_manifest,
+ 'youtube_include_hls_manifest': opts.youtube_include_hls_manifest,
'encoding': opts.encoding,
'extract_flat': opts.extract_flat,
'mark_watched': opts.mark_watched,
'merge_output_format': opts.merge_output_format,
+ 'final_ext': final_ext,
'postprocessors': postprocessors,
'fixup': opts.fixup,
'source_address': opts.source_address,
'call_home': opts.call_home,
+ 'sleep_interval_requests': opts.sleep_interval_requests,
'sleep_interval': opts.sleep_interval,
'max_sleep_interval': opts.max_sleep_interval,
+ 'sleep_interval_subtitles': opts.sleep_interval_subtitles,
'external_downloader': opts.external_downloader,
'list_thumbnails': opts.list_thumbnails,
'playlist_items': opts.playlist_items,
@@ -425,28 +730,27 @@ def _real_main(argv=None):
'ffmpeg_location': opts.ffmpeg_location,
'hls_prefer_native': opts.hls_prefer_native,
'hls_use_mpegts': opts.hls_use_mpegts,
- 'external_downloader_args': external_downloader_args,
- 'postprocessor_args': postprocessor_args,
+ 'hls_split_discontinuity': opts.hls_split_discontinuity,
+ 'external_downloader_args': opts.external_downloader_args,
+ 'postprocessor_args': opts.postprocessor_args,
'cn_verification_proxy': opts.cn_verification_proxy,
'geo_verification_proxy': opts.geo_verification_proxy,
- 'config_location': opts.config_location,
'geo_bypass': opts.geo_bypass,
'geo_bypass_country': opts.geo_bypass_country,
'geo_bypass_ip_block': opts.geo_bypass_ip_block,
- # just for deprecation check
- 'autonumber': opts.autonumber if opts.autonumber is True else None,
- 'usetitle': opts.usetitle if opts.usetitle is True else None,
+ 'warnings': warnings,
+ 'compat_opts': compat_opts,
}
with YoutubeDL(ydl_opts) as ydl:
+ actual_use = len(all_urls) or opts.load_info_filename
# Remove cache dir
if opts.rm_cachedir:
ydl.cache.remove()
# Maybe do nothing
- if (len(all_urls) < 1) and (opts.load_info_filename is None):
-
+ if not actual_use:
ydl.warn_if_short_id(sys.argv[1:] if argv is None else argv)
parser.error(
'You must provide at least one URL.\n'
@@ -457,8 +761,8 @@ def _real_main(argv=None):
retcode = ydl.download_with_info_file(expand_path(opts.load_info_filename))
else:
retcode = ydl.download(all_urls)
- except MaxDownloadsReached:
- ydl.to_screen('--max-download limit reached, aborting.')
+ except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
+ ydl.to_screen('Aborting remaining downloads')
retcode = 101
sys.exit(retcode)
@@ -473,6 +777,11 @@ def main(argv=None):
sys.exit('ERROR: fixed output name but more than one file to download')
except KeyboardInterrupt:
sys.exit('\nERROR: Interrupted by user')
+ except BrokenPipeError:
+ # https://docs.python.org/3/library/signal.html#note-on-sigpipe
+ devnull = os.open(os.devnull, os.O_WRONLY)
+ os.dup2(devnull, sys.stdout.fileno())
+ sys.exit(r'\nERROR: {err}')
__all__ = ['main', 'YoutubeDL', 'gen_extractors', 'list_extractors']
diff --git a/hypervideo_dl/__main__.py b/hypervideo_dl/__main__.py
index e3b35e2..49765e4 100755
--- a/hypervideo_dl/__main__.py
+++ b/hypervideo_dl/__main__.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
from __future__ import unicode_literals
# Execute with
diff --git a/hypervideo_dl/aes.py b/hypervideo_dl/aes.py
index 461bb6d..60cdeb7 100644
--- a/hypervideo_dl/aes.py
+++ b/hypervideo_dl/aes.py
@@ -2,36 +2,68 @@ from __future__ import unicode_literals
from math import ceil
-from .compat import compat_b64decode
+from .compat import compat_b64decode, compat_pycrypto_AES
from .utils import bytes_to_intlist, intlist_to_bytes
+
+if compat_pycrypto_AES:
+ def aes_cbc_decrypt_bytes(data, key, iv):
+ """ Decrypt bytes with AES-CBC using pycryptodome """
+ return compat_pycrypto_AES.new(key, compat_pycrypto_AES.MODE_CBC, iv).decrypt(data)
+
+ def aes_gcm_decrypt_and_verify_bytes(data, key, tag, nonce):
+ """ Decrypt bytes with AES-GCM using pycryptodome """
+ return compat_pycrypto_AES.new(key, compat_pycrypto_AES.MODE_GCM, nonce).decrypt_and_verify(data, tag)
+
+else:
+ def aes_cbc_decrypt_bytes(data, key, iv):
+ """ Decrypt bytes with AES-CBC using native implementation since pycryptodome is unavailable """
+ return intlist_to_bytes(aes_cbc_decrypt(*map(bytes_to_intlist, (data, key, iv))))
+
+ def aes_gcm_decrypt_and_verify_bytes(data, key, tag, nonce):
+ """ Decrypt bytes with AES-GCM using native implementation since pycryptodome is unavailable """
+ return intlist_to_bytes(aes_gcm_decrypt_and_verify(*map(bytes_to_intlist, (data, key, tag, nonce))))
+
+
BLOCK_SIZE_BYTES = 16
-def aes_ctr_decrypt(data, key, counter):
+def aes_ctr_decrypt(data, key, iv):
"""
Decrypt with aes in counter mode
@param {int[]} data cipher
@param {int[]} key 16/24/32-Byte cipher key
- @param {instance} counter Instance whose next_value function (@returns {int[]} 16-Byte block)
- returns the next counter block
+ @param {int[]} iv 16-Byte initialization vector
@returns {int[]} decrypted data
"""
+ return aes_ctr_encrypt(data, key, iv)
+
+
+def aes_ctr_encrypt(data, key, iv):
+ """
+ Encrypt with aes in counter mode
+
+ @param {int[]} data cleartext
+ @param {int[]} key 16/24/32-Byte cipher key
+ @param {int[]} iv 16-Byte initialization vector
+ @returns {int[]} encrypted data
+ """
expanded_key = key_expansion(key)
block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
+ counter = iter_vector(iv)
- decrypted_data = []
+ encrypted_data = []
for i in range(block_count):
- counter_block = counter.next_value()
+ counter_block = next(counter)
block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
block += [0] * (BLOCK_SIZE_BYTES - len(block))
cipher_counter_block = aes_encrypt(counter_block, expanded_key)
- decrypted_data += xor(block, cipher_counter_block)
- decrypted_data = decrypted_data[:len(data)]
+ encrypted_data += xor(block, cipher_counter_block)
+ encrypted_data = encrypted_data[:len(data)]
- return decrypted_data
+ return encrypted_data
def aes_cbc_decrypt(data, key, iv):
@@ -88,39 +120,47 @@ def aes_cbc_encrypt(data, key, iv):
return encrypted_data
-def key_expansion(data):
+def aes_gcm_decrypt_and_verify(data, key, tag, nonce):
"""
- Generate key schedule
+ Decrypt with aes in GBM mode and checks authenticity using tag
- @param {int[]} data 16/24/32-Byte cipher key
- @returns {int[]} 176/208/240-Byte expanded key
+ @param {int[]} data cipher
+ @param {int[]} key 16-Byte cipher key
+ @param {int[]} tag authentication tag
+ @param {int[]} nonce IV (recommended 12-Byte)
+ @returns {int[]} decrypted data
"""
- data = data[:] # copy
- rcon_iteration = 1
- key_size_bytes = len(data)
- expanded_key_size_bytes = (key_size_bytes // 4 + 7) * BLOCK_SIZE_BYTES
- while len(data) < expanded_key_size_bytes:
- temp = data[-4:]
- temp = key_schedule_core(temp, rcon_iteration)
- rcon_iteration += 1
- data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes])
+ # XXX: check aes, gcm param
- for _ in range(3):
- temp = data[-4:]
- data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes])
+ hash_subkey = aes_encrypt([0] * BLOCK_SIZE_BYTES, key_expansion(key))
- if key_size_bytes == 32:
- temp = data[-4:]
- temp = sub_bytes(temp)
- data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes])
+ if len(nonce) == 12:
+ j0 = nonce + [0, 0, 0, 1]
+ else:
+ fill = (BLOCK_SIZE_BYTES - (len(nonce) % BLOCK_SIZE_BYTES)) % BLOCK_SIZE_BYTES + 8
+ ghash_in = nonce + [0] * fill + bytes_to_intlist((8 * len(nonce)).to_bytes(8, 'big'))
+ j0 = ghash(hash_subkey, ghash_in)
- for _ in range(3 if key_size_bytes == 32 else 2 if key_size_bytes == 24 else 0):
- temp = data[-4:]
- data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes])
- data = data[:expanded_key_size_bytes]
+ # TODO: add nonce support to aes_ctr_decrypt
- return data
+ # nonce_ctr = j0[:12]
+ iv_ctr = inc(j0)
+
+ decrypted_data = aes_ctr_decrypt(data, key, iv_ctr + [0] * (BLOCK_SIZE_BYTES - len(iv_ctr)))
+ pad_len = len(data) // 16 * 16
+ s_tag = ghash(
+ hash_subkey,
+ data
+ + [0] * (BLOCK_SIZE_BYTES - len(data) + pad_len) # pad
+ + bytes_to_intlist((0 * 8).to_bytes(8, 'big') # length of associated data
+ + ((len(data) * 8).to_bytes(8, 'big'))) # length of data
+ )
+
+ if tag != aes_ctr_encrypt(s_tag, key, j0):
+ raise ValueError("Mismatching authentication tag")
+
+ return decrypted_data
def aes_encrypt(data, expanded_key):
@@ -138,7 +178,7 @@ def aes_encrypt(data, expanded_key):
data = sub_bytes(data)
data = shift_rows(data)
if i != rounds:
- data = mix_columns(data)
+ data = list(iter_mix_columns(data, MIX_COLUMN_MATRIX))
data = xor(data, expanded_key[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES])
return data
@@ -157,7 +197,7 @@ def aes_decrypt(data, expanded_key):
for i in range(rounds, 0, -1):
data = xor(data, expanded_key[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES])
if i != rounds:
- data = mix_columns_inv(data)
+ data = list(iter_mix_columns(data, MIX_COLUMN_MATRIX_INV))
data = shift_rows_inv(data)
data = sub_bytes_inv(data)
data = xor(data, expanded_key[:BLOCK_SIZE_BYTES])
@@ -189,15 +229,7 @@ def aes_decrypt_text(data, password, key_size_bytes):
nonce = data[:NONCE_LENGTH_BYTES]
cipher = data[NONCE_LENGTH_BYTES:]
- class Counter(object):
- __value = nonce + [0] * (BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES)
-
- def next_value(self):
- temp = self.__value
- self.__value = inc(self.__value)
- return temp
-
- decrypted_data = aes_ctr_decrypt(cipher, key, Counter())
+ decrypted_data = aes_ctr_decrypt(cipher, key, nonce + [0] * (BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES))
plaintext = intlist_to_bytes(decrypted_data)
return plaintext
@@ -278,6 +310,47 @@ RIJNDAEL_LOG_TABLE = (0x00, 0x00, 0x19, 0x01, 0x32, 0x02, 0x1a, 0xc6, 0x4b, 0xc7
0x67, 0x4a, 0xed, 0xde, 0xc5, 0x31, 0xfe, 0x18, 0x0d, 0x63, 0x8c, 0x80, 0xc0, 0xf7, 0x70, 0x07)
+def key_expansion(data):
+ """
+ Generate key schedule
+
+ @param {int[]} data 16/24/32-Byte cipher key
+ @returns {int[]} 176/208/240-Byte expanded key
+ """
+ data = data[:] # copy
+ rcon_iteration = 1
+ key_size_bytes = len(data)
+ expanded_key_size_bytes = (key_size_bytes // 4 + 7) * BLOCK_SIZE_BYTES
+
+ while len(data) < expanded_key_size_bytes:
+ temp = data[-4:]
+ temp = key_schedule_core(temp, rcon_iteration)
+ rcon_iteration += 1
+ data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes])
+
+ for _ in range(3):
+ temp = data[-4:]
+ data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes])
+
+ if key_size_bytes == 32:
+ temp = data[-4:]
+ temp = sub_bytes(temp)
+ data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes])
+
+ for _ in range(3 if key_size_bytes == 32 else 2 if key_size_bytes == 24 else 0):
+ temp = data[-4:]
+ data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes])
+ data = data[:expanded_key_size_bytes]
+
+ return data
+
+
+def iter_vector(iv):
+ while True:
+ yield iv
+ iv = inc(iv)
+
+
def sub_bytes(data):
return [SBOX[x] for x in data]
@@ -302,48 +375,36 @@ def xor(data1, data2):
return [x ^ y for x, y in zip(data1, data2)]
-def rijndael_mul(a, b):
- if(a == 0 or b == 0):
- return 0
- return RIJNDAEL_EXP_TABLE[(RIJNDAEL_LOG_TABLE[a] + RIJNDAEL_LOG_TABLE[b]) % 0xFF]
+def iter_mix_columns(data, matrix):
+ for i in (0, 4, 8, 12):
+ for row in matrix:
+ mixed = 0
+ for j in range(4):
+ # xor is (+) and (-)
+ mixed ^= (0 if data[i:i + 4][j] == 0 or row[j] == 0 else
+ RIJNDAEL_EXP_TABLE[(RIJNDAEL_LOG_TABLE[data[i + j]] + RIJNDAEL_LOG_TABLE[row[j]]) % 0xFF])
+ yield mixed
-def mix_column(data, matrix):
- data_mixed = []
- for row in range(4):
- mixed = 0
- for column in range(4):
- # xor is (+) and (-)
- mixed ^= rijndael_mul(data[column], matrix[row][column])
- data_mixed.append(mixed)
- return data_mixed
-
-
-def mix_columns(data, matrix=MIX_COLUMN_MATRIX):
- data_mixed = []
- for i in range(4):
- column = data[i * 4: (i + 1) * 4]
- data_mixed += mix_column(column, matrix)
- return data_mixed
+def shift_rows(data):
+ return [data[((column + row) & 0b11) * 4 + row] for column in range(4) for row in range(4)]
-def mix_columns_inv(data):
- return mix_columns(data, MIX_COLUMN_MATRIX_INV)
+def shift_rows_inv(data):
+ return [data[((column - row) & 0b11) * 4 + row] for column in range(4) for row in range(4)]
-def shift_rows(data):
+def shift_block(data):
data_shifted = []
- for column in range(4):
- for row in range(4):
- data_shifted.append(data[((column + row) & 0b11) * 4 + row])
- return data_shifted
+ bit = 0
+ for n in data:
+ if bit:
+ n |= 0x100
+ bit = n & 1
+ n >>= 1
+ data_shifted.append(n)
-def shift_rows_inv(data):
- data_shifted = []
- for column in range(4):
- for row in range(4):
- data_shifted.append(data[((column - row) & 0b11) * 4 + row])
return data_shifted
@@ -358,4 +419,50 @@ def inc(data):
return data
-__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text']
+def block_product(block_x, block_y):
+ # NIST SP 800-38D, Algorithm 1
+
+ if len(block_x) != BLOCK_SIZE_BYTES or len(block_y) != BLOCK_SIZE_BYTES:
+ raise ValueError("Length of blocks need to be %d bytes" % BLOCK_SIZE_BYTES)
+
+ block_r = [0xE1] + [0] * (BLOCK_SIZE_BYTES - 1)
+ block_v = block_y[:]
+ block_z = [0] * BLOCK_SIZE_BYTES
+
+ for i in block_x:
+ for bit in range(7, -1, -1):
+ if i & (1 << bit):
+ block_z = xor(block_z, block_v)
+
+ do_xor = block_v[-1] & 1
+ block_v = shift_block(block_v)
+ if do_xor:
+ block_v = xor(block_v, block_r)
+
+ return block_z
+
+
+def ghash(subkey, data):
+ # NIST SP 800-38D, Algorithm 2
+
+ if len(data) % BLOCK_SIZE_BYTES:
+ raise ValueError("Length of data should be %d bytes" % BLOCK_SIZE_BYTES)
+
+ last_y = [0] * BLOCK_SIZE_BYTES
+ for i in range(0, len(data), BLOCK_SIZE_BYTES):
+ block = data[i : i + BLOCK_SIZE_BYTES] # noqa: E203
+ last_y = block_product(xor(last_y, block), subkey)
+
+ return last_y
+
+
+__all__ = [
+ 'aes_ctr_decrypt',
+ 'aes_cbc_decrypt',
+ 'aes_cbc_decrypt_bytes',
+ 'aes_decrypt_text',
+ 'aes_encrypt',
+ 'aes_gcm_decrypt_and_verify',
+ 'aes_gcm_decrypt_and_verify_bytes',
+ 'key_expansion'
+]
diff --git a/hypervideo_dl/cache.py b/hypervideo_dl/cache.py
index 81cd297..24acb1b 100644
--- a/hypervideo_dl/cache.py
+++ b/hypervideo_dl/cache.py
@@ -50,6 +50,7 @@ class Cache(object):
except OSError as ose:
if ose.errno != errno.EEXIST:
raise
+ self._ydl.write_debug(f'Saving {section}.{key} to cache')
write_json_file(data, fn)
except Exception:
tb = traceback.format_exc()
@@ -66,6 +67,7 @@ class Cache(object):
try:
try:
with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
+ self._ydl.write_debug(f'Loading {section}.{key} from cache')
return json.load(cachef)
except ValueError:
try:
diff --git a/hypervideo_dl/compat.py b/hypervideo_dl/compat.py
index 97ab37a..5e0e5d8 100644
--- a/hypervideo_dl/compat.py
+++ b/hypervideo_dl/compat.py
@@ -1,2526 +1,42 @@
# coding: utf-8
-from __future__ import unicode_literals
+import asyncio
import base64
-import binascii
-import collections
import ctypes
-import email
import getpass
-import io
+import html
+import html.parser
+import http
+import http.client
+import http.cookiejar
+import http.cookies
+import http.server
import itertools
import optparse
import os
-import platform
import re
import shlex
import shutil
import socket
import struct
-import subprocess
import sys
-import xml.etree.ElementTree
+import tokenize
+import urllib
+import xml.etree.ElementTree as etree
+from subprocess import DEVNULL
-try:
- import urllib.request as compat_urllib_request
-except ImportError: # Python 2
- import urllib2 as compat_urllib_request
-
-try:
- import urllib.error as compat_urllib_error
-except ImportError: # Python 2
- import urllib2 as compat_urllib_error
-
-try:
- import urllib.parse as compat_urllib_parse
-except ImportError: # Python 2
- import urllib as compat_urllib_parse
-
-try:
- from urllib.parse import urlparse as compat_urllib_parse_urlparse
-except ImportError: # Python 2
- from urlparse import urlparse as compat_urllib_parse_urlparse
-
-try:
- import urllib.parse as compat_urlparse
-except ImportError: # Python 2
- import urlparse as compat_urlparse
-
-try:
- import urllib.response as compat_urllib_response
-except ImportError: # Python 2
- import urllib as compat_urllib_response
-
-try:
- import http.cookiejar as compat_cookiejar
-except ImportError: # Python 2
- import cookielib as compat_cookiejar
-
-if sys.version_info[0] == 2:
- class compat_cookiejar_Cookie(compat_cookiejar.Cookie):
- def __init__(self, version, name, value, *args, **kwargs):
- if isinstance(name, compat_str):
- name = name.encode()
- if isinstance(value, compat_str):
- value = value.encode()
- compat_cookiejar.Cookie.__init__(self, version, name, value, *args, **kwargs)
-else:
- compat_cookiejar_Cookie = compat_cookiejar.Cookie
-
-try:
- import http.cookies as compat_cookies
-except ImportError: # Python 2
- import Cookie as compat_cookies
-
-if sys.version_info[0] == 2:
- class compat_cookies_SimpleCookie(compat_cookies.SimpleCookie):
- def load(self, rawdata):
- if isinstance(rawdata, compat_str):
- rawdata = str(rawdata)
- return super(compat_cookies_SimpleCookie, self).load(rawdata)
-else:
- compat_cookies_SimpleCookie = compat_cookies.SimpleCookie
-
-try:
- import html.entities as compat_html_entities
-except ImportError: # Python 2
- import htmlentitydefs as compat_html_entities
-
-try: # Python >= 3.3
- compat_html_entities_html5 = compat_html_entities.html5
-except AttributeError:
- # Copied from CPython 3.5.1 html/entities.py
- compat_html_entities_html5 = {
- 'Aacute': '\xc1',
- 'aacute': '\xe1',
- 'Aacute;': '\xc1',
- 'aacute;': '\xe1',
- 'Abreve;': '\u0102',
- 'abreve;': '\u0103',
- 'ac;': '\u223e',
- 'acd;': '\u223f',
- 'acE;': '\u223e\u0333',
- 'Acirc': '\xc2',
- 'acirc': '\xe2',
- 'Acirc;': '\xc2',
- 'acirc;': '\xe2',
- 'acute': '\xb4',
- 'acute;': '\xb4',
- 'Acy;': '\u0410',
- 'acy;': '\u0430',
- 'AElig': '\xc6',
- 'aelig': '\xe6',
- 'AElig;': '\xc6',
- 'aelig;': '\xe6',
- 'af;': '\u2061',
- 'Afr;': '\U0001d504',
- 'afr;': '\U0001d51e',
- 'Agrave': '\xc0',
- 'agrave': '\xe0',
- 'Agrave;': '\xc0',
- 'agrave;': '\xe0',
- 'alefsym;': '\u2135',
- 'aleph;': '\u2135',
- 'Alpha;': '\u0391',
- 'alpha;': '\u03b1',
- 'Amacr;': '\u0100',
- 'amacr;': '\u0101',
- 'amalg;': '\u2a3f',
- 'AMP': '&',
- 'amp': '&',
- 'AMP;': '&',
- 'amp;': '&',
- 'And;': '\u2a53',
- 'and;': '\u2227',
- 'andand;': '\u2a55',
- 'andd;': '\u2a5c',
- 'andslope;': '\u2a58',
- 'andv;': '\u2a5a',
- 'ang;': '\u2220',
- 'ange;': '\u29a4',
- 'angle;': '\u2220',
- 'angmsd;': '\u2221',
- 'angmsdaa;': '\u29a8',
- 'angmsdab;': '\u29a9',
- 'angmsdac;': '\u29aa',
- 'angmsdad;': '\u29ab',
- 'angmsdae;': '\u29ac',
- 'angmsdaf;': '\u29ad',
- 'angmsdag;': '\u29ae',
- 'angmsdah;': '\u29af',
- 'angrt;': '\u221f',
- 'angrtvb;': '\u22be',
- 'angrtvbd;': '\u299d',
- 'angsph;': '\u2222',
- 'angst;': '\xc5',
- 'angzarr;': '\u237c',
- 'Aogon;': '\u0104',
- 'aogon;': '\u0105',
- 'Aopf;': '\U0001d538',
- 'aopf;': '\U0001d552',
- 'ap;': '\u2248',
- 'apacir;': '\u2a6f',
- 'apE;': '\u2a70',
- 'ape;': '\u224a',
- 'apid;': '\u224b',
- 'apos;': "'",
- 'ApplyFunction;': '\u2061',
- 'approx;': '\u2248',
- 'approxeq;': '\u224a',
- 'Aring': '\xc5',
- 'aring': '\xe5',
- 'Aring;': '\xc5',
- 'aring;': '\xe5',
- 'Ascr;': '\U0001d49c',
- 'ascr;': '\U0001d4b6',
- 'Assign;': '\u2254',
- 'ast;': '*',
- 'asymp;': '\u2248',
- 'asympeq;': '\u224d',
- 'Atilde': '\xc3',
- 'atilde': '\xe3',
- 'Atilde;': '\xc3',
- 'atilde;': '\xe3',
- 'Auml': '\xc4',
- 'auml': '\xe4',
- 'Auml;': '\xc4',
- 'auml;': '\xe4',
- 'awconint;': '\u2233',
- 'awint;': '\u2a11',
- 'backcong;': '\u224c',
- 'backepsilon;': '\u03f6',
- 'backprime;': '\u2035',
- 'backsim;': '\u223d',
- 'backsimeq;': '\u22cd',
- 'Backslash;': '\u2216',
- 'Barv;': '\u2ae7',
- 'barvee;': '\u22bd',
- 'Barwed;': '\u2306',
- 'barwed;': '\u2305',
- 'barwedge;': '\u2305',
- 'bbrk;': '\u23b5',
- 'bbrktbrk;': '\u23b6',
- 'bcong;': '\u224c',
- 'Bcy;': '\u0411',
- 'bcy;': '\u0431',
- 'bdquo;': '\u201e',
- 'becaus;': '\u2235',
- 'Because;': '\u2235',
- 'because;': '\u2235',
- 'bemptyv;': '\u29b0',
- 'bepsi;': '\u03f6',
- 'bernou;': '\u212c',
- 'Bernoullis;': '\u212c',
- 'Beta;': '\u0392',
- 'beta;': '\u03b2',
- 'beth;': '\u2136',
- 'between;': '\u226c',
- 'Bfr;': '\U0001d505',
- 'bfr;': '\U0001d51f',
- 'bigcap;': '\u22c2',
- 'bigcirc;': '\u25ef',
- 'bigcup;': '\u22c3',
- 'bigodot;': '\u2a00',
- 'bigoplus;': '\u2a01',
- 'bigotimes;': '\u2a02',
- 'bigsqcup;': '\u2a06',
- 'bigstar;': '\u2605',
- 'bigtriangledown;': '\u25bd',
- 'bigtriangleup;': '\u25b3',
- 'biguplus;': '\u2a04',
- 'bigvee;': '\u22c1',
- 'bigwedge;': '\u22c0',
- 'bkarow;': '\u290d',
- 'blacklozenge;': '\u29eb',
- 'blacksquare;': '\u25aa',
- 'blacktriangle;': '\u25b4',
- 'blacktriangledown;': '\u25be',
- 'blacktriangleleft;': '\u25c2',
- 'blacktriangleright;': '\u25b8',
- 'blank;': '\u2423',
- 'blk12;': '\u2592',
- 'blk14;': '\u2591',
- 'blk34;': '\u2593',
- 'block;': '\u2588',
- 'bne;': '=\u20e5',
- 'bnequiv;': '\u2261\u20e5',
- 'bNot;': '\u2aed',
- 'bnot;': '\u2310',
- 'Bopf;': '\U0001d539',
- 'bopf;': '\U0001d553',
- 'bot;': '\u22a5',
- 'bottom;': '\u22a5',
- 'bowtie;': '\u22c8',
- 'boxbox;': '\u29c9',
- 'boxDL;': '\u2557',
- 'boxDl;': '\u2556',
- 'boxdL;': '\u2555',
- 'boxdl;': '\u2510',
- 'boxDR;': '\u2554',
- 'boxDr;': '\u2553',
- 'boxdR;': '\u2552',
- 'boxdr;': '\u250c',
- 'boxH;': '\u2550',
- 'boxh;': '\u2500',
- 'boxHD;': '\u2566',
- 'boxHd;': '\u2564',
- 'boxhD;': '\u2565',
- 'boxhd;': '\u252c',
- 'boxHU;': '\u2569',
- 'boxHu;': '\u2567',
- 'boxhU;': '\u2568',
- 'boxhu;': '\u2534',
- 'boxminus;': '\u229f',
- 'boxplus;': '\u229e',
- 'boxtimes;': '\u22a0',
- 'boxUL;': '\u255d',
- 'boxUl;': '\u255c',
- 'boxuL;': '\u255b',
- 'boxul;': '\u2518',
- 'boxUR;': '\u255a',
- 'boxUr;': '\u2559',
- 'boxuR;': '\u2558',
- 'boxur;': '\u2514',
- 'boxV;': '\u2551',
- 'boxv;': '\u2502',
- 'boxVH;': '\u256c',
- 'boxVh;': '\u256b',
- 'boxvH;': '\u256a',
- 'boxvh;': '\u253c',
- 'boxVL;': '\u2563',
- 'boxVl;': '\u2562',
- 'boxvL;': '\u2561',
- 'boxvl;': '\u2524',
- 'boxVR;': '\u2560',
- 'boxVr;': '\u255f',
- 'boxvR;': '\u255e',
- 'boxvr;': '\u251c',
- 'bprime;': '\u2035',
- 'Breve;': '\u02d8',
- 'breve;': '\u02d8',
- 'brvbar': '\xa6',
- 'brvbar;': '\xa6',
- 'Bscr;': '\u212c',
- 'bscr;': '\U0001d4b7',
- 'bsemi;': '\u204f',
- 'bsim;': '\u223d',
- 'bsime;': '\u22cd',
- 'bsol;': '\\',
- 'bsolb;': '\u29c5',
- 'bsolhsub;': '\u27c8',
- 'bull;': '\u2022',
- 'bullet;': '\u2022',
- 'bump;': '\u224e',
- 'bumpE;': '\u2aae',
- 'bumpe;': '\u224f',
- 'Bumpeq;': '\u224e',
- 'bumpeq;': '\u224f',
- 'Cacute;': '\u0106',
- 'cacute;': '\u0107',
- 'Cap;': '\u22d2',
- 'cap;': '\u2229',
- 'capand;': '\u2a44',
- 'capbrcup;': '\u2a49',
- 'capcap;': '\u2a4b',
- 'capcup;': '\u2a47',
- 'capdot;': '\u2a40',
- 'CapitalDifferentialD;': '\u2145',
- 'caps;': '\u2229\ufe00',
- 'caret;': '\u2041',
- 'caron;': '\u02c7',
- 'Cayleys;': '\u212d',
- 'ccaps;': '\u2a4d',
- 'Ccaron;': '\u010c',
- 'ccaron;': '\u010d',
- 'Ccedil': '\xc7',
- 'ccedil': '\xe7',
- 'Ccedil;': '\xc7',
- 'ccedil;': '\xe7',
- 'Ccirc;': '\u0108',
- 'ccirc;': '\u0109',
- 'Cconint;': '\u2230',
- 'ccups;': '\u2a4c',
- 'ccupssm;': '\u2a50',
- 'Cdot;': '\u010a',
- 'cdot;': '\u010b',
- 'cedil': '\xb8',
- 'cedil;': '\xb8',
- 'Cedilla;': '\xb8',
- 'cemptyv;': '\u29b2',
- 'cent': '\xa2',
- 'cent;': '\xa2',
- 'CenterDot;': '\xb7',
- 'centerdot;': '\xb7',
- 'Cfr;': '\u212d',
- 'cfr;': '\U0001d520',
- 'CHcy;': '\u0427',
- 'chcy;': '\u0447',
- 'check;': '\u2713',
- 'checkmark;': '\u2713',
- 'Chi;': '\u03a7',
- 'chi;': '\u03c7',
- 'cir;': '\u25cb',
- 'circ;': '\u02c6',
- 'circeq;': '\u2257',
- 'circlearrowleft;': '\u21ba',
- 'circlearrowright;': '\u21bb',
- 'circledast;': '\u229b',
- 'circledcirc;': '\u229a',
- 'circleddash;': '\u229d',
- 'CircleDot;': '\u2299',
- 'circledR;': '\xae',
- 'circledS;': '\u24c8',
- 'CircleMinus;': '\u2296',
- 'CirclePlus;': '\u2295',
- 'CircleTimes;': '\u2297',
- 'cirE;': '\u29c3',
- 'cire;': '\u2257',
- 'cirfnint;': '\u2a10',
- 'cirmid;': '\u2aef',
- 'cirscir;': '\u29c2',
- 'ClockwiseContourIntegral;': '\u2232',
- 'CloseCurlyDoubleQuote;': '\u201d',
- 'CloseCurlyQuote;': '\u2019',
- 'clubs;': '\u2663',
- 'clubsuit;': '\u2663',
- 'Colon;': '\u2237',
- 'colon;': ':',
- 'Colone;': '\u2a74',
- 'colone;': '\u2254',
- 'coloneq;': '\u2254',
- 'comma;': ',',
- 'commat;': '@',
- 'comp;': '\u2201',
- 'compfn;': '\u2218',
- 'complement;': '\u2201',
- 'complexes;': '\u2102',
- 'cong;': '\u2245',
- 'congdot;': '\u2a6d',
- 'Congruent;': '\u2261',
- 'Conint;': '\u222f',
- 'conint;': '\u222e',
- 'ContourIntegral;': '\u222e',
- 'Copf;': '\u2102',
- 'copf;': '\U0001d554',
- 'coprod;': '\u2210',
- 'Coproduct;': '\u2210',
- 'COPY': '\xa9',
- 'copy': '\xa9',
- 'COPY;': '\xa9',
- 'copy;': '\xa9',
- 'copysr;': '\u2117',
- 'CounterClockwiseContourIntegral;': '\u2233',
- 'crarr;': '\u21b5',
- 'Cross;': '\u2a2f',
- 'cross;': '\u2717',
- 'Cscr;': '\U0001d49e',
- 'cscr;': '\U0001d4b8',
- 'csub;': '\u2acf',
- 'csube;': '\u2ad1',
- 'csup;': '\u2ad0',
- 'csupe;': '\u2ad2',
- 'ctdot;': '\u22ef',
- 'cudarrl;': '\u2938',
- 'cudarrr;': '\u2935',
- 'cuepr;': '\u22de',
- 'cuesc;': '\u22df',
- 'cularr;': '\u21b6',
- 'cularrp;': '\u293d',
- 'Cup;': '\u22d3',
- 'cup;': '\u222a',
- 'cupbrcap;': '\u2a48',
- 'CupCap;': '\u224d',
- 'cupcap;': '\u2a46',
- 'cupcup;': '\u2a4a',
- 'cupdot;': '\u228d',
- 'cupor;': '\u2a45',
- 'cups;': '\u222a\ufe00',
- 'curarr;': '\u21b7',
- 'curarrm;': '\u293c',
- 'curlyeqprec;': '\u22de',
- 'curlyeqsucc;': '\u22df',
- 'curlyvee;': '\u22ce',
- 'curlywedge;': '\u22cf',
- 'curren': '\xa4',
- 'curren;': '\xa4',
- 'curvearrowleft;': '\u21b6',
- 'curvearrowright;': '\u21b7',
- 'cuvee;': '\u22ce',
- 'cuwed;': '\u22cf',
- 'cwconint;': '\u2232',
- 'cwint;': '\u2231',
- 'cylcty;': '\u232d',
- 'Dagger;': '\u2021',
- 'dagger;': '\u2020',
- 'daleth;': '\u2138',
- 'Darr;': '\u21a1',
- 'dArr;': '\u21d3',
- 'darr;': '\u2193',
- 'dash;': '\u2010',
- 'Dashv;': '\u2ae4',
- 'dashv;': '\u22a3',
- 'dbkarow;': '\u290f',
- 'dblac;': '\u02dd',
- 'Dcaron;': '\u010e',
- 'dcaron;': '\u010f',
- 'Dcy;': '\u0414',
- 'dcy;': '\u0434',
- 'DD;': '\u2145',
- 'dd;': '\u2146',
- 'ddagger;': '\u2021',
- 'ddarr;': '\u21ca',
- 'DDotrahd;': '\u2911',
- 'ddotseq;': '\u2a77',
- 'deg': '\xb0',
- 'deg;': '\xb0',
- 'Del;': '\u2207',
- 'Delta;': '\u0394',
- 'delta;': '\u03b4',
- 'demptyv;': '\u29b1',
- 'dfisht;': '\u297f',
- 'Dfr;': '\U0001d507',
- 'dfr;': '\U0001d521',
- 'dHar;': '\u2965',
- 'dharl;': '\u21c3',
- 'dharr;': '\u21c2',
- 'DiacriticalAcute;': '\xb4',
- 'DiacriticalDot;': '\u02d9',
- 'DiacriticalDoubleAcute;': '\u02dd',
- 'DiacriticalGrave;': '`',
- 'DiacriticalTilde;': '\u02dc',
- 'diam;': '\u22c4',
- 'Diamond;': '\u22c4',
- 'diamond;': '\u22c4',
- 'diamondsuit;': '\u2666',
- 'diams;': '\u2666',
- 'die;': '\xa8',
- 'DifferentialD;': '\u2146',
- 'digamma;': '\u03dd',
- 'disin;': '\u22f2',
- 'div;': '\xf7',
- 'divide': '\xf7',
- 'divide;': '\xf7',
- 'divideontimes;': '\u22c7',
- 'divonx;': '\u22c7',
- 'DJcy;': '\u0402',
- 'djcy;': '\u0452',
- 'dlcorn;': '\u231e',
- 'dlcrop;': '\u230d',
- 'dollar;': '$',
- 'Dopf;': '\U0001d53b',
- 'dopf;': '\U0001d555',
- 'Dot;': '\xa8',
- 'dot;': '\u02d9',
- 'DotDot;': '\u20dc',
- 'doteq;': '\u2250',
- 'doteqdot;': '\u2251',
- 'DotEqual;': '\u2250',
- 'dotminus;': '\u2238',
- 'dotplus;': '\u2214',
- 'dotsquare;': '\u22a1',
- 'doublebarwedge;': '\u2306',
- 'DoubleContourIntegral;': '\u222f',
- 'DoubleDot;': '\xa8',
- 'DoubleDownArrow;': '\u21d3',
- 'DoubleLeftArrow;': '\u21d0',
- 'DoubleLeftRightArrow;': '\u21d4',
- 'DoubleLeftTee;': '\u2ae4',
- 'DoubleLongLeftArrow;': '\u27f8',
- 'DoubleLongLeftRightArrow;': '\u27fa',
- 'DoubleLongRightArrow;': '\u27f9',
- 'DoubleRightArrow;': '\u21d2',
- 'DoubleRightTee;': '\u22a8',
- 'DoubleUpArrow;': '\u21d1',
- 'DoubleUpDownArrow;': '\u21d5',
- 'DoubleVerticalBar;': '\u2225',
- 'DownArrow;': '\u2193',
- 'Downarrow;': '\u21d3',
- 'downarrow;': '\u2193',
- 'DownArrowBar;': '\u2913',
- 'DownArrowUpArrow;': '\u21f5',
- 'DownBreve;': '\u0311',
- 'downdownarrows;': '\u21ca',
- 'downharpoonleft;': '\u21c3',
- 'downharpoonright;': '\u21c2',
- 'DownLeftRightVector;': '\u2950',
- 'DownLeftTeeVector;': '\u295e',
- 'DownLeftVector;': '\u21bd',
- 'DownLeftVectorBar;': '\u2956',
- 'DownRightTeeVector;': '\u295f',
- 'DownRightVector;': '\u21c1',
- 'DownRightVectorBar;': '\u2957',
- 'DownTee;': '\u22a4',
- 'DownTeeArrow;': '\u21a7',
- 'drbkarow;': '\u2910',
- 'drcorn;': '\u231f',
- 'drcrop;': '\u230c',
- 'Dscr;': '\U0001d49f',
- 'dscr;': '\U0001d4b9',
- 'DScy;': '\u0405',
- 'dscy;': '\u0455',
- 'dsol;': '\u29f6',
- 'Dstrok;': '\u0110',
- 'dstrok;': '\u0111',
- 'dtdot;': '\u22f1',
- 'dtri;': '\u25bf',
- 'dtrif;': '\u25be',
- 'duarr;': '\u21f5',
- 'duhar;': '\u296f',
- 'dwangle;': '\u29a6',
- 'DZcy;': '\u040f',
- 'dzcy;': '\u045f',
- 'dzigrarr;': '\u27ff',
- 'Eacute': '\xc9',
- 'eacute': '\xe9',
- 'Eacute;': '\xc9',
- 'eacute;': '\xe9',
- 'easter;': '\u2a6e',
- 'Ecaron;': '\u011a',
- 'ecaron;': '\u011b',
- 'ecir;': '\u2256',
- 'Ecirc': '\xca',
- 'ecirc': '\xea',
- 'Ecirc;': '\xca',
- 'ecirc;': '\xea',
- 'ecolon;': '\u2255',
- 'Ecy;': '\u042d',
- 'ecy;': '\u044d',
- 'eDDot;': '\u2a77',
- 'Edot;': '\u0116',
- 'eDot;': '\u2251',
- 'edot;': '\u0117',
- 'ee;': '\u2147',
- 'efDot;': '\u2252',
- 'Efr;': '\U0001d508',
- 'efr;': '\U0001d522',
- 'eg;': '\u2a9a',
- 'Egrave': '\xc8',
- 'egrave': '\xe8',
- 'Egrave;': '\xc8',
- 'egrave;': '\xe8',
- 'egs;': '\u2a96',
- 'egsdot;': '\u2a98',
- 'el;': '\u2a99',
- 'Element;': '\u2208',
- 'elinters;': '\u23e7',
- 'ell;': '\u2113',
- 'els;': '\u2a95',
- 'elsdot;': '\u2a97',
- 'Emacr;': '\u0112',
- 'emacr;': '\u0113',
- 'empty;': '\u2205',
- 'emptyset;': '\u2205',
- 'EmptySmallSquare;': '\u25fb',
- 'emptyv;': '\u2205',
- 'EmptyVerySmallSquare;': '\u25ab',
- 'emsp13;': '\u2004',
- 'emsp14;': '\u2005',
- 'emsp;': '\u2003',
- 'ENG;': '\u014a',
- 'eng;': '\u014b',
- 'ensp;': '\u2002',
- 'Eogon;': '\u0118',
- 'eogon;': '\u0119',
- 'Eopf;': '\U0001d53c',
- 'eopf;': '\U0001d556',
- 'epar;': '\u22d5',
- 'eparsl;': '\u29e3',
- 'eplus;': '\u2a71',
- 'epsi;': '\u03b5',
- 'Epsilon;': '\u0395',
- 'epsilon;': '\u03b5',
- 'epsiv;': '\u03f5',
- 'eqcirc;': '\u2256',
- 'eqcolon;': '\u2255',
- 'eqsim;': '\u2242',
- 'eqslantgtr;': '\u2a96',
- 'eqslantless;': '\u2a95',
- 'Equal;': '\u2a75',
- 'equals;': '=',
- 'EqualTilde;': '\u2242',
- 'equest;': '\u225f',
- 'Equilibrium;': '\u21cc',
- 'equiv;': '\u2261',
- 'equivDD;': '\u2a78',
- 'eqvparsl;': '\u29e5',
- 'erarr;': '\u2971',
- 'erDot;': '\u2253',
- 'Escr;': '\u2130',
- 'escr;': '\u212f',
- 'esdot;': '\u2250',
- 'Esim;': '\u2a73',
- 'esim;': '\u2242',
- 'Eta;': '\u0397',
- 'eta;': '\u03b7',
- 'ETH': '\xd0',
- 'eth': '\xf0',
- 'ETH;': '\xd0',
- 'eth;': '\xf0',
- 'Euml': '\xcb',
- 'euml': '\xeb',
- 'Euml;': '\xcb',
- 'euml;': '\xeb',
- 'euro;': '\u20ac',
- 'excl;': '!',
- 'exist;': '\u2203',
- 'Exists;': '\u2203',
- 'expectation;': '\u2130',
- 'ExponentialE;': '\u2147',
- 'exponentiale;': '\u2147',
- 'fallingdotseq;': '\u2252',
- 'Fcy;': '\u0424',
- 'fcy;': '\u0444',
- 'female;': '\u2640',
- 'ffilig;': '\ufb03',
- 'fflig;': '\ufb00',
- 'ffllig;': '\ufb04',
- 'Ffr;': '\U0001d509',
- 'ffr;': '\U0001d523',
- 'filig;': '\ufb01',
- 'FilledSmallSquare;': '\u25fc',
- 'FilledVerySmallSquare;': '\u25aa',
- 'fjlig;': 'fj',
- 'flat;': '\u266d',
- 'fllig;': '\ufb02',
- 'fltns;': '\u25b1',
- 'fnof;': '\u0192',
- 'Fopf;': '\U0001d53d',
- 'fopf;': '\U0001d557',
- 'ForAll;': '\u2200',
- 'forall;': '\u2200',
- 'fork;': '\u22d4',
- 'forkv;': '\u2ad9',
- 'Fouriertrf;': '\u2131',
- 'fpartint;': '\u2a0d',
- 'frac12': '\xbd',
- 'frac12;': '\xbd',
- 'frac13;': '\u2153',
- 'frac14': '\xbc',
- 'frac14;': '\xbc',
- 'frac15;': '\u2155',
- 'frac16;': '\u2159',
- 'frac18;': '\u215b',
- 'frac23;': '\u2154',
- 'frac25;': '\u2156',
- 'frac34': '\xbe',
- 'frac34;': '\xbe',
- 'frac35;': '\u2157',
- 'frac38;': '\u215c',
- 'frac45;': '\u2158',
- 'frac56;': '\u215a',
- 'frac58;': '\u215d',
- 'frac78;': '\u215e',
- 'frasl;': '\u2044',
- 'frown;': '\u2322',
- 'Fscr;': '\u2131',
- 'fscr;': '\U0001d4bb',
- 'gacute;': '\u01f5',
- 'Gamma;': '\u0393',
- 'gamma;': '\u03b3',
- 'Gammad;': '\u03dc',
- 'gammad;': '\u03dd',
- 'gap;': '\u2a86',
- 'Gbreve;': '\u011e',
- 'gbreve;': '\u011f',
- 'Gcedil;': '\u0122',
- 'Gcirc;': '\u011c',
- 'gcirc;': '\u011d',
- 'Gcy;': '\u0413',
- 'gcy;': '\u0433',
- 'Gdot;': '\u0120',
- 'gdot;': '\u0121',
- 'gE;': '\u2267',
- 'ge;': '\u2265',
- 'gEl;': '\u2a8c',
- 'gel;': '\u22db',
- 'geq;': '\u2265',
- 'geqq;': '\u2267',
- 'geqslant;': '\u2a7e',
- 'ges;': '\u2a7e',
- 'gescc;': '\u2aa9',
- 'gesdot;': '\u2a80',
- 'gesdoto;': '\u2a82',
- 'gesdotol;': '\u2a84',
- 'gesl;': '\u22db\ufe00',
- 'gesles;': '\u2a94',
- 'Gfr;': '\U0001d50a',
- 'gfr;': '\U0001d524',
- 'Gg;': '\u22d9',
- 'gg;': '\u226b',
- 'ggg;': '\u22d9',
- 'gimel;': '\u2137',
- 'GJcy;': '\u0403',
- 'gjcy;': '\u0453',
- 'gl;': '\u2277',
- 'gla;': '\u2aa5',
- 'glE;': '\u2a92',
- 'glj;': '\u2aa4',
- 'gnap;': '\u2a8a',
- 'gnapprox;': '\u2a8a',
- 'gnE;': '\u2269',
- 'gne;': '\u2a88',
- 'gneq;': '\u2a88',
- 'gneqq;': '\u2269',
- 'gnsim;': '\u22e7',
- 'Gopf;': '\U0001d53e',
- 'gopf;': '\U0001d558',
- 'grave;': '`',
- 'GreaterEqual;': '\u2265',
- 'GreaterEqualLess;': '\u22db',
- 'GreaterFullEqual;': '\u2267',
- 'GreaterGreater;': '\u2aa2',
- 'GreaterLess;': '\u2277',
- 'GreaterSlantEqual;': '\u2a7e',
- 'GreaterTilde;': '\u2273',
- 'Gscr;': '\U0001d4a2',
- 'gscr;': '\u210a',
- 'gsim;': '\u2273',
- 'gsime;': '\u2a8e',
- 'gsiml;': '\u2a90',
- 'GT': '>',
- 'gt': '>',
- 'GT;': '>',
- 'Gt;': '\u226b',
- 'gt;': '>',
- 'gtcc;': '\u2aa7',
- 'gtcir;': '\u2a7a',
- 'gtdot;': '\u22d7',
- 'gtlPar;': '\u2995',
- 'gtquest;': '\u2a7c',
- 'gtrapprox;': '\u2a86',
- 'gtrarr;': '\u2978',
- 'gtrdot;': '\u22d7',
- 'gtreqless;': '\u22db',
- 'gtreqqless;': '\u2a8c',
- 'gtrless;': '\u2277',
- 'gtrsim;': '\u2273',
- 'gvertneqq;': '\u2269\ufe00',
- 'gvnE;': '\u2269\ufe00',
- 'Hacek;': '\u02c7',
- 'hairsp;': '\u200a',
- 'half;': '\xbd',
- 'hamilt;': '\u210b',
- 'HARDcy;': '\u042a',
- 'hardcy;': '\u044a',
- 'hArr;': '\u21d4',
- 'harr;': '\u2194',
- 'harrcir;': '\u2948',
- 'harrw;': '\u21ad',
- 'Hat;': '^',
- 'hbar;': '\u210f',
- 'Hcirc;': '\u0124',
- 'hcirc;': '\u0125',
- 'hearts;': '\u2665',
- 'heartsuit;': '\u2665',
- 'hellip;': '\u2026',
- 'hercon;': '\u22b9',
- 'Hfr;': '\u210c',
- 'hfr;': '\U0001d525',
- 'HilbertSpace;': '\u210b',
- 'hksearow;': '\u2925',
- 'hkswarow;': '\u2926',
- 'hoarr;': '\u21ff',
- 'homtht;': '\u223b',
- 'hookleftarrow;': '\u21a9',
- 'hookrightarrow;': '\u21aa',
- 'Hopf;': '\u210d',
- 'hopf;': '\U0001d559',
- 'horbar;': '\u2015',
- 'HorizontalLine;': '\u2500',
- 'Hscr;': '\u210b',
- 'hscr;': '\U0001d4bd',
- 'hslash;': '\u210f',
- 'Hstrok;': '\u0126',
- 'hstrok;': '\u0127',
- 'HumpDownHump;': '\u224e',
- 'HumpEqual;': '\u224f',
- 'hybull;': '\u2043',
- 'hyphen;': '\u2010',
- 'Iacute': '\xcd',
- 'iacute': '\xed',
- 'Iacute;': '\xcd',
- 'iacute;': '\xed',
- 'ic;': '\u2063',
- 'Icirc': '\xce',
- 'icirc': '\xee',
- 'Icirc;': '\xce',
- 'icirc;': '\xee',
- 'Icy;': '\u0418',
- 'icy;': '\u0438',
- 'Idot;': '\u0130',
- 'IEcy;': '\u0415',
- 'iecy;': '\u0435',
- 'iexcl': '\xa1',
- 'iexcl;': '\xa1',
- 'iff;': '\u21d4',
- 'Ifr;': '\u2111',
- 'ifr;': '\U0001d526',
- 'Igrave': '\xcc',
- 'igrave': '\xec',
- 'Igrave;': '\xcc',
- 'igrave;': '\xec',
- 'ii;': '\u2148',
- 'iiiint;': '\u2a0c',
- 'iiint;': '\u222d',
- 'iinfin;': '\u29dc',
- 'iiota;': '\u2129',
- 'IJlig;': '\u0132',
- 'ijlig;': '\u0133',
- 'Im;': '\u2111',
- 'Imacr;': '\u012a',
- 'imacr;': '\u012b',
- 'image;': '\u2111',
- 'ImaginaryI;': '\u2148',
- 'imagline;': '\u2110',
- 'imagpart;': '\u2111',
- 'imath;': '\u0131',
- 'imof;': '\u22b7',
- 'imped;': '\u01b5',
- 'Implies;': '\u21d2',
- 'in;': '\u2208',
- 'incare;': '\u2105',
- 'infin;': '\u221e',
- 'infintie;': '\u29dd',
- 'inodot;': '\u0131',
- 'Int;': '\u222c',
- 'int;': '\u222b',
- 'intcal;': '\u22ba',
- 'integers;': '\u2124',
- 'Integral;': '\u222b',
- 'intercal;': '\u22ba',
- 'Intersection;': '\u22c2',
- 'intlarhk;': '\u2a17',
- 'intprod;': '\u2a3c',
- 'InvisibleComma;': '\u2063',
- 'InvisibleTimes;': '\u2062',
- 'IOcy;': '\u0401',
- 'iocy;': '\u0451',
- 'Iogon;': '\u012e',
- 'iogon;': '\u012f',
- 'Iopf;': '\U0001d540',
- 'iopf;': '\U0001d55a',
- 'Iota;': '\u0399',
- 'iota;': '\u03b9',
- 'iprod;': '\u2a3c',
- 'iquest': '\xbf',
- 'iquest;': '\xbf',
- 'Iscr;': '\u2110',
- 'iscr;': '\U0001d4be',
- 'isin;': '\u2208',
- 'isindot;': '\u22f5',
- 'isinE;': '\u22f9',
- 'isins;': '\u22f4',
- 'isinsv;': '\u22f3',
- 'isinv;': '\u2208',
- 'it;': '\u2062',
- 'Itilde;': '\u0128',
- 'itilde;': '\u0129',
- 'Iukcy;': '\u0406',
- 'iukcy;': '\u0456',
- 'Iuml': '\xcf',
- 'iuml': '\xef',
- 'Iuml;': '\xcf',
- 'iuml;': '\xef',
- 'Jcirc;': '\u0134',
- 'jcirc;': '\u0135',
- 'Jcy;': '\u0419',
- 'jcy;': '\u0439',
- 'Jfr;': '\U0001d50d',
- 'jfr;': '\U0001d527',
- 'jmath;': '\u0237',
- 'Jopf;': '\U0001d541',
- 'jopf;': '\U0001d55b',
- 'Jscr;': '\U0001d4a5',
- 'jscr;': '\U0001d4bf',
- 'Jsercy;': '\u0408',
- 'jsercy;': '\u0458',
- 'Jukcy;': '\u0404',
- 'jukcy;': '\u0454',
- 'Kappa;': '\u039a',
- 'kappa;': '\u03ba',
- 'kappav;': '\u03f0',
- 'Kcedil;': '\u0136',
- 'kcedil;': '\u0137',
- 'Kcy;': '\u041a',
- 'kcy;': '\u043a',
- 'Kfr;': '\U0001d50e',
- 'kfr;': '\U0001d528',
- 'kgreen;': '\u0138',
- 'KHcy;': '\u0425',
- 'khcy;': '\u0445',
- 'KJcy;': '\u040c',
- 'kjcy;': '\u045c',
- 'Kopf;': '\U0001d542',
- 'kopf;': '\U0001d55c',
- 'Kscr;': '\U0001d4a6',
- 'kscr;': '\U0001d4c0',
- 'lAarr;': '\u21da',
- 'Lacute;': '\u0139',
- 'lacute;': '\u013a',
- 'laemptyv;': '\u29b4',
- 'lagran;': '\u2112',
- 'Lambda;': '\u039b',
- 'lambda;': '\u03bb',
- 'Lang;': '\u27ea',
- 'lang;': '\u27e8',
- 'langd;': '\u2991',
- 'langle;': '\u27e8',
- 'lap;': '\u2a85',
- 'Laplacetrf;': '\u2112',
- 'laquo': '\xab',
- 'laquo;': '\xab',
- 'Larr;': '\u219e',
- 'lArr;': '\u21d0',
- 'larr;': '\u2190',
- 'larrb;': '\u21e4',
- 'larrbfs;': '\u291f',
- 'larrfs;': '\u291d',
- 'larrhk;': '\u21a9',
- 'larrlp;': '\u21ab',
- 'larrpl;': '\u2939',
- 'larrsim;': '\u2973',
- 'larrtl;': '\u21a2',
- 'lat;': '\u2aab',
- 'lAtail;': '\u291b',
- 'latail;': '\u2919',
- 'late;': '\u2aad',
- 'lates;': '\u2aad\ufe00',
- 'lBarr;': '\u290e',
- 'lbarr;': '\u290c',
- 'lbbrk;': '\u2772',
- 'lbrace;': '{',
- 'lbrack;': '[',
- 'lbrke;': '\u298b',
- 'lbrksld;': '\u298f',
- 'lbrkslu;': '\u298d',
- 'Lcaron;': '\u013d',
- 'lcaron;': '\u013e',
- 'Lcedil;': '\u013b',
- 'lcedil;': '\u013c',
- 'lceil;': '\u2308',
- 'lcub;': '{',
- 'Lcy;': '\u041b',
- 'lcy;': '\u043b',
- 'ldca;': '\u2936',
- 'ldquo;': '\u201c',
- 'ldquor;': '\u201e',
- 'ldrdhar;': '\u2967',
- 'ldrushar;': '\u294b',
- 'ldsh;': '\u21b2',
- 'lE;': '\u2266',
- 'le;': '\u2264',
- 'LeftAngleBracket;': '\u27e8',
- 'LeftArrow;': '\u2190',
- 'Leftarrow;': '\u21d0',
- 'leftarrow;': '\u2190',
- 'LeftArrowBar;': '\u21e4',
- 'LeftArrowRightArrow;': '\u21c6',
- 'leftarrowtail;': '\u21a2',
- 'LeftCeiling;': '\u2308',
- 'LeftDoubleBracket;': '\u27e6',
- 'LeftDownTeeVector;': '\u2961',
- 'LeftDownVector;': '\u21c3',
- 'LeftDownVectorBar;': '\u2959',
- 'LeftFloor;': '\u230a',
- 'leftharpoondown;': '\u21bd',
- 'leftharpoonup;': '\u21bc',
- 'leftleftarrows;': '\u21c7',
- 'LeftRightArrow;': '\u2194',
- 'Leftrightarrow;': '\u21d4',
- 'leftrightarrow;': '\u2194',
- 'leftrightarrows;': '\u21c6',
- 'leftrightharpoons;': '\u21cb',
- 'leftrightsquigarrow;': '\u21ad',
- 'LeftRightVector;': '\u294e',
- 'LeftTee;': '\u22a3',
- 'LeftTeeArrow;': '\u21a4',
- 'LeftTeeVector;': '\u295a',
- 'leftthreetimes;': '\u22cb',
- 'LeftTriangle;': '\u22b2',
- 'LeftTriangleBar;': '\u29cf',
- 'LeftTriangleEqual;': '\u22b4',
- 'LeftUpDownVector;': '\u2951',
- 'LeftUpTeeVector;': '\u2960',
- 'LeftUpVector;': '\u21bf',
- 'LeftUpVectorBar;': '\u2958',
- 'LeftVector;': '\u21bc',
- 'LeftVectorBar;': '\u2952',
- 'lEg;': '\u2a8b',
- 'leg;': '\u22da',
- 'leq;': '\u2264',
- 'leqq;': '\u2266',
- 'leqslant;': '\u2a7d',
- 'les;': '\u2a7d',
- 'lescc;': '\u2aa8',
- 'lesdot;': '\u2a7f',
- 'lesdoto;': '\u2a81',
- 'lesdotor;': '\u2a83',
- 'lesg;': '\u22da\ufe00',
- 'lesges;': '\u2a93',
- 'lessapprox;': '\u2a85',
- 'lessdot;': '\u22d6',
- 'lesseqgtr;': '\u22da',
- 'lesseqqgtr;': '\u2a8b',
- 'LessEqualGreater;': '\u22da',
- 'LessFullEqual;': '\u2266',
- 'LessGreater;': '\u2276',
- 'lessgtr;': '\u2276',
- 'LessLess;': '\u2aa1',
- 'lesssim;': '\u2272',
- 'LessSlantEqual;': '\u2a7d',
- 'LessTilde;': '\u2272',
- 'lfisht;': '\u297c',
- 'lfloor;': '\u230a',
- 'Lfr;': '\U0001d50f',
- 'lfr;': '\U0001d529',
- 'lg;': '\u2276',
- 'lgE;': '\u2a91',
- 'lHar;': '\u2962',
- 'lhard;': '\u21bd',
- 'lharu;': '\u21bc',
- 'lharul;': '\u296a',
- 'lhblk;': '\u2584',
- 'LJcy;': '\u0409',
- 'ljcy;': '\u0459',
- 'Ll;': '\u22d8',
- 'll;': '\u226a',
- 'llarr;': '\u21c7',
- 'llcorner;': '\u231e',
- 'Lleftarrow;': '\u21da',
- 'llhard;': '\u296b',
- 'lltri;': '\u25fa',
- 'Lmidot;': '\u013f',
- 'lmidot;': '\u0140',
- 'lmoust;': '\u23b0',
- 'lmoustache;': '\u23b0',
- 'lnap;': '\u2a89',
- 'lnapprox;': '\u2a89',
- 'lnE;': '\u2268',
- 'lne;': '\u2a87',
- 'lneq;': '\u2a87',
- 'lneqq;': '\u2268',
- 'lnsim;': '\u22e6',
- 'loang;': '\u27ec',
- 'loarr;': '\u21fd',
- 'lobrk;': '\u27e6',
- 'LongLeftArrow;': '\u27f5',
- 'Longleftarrow;': '\u27f8',
- 'longleftarrow;': '\u27f5',
- 'LongLeftRightArrow;': '\u27f7',
- 'Longleftrightarrow;': '\u27fa',
- 'longleftrightarrow;': '\u27f7',
- 'longmapsto;': '\u27fc',
- 'LongRightArrow;': '\u27f6',
- 'Longrightarrow;': '\u27f9',
- 'longrightarrow;': '\u27f6',
- 'looparrowleft;': '\u21ab',
- 'looparrowright;': '\u21ac',
- 'lopar;': '\u2985',
- 'Lopf;': '\U0001d543',
- 'lopf;': '\U0001d55d',
- 'loplus;': '\u2a2d',
- 'lotimes;': '\u2a34',
- 'lowast;': '\u2217',
- 'lowbar;': '_',
- 'LowerLeftArrow;': '\u2199',
- 'LowerRightArrow;': '\u2198',
- 'loz;': '\u25ca',
- 'lozenge;': '\u25ca',
- 'lozf;': '\u29eb',
- 'lpar;': '(',
- 'lparlt;': '\u2993',
- 'lrarr;': '\u21c6',
- 'lrcorner;': '\u231f',
- 'lrhar;': '\u21cb',
- 'lrhard;': '\u296d',
- 'lrm;': '\u200e',
- 'lrtri;': '\u22bf',
- 'lsaquo;': '\u2039',
- 'Lscr;': '\u2112',
- 'lscr;': '\U0001d4c1',
- 'Lsh;': '\u21b0',
- 'lsh;': '\u21b0',
- 'lsim;': '\u2272',
- 'lsime;': '\u2a8d',
- 'lsimg;': '\u2a8f',
- 'lsqb;': '[',
- 'lsquo;': '\u2018',
- 'lsquor;': '\u201a',
- 'Lstrok;': '\u0141',
- 'lstrok;': '\u0142',
- 'LT': '<',
- 'lt': '<',
- 'LT;': '<',
- 'Lt;': '\u226a',
- 'lt;': '<',
- 'ltcc;': '\u2aa6',
- 'ltcir;': '\u2a79',
- 'ltdot;': '\u22d6',
- 'lthree;': '\u22cb',
- 'ltimes;': '\u22c9',
- 'ltlarr;': '\u2976',
- 'ltquest;': '\u2a7b',
- 'ltri;': '\u25c3',
- 'ltrie;': '\u22b4',
- 'ltrif;': '\u25c2',
- 'ltrPar;': '\u2996',
- 'lurdshar;': '\u294a',
- 'luruhar;': '\u2966',
- 'lvertneqq;': '\u2268\ufe00',
- 'lvnE;': '\u2268\ufe00',
- 'macr': '\xaf',
- 'macr;': '\xaf',
- 'male;': '\u2642',
- 'malt;': '\u2720',
- 'maltese;': '\u2720',
- 'Map;': '\u2905',
- 'map;': '\u21a6',
- 'mapsto;': '\u21a6',
- 'mapstodown;': '\u21a7',
- 'mapstoleft;': '\u21a4',
- 'mapstoup;': '\u21a5',
- 'marker;': '\u25ae',
- 'mcomma;': '\u2a29',
- 'Mcy;': '\u041c',
- 'mcy;': '\u043c',
- 'mdash;': '\u2014',
- 'mDDot;': '\u223a',
- 'measuredangle;': '\u2221',
- 'MediumSpace;': '\u205f',
- 'Mellintrf;': '\u2133',
- 'Mfr;': '\U0001d510',
- 'mfr;': '\U0001d52a',
- 'mho;': '\u2127',
- 'micro': '\xb5',
- 'micro;': '\xb5',
- 'mid;': '\u2223',
- 'midast;': '*',
- 'midcir;': '\u2af0',
- 'middot': '\xb7',
- 'middot;': '\xb7',
- 'minus;': '\u2212',
- 'minusb;': '\u229f',
- 'minusd;': '\u2238',
- 'minusdu;': '\u2a2a',
- 'MinusPlus;': '\u2213',
- 'mlcp;': '\u2adb',
- 'mldr;': '\u2026',
- 'mnplus;': '\u2213',
- 'models;': '\u22a7',
- 'Mopf;': '\U0001d544',
- 'mopf;': '\U0001d55e',
- 'mp;': '\u2213',
- 'Mscr;': '\u2133',
- 'mscr;': '\U0001d4c2',
- 'mstpos;': '\u223e',
- 'Mu;': '\u039c',
- 'mu;': '\u03bc',
- 'multimap;': '\u22b8',
- 'mumap;': '\u22b8',
- 'nabla;': '\u2207',
- 'Nacute;': '\u0143',
- 'nacute;': '\u0144',
- 'nang;': '\u2220\u20d2',
- 'nap;': '\u2249',
- 'napE;': '\u2a70\u0338',
- 'napid;': '\u224b\u0338',
- 'napos;': '\u0149',
- 'napprox;': '\u2249',
- 'natur;': '\u266e',
- 'natural;': '\u266e',
- 'naturals;': '\u2115',
- 'nbsp': '\xa0',
- 'nbsp;': '\xa0',
- 'nbump;': '\u224e\u0338',
- 'nbumpe;': '\u224f\u0338',
- 'ncap;': '\u2a43',
- 'Ncaron;': '\u0147',
- 'ncaron;': '\u0148',
- 'Ncedil;': '\u0145',
- 'ncedil;': '\u0146',
- 'ncong;': '\u2247',
- 'ncongdot;': '\u2a6d\u0338',
- 'ncup;': '\u2a42',
- 'Ncy;': '\u041d',
- 'ncy;': '\u043d',
- 'ndash;': '\u2013',
- 'ne;': '\u2260',
- 'nearhk;': '\u2924',
- 'neArr;': '\u21d7',
- 'nearr;': '\u2197',
- 'nearrow;': '\u2197',
- 'nedot;': '\u2250\u0338',
- 'NegativeMediumSpace;': '\u200b',
- 'NegativeThickSpace;': '\u200b',
- 'NegativeThinSpace;': '\u200b',
- 'NegativeVeryThinSpace;': '\u200b',
- 'nequiv;': '\u2262',
- 'nesear;': '\u2928',
- 'nesim;': '\u2242\u0338',
- 'NestedGreaterGreater;': '\u226b',
- 'NestedLessLess;': '\u226a',
- 'NewLine;': '\n',
- 'nexist;': '\u2204',
- 'nexists;': '\u2204',
- 'Nfr;': '\U0001d511',
- 'nfr;': '\U0001d52b',
- 'ngE;': '\u2267\u0338',
- 'nge;': '\u2271',
- 'ngeq;': '\u2271',
- 'ngeqq;': '\u2267\u0338',
- 'ngeqslant;': '\u2a7e\u0338',
- 'nges;': '\u2a7e\u0338',
- 'nGg;': '\u22d9\u0338',
- 'ngsim;': '\u2275',
- 'nGt;': '\u226b\u20d2',
- 'ngt;': '\u226f',
- 'ngtr;': '\u226f',
- 'nGtv;': '\u226b\u0338',
- 'nhArr;': '\u21ce',
- 'nharr;': '\u21ae',
- 'nhpar;': '\u2af2',
- 'ni;': '\u220b',
- 'nis;': '\u22fc',
- 'nisd;': '\u22fa',
- 'niv;': '\u220b',
- 'NJcy;': '\u040a',
- 'njcy;': '\u045a',
- 'nlArr;': '\u21cd',
- 'nlarr;': '\u219a',
- 'nldr;': '\u2025',
- 'nlE;': '\u2266\u0338',
- 'nle;': '\u2270',
- 'nLeftarrow;': '\u21cd',
- 'nleftarrow;': '\u219a',
- 'nLeftrightarrow;': '\u21ce',
- 'nleftrightarrow;': '\u21ae',
- 'nleq;': '\u2270',
- 'nleqq;': '\u2266\u0338',
- 'nleqslant;': '\u2a7d\u0338',
- 'nles;': '\u2a7d\u0338',
- 'nless;': '\u226e',
- 'nLl;': '\u22d8\u0338',
- 'nlsim;': '\u2274',
- 'nLt;': '\u226a\u20d2',
- 'nlt;': '\u226e',
- 'nltri;': '\u22ea',
- 'nltrie;': '\u22ec',
- 'nLtv;': '\u226a\u0338',
- 'nmid;': '\u2224',
- 'NoBreak;': '\u2060',
- 'NonBreakingSpace;': '\xa0',
- 'Nopf;': '\u2115',
- 'nopf;': '\U0001d55f',
- 'not': '\xac',
- 'Not;': '\u2aec',
- 'not;': '\xac',
- 'NotCongruent;': '\u2262',
- 'NotCupCap;': '\u226d',
- 'NotDoubleVerticalBar;': '\u2226',
- 'NotElement;': '\u2209',
- 'NotEqual;': '\u2260',
- 'NotEqualTilde;': '\u2242\u0338',
- 'NotExists;': '\u2204',
- 'NotGreater;': '\u226f',
- 'NotGreaterEqual;': '\u2271',
- 'NotGreaterFullEqual;': '\u2267\u0338',
- 'NotGreaterGreater;': '\u226b\u0338',
- 'NotGreaterLess;': '\u2279',
- 'NotGreaterSlantEqual;': '\u2a7e\u0338',
- 'NotGreaterTilde;': '\u2275',
- 'NotHumpDownHump;': '\u224e\u0338',
- 'NotHumpEqual;': '\u224f\u0338',
- 'notin;': '\u2209',
- 'notindot;': '\u22f5\u0338',
- 'notinE;': '\u22f9\u0338',
- 'notinva;': '\u2209',
- 'notinvb;': '\u22f7',
- 'notinvc;': '\u22f6',
- 'NotLeftTriangle;': '\u22ea',
- 'NotLeftTriangleBar;': '\u29cf\u0338',
- 'NotLeftTriangleEqual;': '\u22ec',
- 'NotLess;': '\u226e',
- 'NotLessEqual;': '\u2270',
- 'NotLessGreater;': '\u2278',
- 'NotLessLess;': '\u226a\u0338',
- 'NotLessSlantEqual;': '\u2a7d\u0338',
- 'NotLessTilde;': '\u2274',
- 'NotNestedGreaterGreater;': '\u2aa2\u0338',
- 'NotNestedLessLess;': '\u2aa1\u0338',
- 'notni;': '\u220c',
- 'notniva;': '\u220c',
- 'notnivb;': '\u22fe',
- 'notnivc;': '\u22fd',
- 'NotPrecedes;': '\u2280',
- 'NotPrecedesEqual;': '\u2aaf\u0338',
- 'NotPrecedesSlantEqual;': '\u22e0',
- 'NotReverseElement;': '\u220c',
- 'NotRightTriangle;': '\u22eb',
- 'NotRightTriangleBar;': '\u29d0\u0338',
- 'NotRightTriangleEqual;': '\u22ed',
- 'NotSquareSubset;': '\u228f\u0338',
- 'NotSquareSubsetEqual;': '\u22e2',
- 'NotSquareSuperset;': '\u2290\u0338',
- 'NotSquareSupersetEqual;': '\u22e3',
- 'NotSubset;': '\u2282\u20d2',
- 'NotSubsetEqual;': '\u2288',
- 'NotSucceeds;': '\u2281',
- 'NotSucceedsEqual;': '\u2ab0\u0338',
- 'NotSucceedsSlantEqual;': '\u22e1',
- 'NotSucceedsTilde;': '\u227f\u0338',
- 'NotSuperset;': '\u2283\u20d2',
- 'NotSupersetEqual;': '\u2289',
- 'NotTilde;': '\u2241',
- 'NotTildeEqual;': '\u2244',
- 'NotTildeFullEqual;': '\u2247',
- 'NotTildeTilde;': '\u2249',
- 'NotVerticalBar;': '\u2224',
- 'npar;': '\u2226',
- 'nparallel;': '\u2226',
- 'nparsl;': '\u2afd\u20e5',
- 'npart;': '\u2202\u0338',
- 'npolint;': '\u2a14',
- 'npr;': '\u2280',
- 'nprcue;': '\u22e0',
- 'npre;': '\u2aaf\u0338',
- 'nprec;': '\u2280',
- 'npreceq;': '\u2aaf\u0338',
- 'nrArr;': '\u21cf',
- 'nrarr;': '\u219b',
- 'nrarrc;': '\u2933\u0338',
- 'nrarrw;': '\u219d\u0338',
- 'nRightarrow;': '\u21cf',
- 'nrightarrow;': '\u219b',
- 'nrtri;': '\u22eb',
- 'nrtrie;': '\u22ed',
- 'nsc;': '\u2281',
- 'nsccue;': '\u22e1',
- 'nsce;': '\u2ab0\u0338',
- 'Nscr;': '\U0001d4a9',
- 'nscr;': '\U0001d4c3',
- 'nshortmid;': '\u2224',
- 'nshortparallel;': '\u2226',
- 'nsim;': '\u2241',
- 'nsime;': '\u2244',
- 'nsimeq;': '\u2244',
- 'nsmid;': '\u2224',
- 'nspar;': '\u2226',
- 'nsqsube;': '\u22e2',
- 'nsqsupe;': '\u22e3',
- 'nsub;': '\u2284',
- 'nsubE;': '\u2ac5\u0338',
- 'nsube;': '\u2288',
- 'nsubset;': '\u2282\u20d2',
- 'nsubseteq;': '\u2288',
- 'nsubseteqq;': '\u2ac5\u0338',
- 'nsucc;': '\u2281',
- 'nsucceq;': '\u2ab0\u0338',
- 'nsup;': '\u2285',
- 'nsupE;': '\u2ac6\u0338',
- 'nsupe;': '\u2289',
- 'nsupset;': '\u2283\u20d2',
- 'nsupseteq;': '\u2289',
- 'nsupseteqq;': '\u2ac6\u0338',
- 'ntgl;': '\u2279',
- 'Ntilde': '\xd1',
- 'ntilde': '\xf1',
- 'Ntilde;': '\xd1',
- 'ntilde;': '\xf1',
- 'ntlg;': '\u2278',
- 'ntriangleleft;': '\u22ea',
- 'ntrianglelefteq;': '\u22ec',
- 'ntriangleright;': '\u22eb',
- 'ntrianglerighteq;': '\u22ed',
- 'Nu;': '\u039d',
- 'nu;': '\u03bd',
- 'num;': '#',
- 'numero;': '\u2116',
- 'numsp;': '\u2007',
- 'nvap;': '\u224d\u20d2',
- 'nVDash;': '\u22af',
- 'nVdash;': '\u22ae',
- 'nvDash;': '\u22ad',
- 'nvdash;': '\u22ac',
- 'nvge;': '\u2265\u20d2',
- 'nvgt;': '>\u20d2',
- 'nvHarr;': '\u2904',
- 'nvinfin;': '\u29de',
- 'nvlArr;': '\u2902',
- 'nvle;': '\u2264\u20d2',
- 'nvlt;': '<\u20d2',
- 'nvltrie;': '\u22b4\u20d2',
- 'nvrArr;': '\u2903',
- 'nvrtrie;': '\u22b5\u20d2',
- 'nvsim;': '\u223c\u20d2',
- 'nwarhk;': '\u2923',
- 'nwArr;': '\u21d6',
- 'nwarr;': '\u2196',
- 'nwarrow;': '\u2196',
- 'nwnear;': '\u2927',
- 'Oacute': '\xd3',
- 'oacute': '\xf3',
- 'Oacute;': '\xd3',
- 'oacute;': '\xf3',
- 'oast;': '\u229b',
- 'ocir;': '\u229a',
- 'Ocirc': '\xd4',
- 'ocirc': '\xf4',
- 'Ocirc;': '\xd4',
- 'ocirc;': '\xf4',
- 'Ocy;': '\u041e',
- 'ocy;': '\u043e',
- 'odash;': '\u229d',
- 'Odblac;': '\u0150',
- 'odblac;': '\u0151',
- 'odiv;': '\u2a38',
- 'odot;': '\u2299',
- 'odsold;': '\u29bc',
- 'OElig;': '\u0152',
- 'oelig;': '\u0153',
- 'ofcir;': '\u29bf',
- 'Ofr;': '\U0001d512',
- 'ofr;': '\U0001d52c',
- 'ogon;': '\u02db',
- 'Ograve': '\xd2',
- 'ograve': '\xf2',
- 'Ograve;': '\xd2',
- 'ograve;': '\xf2',
- 'ogt;': '\u29c1',
- 'ohbar;': '\u29b5',
- 'ohm;': '\u03a9',
- 'oint;': '\u222e',
- 'olarr;': '\u21ba',
- 'olcir;': '\u29be',
- 'olcross;': '\u29bb',
- 'oline;': '\u203e',
- 'olt;': '\u29c0',
- 'Omacr;': '\u014c',
- 'omacr;': '\u014d',
- 'Omega;': '\u03a9',
- 'omega;': '\u03c9',
- 'Omicron;': '\u039f',
- 'omicron;': '\u03bf',
- 'omid;': '\u29b6',
- 'ominus;': '\u2296',
- 'Oopf;': '\U0001d546',
- 'oopf;': '\U0001d560',
- 'opar;': '\u29b7',
- 'OpenCurlyDoubleQuote;': '\u201c',
- 'OpenCurlyQuote;': '\u2018',
- 'operp;': '\u29b9',
- 'oplus;': '\u2295',
- 'Or;': '\u2a54',
- 'or;': '\u2228',
- 'orarr;': '\u21bb',
- 'ord;': '\u2a5d',
- 'order;': '\u2134',
- 'orderof;': '\u2134',
- 'ordf': '\xaa',
- 'ordf;': '\xaa',
- 'ordm': '\xba',
- 'ordm;': '\xba',
- 'origof;': '\u22b6',
- 'oror;': '\u2a56',
- 'orslope;': '\u2a57',
- 'orv;': '\u2a5b',
- 'oS;': '\u24c8',
- 'Oscr;': '\U0001d4aa',
- 'oscr;': '\u2134',
- 'Oslash': '\xd8',
- 'oslash': '\xf8',
- 'Oslash;': '\xd8',
- 'oslash;': '\xf8',
- 'osol;': '\u2298',
- 'Otilde': '\xd5',
- 'otilde': '\xf5',
- 'Otilde;': '\xd5',
- 'otilde;': '\xf5',
- 'Otimes;': '\u2a37',
- 'otimes;': '\u2297',
- 'otimesas;': '\u2a36',
- 'Ouml': '\xd6',
- 'ouml': '\xf6',
- 'Ouml;': '\xd6',
- 'ouml;': '\xf6',
- 'ovbar;': '\u233d',
- 'OverBar;': '\u203e',
- 'OverBrace;': '\u23de',
- 'OverBracket;': '\u23b4',
- 'OverParenthesis;': '\u23dc',
- 'par;': '\u2225',
- 'para': '\xb6',
- 'para;': '\xb6',
- 'parallel;': '\u2225',
- 'parsim;': '\u2af3',
- 'parsl;': '\u2afd',
- 'part;': '\u2202',
- 'PartialD;': '\u2202',
- 'Pcy;': '\u041f',
- 'pcy;': '\u043f',
- 'percnt;': '%',
- 'period;': '.',
- 'permil;': '\u2030',
- 'perp;': '\u22a5',
- 'pertenk;': '\u2031',
- 'Pfr;': '\U0001d513',
- 'pfr;': '\U0001d52d',
- 'Phi;': '\u03a6',
- 'phi;': '\u03c6',
- 'phiv;': '\u03d5',
- 'phmmat;': '\u2133',
- 'phone;': '\u260e',
- 'Pi;': '\u03a0',
- 'pi;': '\u03c0',
- 'pitchfork;': '\u22d4',
- 'piv;': '\u03d6',
- 'planck;': '\u210f',
- 'planckh;': '\u210e',
- 'plankv;': '\u210f',
- 'plus;': '+',
- 'plusacir;': '\u2a23',
- 'plusb;': '\u229e',
- 'pluscir;': '\u2a22',
- 'plusdo;': '\u2214',
- 'plusdu;': '\u2a25',
- 'pluse;': '\u2a72',
- 'PlusMinus;': '\xb1',
- 'plusmn': '\xb1',
- 'plusmn;': '\xb1',
- 'plussim;': '\u2a26',
- 'plustwo;': '\u2a27',
- 'pm;': '\xb1',
- 'Poincareplane;': '\u210c',
- 'pointint;': '\u2a15',
- 'Popf;': '\u2119',
- 'popf;': '\U0001d561',
- 'pound': '\xa3',
- 'pound;': '\xa3',
- 'Pr;': '\u2abb',
- 'pr;': '\u227a',
- 'prap;': '\u2ab7',
- 'prcue;': '\u227c',
- 'prE;': '\u2ab3',
- 'pre;': '\u2aaf',
- 'prec;': '\u227a',
- 'precapprox;': '\u2ab7',
- 'preccurlyeq;': '\u227c',
- 'Precedes;': '\u227a',
- 'PrecedesEqual;': '\u2aaf',
- 'PrecedesSlantEqual;': '\u227c',
- 'PrecedesTilde;': '\u227e',
- 'preceq;': '\u2aaf',
- 'precnapprox;': '\u2ab9',
- 'precneqq;': '\u2ab5',
- 'precnsim;': '\u22e8',
- 'precsim;': '\u227e',
- 'Prime;': '\u2033',
- 'prime;': '\u2032',
- 'primes;': '\u2119',
- 'prnap;': '\u2ab9',
- 'prnE;': '\u2ab5',
- 'prnsim;': '\u22e8',
- 'prod;': '\u220f',
- 'Product;': '\u220f',
- 'profalar;': '\u232e',
- 'profline;': '\u2312',
- 'profsurf;': '\u2313',
- 'prop;': '\u221d',
- 'Proportion;': '\u2237',
- 'Proportional;': '\u221d',
- 'propto;': '\u221d',
- 'prsim;': '\u227e',
- 'prurel;': '\u22b0',
- 'Pscr;': '\U0001d4ab',
- 'pscr;': '\U0001d4c5',
- 'Psi;': '\u03a8',
- 'psi;': '\u03c8',
- 'puncsp;': '\u2008',
- 'Qfr;': '\U0001d514',
- 'qfr;': '\U0001d52e',
- 'qint;': '\u2a0c',
- 'Qopf;': '\u211a',
- 'qopf;': '\U0001d562',
- 'qprime;': '\u2057',
- 'Qscr;': '\U0001d4ac',
- 'qscr;': '\U0001d4c6',
- 'quaternions;': '\u210d',
- 'quatint;': '\u2a16',
- 'quest;': '?',
- 'questeq;': '\u225f',
- 'QUOT': '"',
- 'quot': '"',
- 'QUOT;': '"',
- 'quot;': '"',
- 'rAarr;': '\u21db',
- 'race;': '\u223d\u0331',
- 'Racute;': '\u0154',
- 'racute;': '\u0155',
- 'radic;': '\u221a',
- 'raemptyv;': '\u29b3',
- 'Rang;': '\u27eb',
- 'rang;': '\u27e9',
- 'rangd;': '\u2992',
- 'range;': '\u29a5',
- 'rangle;': '\u27e9',
- 'raquo': '\xbb',
- 'raquo;': '\xbb',
- 'Rarr;': '\u21a0',
- 'rArr;': '\u21d2',
- 'rarr;': '\u2192',
- 'rarrap;': '\u2975',
- 'rarrb;': '\u21e5',
- 'rarrbfs;': '\u2920',
- 'rarrc;': '\u2933',
- 'rarrfs;': '\u291e',
- 'rarrhk;': '\u21aa',
- 'rarrlp;': '\u21ac',
- 'rarrpl;': '\u2945',
- 'rarrsim;': '\u2974',
- 'Rarrtl;': '\u2916',
- 'rarrtl;': '\u21a3',
- 'rarrw;': '\u219d',
- 'rAtail;': '\u291c',
- 'ratail;': '\u291a',
- 'ratio;': '\u2236',
- 'rationals;': '\u211a',
- 'RBarr;': '\u2910',
- 'rBarr;': '\u290f',
- 'rbarr;': '\u290d',
- 'rbbrk;': '\u2773',
- 'rbrace;': '}',
- 'rbrack;': ']',
- 'rbrke;': '\u298c',
- 'rbrksld;': '\u298e',
- 'rbrkslu;': '\u2990',
- 'Rcaron;': '\u0158',
- 'rcaron;': '\u0159',
- 'Rcedil;': '\u0156',
- 'rcedil;': '\u0157',
- 'rceil;': '\u2309',
- 'rcub;': '}',
- 'Rcy;': '\u0420',
- 'rcy;': '\u0440',
- 'rdca;': '\u2937',
- 'rdldhar;': '\u2969',
- 'rdquo;': '\u201d',
- 'rdquor;': '\u201d',
- 'rdsh;': '\u21b3',
- 'Re;': '\u211c',
- 'real;': '\u211c',
- 'realine;': '\u211b',
- 'realpart;': '\u211c',
- 'reals;': '\u211d',
- 'rect;': '\u25ad',
- 'REG': '\xae',
- 'reg': '\xae',
- 'REG;': '\xae',
- 'reg;': '\xae',
- 'ReverseElement;': '\u220b',
- 'ReverseEquilibrium;': '\u21cb',
- 'ReverseUpEquilibrium;': '\u296f',
- 'rfisht;': '\u297d',
- 'rfloor;': '\u230b',
- 'Rfr;': '\u211c',
- 'rfr;': '\U0001d52f',
- 'rHar;': '\u2964',
- 'rhard;': '\u21c1',
- 'rharu;': '\u21c0',
- 'rharul;': '\u296c',
- 'Rho;': '\u03a1',
- 'rho;': '\u03c1',
- 'rhov;': '\u03f1',
- 'RightAngleBracket;': '\u27e9',
- 'RightArrow;': '\u2192',
- 'Rightarrow;': '\u21d2',
- 'rightarrow;': '\u2192',
- 'RightArrowBar;': '\u21e5',
- 'RightArrowLeftArrow;': '\u21c4',
- 'rightarrowtail;': '\u21a3',
- 'RightCeiling;': '\u2309',
- 'RightDoubleBracket;': '\u27e7',
- 'RightDownTeeVector;': '\u295d',
- 'RightDownVector;': '\u21c2',
- 'RightDownVectorBar;': '\u2955',
- 'RightFloor;': '\u230b',
- 'rightharpoondown;': '\u21c1',
- 'rightharpoonup;': '\u21c0',
- 'rightleftarrows;': '\u21c4',
- 'rightleftharpoons;': '\u21cc',
- 'rightrightarrows;': '\u21c9',
- 'rightsquigarrow;': '\u219d',
- 'RightTee;': '\u22a2',
- 'RightTeeArrow;': '\u21a6',
- 'RightTeeVector;': '\u295b',
- 'rightthreetimes;': '\u22cc',
- 'RightTriangle;': '\u22b3',
- 'RightTriangleBar;': '\u29d0',
- 'RightTriangleEqual;': '\u22b5',
- 'RightUpDownVector;': '\u294f',
- 'RightUpTeeVector;': '\u295c',
- 'RightUpVector;': '\u21be',
- 'RightUpVectorBar;': '\u2954',
- 'RightVector;': '\u21c0',
- 'RightVectorBar;': '\u2953',
- 'ring;': '\u02da',
- 'risingdotseq;': '\u2253',
- 'rlarr;': '\u21c4',
- 'rlhar;': '\u21cc',
- 'rlm;': '\u200f',
- 'rmoust;': '\u23b1',
- 'rmoustache;': '\u23b1',
- 'rnmid;': '\u2aee',
- 'roang;': '\u27ed',
- 'roarr;': '\u21fe',
- 'robrk;': '\u27e7',
- 'ropar;': '\u2986',
- 'Ropf;': '\u211d',
- 'ropf;': '\U0001d563',
- 'roplus;': '\u2a2e',
- 'rotimes;': '\u2a35',
- 'RoundImplies;': '\u2970',
- 'rpar;': ')',
- 'rpargt;': '\u2994',
- 'rppolint;': '\u2a12',
- 'rrarr;': '\u21c9',
- 'Rrightarrow;': '\u21db',
- 'rsaquo;': '\u203a',
- 'Rscr;': '\u211b',
- 'rscr;': '\U0001d4c7',
- 'Rsh;': '\u21b1',
- 'rsh;': '\u21b1',
- 'rsqb;': ']',
- 'rsquo;': '\u2019',
- 'rsquor;': '\u2019',
- 'rthree;': '\u22cc',
- 'rtimes;': '\u22ca',
- 'rtri;': '\u25b9',
- 'rtrie;': '\u22b5',
- 'rtrif;': '\u25b8',
- 'rtriltri;': '\u29ce',
- 'RuleDelayed;': '\u29f4',
- 'ruluhar;': '\u2968',
- 'rx;': '\u211e',
- 'Sacute;': '\u015a',
- 'sacute;': '\u015b',
- 'sbquo;': '\u201a',
- 'Sc;': '\u2abc',
- 'sc;': '\u227b',
- 'scap;': '\u2ab8',
- 'Scaron;': '\u0160',
- 'scaron;': '\u0161',
- 'sccue;': '\u227d',
- 'scE;': '\u2ab4',
- 'sce;': '\u2ab0',
- 'Scedil;': '\u015e',
- 'scedil;': '\u015f',
- 'Scirc;': '\u015c',
- 'scirc;': '\u015d',
- 'scnap;': '\u2aba',
- 'scnE;': '\u2ab6',
- 'scnsim;': '\u22e9',
- 'scpolint;': '\u2a13',
- 'scsim;': '\u227f',
- 'Scy;': '\u0421',
- 'scy;': '\u0441',
- 'sdot;': '\u22c5',
- 'sdotb;': '\u22a1',
- 'sdote;': '\u2a66',
- 'searhk;': '\u2925',
- 'seArr;': '\u21d8',
- 'searr;': '\u2198',
- 'searrow;': '\u2198',
- 'sect': '\xa7',
- 'sect;': '\xa7',
- 'semi;': ';',
- 'seswar;': '\u2929',
- 'setminus;': '\u2216',
- 'setmn;': '\u2216',
- 'sext;': '\u2736',
- 'Sfr;': '\U0001d516',
- 'sfr;': '\U0001d530',
- 'sfrown;': '\u2322',
- 'sharp;': '\u266f',
- 'SHCHcy;': '\u0429',
- 'shchcy;': '\u0449',
- 'SHcy;': '\u0428',
- 'shcy;': '\u0448',
- 'ShortDownArrow;': '\u2193',
- 'ShortLeftArrow;': '\u2190',
- 'shortmid;': '\u2223',
- 'shortparallel;': '\u2225',
- 'ShortRightArrow;': '\u2192',
- 'ShortUpArrow;': '\u2191',
- 'shy': '\xad',
- 'shy;': '\xad',
- 'Sigma;': '\u03a3',
- 'sigma;': '\u03c3',
- 'sigmaf;': '\u03c2',
- 'sigmav;': '\u03c2',
- 'sim;': '\u223c',
- 'simdot;': '\u2a6a',
- 'sime;': '\u2243',
- 'simeq;': '\u2243',
- 'simg;': '\u2a9e',
- 'simgE;': '\u2aa0',
- 'siml;': '\u2a9d',
- 'simlE;': '\u2a9f',
- 'simne;': '\u2246',
- 'simplus;': '\u2a24',
- 'simrarr;': '\u2972',
- 'slarr;': '\u2190',
- 'SmallCircle;': '\u2218',
- 'smallsetminus;': '\u2216',
- 'smashp;': '\u2a33',
- 'smeparsl;': '\u29e4',
- 'smid;': '\u2223',
- 'smile;': '\u2323',
- 'smt;': '\u2aaa',
- 'smte;': '\u2aac',
- 'smtes;': '\u2aac\ufe00',
- 'SOFTcy;': '\u042c',
- 'softcy;': '\u044c',
- 'sol;': '/',
- 'solb;': '\u29c4',
- 'solbar;': '\u233f',
- 'Sopf;': '\U0001d54a',
- 'sopf;': '\U0001d564',
- 'spades;': '\u2660',
- 'spadesuit;': '\u2660',
- 'spar;': '\u2225',
- 'sqcap;': '\u2293',
- 'sqcaps;': '\u2293\ufe00',
- 'sqcup;': '\u2294',
- 'sqcups;': '\u2294\ufe00',
- 'Sqrt;': '\u221a',
- 'sqsub;': '\u228f',
- 'sqsube;': '\u2291',
- 'sqsubset;': '\u228f',
- 'sqsubseteq;': '\u2291',
- 'sqsup;': '\u2290',
- 'sqsupe;': '\u2292',
- 'sqsupset;': '\u2290',
- 'sqsupseteq;': '\u2292',
- 'squ;': '\u25a1',
- 'Square;': '\u25a1',
- 'square;': '\u25a1',
- 'SquareIntersection;': '\u2293',
- 'SquareSubset;': '\u228f',
- 'SquareSubsetEqual;': '\u2291',
- 'SquareSuperset;': '\u2290',
- 'SquareSupersetEqual;': '\u2292',
- 'SquareUnion;': '\u2294',
- 'squarf;': '\u25aa',
- 'squf;': '\u25aa',
- 'srarr;': '\u2192',
- 'Sscr;': '\U0001d4ae',
- 'sscr;': '\U0001d4c8',
- 'ssetmn;': '\u2216',
- 'ssmile;': '\u2323',
- 'sstarf;': '\u22c6',
- 'Star;': '\u22c6',
- 'star;': '\u2606',
- 'starf;': '\u2605',
- 'straightepsilon;': '\u03f5',
- 'straightphi;': '\u03d5',
- 'strns;': '\xaf',
- 'Sub;': '\u22d0',
- 'sub;': '\u2282',
- 'subdot;': '\u2abd',
- 'subE;': '\u2ac5',
- 'sube;': '\u2286',
- 'subedot;': '\u2ac3',
- 'submult;': '\u2ac1',
- 'subnE;': '\u2acb',
- 'subne;': '\u228a',
- 'subplus;': '\u2abf',
- 'subrarr;': '\u2979',
- 'Subset;': '\u22d0',
- 'subset;': '\u2282',
- 'subseteq;': '\u2286',
- 'subseteqq;': '\u2ac5',
- 'SubsetEqual;': '\u2286',
- 'subsetneq;': '\u228a',
- 'subsetneqq;': '\u2acb',
- 'subsim;': '\u2ac7',
- 'subsub;': '\u2ad5',
- 'subsup;': '\u2ad3',
- 'succ;': '\u227b',
- 'succapprox;': '\u2ab8',
- 'succcurlyeq;': '\u227d',
- 'Succeeds;': '\u227b',
- 'SucceedsEqual;': '\u2ab0',
- 'SucceedsSlantEqual;': '\u227d',
- 'SucceedsTilde;': '\u227f',
- 'succeq;': '\u2ab0',
- 'succnapprox;': '\u2aba',
- 'succneqq;': '\u2ab6',
- 'succnsim;': '\u22e9',
- 'succsim;': '\u227f',
- 'SuchThat;': '\u220b',
- 'Sum;': '\u2211',
- 'sum;': '\u2211',
- 'sung;': '\u266a',
- 'sup1': '\xb9',
- 'sup1;': '\xb9',
- 'sup2': '\xb2',
- 'sup2;': '\xb2',
- 'sup3': '\xb3',
- 'sup3;': '\xb3',
- 'Sup;': '\u22d1',
- 'sup;': '\u2283',
- 'supdot;': '\u2abe',
- 'supdsub;': '\u2ad8',
- 'supE;': '\u2ac6',
- 'supe;': '\u2287',
- 'supedot;': '\u2ac4',
- 'Superset;': '\u2283',
- 'SupersetEqual;': '\u2287',
- 'suphsol;': '\u27c9',
- 'suphsub;': '\u2ad7',
- 'suplarr;': '\u297b',
- 'supmult;': '\u2ac2',
- 'supnE;': '\u2acc',
- 'supne;': '\u228b',
- 'supplus;': '\u2ac0',
- 'Supset;': '\u22d1',
- 'supset;': '\u2283',
- 'supseteq;': '\u2287',
- 'supseteqq;': '\u2ac6',
- 'supsetneq;': '\u228b',
- 'supsetneqq;': '\u2acc',
- 'supsim;': '\u2ac8',
- 'supsub;': '\u2ad4',
- 'supsup;': '\u2ad6',
- 'swarhk;': '\u2926',
- 'swArr;': '\u21d9',
- 'swarr;': '\u2199',
- 'swarrow;': '\u2199',
- 'swnwar;': '\u292a',
- 'szlig': '\xdf',
- 'szlig;': '\xdf',
- 'Tab;': '\t',
- 'target;': '\u2316',
- 'Tau;': '\u03a4',
- 'tau;': '\u03c4',
- 'tbrk;': '\u23b4',
- 'Tcaron;': '\u0164',
- 'tcaron;': '\u0165',
- 'Tcedil;': '\u0162',
- 'tcedil;': '\u0163',
- 'Tcy;': '\u0422',
- 'tcy;': '\u0442',
- 'tdot;': '\u20db',
- 'telrec;': '\u2315',
- 'Tfr;': '\U0001d517',
- 'tfr;': '\U0001d531',
- 'there4;': '\u2234',
- 'Therefore;': '\u2234',
- 'therefore;': '\u2234',
- 'Theta;': '\u0398',
- 'theta;': '\u03b8',
- 'thetasym;': '\u03d1',
- 'thetav;': '\u03d1',
- 'thickapprox;': '\u2248',
- 'thicksim;': '\u223c',
- 'ThickSpace;': '\u205f\u200a',
- 'thinsp;': '\u2009',
- 'ThinSpace;': '\u2009',
- 'thkap;': '\u2248',
- 'thksim;': '\u223c',
- 'THORN': '\xde',
- 'thorn': '\xfe',
- 'THORN;': '\xde',
- 'thorn;': '\xfe',
- 'Tilde;': '\u223c',
- 'tilde;': '\u02dc',
- 'TildeEqual;': '\u2243',
- 'TildeFullEqual;': '\u2245',
- 'TildeTilde;': '\u2248',
- 'times': '\xd7',
- 'times;': '\xd7',
- 'timesb;': '\u22a0',
- 'timesbar;': '\u2a31',
- 'timesd;': '\u2a30',
- 'tint;': '\u222d',
- 'toea;': '\u2928',
- 'top;': '\u22a4',
- 'topbot;': '\u2336',
- 'topcir;': '\u2af1',
- 'Topf;': '\U0001d54b',
- 'topf;': '\U0001d565',
- 'topfork;': '\u2ada',
- 'tosa;': '\u2929',
- 'tprime;': '\u2034',
- 'TRADE;': '\u2122',
- 'trade;': '\u2122',
- 'triangle;': '\u25b5',
- 'triangledown;': '\u25bf',
- 'triangleleft;': '\u25c3',
- 'trianglelefteq;': '\u22b4',
- 'triangleq;': '\u225c',
- 'triangleright;': '\u25b9',
- 'trianglerighteq;': '\u22b5',
- 'tridot;': '\u25ec',
- 'trie;': '\u225c',
- 'triminus;': '\u2a3a',
- 'TripleDot;': '\u20db',
- 'triplus;': '\u2a39',
- 'trisb;': '\u29cd',
- 'tritime;': '\u2a3b',
- 'trpezium;': '\u23e2',
- 'Tscr;': '\U0001d4af',
- 'tscr;': '\U0001d4c9',
- 'TScy;': '\u0426',
- 'tscy;': '\u0446',
- 'TSHcy;': '\u040b',
- 'tshcy;': '\u045b',
- 'Tstrok;': '\u0166',
- 'tstrok;': '\u0167',
- 'twixt;': '\u226c',
- 'twoheadleftarrow;': '\u219e',
- 'twoheadrightarrow;': '\u21a0',
- 'Uacute': '\xda',
- 'uacute': '\xfa',
- 'Uacute;': '\xda',
- 'uacute;': '\xfa',
- 'Uarr;': '\u219f',
- 'uArr;': '\u21d1',
- 'uarr;': '\u2191',
- 'Uarrocir;': '\u2949',
- 'Ubrcy;': '\u040e',
- 'ubrcy;': '\u045e',
- 'Ubreve;': '\u016c',
- 'ubreve;': '\u016d',
- 'Ucirc': '\xdb',
- 'ucirc': '\xfb',
- 'Ucirc;': '\xdb',
- 'ucirc;': '\xfb',
- 'Ucy;': '\u0423',
- 'ucy;': '\u0443',
- 'udarr;': '\u21c5',
- 'Udblac;': '\u0170',
- 'udblac;': '\u0171',
- 'udhar;': '\u296e',
- 'ufisht;': '\u297e',
- 'Ufr;': '\U0001d518',
- 'ufr;': '\U0001d532',
- 'Ugrave': '\xd9',
- 'ugrave': '\xf9',
- 'Ugrave;': '\xd9',
- 'ugrave;': '\xf9',
- 'uHar;': '\u2963',
- 'uharl;': '\u21bf',
- 'uharr;': '\u21be',
- 'uhblk;': '\u2580',
- 'ulcorn;': '\u231c',
- 'ulcorner;': '\u231c',
- 'ulcrop;': '\u230f',
- 'ultri;': '\u25f8',
- 'Umacr;': '\u016a',
- 'umacr;': '\u016b',
- 'uml': '\xa8',
- 'uml;': '\xa8',
- 'UnderBar;': '_',
- 'UnderBrace;': '\u23df',
- 'UnderBracket;': '\u23b5',
- 'UnderParenthesis;': '\u23dd',
- 'Union;': '\u22c3',
- 'UnionPlus;': '\u228e',
- 'Uogon;': '\u0172',
- 'uogon;': '\u0173',
- 'Uopf;': '\U0001d54c',
- 'uopf;': '\U0001d566',
- 'UpArrow;': '\u2191',
- 'Uparrow;': '\u21d1',
- 'uparrow;': '\u2191',
- 'UpArrowBar;': '\u2912',
- 'UpArrowDownArrow;': '\u21c5',
- 'UpDownArrow;': '\u2195',
- 'Updownarrow;': '\u21d5',
- 'updownarrow;': '\u2195',
- 'UpEquilibrium;': '\u296e',
- 'upharpoonleft;': '\u21bf',
- 'upharpoonright;': '\u21be',
- 'uplus;': '\u228e',
- 'UpperLeftArrow;': '\u2196',
- 'UpperRightArrow;': '\u2197',
- 'Upsi;': '\u03d2',
- 'upsi;': '\u03c5',
- 'upsih;': '\u03d2',
- 'Upsilon;': '\u03a5',
- 'upsilon;': '\u03c5',
- 'UpTee;': '\u22a5',
- 'UpTeeArrow;': '\u21a5',
- 'upuparrows;': '\u21c8',
- 'urcorn;': '\u231d',
- 'urcorner;': '\u231d',
- 'urcrop;': '\u230e',
- 'Uring;': '\u016e',
- 'uring;': '\u016f',
- 'urtri;': '\u25f9',
- 'Uscr;': '\U0001d4b0',
- 'uscr;': '\U0001d4ca',
- 'utdot;': '\u22f0',
- 'Utilde;': '\u0168',
- 'utilde;': '\u0169',
- 'utri;': '\u25b5',
- 'utrif;': '\u25b4',
- 'uuarr;': '\u21c8',
- 'Uuml': '\xdc',
- 'uuml': '\xfc',
- 'Uuml;': '\xdc',
- 'uuml;': '\xfc',
- 'uwangle;': '\u29a7',
- 'vangrt;': '\u299c',
- 'varepsilon;': '\u03f5',
- 'varkappa;': '\u03f0',
- 'varnothing;': '\u2205',
- 'varphi;': '\u03d5',
- 'varpi;': '\u03d6',
- 'varpropto;': '\u221d',
- 'vArr;': '\u21d5',
- 'varr;': '\u2195',
- 'varrho;': '\u03f1',
- 'varsigma;': '\u03c2',
- 'varsubsetneq;': '\u228a\ufe00',
- 'varsubsetneqq;': '\u2acb\ufe00',
- 'varsupsetneq;': '\u228b\ufe00',
- 'varsupsetneqq;': '\u2acc\ufe00',
- 'vartheta;': '\u03d1',
- 'vartriangleleft;': '\u22b2',
- 'vartriangleright;': '\u22b3',
- 'Vbar;': '\u2aeb',
- 'vBar;': '\u2ae8',
- 'vBarv;': '\u2ae9',
- 'Vcy;': '\u0412',
- 'vcy;': '\u0432',
- 'VDash;': '\u22ab',
- 'Vdash;': '\u22a9',
- 'vDash;': '\u22a8',
- 'vdash;': '\u22a2',
- 'Vdashl;': '\u2ae6',
- 'Vee;': '\u22c1',
- 'vee;': '\u2228',
- 'veebar;': '\u22bb',
- 'veeeq;': '\u225a',
- 'vellip;': '\u22ee',
- 'Verbar;': '\u2016',
- 'verbar;': '|',
- 'Vert;': '\u2016',
- 'vert;': '|',
- 'VerticalBar;': '\u2223',
- 'VerticalLine;': '|',
- 'VerticalSeparator;': '\u2758',
- 'VerticalTilde;': '\u2240',
- 'VeryThinSpace;': '\u200a',
- 'Vfr;': '\U0001d519',
- 'vfr;': '\U0001d533',
- 'vltri;': '\u22b2',
- 'vnsub;': '\u2282\u20d2',
- 'vnsup;': '\u2283\u20d2',
- 'Vopf;': '\U0001d54d',
- 'vopf;': '\U0001d567',
- 'vprop;': '\u221d',
- 'vrtri;': '\u22b3',
- 'Vscr;': '\U0001d4b1',
- 'vscr;': '\U0001d4cb',
- 'vsubnE;': '\u2acb\ufe00',
- 'vsubne;': '\u228a\ufe00',
- 'vsupnE;': '\u2acc\ufe00',
- 'vsupne;': '\u228b\ufe00',
- 'Vvdash;': '\u22aa',
- 'vzigzag;': '\u299a',
- 'Wcirc;': '\u0174',
- 'wcirc;': '\u0175',
- 'wedbar;': '\u2a5f',
- 'Wedge;': '\u22c0',
- 'wedge;': '\u2227',
- 'wedgeq;': '\u2259',
- 'weierp;': '\u2118',
- 'Wfr;': '\U0001d51a',
- 'wfr;': '\U0001d534',
- 'Wopf;': '\U0001d54e',
- 'wopf;': '\U0001d568',
- 'wp;': '\u2118',
- 'wr;': '\u2240',
- 'wreath;': '\u2240',
- 'Wscr;': '\U0001d4b2',
- 'wscr;': '\U0001d4cc',
- 'xcap;': '\u22c2',
- 'xcirc;': '\u25ef',
- 'xcup;': '\u22c3',
- 'xdtri;': '\u25bd',
- 'Xfr;': '\U0001d51b',
- 'xfr;': '\U0001d535',
- 'xhArr;': '\u27fa',
- 'xharr;': '\u27f7',
- 'Xi;': '\u039e',
- 'xi;': '\u03be',
- 'xlArr;': '\u27f8',
- 'xlarr;': '\u27f5',
- 'xmap;': '\u27fc',
- 'xnis;': '\u22fb',
- 'xodot;': '\u2a00',
- 'Xopf;': '\U0001d54f',
- 'xopf;': '\U0001d569',
- 'xoplus;': '\u2a01',
- 'xotime;': '\u2a02',
- 'xrArr;': '\u27f9',
- 'xrarr;': '\u27f6',
- 'Xscr;': '\U0001d4b3',
- 'xscr;': '\U0001d4cd',
- 'xsqcup;': '\u2a06',
- 'xuplus;': '\u2a04',
- 'xutri;': '\u25b3',
- 'xvee;': '\u22c1',
- 'xwedge;': '\u22c0',
- 'Yacute': '\xdd',
- 'yacute': '\xfd',
- 'Yacute;': '\xdd',
- 'yacute;': '\xfd',
- 'YAcy;': '\u042f',
- 'yacy;': '\u044f',
- 'Ycirc;': '\u0176',
- 'ycirc;': '\u0177',
- 'Ycy;': '\u042b',
- 'ycy;': '\u044b',
- 'yen': '\xa5',
- 'yen;': '\xa5',
- 'Yfr;': '\U0001d51c',
- 'yfr;': '\U0001d536',
- 'YIcy;': '\u0407',
- 'yicy;': '\u0457',
- 'Yopf;': '\U0001d550',
- 'yopf;': '\U0001d56a',
- 'Yscr;': '\U0001d4b4',
- 'yscr;': '\U0001d4ce',
- 'YUcy;': '\u042e',
- 'yucy;': '\u044e',
- 'yuml': '\xff',
- 'Yuml;': '\u0178',
- 'yuml;': '\xff',
- 'Zacute;': '\u0179',
- 'zacute;': '\u017a',
- 'Zcaron;': '\u017d',
- 'zcaron;': '\u017e',
- 'Zcy;': '\u0417',
- 'zcy;': '\u0437',
- 'Zdot;': '\u017b',
- 'zdot;': '\u017c',
- 'zeetrf;': '\u2128',
- 'ZeroWidthSpace;': '\u200b',
- 'Zeta;': '\u0396',
- 'zeta;': '\u03b6',
- 'Zfr;': '\u2128',
- 'zfr;': '\U0001d537',
- 'ZHcy;': '\u0416',
- 'zhcy;': '\u0436',
- 'zigrarr;': '\u21dd',
- 'Zopf;': '\u2124',
- 'zopf;': '\U0001d56b',
- 'Zscr;': '\U0001d4b5',
- 'zscr;': '\U0001d4cf',
- 'zwj;': '\u200d',
- 'zwnj;': '\u200c',
- }
-
-try:
- import http.client as compat_http_client
-except ImportError: # Python 2
- import httplib as compat_http_client
-
-try:
- from urllib.error import HTTPError as compat_HTTPError
-except ImportError: # Python 2
- from urllib2 import HTTPError as compat_HTTPError
+# HTMLParseError has been deprecated in Python 3.3 and removed in
+# Python 3.5. Introducing dummy exception for Python >3.5 for compatible
+# and uniform cross-version exception handling
+class compat_HTMLParseError(Exception):
+ pass
-try:
- from urllib.request import urlretrieve as compat_urlretrieve
-except ImportError: # Python 2
- from urllib import urlretrieve as compat_urlretrieve
-try:
- from html.parser import HTMLParser as compat_HTMLParser
-except ImportError: # Python 2
- from HTMLParser import HTMLParser as compat_HTMLParser
-
-try: # Python 2
- from HTMLParser import HTMLParseError as compat_HTMLParseError
-except ImportError: # Python <3.4
- try:
- from html.parser import HTMLParseError as compat_HTMLParseError
- except ImportError: # Python >3.4
-
- # HTMLParseError has been deprecated in Python 3.3 and removed in
- # Python 3.5. Introducing dummy exception for Python >3.5 for compatible
- # and uniform cross-version exception handling
- class compat_HTMLParseError(Exception):
- pass
-
-try:
- from subprocess import DEVNULL
- compat_subprocess_get_DEVNULL = lambda: DEVNULL
-except ImportError:
- compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
-
-try:
- import http.server as compat_http_server
-except ImportError:
- import BaseHTTPServer as compat_http_server
-
-try:
- compat_str = unicode # Python 2
-except NameError:
- compat_str = str
-
-try:
- from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
- from urllib.parse import unquote as compat_urllib_parse_unquote
- from urllib.parse import unquote_plus as compat_urllib_parse_unquote_plus
-except ImportError: # Python 2
- _asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire')
- else re.compile(r'([\x00-\x7f]+)'))
-
- # HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus
- # implementations from cpython 3.4.3's stdlib. Python 2's version
- # is apparently broken (see https://github.com/ytdl-org/youtube-dl/pull/6244)
-
- def compat_urllib_parse_unquote_to_bytes(string):
- """unquote_to_bytes('abc%20def') -> b'abc def'."""
- # Note: strings are encoded as UTF-8. This is only an issue if it contains
- # unescaped non-ASCII characters, which URIs should not.
- if not string:
- # Is it a string-like object?
- string.split
- return b''
- if isinstance(string, compat_str):
- string = string.encode('utf-8')
- bits = string.split(b'%')
- if len(bits) == 1:
- return string
- res = [bits[0]]
- append = res.append
- for item in bits[1:]:
- try:
- append(compat_urllib_parse._hextochr[item[:2]])
- append(item[2:])
- except KeyError:
- append(b'%')
- append(item)
- return b''.join(res)
-
- def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
- """Replace %xx escapes by their single-character equivalent. The optional
- encoding and errors parameters specify how to decode percent-encoded
- sequences into Unicode characters, as accepted by the bytes.decode()
- method.
- By default, percent-encoded sequences are decoded with UTF-8, and invalid
- sequences are replaced by a placeholder character.
-
- unquote('abc%20def') -> 'abc def'.
- """
- if '%' not in string:
- string.split
- return string
- if encoding is None:
- encoding = 'utf-8'
- if errors is None:
- errors = 'replace'
- bits = _asciire.split(string)
- res = [bits[0]]
- append = res.append
- for i in range(1, len(bits), 2):
- append(compat_urllib_parse_unquote_to_bytes(bits[i]).decode(encoding, errors))
- append(bits[i + 1])
- return ''.join(res)
-
- def compat_urllib_parse_unquote_plus(string, encoding='utf-8', errors='replace'):
- """Like unquote(), but also replace plus signs by spaces, as required for
- unquoting HTML form values.
-
- unquote_plus('%7e/abc+def') -> '~/abc def'
- """
- string = string.replace('+', ' ')
- return compat_urllib_parse_unquote(string, encoding, errors)
-
-try:
- from urllib.parse import urlencode as compat_urllib_parse_urlencode
-except ImportError: # Python 2
- # Python 2 will choke in urlencode on mixture of byte and unicode strings.
- # Possible solutions are to either port it from python 3 with all
- # the friends or manually ensure input query contains only byte strings.
- # We will stick with latter thus recursively encoding the whole query.
- def compat_urllib_parse_urlencode(query, doseq=0, encoding='utf-8'):
- def encode_elem(e):
- if isinstance(e, dict):
- e = encode_dict(e)
- elif isinstance(e, (list, tuple,)):
- list_e = encode_list(e)
- e = tuple(list_e) if isinstance(e, tuple) else list_e
- elif isinstance(e, compat_str):
- e = e.encode(encoding)
- return e
-
- def encode_dict(d):
- return dict((encode_elem(k), encode_elem(v)) for k, v in d.items())
-
- def encode_list(l):
- return [encode_elem(e) for e in l]
-
- return compat_urllib_parse.urlencode(encode_elem(query), doseq=doseq)
-
-try:
- from urllib.request import DataHandler as compat_urllib_request_DataHandler
-except ImportError: # Python < 3.4
- # Ported from CPython 98774:1733b3bd46db, Lib/urllib/request.py
- class compat_urllib_request_DataHandler(compat_urllib_request.BaseHandler):
- def data_open(self, req):
- # data URLs as specified in RFC 2397.
- #
- # ignores POSTed data
- #
- # syntax:
- # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
- # mediatype := [ type "/" subtype ] *( ";" parameter )
- # data := *urlchar
- # parameter := attribute "=" value
- url = req.get_full_url()
-
- scheme, data = url.split(':', 1)
- mediatype, data = data.split(',', 1)
-
- # even base64 encoded data URLs might be quoted so unquote in any case:
- data = compat_urllib_parse_unquote_to_bytes(data)
- if mediatype.endswith(';base64'):
- data = binascii.a2b_base64(data)
- mediatype = mediatype[:-7]
-
- if not mediatype:
- mediatype = 'text/plain;charset=US-ASCII'
-
- headers = email.message_from_string(
- 'Content-type: %s\nContent-length: %d\n' % (mediatype, len(data)))
-
- return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
-
-try:
- compat_basestring = basestring # Python 2
-except NameError:
- compat_basestring = str
-
-try:
- compat_chr = unichr # Python 2
-except NameError:
- compat_chr = chr
-
-try:
- from xml.etree.ElementTree import ParseError as compat_xml_parse_error
-except ImportError: # Python 2.6
- from xml.parsers.expat import ExpatError as compat_xml_parse_error
-
-
-etree = xml.etree.ElementTree
+# compat_ctypes_WINFUNCTYPE = ctypes.WINFUNCTYPE
+# will not work since ctypes.WINFUNCTYPE does not exist in UNIX machines
+def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
+ return ctypes.WINFUNCTYPE(*args, **kwargs)
class _TreeBuilder(etree.TreeBuilder):
@@ -2528,126 +44,8 @@ class _TreeBuilder(etree.TreeBuilder):
pass
-try:
- # xml.etree.ElementTree.Element is a method in Python <=2.6 and
- # the following will crash with:
- # TypeError: isinstance() arg 2 must be a class, type, or tuple of classes and types
- isinstance(None, xml.etree.ElementTree.Element)
- from xml.etree.ElementTree import Element as compat_etree_Element
-except TypeError: # Python <=2.6
- from xml.etree.ElementTree import _ElementInterface as compat_etree_Element
-
-if sys.version_info[0] >= 3:
- def compat_etree_fromstring(text):
- return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder()))
-else:
- # python 2.x tries to encode unicode strings with ascii (see the
- # XMLParser._fixtext method)
- try:
- _etree_iter = etree.Element.iter
- except AttributeError: # Python <=2.6
- def _etree_iter(root):
- for el in root.findall('*'):
- yield el
- for sub in _etree_iter(el):
- yield sub
-
- # on 2.6 XML doesn't have a parser argument, function copied from CPython
- # 2.7 source
- def _XML(text, parser=None):
- if not parser:
- parser = etree.XMLParser(target=_TreeBuilder())
- parser.feed(text)
- return parser.close()
-
- def _element_factory(*args, **kwargs):
- el = etree.Element(*args, **kwargs)
- for k, v in el.items():
- if isinstance(v, bytes):
- el.set(k, v.decode('utf-8'))
- return el
-
- def compat_etree_fromstring(text):
- doc = _XML(text, parser=etree.XMLParser(target=_TreeBuilder(element_factory=_element_factory)))
- for el in _etree_iter(doc):
- if el.text is not None and isinstance(el.text, bytes):
- el.text = el.text.decode('utf-8')
- return doc
-
-if hasattr(etree, 'register_namespace'):
- compat_etree_register_namespace = etree.register_namespace
-else:
- def compat_etree_register_namespace(prefix, uri):
- """Register a namespace prefix.
- The registry is global, and any existing mapping for either the
- given prefix or the namespace URI will be removed.
- *prefix* is the namespace prefix, *uri* is a namespace uri. Tags and
- attributes in this namespace will be serialized with prefix if possible.
- ValueError is raised if prefix is reserved or is invalid.
- """
- if re.match(r"ns\d+$", prefix):
- raise ValueError("Prefix format reserved for internal use")
- for k, v in list(etree._namespace_map.items()):
- if k == uri or v == prefix:
- del etree._namespace_map[k]
- etree._namespace_map[uri] = prefix
-
-if sys.version_info < (2, 7):
- # Here comes the crazy part: In 2.6, if the xpath is a unicode,
- # .//node does not match if a node is a direct child of . !
- def compat_xpath(xpath):
- if isinstance(xpath, compat_str):
- xpath = xpath.encode('ascii')
- return xpath
-else:
- compat_xpath = lambda xpath: xpath
-
-try:
- from urllib.parse import parse_qs as compat_parse_qs
-except ImportError: # Python 2
- # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
- # Python 2's version is apparently totally broken
-
- def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
- encoding='utf-8', errors='replace'):
- qs, _coerce_result = qs, compat_str
- pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
- r = []
- for name_value in pairs:
- if not name_value and not strict_parsing:
- continue
- nv = name_value.split('=', 1)
- if len(nv) != 2:
- if strict_parsing:
- raise ValueError('bad query field: %r' % (name_value,))
- # Handle case of a control-name with no equal sign
- if keep_blank_values:
- nv.append('')
- else:
- continue
- if len(nv[1]) or keep_blank_values:
- name = nv[0].replace('+', ' ')
- name = compat_urllib_parse_unquote(
- name, encoding=encoding, errors=errors)
- name = _coerce_result(name)
- value = nv[1].replace('+', ' ')
- value = compat_urllib_parse_unquote(
- value, encoding=encoding, errors=errors)
- value = _coerce_result(value)
- r.append((name, value))
- return r
-
- def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
- encoding='utf-8', errors='replace'):
- parsed_result = {}
- pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
- encoding=encoding, errors=errors)
- for name, value in pairs:
- if name in parsed_result:
- parsed_result[name].append(value)
- else:
- parsed_result[name] = [value]
- return parsed_result
+def compat_etree_fromstring(text):
+ return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder()))
compat_os_name = os._name if os.name == 'java' else os.name
@@ -2657,29 +55,7 @@ if compat_os_name == 'nt':
def compat_shlex_quote(s):
return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"')
else:
- try:
- from shlex import quote as compat_shlex_quote
- except ImportError: # Python < 3.3
- def compat_shlex_quote(s):
- if re.match(r'^[-_\w./]+$', s):
- return s
- else:
- return "'" + s.replace("'", "'\"'\"'") + "'"
-
-
-try:
- args = shlex.split('中文')
- assert (isinstance(args, list)
- and isinstance(args[0], compat_str)
- and args[0] == '中文')
- compat_shlex_split = shlex.split
-except (AssertionError, UnicodeEncodeError):
- # Working around shlex issue with unicode strings on some python 2
- # versions (see http://bugs.python.org/issue1548891)
- def compat_shlex_split(s, comments=False, posix=True):
- if isinstance(s, compat_str):
- s = s.encode('utf-8')
- return list(map(lambda s: s.decode('utf-8'), shlex.split(s, comments, posix)))
+ from shlex import quote as compat_shlex_quote
def compat_ord(c):
@@ -2689,89 +65,8 @@ def compat_ord(c):
return ord(c)
-if sys.version_info >= (3, 0):
- compat_getenv = os.getenv
- compat_expanduser = os.path.expanduser
-
- def compat_setenv(key, value, env=os.environ):
- env[key] = value
-else:
- # Environment variables should be decoded with filesystem encoding.
- # Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918)
-
- def compat_getenv(key, default=None):
- from .utils import get_filesystem_encoding
- env = os.getenv(key, default)
- if env:
- env = env.decode(get_filesystem_encoding())
- return env
-
- def compat_setenv(key, value, env=os.environ):
- def encode(v):
- from .utils import get_filesystem_encoding
- return v.encode(get_filesystem_encoding()) if isinstance(v, compat_str) else v
- env[encode(key)] = encode(value)
-
- # HACK: The default implementations of os.path.expanduser from cpython do not decode
- # environment variables with filesystem encoding. We will work around this by
- # providing adjusted implementations.
- # The following are os.path.expanduser implementations from cpython 2.7.8 stdlib
- # for different platforms with correct environment variables decoding.
-
- if compat_os_name == 'posix':
- def compat_expanduser(path):
- """Expand ~ and ~user constructions. If user or $HOME is unknown,
- do nothing."""
- if not path.startswith('~'):
- return path
- i = path.find('/', 1)
- if i < 0:
- i = len(path)
- if i == 1:
- if 'HOME' not in os.environ:
- import pwd
- userhome = pwd.getpwuid(os.getuid()).pw_dir
- else:
- userhome = compat_getenv('HOME')
- else:
- import pwd
- try:
- pwent = pwd.getpwnam(path[1:i])
- except KeyError:
- return path
- userhome = pwent.pw_dir
- userhome = userhome.rstrip('/')
- return (userhome + path[i:]) or '/'
- elif compat_os_name in ('nt', 'ce'):
- def compat_expanduser(path):
- """Expand ~ and ~user constructs.
-
- If user or $HOME is unknown, do nothing."""
- if path[:1] != '~':
- return path
- i, n = 1, len(path)
- while i < n and path[i] not in '/\\':
- i = i + 1
-
- if 'HOME' in os.environ:
- userhome = compat_getenv('HOME')
- elif 'USERPROFILE' in os.environ:
- userhome = compat_getenv('USERPROFILE')
- elif 'HOMEPATH' not in os.environ:
- return path
- else:
- try:
- drive = compat_getenv('HOMEDRIVE')
- except KeyError:
- drive = ''
- userhome = os.path.join(drive, compat_getenv('HOMEPATH'))
-
- if i != 1: # ~user
- userhome = os.path.join(os.path.dirname(userhome), path[1:i])
-
- return userhome + path[i:]
- else:
- compat_expanduser = os.path.expanduser
+def compat_setenv(key, value, env=os.environ):
+ env[key] = value
if compat_os_name == 'nt' and sys.version_info < (3, 8):
@@ -2785,78 +80,9 @@ else:
compat_realpath = os.path.realpath
-if sys.version_info < (3, 0):
- def compat_print(s):
- from .utils import preferredencoding
- print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
-else:
- def compat_print(s):
- assert isinstance(s, compat_str)
- print(s)
-
-
-if sys.version_info < (3, 0) and sys.platform == 'win32':
- def compat_getpass(prompt, *args, **kwargs):
- if isinstance(prompt, compat_str):
- from .utils import preferredencoding
- prompt = prompt.encode(preferredencoding())
- return getpass.getpass(prompt, *args, **kwargs)
-else:
- compat_getpass = getpass.getpass
-
-try:
- compat_input = raw_input
-except NameError: # Python 3
- compat_input = input
-
-# Python < 2.6.5 require kwargs to be bytes
-try:
- def _testfunc(x):
- pass
- _testfunc(**{'x': 0})
-except TypeError:
- def compat_kwargs(kwargs):
- return dict((bytes(k), v) for k, v in kwargs.items())
-else:
- compat_kwargs = lambda kwargs: kwargs
-
-
-try:
- compat_numeric_types = (int, float, long, complex)
-except NameError: # Python 3
- compat_numeric_types = (int, float, complex)
-
-
-try:
- compat_integer_types = (int, long)
-except NameError: # Python 3
- compat_integer_types = (int, )
-
-
-if sys.version_info < (2, 7):
- def compat_socket_create_connection(address, timeout, source_address=None):
- host, port = address
- err = None
- for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
- af, socktype, proto, canonname, sa = res
- sock = None
- try:
- sock = socket.socket(af, socktype, proto)
- sock.settimeout(timeout)
- if source_address:
- sock.bind(source_address)
- sock.connect(sa)
- return sock
- except socket.error as _:
- err = _
- if sock is not None:
- sock.close()
- if err is not None:
- raise err
- else:
- raise socket.error('getaddrinfo returns an empty list')
-else:
- compat_socket_create_connection = socket.create_connection
+def compat_print(s):
+ assert isinstance(s, compat_str)
+ print(s)
# Fix https://github.com/ytdl-org/youtube-dl/issues/4223
@@ -2880,129 +106,130 @@ def workaround_optparse_bug9161():
optparse.OptionGroup.add_option = _compat_add_option
-if hasattr(shutil, 'get_terminal_size'): # Python >= 3.3
- compat_get_terminal_size = shutil.get_terminal_size
-else:
- _terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines'])
-
- def compat_get_terminal_size(fallback=(80, 24)):
- columns = compat_getenv('COLUMNS')
- if columns:
- columns = int(columns)
- else:
- columns = None
- lines = compat_getenv('LINES')
- if lines:
- lines = int(lines)
- else:
- lines = None
-
- if columns is None or lines is None or columns <= 0 or lines <= 0:
- try:
- sp = subprocess.Popen(
- ['stty', 'size'],
- stdout=subprocess.PIPE, stderr=subprocess.PIPE)
- out, err = sp.communicate()
- _lines, _columns = map(int, out.split())
- except Exception:
- _columns, _lines = _terminal_size(*fallback)
-
- if columns is None or columns <= 0:
- columns = _columns
- if lines is None or lines <= 0:
- lines = _lines
- return _terminal_size(columns, lines)
-
try:
- itertools.count(start=0, step=1)
- compat_itertools_count = itertools.count
-except TypeError: # Python 2.6
- def compat_itertools_count(start=0, step=1):
- n = start
- while True:
- yield n
- n += step
-
-if sys.version_info >= (3, 0):
- from tokenize import tokenize as compat_tokenize_tokenize
-else:
- from tokenize import generate_tokens as compat_tokenize_tokenize
+ compat_Pattern = re.Pattern
+except AttributeError:
+ compat_Pattern = type(re.compile(''))
try:
- struct.pack('!I', 0)
-except TypeError:
- # In Python 2.6 and 2.7.x < 2.7.7, struct requires a bytes argument
- # See https://bugs.python.org/issue19099
- def compat_struct_pack(spec, *args):
- if isinstance(spec, compat_str):
- spec = spec.encode('ascii')
- return struct.pack(spec, *args)
+ compat_Match = re.Match
+except AttributeError:
+ compat_Match = type(re.compile('').match(''))
- def compat_struct_unpack(spec, *args):
- if isinstance(spec, compat_str):
- spec = spec.encode('ascii')
- return struct.unpack(spec, *args)
- class compat_Struct(struct.Struct):
- def __init__(self, fmt):
- if isinstance(fmt, compat_str):
- fmt = fmt.encode('ascii')
- super(compat_Struct, self).__init__(fmt)
+try:
+ compat_asyncio_run = asyncio.run # >= 3.7
+except AttributeError:
+ def compat_asyncio_run(coro):
+ try:
+ loop = asyncio.get_event_loop()
+ except RuntimeError:
+ loop = asyncio.new_event_loop()
+ asyncio.set_event_loop(loop)
+ loop.run_until_complete(coro)
+
+ asyncio.run = compat_asyncio_run
+
+
+# Python 3.8+ does not honor %HOME% on windows, but this breaks compatibility with youtube-dl
+# See https://github.com/hypervideo/hypervideo/issues/792
+# https://docs.python.org/3/library/os.path.html#os.path.expanduser
+if compat_os_name in ('nt', 'ce') and 'HOME' in os.environ:
+ _userhome = os.environ['HOME']
+
+ def compat_expanduser(path):
+ if not path.startswith('~'):
+ return path
+ i = path.replace('\\', '/', 1).find('/') # ~user
+ if i < 0:
+ i = len(path)
+ userhome = os.path.join(os.path.dirname(_userhome), path[1:i]) if i > 1 else _userhome
+ return userhome + path[i:]
else:
- compat_struct_pack = struct.pack
- compat_struct_unpack = struct.unpack
- if platform.python_implementation() == 'IronPython' and sys.version_info < (2, 7, 8):
- class compat_Struct(struct.Struct):
- def unpack(self, string):
- if not isinstance(string, buffer): # noqa: F821
- string = buffer(string) # noqa: F821
- return super(compat_Struct, self).unpack(string)
- else:
- compat_Struct = struct.Struct
+ compat_expanduser = os.path.expanduser
try:
- from future_builtins import zip as compat_zip
-except ImportError: # not 2.6+ or is 3.x
+ from Cryptodome.Cipher import AES as compat_pycrypto_AES
+except ImportError:
try:
- from itertools import izip as compat_zip # < 2.5 or 3.x
+ from Crypto.Cipher import AES as compat_pycrypto_AES
except ImportError:
- compat_zip = zip
-
-
-if sys.version_info < (3, 3):
- def compat_b64decode(s, *args, **kwargs):
- if isinstance(s, compat_str):
- s = s.encode('ascii')
- return base64.b64decode(s, *args, **kwargs)
-else:
- compat_b64decode = base64.b64decode
-
-
-if platform.python_implementation() == 'PyPy' and sys.pypy_version_info < (5, 4, 0):
- # PyPy2 prior to version 5.4.0 expects byte strings as Windows function
- # names, see the original PyPy issue [1] and the hypervideo one [2].
- # 1. https://bitbucket.org/pypy/pypy/issues/2360/windows-ctypescdll-typeerror-function-name
- # 2. https://github.com/ytdl-org/youtube-dl/pull/4392
- def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
- real = ctypes.WINFUNCTYPE(*args, **kwargs)
-
- def resf(tpl, *args, **kwargs):
- funcname, dll = tpl
- return real((str(funcname), dll), *args, **kwargs)
-
- return resf
-else:
- def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
- return ctypes.WINFUNCTYPE(*args, **kwargs)
-
+ compat_pycrypto_AES = None
+
+
+def windows_enable_vt_mode(): # TODO: Do this the proper way https://bugs.python.org/issue30075
+ if compat_os_name != 'nt':
+ return
+ os.system('')
+
+
+# Deprecated
+
+compat_basestring = str
+compat_chr = chr
+compat_input = input
+compat_integer_types = (int, )
+compat_kwargs = lambda kwargs: kwargs
+compat_numeric_types = (int, float, complex)
+compat_str = str
+compat_xpath = lambda xpath: xpath
+compat_zip = zip
+
+compat_HTMLParser = html.parser.HTMLParser
+compat_HTTPError = urllib.error.HTTPError
+compat_Struct = struct.Struct
+compat_b64decode = base64.b64decode
+compat_cookiejar = http.cookiejar
+compat_cookiejar_Cookie = compat_cookiejar.Cookie
+compat_cookies = http.cookies
+compat_cookies_SimpleCookie = compat_cookies.SimpleCookie
+compat_etree_Element = etree.Element
+compat_etree_register_namespace = etree.register_namespace
+compat_get_terminal_size = shutil.get_terminal_size
+compat_getenv = os.getenv
+compat_getpass = getpass.getpass
+compat_html_entities = html.entities
+compat_html_entities_html5 = compat_html_entities.html5
+compat_http_client = http.client
+compat_http_server = http.server
+compat_itertools_count = itertools.count
+compat_parse_qs = urllib.parse.parse_qs
+compat_shlex_split = shlex.split
+compat_socket_create_connection = socket.create_connection
+compat_struct_pack = struct.pack
+compat_struct_unpack = struct.unpack
+compat_subprocess_get_DEVNULL = lambda: DEVNULL
+compat_tokenize_tokenize = tokenize.tokenize
+compat_urllib_error = urllib.error
+compat_urllib_parse = urllib.parse
+compat_urllib_parse_quote = urllib.parse.quote
+compat_urllib_parse_quote_plus = urllib.parse.quote_plus
+compat_urllib_parse_unquote = urllib.parse.unquote
+compat_urllib_parse_unquote_plus = urllib.parse.unquote_plus
+compat_urllib_parse_unquote_to_bytes = urllib.parse.unquote_to_bytes
+compat_urllib_parse_urlencode = urllib.parse.urlencode
+compat_urllib_parse_urlparse = urllib.parse.urlparse
+compat_urllib_parse_urlunparse = urllib.parse.urlunparse
+compat_urllib_request = urllib.request
+compat_urllib_request_DataHandler = urllib.request.DataHandler
+compat_urllib_response = urllib.response
+compat_urlparse = urllib.parse
+compat_urlretrieve = urllib.request.urlretrieve
+compat_xml_parse_error = etree.ParseError
+
+
+# Set public objects
__all__ = [
'compat_HTMLParseError',
'compat_HTMLParser',
'compat_HTTPError',
+ 'compat_Match',
+ 'compat_Pattern',
'compat_Struct',
+ 'compat_asyncio_run',
'compat_b64decode',
'compat_basestring',
'compat_chr',
@@ -3031,6 +258,7 @@ __all__ = [
'compat_os_name',
'compat_parse_qs',
'compat_print',
+ 'compat_pycrypto_AES',
'compat_realpath',
'compat_setenv',
'compat_shlex_quote',
@@ -3043,11 +271,14 @@ __all__ = [
'compat_tokenize_tokenize',
'compat_urllib_error',
'compat_urllib_parse',
+ 'compat_urllib_parse_quote',
+ 'compat_urllib_parse_quote_plus',
'compat_urllib_parse_unquote',
'compat_urllib_parse_unquote_plus',
'compat_urllib_parse_unquote_to_bytes',
'compat_urllib_parse_urlencode',
'compat_urllib_parse_urlparse',
+ 'compat_urllib_parse_urlunparse',
'compat_urllib_request',
'compat_urllib_request_DataHandler',
'compat_urllib_response',
@@ -3056,5 +287,6 @@ __all__ = [
'compat_xml_parse_error',
'compat_xpath',
'compat_zip',
+ 'windows_enable_vt_mode',
'workaround_optparse_bug9161',
]
diff --git a/hypervideo_dl/cookies.py b/hypervideo_dl/cookies.py
new file mode 100644
index 0000000..38fbdfa
--- /dev/null
+++ b/hypervideo_dl/cookies.py
@@ -0,0 +1,745 @@
+import ctypes
+import json
+import os
+import shutil
+import struct
+import subprocess
+import sys
+import tempfile
+from datetime import datetime, timedelta, timezone
+from hashlib import pbkdf2_hmac
+
+from .aes import aes_cbc_decrypt_bytes, aes_gcm_decrypt_and_verify_bytes
+from .compat import (
+ compat_b64decode,
+ compat_cookiejar_Cookie,
+)
+from .utils import (
+ bug_reports_message,
+ expand_path,
+ process_communicate_or_kill,
+ YoutubeDLCookieJar,
+)
+
+try:
+ import sqlite3
+ SQLITE_AVAILABLE = True
+except ImportError:
+ # although sqlite3 is part of the standard library, it is possible to compile python without
+ # sqlite support. See: https://github.com/hypervideo/hypervideo/issues/544
+ SQLITE_AVAILABLE = False
+
+
+try:
+ import keyring
+ KEYRING_AVAILABLE = True
+ KEYRING_UNAVAILABLE_REASON = f'due to unknown reasons{bug_reports_message()}'
+except ImportError:
+ KEYRING_AVAILABLE = False
+ KEYRING_UNAVAILABLE_REASON = (
+ 'as the `keyring` module is not installed. '
+ 'Please install by running `python3 -m pip install keyring`. '
+ 'Depending on your platform, additional packages may be required '
+ 'to access the keyring; see https://pypi.org/project/keyring')
+except Exception as _err:
+ KEYRING_AVAILABLE = False
+ KEYRING_UNAVAILABLE_REASON = 'as the `keyring` module could not be initialized: %s' % _err
+
+
+CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
+SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
+
+
+class YDLLogger:
+ def __init__(self, ydl=None):
+ self._ydl = ydl
+
+ def debug(self, message):
+ if self._ydl:
+ self._ydl.write_debug(message)
+
+ def info(self, message):
+ if self._ydl:
+ self._ydl.to_screen(f'[Cookies] {message}')
+
+ def warning(self, message, only_once=False):
+ if self._ydl:
+ self._ydl.report_warning(message, only_once)
+
+ def error(self, message):
+ if self._ydl:
+ self._ydl.report_error(message)
+
+
+def load_cookies(cookie_file, browser_specification, ydl):
+ cookie_jars = []
+ if browser_specification is not None:
+ browser_name, profile = _parse_browser_specification(*browser_specification)
+ cookie_jars.append(extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl)))
+
+ if cookie_file is not None:
+ cookie_file = expand_path(cookie_file)
+ jar = YoutubeDLCookieJar(cookie_file)
+ if os.access(cookie_file, os.R_OK):
+ jar.load(ignore_discard=True, ignore_expires=True)
+ cookie_jars.append(jar)
+
+ return _merge_cookie_jars(cookie_jars)
+
+
+def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger()):
+ if browser_name == 'firefox':
+ return _extract_firefox_cookies(profile, logger)
+ elif browser_name == 'safari':
+ return _extract_safari_cookies(profile, logger)
+ elif browser_name in CHROMIUM_BASED_BROWSERS:
+ return _extract_chrome_cookies(browser_name, profile, logger)
+ else:
+ raise ValueError('unknown browser: {}'.format(browser_name))
+
+
+def _extract_firefox_cookies(profile, logger):
+ logger.info('Extracting cookies from firefox')
+ if not SQLITE_AVAILABLE:
+ logger.warning('Cannot extract cookies from firefox without sqlite3 support. '
+ 'Please use a python interpreter compiled with sqlite3 support')
+ return YoutubeDLCookieJar()
+
+ if profile is None:
+ search_root = _firefox_browser_dir()
+ elif _is_path(profile):
+ search_root = profile
+ else:
+ search_root = os.path.join(_firefox_browser_dir(), profile)
+
+ cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite')
+ if cookie_database_path is None:
+ raise FileNotFoundError('could not find firefox cookies database in {}'.format(search_root))
+ logger.debug('Extracting cookies from: "{}"'.format(cookie_database_path))
+
+ with tempfile.TemporaryDirectory(prefix='youtube_dl') as tmpdir:
+ cursor = None
+ try:
+ cursor = _open_database_copy(cookie_database_path, tmpdir)
+ cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies')
+ jar = YoutubeDLCookieJar()
+ for host, name, value, path, expiry, is_secure in cursor.fetchall():
+ cookie = compat_cookiejar_Cookie(
+ version=0, name=name, value=value, port=None, port_specified=False,
+ domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'),
+ path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False,
+ comment=None, comment_url=None, rest={})
+ jar.set_cookie(cookie)
+ logger.info('Extracted {} cookies from firefox'.format(len(jar)))
+ return jar
+ finally:
+ if cursor is not None:
+ cursor.connection.close()
+
+
+def _firefox_browser_dir():
+ if sys.platform in ('linux', 'linux2'):
+ return os.path.expanduser('~/.mozilla/firefox')
+ elif sys.platform == 'win32':
+ return os.path.expandvars(r'%APPDATA%\Mozilla\Firefox\Profiles')
+ elif sys.platform == 'darwin':
+ return os.path.expanduser('~/Library/Application Support/Firefox')
+ else:
+ raise ValueError('unsupported platform: {}'.format(sys.platform))
+
+
+def _get_chromium_based_browser_settings(browser_name):
+ # https://chromium.googlesource.com/chromium/src/+/HEAD/docs/user_data_dir.md
+ if sys.platform in ('linux', 'linux2'):
+ config = _config_home()
+ browser_dir = {
+ 'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'),
+ 'chrome': os.path.join(config, 'google-chrome'),
+ 'chromium': os.path.join(config, 'chromium'),
+ 'edge': os.path.join(config, 'microsoft-edge'),
+ 'opera': os.path.join(config, 'opera'),
+ 'vivaldi': os.path.join(config, 'vivaldi'),
+ }[browser_name]
+
+ elif sys.platform == 'win32':
+ appdata_local = os.path.expandvars('%LOCALAPPDATA%')
+ appdata_roaming = os.path.expandvars('%APPDATA%')
+ browser_dir = {
+ 'brave': os.path.join(appdata_local, r'BraveSoftware\Brave-Browser\User Data'),
+ 'chrome': os.path.join(appdata_local, r'Google\Chrome\User Data'),
+ 'chromium': os.path.join(appdata_local, r'Chromium\User Data'),
+ 'edge': os.path.join(appdata_local, r'Microsoft\Edge\User Data'),
+ 'opera': os.path.join(appdata_roaming, r'Opera Software\Opera Stable'),
+ 'vivaldi': os.path.join(appdata_local, r'Vivaldi\User Data'),
+ }[browser_name]
+
+ elif sys.platform == 'darwin':
+ appdata = os.path.expanduser('~/Library/Application Support')
+ browser_dir = {
+ 'brave': os.path.join(appdata, 'BraveSoftware/Brave-Browser'),
+ 'chrome': os.path.join(appdata, 'Google/Chrome'),
+ 'chromium': os.path.join(appdata, 'Chromium'),
+ 'edge': os.path.join(appdata, 'Microsoft Edge'),
+ 'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
+ 'vivaldi': os.path.join(appdata, 'Vivaldi'),
+ }[browser_name]
+
+ else:
+ raise ValueError('unsupported platform: {}'.format(sys.platform))
+
+ # Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
+ # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
+ keyring_name = {
+ 'brave': 'Brave',
+ 'chrome': 'Chrome',
+ 'chromium': 'Chromium',
+ 'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
+ 'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
+ 'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
+ }[browser_name]
+
+ browsers_without_profiles = {'opera'}
+
+ return {
+ 'browser_dir': browser_dir,
+ 'keyring_name': keyring_name,
+ 'supports_profiles': browser_name not in browsers_without_profiles
+ }
+
+
+def _extract_chrome_cookies(browser_name, profile, logger):
+ logger.info('Extracting cookies from {}'.format(browser_name))
+
+ if not SQLITE_AVAILABLE:
+ logger.warning(('Cannot extract cookies from {} without sqlite3 support. '
+ 'Please use a python interpreter compiled with sqlite3 support').format(browser_name))
+ return YoutubeDLCookieJar()
+
+ config = _get_chromium_based_browser_settings(browser_name)
+
+ if profile is None:
+ search_root = config['browser_dir']
+ elif _is_path(profile):
+ search_root = profile
+ config['browser_dir'] = os.path.dirname(profile) if config['supports_profiles'] else profile
+ else:
+ if config['supports_profiles']:
+ search_root = os.path.join(config['browser_dir'], profile)
+ else:
+ logger.error('{} does not support profiles'.format(browser_name))
+ search_root = config['browser_dir']
+
+ cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies')
+ if cookie_database_path is None:
+ raise FileNotFoundError('could not find {} cookies database in "{}"'.format(browser_name, search_root))
+ logger.debug('Extracting cookies from: "{}"'.format(cookie_database_path))
+
+ decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger)
+
+ with tempfile.TemporaryDirectory(prefix='youtube_dl') as tmpdir:
+ cursor = None
+ try:
+ cursor = _open_database_copy(cookie_database_path, tmpdir)
+ cursor.connection.text_factory = bytes
+ column_names = _get_column_names(cursor, 'cookies')
+ secure_column = 'is_secure' if 'is_secure' in column_names else 'secure'
+ cursor.execute('SELECT host_key, name, value, encrypted_value, path, '
+ 'expires_utc, {} FROM cookies'.format(secure_column))
+ jar = YoutubeDLCookieJar()
+ failed_cookies = 0
+ for host_key, name, value, encrypted_value, path, expires_utc, is_secure in cursor.fetchall():
+ host_key = host_key.decode('utf-8')
+ name = name.decode('utf-8')
+ value = value.decode('utf-8')
+ path = path.decode('utf-8')
+
+ if not value and encrypted_value:
+ value = decryptor.decrypt(encrypted_value)
+ if value is None:
+ failed_cookies += 1
+ continue
+
+ cookie = compat_cookiejar_Cookie(
+ version=0, name=name, value=value, port=None, port_specified=False,
+ domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
+ path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False,
+ comment=None, comment_url=None, rest={})
+ jar.set_cookie(cookie)
+ if failed_cookies > 0:
+ failed_message = ' ({} could not be decrypted)'.format(failed_cookies)
+ else:
+ failed_message = ''
+ logger.info('Extracted {} cookies from {}{}'.format(len(jar), browser_name, failed_message))
+ return jar
+ finally:
+ if cursor is not None:
+ cursor.connection.close()
+
+
+class ChromeCookieDecryptor:
+ """
+ Overview:
+
+ Linux:
+ - cookies are either v10 or v11
+ - v10: AES-CBC encrypted with a fixed key
+ - v11: AES-CBC encrypted with an OS protected key (keyring)
+ - v11 keys can be stored in various places depending on the activate desktop environment [2]
+
+ Mac:
+ - cookies are either v10 or not v10
+ - v10: AES-CBC encrypted with an OS protected key (keyring) and more key derivation iterations than linux
+ - not v10: 'old data' stored as plaintext
+
+ Windows:
+ - cookies are either v10 or not v10
+ - v10: AES-GCM encrypted with a key which is encrypted with DPAPI
+ - not v10: encrypted with DPAPI
+
+ Sources:
+ - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/
+ - [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_linux.cc
+ - KeyStorageLinux::CreateService
+ """
+
+ def decrypt(self, encrypted_value):
+ raise NotImplementedError
+
+
+def get_cookie_decryptor(browser_root, browser_keyring_name, logger):
+ if sys.platform in ('linux', 'linux2'):
+ return LinuxChromeCookieDecryptor(browser_keyring_name, logger)
+ elif sys.platform == 'darwin':
+ return MacChromeCookieDecryptor(browser_keyring_name, logger)
+ elif sys.platform == 'win32':
+ return WindowsChromeCookieDecryptor(browser_root, logger)
+ else:
+ raise NotImplementedError('Chrome cookie decryption is not supported '
+ 'on this platform: {}'.format(sys.platform))
+
+
+class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
+ def __init__(self, browser_keyring_name, logger):
+ self._logger = logger
+ self._v10_key = self.derive_key(b'peanuts')
+ if KEYRING_AVAILABLE:
+ self._v11_key = self.derive_key(_get_linux_keyring_password(browser_keyring_name))
+ else:
+ self._v11_key = None
+
+ @staticmethod
+ def derive_key(password):
+ # values from
+ # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_linux.cc
+ return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16)
+
+ def decrypt(self, encrypted_value):
+ version = encrypted_value[:3]
+ ciphertext = encrypted_value[3:]
+
+ if version == b'v10':
+ return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
+
+ elif version == b'v11':
+ if self._v11_key is None:
+ self._logger.warning(f'cannot decrypt cookie {KEYRING_UNAVAILABLE_REASON}', only_once=True)
+ return None
+ return _decrypt_aes_cbc(ciphertext, self._v11_key, self._logger)
+
+ else:
+ return None
+
+
+class MacChromeCookieDecryptor(ChromeCookieDecryptor):
+ def __init__(self, browser_keyring_name, logger):
+ self._logger = logger
+ password = _get_mac_keyring_password(browser_keyring_name, logger)
+ self._v10_key = None if password is None else self.derive_key(password)
+
+ @staticmethod
+ def derive_key(password):
+ # values from
+ # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
+ return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16)
+
+ def decrypt(self, encrypted_value):
+ version = encrypted_value[:3]
+ ciphertext = encrypted_value[3:]
+
+ if version == b'v10':
+ if self._v10_key is None:
+ self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
+ return None
+
+ return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
+
+ else:
+ # other prefixes are considered 'old data' which were stored as plaintext
+ # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
+ return encrypted_value
+
+
+class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
+ def __init__(self, browser_root, logger):
+ self._logger = logger
+ self._v10_key = _get_windows_v10_key(browser_root, logger)
+
+ def decrypt(self, encrypted_value):
+ version = encrypted_value[:3]
+ ciphertext = encrypted_value[3:]
+
+ if version == b'v10':
+ if self._v10_key is None:
+ self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
+ return None
+
+ # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
+ # kNonceLength
+ nonce_length = 96 // 8
+ # boringssl
+ # EVP_AEAD_AES_GCM_TAG_LEN
+ authentication_tag_length = 16
+
+ raw_ciphertext = ciphertext
+ nonce = raw_ciphertext[:nonce_length]
+ ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length]
+ authentication_tag = raw_ciphertext[-authentication_tag_length:]
+
+ return _decrypt_aes_gcm(ciphertext, self._v10_key, nonce, authentication_tag, self._logger)
+
+ else:
+ # any other prefix means the data is DPAPI encrypted
+ # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
+ return _decrypt_windows_dpapi(encrypted_value, self._logger).decode('utf-8')
+
+
+def _extract_safari_cookies(profile, logger):
+ if profile is not None:
+ logger.error('safari does not support profiles')
+ if sys.platform != 'darwin':
+ raise ValueError('unsupported platform: {}'.format(sys.platform))
+
+ cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies')
+
+ if not os.path.isfile(cookies_path):
+ raise FileNotFoundError('could not find safari cookies database')
+
+ with open(cookies_path, 'rb') as f:
+ cookies_data = f.read()
+
+ jar = parse_safari_cookies(cookies_data, logger=logger)
+ logger.info('Extracted {} cookies from safari'.format(len(jar)))
+ return jar
+
+
+class ParserError(Exception):
+ pass
+
+
+class DataParser:
+ def __init__(self, data, logger):
+ self._data = data
+ self.cursor = 0
+ self._logger = logger
+
+ def read_bytes(self, num_bytes):
+ if num_bytes < 0:
+ raise ParserError('invalid read of {} bytes'.format(num_bytes))
+ end = self.cursor + num_bytes
+ if end > len(self._data):
+ raise ParserError('reached end of input')
+ data = self._data[self.cursor:end]
+ self.cursor = end
+ return data
+
+ def expect_bytes(self, expected_value, message):
+ value = self.read_bytes(len(expected_value))
+ if value != expected_value:
+ raise ParserError('unexpected value: {} != {} ({})'.format(value, expected_value, message))
+
+ def read_uint(self, big_endian=False):
+ data_format = '>I' if big_endian else '<I'
+ return struct.unpack(data_format, self.read_bytes(4))[0]
+
+ def read_double(self, big_endian=False):
+ data_format = '>d' if big_endian else '<d'
+ return struct.unpack(data_format, self.read_bytes(8))[0]
+
+ def read_cstring(self):
+ buffer = []
+ while True:
+ c = self.read_bytes(1)
+ if c == b'\x00':
+ return b''.join(buffer).decode('utf-8')
+ else:
+ buffer.append(c)
+
+ def skip(self, num_bytes, description='unknown'):
+ if num_bytes > 0:
+ self._logger.debug('skipping {} bytes ({}): {}'.format(
+ num_bytes, description, self.read_bytes(num_bytes)))
+ elif num_bytes < 0:
+ raise ParserError('invalid skip of {} bytes'.format(num_bytes))
+
+ def skip_to(self, offset, description='unknown'):
+ self.skip(offset - self.cursor, description)
+
+ def skip_to_end(self, description='unknown'):
+ self.skip_to(len(self._data), description)
+
+
+def _mac_absolute_time_to_posix(timestamp):
+ return int((datetime(2001, 1, 1, 0, 0, tzinfo=timezone.utc) + timedelta(seconds=timestamp)).timestamp())
+
+
+def _parse_safari_cookies_header(data, logger):
+ p = DataParser(data, logger)
+ p.expect_bytes(b'cook', 'database signature')
+ number_of_pages = p.read_uint(big_endian=True)
+ page_sizes = [p.read_uint(big_endian=True) for _ in range(number_of_pages)]
+ return page_sizes, p.cursor
+
+
+def _parse_safari_cookies_page(data, jar, logger):
+ p = DataParser(data, logger)
+ p.expect_bytes(b'\x00\x00\x01\x00', 'page signature')
+ number_of_cookies = p.read_uint()
+ record_offsets = [p.read_uint() for _ in range(number_of_cookies)]
+ if number_of_cookies == 0:
+ logger.debug('a cookies page of size {} has no cookies'.format(len(data)))
+ return
+
+ p.skip_to(record_offsets[0], 'unknown page header field')
+
+ for record_offset in record_offsets:
+ p.skip_to(record_offset, 'space between records')
+ record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger)
+ p.read_bytes(record_length)
+ p.skip_to_end('space in between pages')
+
+
+def _parse_safari_cookies_record(data, jar, logger):
+ p = DataParser(data, logger)
+ record_size = p.read_uint()
+ p.skip(4, 'unknown record field 1')
+ flags = p.read_uint()
+ is_secure = bool(flags & 0x0001)
+ p.skip(4, 'unknown record field 2')
+ domain_offset = p.read_uint()
+ name_offset = p.read_uint()
+ path_offset = p.read_uint()
+ value_offset = p.read_uint()
+ p.skip(8, 'unknown record field 3')
+ expiration_date = _mac_absolute_time_to_posix(p.read_double())
+ _creation_date = _mac_absolute_time_to_posix(p.read_double()) # noqa: F841
+
+ try:
+ p.skip_to(domain_offset)
+ domain = p.read_cstring()
+
+ p.skip_to(name_offset)
+ name = p.read_cstring()
+
+ p.skip_to(path_offset)
+ path = p.read_cstring()
+
+ p.skip_to(value_offset)
+ value = p.read_cstring()
+ except UnicodeDecodeError:
+ logger.warning('failed to parse Safari cookie because UTF-8 decoding failed', only_once=True)
+ return record_size
+
+ p.skip_to(record_size, 'space at the end of the record')
+
+ cookie = compat_cookiejar_Cookie(
+ version=0, name=name, value=value, port=None, port_specified=False,
+ domain=domain, domain_specified=bool(domain), domain_initial_dot=domain.startswith('.'),
+ path=path, path_specified=bool(path), secure=is_secure, expires=expiration_date, discard=False,
+ comment=None, comment_url=None, rest={})
+ jar.set_cookie(cookie)
+ return record_size
+
+
+def parse_safari_cookies(data, jar=None, logger=YDLLogger()):
+ """
+ References:
+ - https://github.com/libyal/dtformats/blob/main/documentation/Safari%20Cookies.asciidoc
+ - this data appears to be out of date but the important parts of the database structure is the same
+ - there are a few bytes here and there which are skipped during parsing
+ """
+ if jar is None:
+ jar = YoutubeDLCookieJar()
+ page_sizes, body_start = _parse_safari_cookies_header(data, logger)
+ p = DataParser(data[body_start:], logger)
+ for page_size in page_sizes:
+ _parse_safari_cookies_page(p.read_bytes(page_size), jar, logger)
+ p.skip_to_end('footer')
+ return jar
+
+
+def _get_linux_keyring_password(browser_keyring_name):
+ password = keyring.get_password('{} Keys'.format(browser_keyring_name),
+ '{} Safe Storage'.format(browser_keyring_name))
+ if password is None:
+ # this sometimes occurs in KDE because chrome does not check hasEntry and instead
+ # just tries to read the value (which kwallet returns "") whereas keyring checks hasEntry
+ # to verify this:
+ # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
+ # while starting chrome.
+ # this may be a bug as the intended behaviour is to generate a random password and store
+ # it, but that doesn't matter here.
+ password = ''
+ return password.encode('utf-8')
+
+
+def _get_mac_keyring_password(browser_keyring_name, logger):
+ if KEYRING_AVAILABLE:
+ logger.debug('using keyring to obtain password')
+ password = keyring.get_password('{} Safe Storage'.format(browser_keyring_name), browser_keyring_name)
+ return password.encode('utf-8')
+ else:
+ logger.debug('using find-generic-password to obtain password')
+ proc = subprocess.Popen(['security', 'find-generic-password',
+ '-w', # write password to stdout
+ '-a', browser_keyring_name, # match 'account'
+ '-s', '{} Safe Storage'.format(browser_keyring_name)], # match 'service'
+ stdout=subprocess.PIPE,
+ stderr=subprocess.DEVNULL)
+ try:
+ stdout, stderr = process_communicate_or_kill(proc)
+ if stdout[-1:] == b'\n':
+ stdout = stdout[:-1]
+ return stdout
+ except BaseException as e:
+ logger.warning(f'exception running find-generic-password: {type(e).__name__}({e})')
+ return None
+
+
+def _get_windows_v10_key(browser_root, logger):
+ path = _find_most_recently_used_file(browser_root, 'Local State')
+ if path is None:
+ logger.error('could not find local state file')
+ return None
+ with open(path, 'r') as f:
+ data = json.load(f)
+ try:
+ base64_key = data['os_crypt']['encrypted_key']
+ except KeyError:
+ logger.error('no encrypted key in Local State')
+ return None
+ encrypted_key = compat_b64decode(base64_key)
+ prefix = b'DPAPI'
+ if not encrypted_key.startswith(prefix):
+ logger.error('invalid key')
+ return None
+ return _decrypt_windows_dpapi(encrypted_key[len(prefix):], logger)
+
+
+def pbkdf2_sha1(password, salt, iterations, key_length):
+ return pbkdf2_hmac('sha1', password, salt, iterations, key_length)
+
+
+def _decrypt_aes_cbc(ciphertext, key, logger, initialization_vector=b' ' * 16):
+ plaintext = aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector)
+ padding_length = plaintext[-1]
+ try:
+ return plaintext[:-padding_length].decode('utf-8')
+ except UnicodeDecodeError:
+ logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
+ return None
+
+
+def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger):
+ try:
+ plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce)
+ except ValueError:
+ logger.warning('failed to decrypt cookie (AES-GCM) because the MAC check failed. Possibly the key is wrong?', only_once=True)
+ return None
+
+ try:
+ return plaintext.decode('utf-8')
+ except UnicodeDecodeError:
+ logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
+ return None
+
+
+def _decrypt_windows_dpapi(ciphertext, logger):
+ """
+ References:
+ - https://docs.microsoft.com/en-us/windows/win32/api/dpapi/nf-dpapi-cryptunprotectdata
+ """
+ from ctypes.wintypes import DWORD
+
+ class DATA_BLOB(ctypes.Structure):
+ _fields_ = [('cbData', DWORD),
+ ('pbData', ctypes.POINTER(ctypes.c_char))]
+
+ buffer = ctypes.create_string_buffer(ciphertext)
+ blob_in = DATA_BLOB(ctypes.sizeof(buffer), buffer)
+ blob_out = DATA_BLOB()
+ ret = ctypes.windll.crypt32.CryptUnprotectData(
+ ctypes.byref(blob_in), # pDataIn
+ None, # ppszDataDescr: human readable description of pDataIn
+ None, # pOptionalEntropy: salt?
+ None, # pvReserved: must be NULL
+ None, # pPromptStruct: information about prompts to display
+ 0, # dwFlags
+ ctypes.byref(blob_out) # pDataOut
+ )
+ if not ret:
+ logger.warning('failed to decrypt with DPAPI', only_once=True)
+ return None
+
+ result = ctypes.string_at(blob_out.pbData, blob_out.cbData)
+ ctypes.windll.kernel32.LocalFree(blob_out.pbData)
+ return result
+
+
+def _config_home():
+ return os.environ.get('XDG_CONFIG_HOME', os.path.expanduser('~/.config'))
+
+
+def _open_database_copy(database_path, tmpdir):
+ # cannot open sqlite databases if they are already in use (e.g. by the browser)
+ database_copy_path = os.path.join(tmpdir, 'temporary.sqlite')
+ shutil.copy(database_path, database_copy_path)
+ conn = sqlite3.connect(database_copy_path)
+ return conn.cursor()
+
+
+def _get_column_names(cursor, table_name):
+ table_info = cursor.execute('PRAGMA table_info({})'.format(table_name)).fetchall()
+ return [row[1].decode('utf-8') for row in table_info]
+
+
+def _find_most_recently_used_file(root, filename):
+ # if there are multiple browser profiles, take the most recently used one
+ paths = []
+ for root, dirs, files in os.walk(root):
+ for file in files:
+ if file == filename:
+ paths.append(os.path.join(root, file))
+ return None if not paths else max(paths, key=lambda path: os.lstat(path).st_mtime)
+
+
+def _merge_cookie_jars(jars):
+ output_jar = YoutubeDLCookieJar()
+ for jar in jars:
+ for cookie in jar:
+ output_jar.set_cookie(cookie)
+ if jar.filename is not None:
+ output_jar.filename = jar.filename
+ return output_jar
+
+
+def _is_path(value):
+ return os.path.sep in value
+
+
+def _parse_browser_specification(browser_name, profile=None):
+ browser_name = browser_name.lower()
+ if browser_name not in SUPPORTED_BROWSERS:
+ raise ValueError(f'unsupported browser: "{browser_name}"')
+ if profile is not None and _is_path(profile):
+ profile = os.path.expanduser(profile)
+ return browser_name, profile
diff --git a/hypervideo_dl/downloader/__init__.py b/hypervideo_dl/downloader/__init__.py
index 2e485df..2449c74 100644
--- a/hypervideo_dl/downloader/__init__.py
+++ b/hypervideo_dl/downloader/__init__.py
@@ -1,24 +1,47 @@
from __future__ import unicode_literals
+from ..compat import compat_str
+from ..utils import (
+ determine_protocol,
+ NO_DEFAULT
+)
+
+
+def get_suitable_downloader(info_dict, params={}, default=NO_DEFAULT, protocol=None, to_stdout=False):
+ info_dict['protocol'] = determine_protocol(info_dict)
+ info_copy = info_dict.copy()
+ info_copy['to_stdout'] = to_stdout
+
+ downloaders = [_get_suitable_downloader(info_copy, proto, params, default)
+ for proto in (protocol or info_copy['protocol']).split('+')]
+ if set(downloaders) == {FFmpegFD} and FFmpegFD.can_merge_formats(info_copy, params):
+ return FFmpegFD
+ elif len(downloaders) == 1:
+ return downloaders[0]
+ return None
+
+
+# Some of these require get_suitable_downloader
from .common import FileDownloader
+from .dash import DashSegmentsFD
from .f4m import F4mFD
from .hls import HlsFD
from .http import HttpFD
from .rtmp import RtmpFD
-from .dash import DashSegmentsFD
from .rtsp import RtspFD
from .ism import IsmFD
+from .mhtml import MhtmlFD
+from .niconico import NiconicoDmcFD
+from .websocket import WebSocketFragmentFD
+from .youtube_live_chat import YoutubeLiveChatFD
from .external import (
get_external_downloader,
FFmpegFD,
)
-from ..utils import (
- determine_protocol,
-)
-
PROTOCOL_MAP = {
'rtmp': RtmpFD,
+ 'rtmp_ffmpeg': FFmpegFD,
'm3u8_native': HlsFD,
'm3u8': FFmpegFD,
'mms': RtspFD,
@@ -26,36 +49,78 @@ PROTOCOL_MAP = {
'f4m': F4mFD,
'http_dash_segments': DashSegmentsFD,
'ism': IsmFD,
+ 'mhtml': MhtmlFD,
+ 'niconico_dmc': NiconicoDmcFD,
+ 'websocket_frag': WebSocketFragmentFD,
+ 'youtube_live_chat': YoutubeLiveChatFD,
+ 'youtube_live_chat_replay': YoutubeLiveChatFD,
}
-def get_suitable_downloader(info_dict, params={}):
+def shorten_protocol_name(proto, simplify=False):
+ short_protocol_names = {
+ 'm3u8_native': 'm3u8_n',
+ 'rtmp_ffmpeg': 'rtmp_f',
+ 'http_dash_segments': 'dash',
+ 'niconico_dmc': 'dmc',
+ 'websocket_frag': 'WSfrag',
+ }
+ if simplify:
+ short_protocol_names.update({
+ 'https': 'http',
+ 'ftps': 'ftp',
+ 'm3u8_native': 'm3u8',
+ 'rtmp_ffmpeg': 'rtmp',
+ 'm3u8_frag_urls': 'm3u8',
+ 'dash_frag_urls': 'dash',
+ })
+ return short_protocol_names.get(proto, proto)
+
+
+def _get_suitable_downloader(info_dict, protocol, params, default):
"""Get the downloader class that can handle the info dict."""
- protocol = determine_protocol(info_dict)
- info_dict['protocol'] = protocol
+ if default is NO_DEFAULT:
+ default = HttpFD
# if (info_dict.get('start_time') or info_dict.get('end_time')) and not info_dict.get('requested_formats') and FFmpegFD.can_download(info_dict):
# return FFmpegFD
- external_downloader = params.get('external_downloader')
- if external_downloader is not None:
+ info_dict['protocol'] = protocol
+ downloaders = params.get('external_downloader')
+ external_downloader = (
+ downloaders if isinstance(downloaders, compat_str) or downloaders is None
+ else downloaders.get(shorten_protocol_name(protocol, True), downloaders.get('default')))
+
+ if external_downloader is None:
+ if info_dict['to_stdout'] and FFmpegFD.can_merge_formats(info_dict, params):
+ return FFmpegFD
+ elif external_downloader.lower() != 'native':
ed = get_external_downloader(external_downloader)
- if ed.can_download(info_dict):
+ if ed.can_download(info_dict, external_downloader):
return ed
- if protocol.startswith('m3u8') and info_dict.get('is_live'):
- return FFmpegFD
-
- if protocol == 'm3u8' and params.get('hls_prefer_native') is True:
- return HlsFD
+ if protocol == 'http_dash_segments':
+ if info_dict.get('is_live') and (external_downloader or '').lower() != 'native':
+ return FFmpegFD
- if protocol == 'm3u8_native' and params.get('hls_prefer_native') is False:
- return FFmpegFD
+ if protocol in ('m3u8', 'm3u8_native'):
+ if info_dict.get('is_live'):
+ return FFmpegFD
+ elif (external_downloader or '').lower() == 'native':
+ return HlsFD
+ elif get_suitable_downloader(
+ info_dict, params, None, protocol='m3u8_frag_urls', to_stdout=info_dict['to_stdout']):
+ return HlsFD
+ elif params.get('hls_prefer_native') is True:
+ return HlsFD
+ elif params.get('hls_prefer_native') is False:
+ return FFmpegFD
- return PROTOCOL_MAP.get(protocol, HttpFD)
+ return PROTOCOL_MAP.get(protocol, default)
__all__ = [
- 'get_suitable_downloader',
'FileDownloader',
+ 'get_suitable_downloader',
+ 'shorten_protocol_name',
]
diff --git a/hypervideo_dl/downloader/common.py b/hypervideo_dl/downloader/common.py
index d023168..27ca2cd 100644
--- a/hypervideo_dl/downloader/common.py
+++ b/hypervideo_dl/downloader/common.py
@@ -2,11 +2,9 @@ from __future__ import division, unicode_literals
import os
import re
-import sys
import time
import random
-from ..compat import compat_os_name
from ..utils import (
decodeArgument,
encodeFilename,
@@ -15,6 +13,12 @@ from ..utils import (
shell_quote,
timeconvert,
)
+from ..minicurses import (
+ MultilineLogger,
+ MultilinePrinter,
+ QuietMultilinePrinter,
+ BreaklineStatusPrinter
+)
class FileDownloader(object):
@@ -32,25 +36,28 @@ class FileDownloader(object):
verbose: Print additional info to stdout.
quiet: Do not print messages to stdout.
ratelimit: Download speed limit, in bytes/sec.
+ throttledratelimit: Assume the download is being throttled below this speed (bytes/sec)
retries: Number of times to retry for HTTP error 5xx
buffersize: Size of download buffer in bytes.
noresizebuffer: Do not automatically resize the download buffer.
continuedl: Try to continue downloads if possible.
noprogress: Do not print the progress bar.
- logtostderr: Log messages to stderr instead of stdout.
- consoletitle: Display progress in console window's titlebar.
nopart: Do not use temporary .part files.
updatetime: Use the Last-modified header to set output file timestamps.
test: Download only first bytes to test the downloader.
min_filesize: Skip files smaller than this size
max_filesize: Skip files larger than this size
xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
- external_downloader_args: A list of additional command-line arguments for the
- external downloader.
+ external_downloader_args: A dictionary of downloader keys (in lower case)
+ and a list of additional command-line arguments for the
+ executable. Use 'default' as the name for arguments to be
+ passed to all downloaders. For compatibility with youtube-dl,
+ a single list of args can also be used
hls_use_mpegts: Use the mpegts container for HLS videos.
http_chunk_size: Size of a chunk for chunk-based HTTP downloading. May be
useful for bypassing bandwidth throttling imposed by
a webserver (experimental)
+ progress_template: See YoutubeDL.py
Subclasses of this one must re-define the real_download method.
"""
@@ -63,6 +70,7 @@ class FileDownloader(object):
self.ydl = ydl
self._progress_hooks = []
self.params = params
+ self._prepare_multiline_status()
self.add_progress_hook(self.report_progress)
@staticmethod
@@ -147,10 +155,10 @@ class FileDownloader(object):
return int(round(number * multiplier))
def to_screen(self, *args, **kargs):
- self.ydl.to_screen(*args, **kargs)
+ self.ydl.to_stdout(*args, quiet=self.params.get('quiet'), **kargs)
def to_stderr(self, message):
- self.ydl.to_screen(message)
+ self.ydl.to_stderr(message)
def to_console_title(self, message):
self.ydl.to_console_title(message)
@@ -164,6 +172,9 @@ class FileDownloader(object):
def report_error(self, *args, **kargs):
self.ydl.report_error(*args, **kargs)
+ def write_debug(self, *args, **kargs):
+ self.ydl.write_debug(*args, **kargs)
+
def slow_down(self, start_time, now, byte_counter):
"""Sleep if the download speed is over the rate limit."""
rate_limit = self.params.get('ratelimit')
@@ -196,12 +207,12 @@ class FileDownloader(object):
return filename + '.ytdl'
def try_rename(self, old_filename, new_filename):
+ if old_filename == new_filename:
+ return
try:
- if old_filename == new_filename:
- return
- os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
+ os.replace(old_filename, new_filename)
except (IOError, OSError) as err:
- self.report_error('unable to rename file: %s' % error_to_compat_str(err))
+ self.report_error(f'unable to rename file: {err}')
def try_utime(self, filename, last_modified_hdr):
"""Try to set the last-modified time of the given file."""
@@ -228,39 +239,46 @@ class FileDownloader(object):
"""Report destination filename."""
self.to_screen('[download] Destination: ' + filename)
- def _report_progress_status(self, msg, is_last_line=False):
- fullmsg = '[download] ' + msg
- if self.params.get('progress_with_newline', False):
- self.to_screen(fullmsg)
+ def _prepare_multiline_status(self, lines=1):
+ if self.params.get('noprogress'):
+ self._multiline = QuietMultilinePrinter()
+ elif self.ydl.params.get('logger'):
+ self._multiline = MultilineLogger(self.ydl.params['logger'], lines)
+ elif self.params.get('progress_with_newline'):
+ self._multiline = BreaklineStatusPrinter(self.ydl._screen_file, lines)
else:
- if compat_os_name == 'nt':
- prev_len = getattr(self, '_report_progress_prev_line_length',
- 0)
- if prev_len > len(fullmsg):
- fullmsg += ' ' * (prev_len - len(fullmsg))
- self._report_progress_prev_line_length = len(fullmsg)
- clear_line = '\r'
- else:
- clear_line = ('\r\x1b[K' if sys.stderr.isatty() else '\r')
- self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line)
- self.to_console_title('hypervideo ' + msg)
+ self._multiline = MultilinePrinter(self.ydl._screen_file, lines, not self.params.get('quiet'))
+
+ def _finish_multiline_status(self):
+ self._multiline.end()
+
+ def _report_progress_status(self, s):
+ progress_dict = s.copy()
+ progress_dict.pop('info_dict')
+ progress_dict = {'info': s['info_dict'], 'progress': progress_dict}
+
+ progress_template = self.params.get('progress_template', {})
+ self._multiline.print_at_line(self.ydl.evaluate_outtmpl(
+ progress_template.get('download') or '[download] %(progress._default_template)s',
+ progress_dict), s.get('progress_idx') or 0)
+ self.to_console_title(self.ydl.evaluate_outtmpl(
+ progress_template.get('download-title') or 'hypervideo %(progress._default_template)s',
+ progress_dict))
def report_progress(self, s):
if s['status'] == 'finished':
- if self.params.get('noprogress', False):
+ if self.params.get('noprogress'):
self.to_screen('[download] Download completed')
- else:
- msg_template = '100%%'
- if s.get('total_bytes') is not None:
- s['_total_bytes_str'] = format_bytes(s['total_bytes'])
- msg_template += ' of %(_total_bytes_str)s'
- if s.get('elapsed') is not None:
- s['_elapsed_str'] = self.format_seconds(s['elapsed'])
- msg_template += ' in %(_elapsed_str)s'
- self._report_progress_status(
- msg_template % s, is_last_line=True)
-
- if self.params.get('noprogress'):
+ msg_template = '100%%'
+ if s.get('total_bytes') is not None:
+ s['_total_bytes_str'] = format_bytes(s['total_bytes'])
+ msg_template += ' of %(_total_bytes_str)s'
+ if s.get('elapsed') is not None:
+ s['_elapsed_str'] = self.format_seconds(s['elapsed'])
+ msg_template += ' in %(_elapsed_str)s'
+ s['_percent_str'] = self.format_percent(100)
+ s['_default_template'] = msg_template % s
+ self._report_progress_status(s)
return
if s['status'] != 'downloading':
@@ -302,8 +320,8 @@ class FileDownloader(object):
msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s'
else:
msg_template = '%(_percent_str)s % at %(_speed_str)s ETA %(_eta_str)s'
-
- self._report_progress_status(msg_template % s)
+ s['_default_template'] = msg_template % s
+ self._report_progress_status(s)
def report_resuming_byte(self, resume_len):
"""Report attempt to resume at given byte."""
@@ -312,27 +330,30 @@ class FileDownloader(object):
def report_retry(self, err, count, retries):
"""Report retry in case of HTTP error 5xx"""
self.to_screen(
- '[download] Got server HTTP error: %s. Retrying (attempt %d of %s)...'
+ '[download] Got server HTTP error: %s. Retrying (attempt %d of %s) ...'
% (error_to_compat_str(err), count, self.format_retries(retries)))
- def report_file_already_downloaded(self, file_name):
+ def report_file_already_downloaded(self, *args, **kwargs):
"""Report file has already been fully downloaded."""
- try:
- self.to_screen('[download] %s has already been downloaded' % file_name)
- except UnicodeEncodeError:
- self.to_screen('[download] The file has already been downloaded')
+ return self.ydl.report_file_already_downloaded(*args, **kwargs)
def report_unable_to_resume(self):
"""Report it was impossible to resume download."""
self.to_screen('[download] Unable to resume')
- def download(self, filename, info_dict):
+ @staticmethod
+ def supports_manifest(manifest):
+ """ Whether the downloader can download the fragments from the manifest.
+ Redefine in subclasses if needed. """
+ pass
+
+ def download(self, filename, info_dict, subtitle=False):
"""Download to a filename using the info from info_dict
Return True on success and False otherwise
"""
nooverwrites_and_exists = (
- self.params.get('nooverwrites', False)
+ not self.params.get('overwrites', True)
and os.path.exists(encodeFilename(filename))
)
@@ -350,26 +371,43 @@ class FileDownloader(object):
'filename': filename,
'status': 'finished',
'total_bytes': os.path.getsize(encodeFilename(filename)),
- })
- return True
-
- min_sleep_interval = self.params.get('sleep_interval')
- if min_sleep_interval:
- max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval)
- sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval)
- self.to_screen(
- '[download] Sleeping %s seconds...' % (
- int(sleep_interval) if sleep_interval.is_integer()
- else '%.2f' % sleep_interval))
- time.sleep(sleep_interval)
-
- return self.real_download(filename, info_dict)
+ }, info_dict)
+ return True, False
+
+ if subtitle is False:
+ min_sleep_interval = self.params.get('sleep_interval')
+ if min_sleep_interval:
+ max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval)
+ sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval)
+ self.to_screen(
+ '[download] Sleeping %s seconds ...' % (
+ int(sleep_interval) if sleep_interval.is_integer()
+ else '%.2f' % sleep_interval))
+ time.sleep(sleep_interval)
+ else:
+ sleep_interval_sub = 0
+ if type(self.params.get('sleep_interval_subtitles')) is int:
+ sleep_interval_sub = self.params.get('sleep_interval_subtitles')
+ if sleep_interval_sub > 0:
+ self.to_screen(
+ '[download] Sleeping %s seconds ...' % (
+ sleep_interval_sub))
+ time.sleep(sleep_interval_sub)
+ ret = self.real_download(filename, info_dict)
+ self._finish_multiline_status()
+ return ret, True
def real_download(self, filename, info_dict):
"""Real download process. Redefine in subclasses."""
raise NotImplementedError('This method must be implemented by subclasses')
- def _hook_progress(self, status):
+ def _hook_progress(self, status, info_dict):
+ if not self._progress_hooks:
+ return
+ status['info_dict'] = info_dict
+ # youtube-dl passes the same status object to all the hooks.
+ # Some third party scripts seems to be relying on this.
+ # So keep this behavior if possible
for ph in self._progress_hooks:
ph(status)
@@ -387,5 +425,4 @@ class FileDownloader(object):
if exe is None:
exe = os.path.basename(str_args[0])
- self.to_screen('[debug] %s command line: %s' % (
- exe, shell_quote(str_args)))
+ self.write_debug('%s command line: %s' % (exe, shell_quote(str_args)))
diff --git a/hypervideo_dl/downloader/dash.py b/hypervideo_dl/downloader/dash.py
index c6d674b..6444ad6 100644
--- a/hypervideo_dl/downloader/dash.py
+++ b/hypervideo_dl/downloader/dash.py
@@ -1,80 +1,62 @@
from __future__ import unicode_literals
+from ..downloader import get_suitable_downloader
from .fragment import FragmentFD
-from ..compat import compat_urllib_error
-from ..utils import (
- DownloadError,
- urljoin,
-)
+
+from ..utils import urljoin
class DashSegmentsFD(FragmentFD):
"""
- Download segments in a DASH manifest
+ Download segments in a DASH manifest. External downloaders can take over
+ the fragment downloads by supporting the 'dash_frag_urls' protocol
"""
FD_NAME = 'dashsegments'
def real_download(self, filename, info_dict):
+ if info_dict.get('is_live'):
+ self.report_error('Live DASH videos are not supported')
+
fragment_base_url = info_dict.get('fragment_base_url')
fragments = info_dict['fragments'][:1] if self.params.get(
'test', False) else info_dict['fragments']
+ real_downloader = get_suitable_downloader(
+ info_dict, self.params, None, protocol='dash_frag_urls', to_stdout=(filename == '-'))
+
ctx = {
'filename': filename,
'total_frags': len(fragments),
}
- self._prepare_and_start_frag_download(ctx)
-
- fragment_retries = self.params.get('fragment_retries', 0)
- skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
+ if real_downloader:
+ self._prepare_external_frag_download(ctx)
+ else:
+ self._prepare_and_start_frag_download(ctx, info_dict)
+ fragments_to_download = []
frag_index = 0
for i, fragment in enumerate(fragments):
frag_index += 1
if frag_index <= ctx['fragment_index']:
continue
- # In DASH, the first segment contains necessary headers to
- # generate a valid MP4 file, so always abort for the first segment
- fatal = i == 0 or not skip_unavailable_fragments
- count = 0
- while count <= fragment_retries:
- try:
- fragment_url = fragment.get('url')
- if not fragment_url:
- assert fragment_base_url
- fragment_url = urljoin(fragment_base_url, fragment['path'])
- success, frag_content = self._download_fragment(ctx, fragment_url, info_dict)
- if not success:
- return False
- self._append_fragment(ctx, frag_content)
- break
- except compat_urllib_error.HTTPError as err:
- # YouTube may often return 404 HTTP error for a fragment causing the
- # whole download to fail. However if the same fragment is immediately
- # retried with the same request data this usually succeeds (1-2 attempts
- # is usually enough) thus allowing to download the whole file successfully.
- # To be future-proof we will retry all fragments that fail with any
- # HTTP error.
- count += 1
- if count <= fragment_retries:
- self.report_retry_fragment(err, frag_index, count, fragment_retries)
- except DownloadError:
- # Don't retry fragment if error occurred during HTTP downloading
- # itself since it has own retry settings
- if not fatal:
- self.report_skip_fragment(frag_index)
- break
- raise
-
- if count > fragment_retries:
- if not fatal:
- self.report_skip_fragment(frag_index)
- continue
- self.report_error('giving up after %s fragment retries' % fragment_retries)
- return False
-
- self._finish_frag_download(ctx)
-
- return True
+ fragment_url = fragment.get('url')
+ if not fragment_url:
+ assert fragment_base_url
+ fragment_url = urljoin(fragment_base_url, fragment['path'])
+
+ fragments_to_download.append({
+ 'frag_index': frag_index,
+ 'index': i,
+ 'url': fragment_url,
+ })
+
+ if real_downloader:
+ self.to_screen(
+ '[%s] Fragment downloads will be delegated to %s' % (self.FD_NAME, real_downloader.get_basename()))
+ info_dict['fragments'] = fragments_to_download
+ fd = real_downloader(self.ydl, self.params)
+ return fd.real_download(filename, info_dict)
+
+ return self.download_and_append_fragments(ctx, fragments_to_download, info_dict)
diff --git a/hypervideo_dl/downloader/external.py b/hypervideo_dl/downloader/external.py
index c31f891..74adb05 100644
--- a/hypervideo_dl/downloader/external.py
+++ b/hypervideo_dl/downloader/external.py
@@ -6,7 +6,7 @@ import subprocess
import sys
import time
-from .common import FileDownloader
+from .fragment import FragmentFD
from ..compat import (
compat_setenv,
compat_str,
@@ -16,16 +16,21 @@ from ..utils import (
cli_option,
cli_valueless_option,
cli_bool_option,
- cli_configuration_args,
+ _configuration_args,
encodeFilename,
encodeArgument,
handle_youtubedl_headers,
check_executable,
is_outdated_version,
+ process_communicate_or_kill,
+ sanitize_open,
)
-class ExternalFD(FileDownloader):
+class ExternalFD(FragmentFD):
+ SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps')
+ can_download_to_stdout = False
+
def real_download(self, filename, info_dict):
self.report_destination(filename)
tmpfilename = self.temp_name(filename)
@@ -56,7 +61,7 @@ class ExternalFD(FileDownloader):
'downloaded_bytes': fsize,
'total_bytes': fsize,
})
- self._hook_progress(status)
+ self._hook_progress(status, info_dict)
return True
else:
self.to_stderr('\n')
@@ -70,19 +75,25 @@ class ExternalFD(FileDownloader):
@property
def exe(self):
- return self.params.get('external_downloader')
+ return self.get_basename()
@classmethod
- def available(cls):
- return check_executable(cls.get_basename(), [cls.AVAILABLE_OPT])
+ def available(cls, path=None):
+ path = check_executable(path or cls.get_basename(), [cls.AVAILABLE_OPT])
+ if path:
+ cls.exe = path
+ return path
+ return False
@classmethod
def supports(cls, info_dict):
- return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps')
+ return (
+ (cls.can_download_to_stdout or not info_dict.get('to_stdout'))
+ and info_dict['protocol'] in cls.SUPPORTED_PROTOCOLS)
@classmethod
- def can_download(cls, info_dict):
- return cls.available() and cls.supports(info_dict)
+ def can_download(cls, info_dict, path=None):
+ return cls.available(path) and cls.supports(info_dict)
def _option(self, command_option, param):
return cli_option(self.params, command_option, param)
@@ -93,8 +104,10 @@ class ExternalFD(FileDownloader):
def _valueless_option(self, command_option, param, expected_value=True):
return cli_valueless_option(self.params, command_option, param, expected_value)
- def _configuration_args(self, default=[]):
- return cli_configuration_args(self.params, 'external_downloader_args', default)
+ def _configuration_args(self, keys=None, *args, **kwargs):
+ return _configuration_args(
+ self.get_basename(), self.params.get('external_downloader_args'), self.get_basename(),
+ keys, *args, **kwargs)
def _call_downloader(self, tmpfilename, info_dict):
""" Either overwrite this or implement _make_cmd """
@@ -102,12 +115,56 @@ class ExternalFD(FileDownloader):
self._debug_cmd(cmd)
- p = subprocess.Popen(
- cmd, stderr=subprocess.PIPE)
- _, stderr = p.communicate()
- if p.returncode != 0:
+ if 'fragments' not in info_dict:
+ p = subprocess.Popen(
+ cmd, stderr=subprocess.PIPE)
+ _, stderr = process_communicate_or_kill(p)
+ if p.returncode != 0:
+ self.to_stderr(stderr.decode('utf-8', 'replace'))
+ return p.returncode
+
+ fragment_retries = self.params.get('fragment_retries', 0)
+ skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
+
+ count = 0
+ while count <= fragment_retries:
+ p = subprocess.Popen(
+ cmd, stderr=subprocess.PIPE)
+ _, stderr = process_communicate_or_kill(p)
+ if p.returncode == 0:
+ break
+ # TODO: Decide whether to retry based on error code
+ # https://aria2.github.io/manual/en/html/aria2c.html#exit-status
self.to_stderr(stderr.decode('utf-8', 'replace'))
- return p.returncode
+ count += 1
+ if count <= fragment_retries:
+ self.to_screen(
+ '[%s] Got error. Retrying fragments (attempt %d of %s)...'
+ % (self.get_basename(), count, self.format_retries(fragment_retries)))
+ if count > fragment_retries:
+ if not skip_unavailable_fragments:
+ self.report_error('Giving up after %s fragment retries' % fragment_retries)
+ return -1
+
+ decrypt_fragment = self.decrypter(info_dict)
+ dest, _ = sanitize_open(tmpfilename, 'wb')
+ for frag_index, fragment in enumerate(info_dict['fragments']):
+ fragment_filename = '%s-Frag%d' % (tmpfilename, frag_index)
+ try:
+ src, _ = sanitize_open(fragment_filename, 'rb')
+ except IOError:
+ if skip_unavailable_fragments and frag_index > 1:
+ self.to_screen('[%s] Skipping fragment %d ...' % (self.get_basename(), frag_index))
+ continue
+ self.report_error('Unable to open fragment %d' % frag_index)
+ return -1
+ dest.write(decrypt_fragment(fragment, src.read()))
+ src.close()
+ if not self.params.get('keep_fragments', False):
+ os.remove(encodeFilename(fragment_filename))
+ dest.close()
+ os.remove(encodeFilename('%s.frag.urls' % tmpfilename))
+ return 0
class CurlFD(ExternalFD):
@@ -115,8 +172,10 @@ class CurlFD(ExternalFD):
def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '--location', '-o', tmpfilename]
- for key, val in info_dict['http_headers'].items():
- cmd += ['--header', '%s: %s' % (key, val)]
+ if info_dict.get('http_headers') is not None:
+ for key, val in info_dict['http_headers'].items():
+ cmd += ['--header', '%s: %s' % (key, val)]
+
cmd += self._bool_option('--continue-at', 'continuedl', '-', '0')
cmd += self._valueless_option('--silent', 'noprogress')
cmd += self._valueless_option('--verbose', 'verbose')
@@ -141,7 +200,7 @@ class CurlFD(ExternalFD):
# curl writes the progress to stderr so don't capture it.
p = subprocess.Popen(cmd)
- p.communicate()
+ process_communicate_or_kill(p)
return p.returncode
@@ -150,8 +209,9 @@ class AxelFD(ExternalFD):
def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '-o', tmpfilename]
- for key, val in info_dict['http_headers'].items():
- cmd += ['-H', '%s: %s' % (key, val)]
+ if info_dict.get('http_headers') is not None:
+ for key, val in info_dict['http_headers'].items():
+ cmd += ['-H', '%s: %s' % (key, val)]
cmd += self._configuration_args()
cmd += ['--', info_dict['url']]
return cmd
@@ -162,8 +222,9 @@ class WgetFD(ExternalFD):
def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies']
- for key, val in info_dict['http_headers'].items():
- cmd += ['--header', '%s: %s' % (key, val)]
+ if info_dict.get('http_headers') is not None:
+ for key, val in info_dict['http_headers'].items():
+ cmd += ['--header', '%s: %s' % (key, val)]
cmd += self._option('--limit-rate', 'ratelimit')
retry = self._option('--tries', 'retries')
if len(retry) == 2:
@@ -180,51 +241,115 @@ class WgetFD(ExternalFD):
class Aria2cFD(ExternalFD):
AVAILABLE_OPT = '-v'
+ SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'dash_frag_urls', 'm3u8_frag_urls')
+
+ @staticmethod
+ def supports_manifest(manifest):
+ UNSUPPORTED_FEATURES = [
+ r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [1]
+ # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2
+ ]
+ check_results = (not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES)
+ return all(check_results)
def _make_cmd(self, tmpfilename, info_dict):
- cmd = [self.exe, '-c']
- cmd += self._configuration_args([
- '--min-split-size', '1M', '--max-connection-per-server', '4'])
- dn = os.path.dirname(tmpfilename)
- if dn:
- cmd += ['--dir', dn]
- cmd += ['--out', os.path.basename(tmpfilename)]
- for key, val in info_dict['http_headers'].items():
- cmd += ['--header', '%s: %s' % (key, val)]
+ cmd = [self.exe, '-c',
+ '--console-log-level=warn', '--summary-interval=0', '--download-result=hide',
+ '--file-allocation=none', '-x16', '-j16', '-s16']
+ if 'fragments' in info_dict:
+ cmd += ['--allow-overwrite=true', '--allow-piece-length-change=true']
+ else:
+ cmd += ['--min-split-size', '1M']
+
+ if info_dict.get('http_headers') is not None:
+ for key, val in info_dict['http_headers'].items():
+ cmd += ['--header', '%s: %s' % (key, val)]
+ cmd += self._option('--max-overall-download-limit', 'ratelimit')
cmd += self._option('--interface', 'source_address')
cmd += self._option('--all-proxy', 'proxy')
cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=')
cmd += self._bool_option('--remote-time', 'updatetime', 'true', 'false', '=')
- cmd += ['--', info_dict['url']]
+ cmd += self._configuration_args()
+
+ # aria2c strips out spaces from the beginning/end of filenames and paths.
+ # We work around this issue by adding a "./" to the beginning of the
+ # filename and relative path, and adding a "/" at the end of the path.
+ # See: https://github.com/hypervideo/hypervideo/issues/276
+ # https://github.com/ytdl-org/youtube-dl/issues/20312
+ # https://github.com/aria2/aria2/issues/1373
+ dn = os.path.dirname(tmpfilename)
+ if dn:
+ if not os.path.isabs(dn):
+ dn = '.%s%s' % (os.path.sep, dn)
+ cmd += ['--dir', dn + os.path.sep]
+ if 'fragments' not in info_dict:
+ cmd += ['--out', '.%s%s' % (os.path.sep, os.path.basename(tmpfilename))]
+ cmd += ['--auto-file-renaming=false']
+
+ if 'fragments' in info_dict:
+ cmd += ['--file-allocation=none', '--uri-selector=inorder']
+ url_list_file = '%s.frag.urls' % tmpfilename
+ url_list = []
+ for frag_index, fragment in enumerate(info_dict['fragments']):
+ fragment_filename = '%s-Frag%d' % (os.path.basename(tmpfilename), frag_index)
+ url_list.append('%s\n\tout=%s' % (fragment['url'], fragment_filename))
+ stream, _ = sanitize_open(url_list_file, 'wb')
+ stream.write('\n'.join(url_list).encode('utf-8'))
+ stream.close()
+ cmd += ['-i', url_list_file]
+ else:
+ cmd += ['--', info_dict['url']]
return cmd
class HttpieFD(ExternalFD):
+ AVAILABLE_OPT = '--version'
+
@classmethod
- def available(cls):
- return check_executable('http', ['--version'])
+ def available(cls, path=None):
+ return ExternalFD.available(cls, path or 'http')
def _make_cmd(self, tmpfilename, info_dict):
cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']]
- for key, val in info_dict['http_headers'].items():
- cmd += ['%s:%s' % (key, val)]
+
+ if info_dict.get('http_headers') is not None:
+ for key, val in info_dict['http_headers'].items():
+ cmd += ['%s:%s' % (key, val)]
return cmd
class FFmpegFD(ExternalFD):
+ SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'm3u8', 'm3u8_native', 'rtsp', 'rtmp', 'rtmp_ffmpeg', 'mms', 'http_dash_segments')
+ can_download_to_stdout = True
+
+ @classmethod
+ def available(cls, path=None):
+ # TODO: Fix path for ffmpeg
+ # Fixme: This may be wrong when --ffmpeg-location is used
+ return FFmpegPostProcessor().available
+
@classmethod
def supports(cls, info_dict):
- return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps', 'm3u8', 'rtsp', 'rtmp', 'mms')
+ return all(proto in cls.SUPPORTED_PROTOCOLS for proto in info_dict['protocol'].split('+'))
+
+ def on_process_started(self, proc, stdin):
+ """ Override this in subclasses """
+ pass
@classmethod
- def available(cls):
- return FFmpegPostProcessor().available
+ def can_merge_formats(cls, info_dict, params):
+ return (
+ info_dict.get('requested_formats')
+ and info_dict.get('protocol')
+ and not params.get('allow_unplayable_formats')
+ and 'no-direct-merge' not in params.get('compat_opts', [])
+ and cls.can_download(info_dict))
def _call_downloader(self, tmpfilename, info_dict):
- url = info_dict['url']
+ urls = [f['url'] for f in info_dict.get('requested_formats', [])] or [info_dict['url']]
ffpp = FFmpegPostProcessor(downloader=self)
if not ffpp.available:
- self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
+ self.report_error('m3u8 download detected but ffmpeg could not be found. Please install')
return False
ffpp.check_version()
@@ -234,7 +359,12 @@ class FFmpegFD(ExternalFD):
if self.params.get(log_level, False):
args += ['-loglevel', log_level]
break
+ if not self.params.get('verbose'):
+ args += ['-hide_banner']
+ args += info_dict.get('_ffmpeg_args', [])
+
+ # This option exists only for compatibility. Extractors should use `_ffmpeg_args` instead
seekable = info_dict.get('_seekable')
if seekable is not None:
# setting -seekable prevents ffmpeg from guessing if the server
@@ -244,8 +374,6 @@ class FFmpegFD(ExternalFD):
# http://trac.ffmpeg.org/ticket/6125#comment:10
args += ['-seekable', '1' if seekable else '0']
- args += self._configuration_args()
-
# start_time = info_dict.get('start_time') or 0
# if start_time:
# args += ['-ss', compat_str(start_time)]
@@ -253,7 +381,7 @@ class FFmpegFD(ExternalFD):
# if end_time:
# args += ['-t', compat_str(end_time - start_time)]
- if info_dict['http_headers'] and re.match(r'^https?://', url):
+ if info_dict.get('http_headers') is not None and re.match(r'^https?://', urls[0]):
# Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
# [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
headers = handle_youtubedl_headers(info_dict['http_headers'])
@@ -311,13 +439,25 @@ class FFmpegFD(ExternalFD):
elif isinstance(conn, compat_str):
args += ['-rtmp_conn', conn]
- args += ['-i', url, '-c', 'copy']
+ for i, url in enumerate(urls):
+ args += self._configuration_args((f'_i{i + 1}', '_i')) + ['-i', url]
+
+ args += ['-c', 'copy']
+ if info_dict.get('requested_formats') or protocol == 'http_dash_segments':
+ for (i, fmt) in enumerate(info_dict.get('requested_formats') or [info_dict]):
+ stream_number = fmt.get('manifest_stream_number', 0)
+ a_or_v = 'a' if fmt.get('acodec') != 'none' else 'v'
+ args.extend(['-map', f'{i}:{a_or_v}:{stream_number}'])
if self.params.get('test', False):
args += ['-fs', compat_str(self._TEST_FILE_SIZE)]
+ ext = info_dict['ext']
if protocol in ('m3u8', 'm3u8_native'):
- if self.params.get('hls_use_mpegts', False) or tmpfilename == '-':
+ use_mpegts = (tmpfilename == '-') or self.params.get('hls_use_mpegts')
+ if use_mpegts is None:
+ use_mpegts = info_dict.get('is_live')
+ if use_mpegts:
args += ['-f', 'mpegts']
else:
args += ['-f', 'mp4']
@@ -325,25 +465,33 @@ class FFmpegFD(ExternalFD):
args += ['-bsf:a', 'aac_adtstoasc']
elif protocol == 'rtmp':
args += ['-f', 'flv']
+ elif ext == 'mp4' and tmpfilename == '-':
+ args += ['-f', 'mpegts']
else:
- args += ['-f', EXT_TO_OUT_FORMATS.get(info_dict['ext'], info_dict['ext'])]
+ args += ['-f', EXT_TO_OUT_FORMATS.get(ext, ext)]
+
+ args += self._configuration_args(('_o1', '_o', ''))
args = [encodeArgument(opt) for opt in args]
args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True))
-
self._debug_cmd(args)
proc = subprocess.Popen(args, stdin=subprocess.PIPE, env=env)
+ if url in ('-', 'pipe:'):
+ self.on_process_started(proc, proc.stdin)
try:
retval = proc.wait()
- except KeyboardInterrupt:
+ except BaseException as e:
# subprocces.run would send the SIGKILL signal to ffmpeg and the
# mp4 file couldn't be played, but if we ask ffmpeg to quit it
# produces a file that is playable (this is mostly useful for live
# streams). Note that Windows is not affected and produces playable
# files (see https://github.com/ytdl-org/youtube-dl/issues/8300).
- if sys.platform != 'win32':
- proc.communicate(b'q')
+ if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32' and url not in ('-', 'pipe:'):
+ process_communicate_or_kill(proc, b'q')
+ else:
+ proc.kill()
+ proc.wait()
raise
return retval
@@ -355,7 +503,7 @@ class AVconvFD(FFmpegFD):
_BY_NAME = dict(
(klass.get_basename(), klass)
for name, klass in globals().items()
- if name.endswith('FD') and name != 'ExternalFD'
+ if name.endswith('FD') and name not in ('ExternalFD', 'FragmentFD')
)
@@ -368,4 +516,4 @@ def get_external_downloader(external_downloader):
downloader . """
# Drop .exe extension on Windows
bn = os.path.splitext(os.path.basename(external_downloader))[0]
- return _BY_NAME[bn]
+ return _BY_NAME.get(bn)
diff --git a/hypervideo_dl/downloader/f4m.py b/hypervideo_dl/downloader/f4m.py
index 8dd3c2e..9da2776 100644
--- a/hypervideo_dl/downloader/f4m.py
+++ b/hypervideo_dl/downloader/f4m.py
@@ -267,13 +267,14 @@ class F4mFD(FragmentFD):
media = doc.findall(_add_ns('media'))
if not media:
self.report_error('No media found')
- for e in (doc.findall(_add_ns('drmAdditionalHeader'))
- + doc.findall(_add_ns('drmAdditionalHeaderSet'))):
- # If id attribute is missing it's valid for all media nodes
- # without drmAdditionalHeaderId or drmAdditionalHeaderSetId attribute
- if 'id' not in e.attrib:
- self.report_error('Missing ID in f4m DRM')
- media = remove_encrypted_media(media)
+ if not self.params.get('allow_unplayable_formats'):
+ for e in (doc.findall(_add_ns('drmAdditionalHeader'))
+ + doc.findall(_add_ns('drmAdditionalHeaderSet'))):
+ # If id attribute is missing it's valid for all media nodes
+ # without drmAdditionalHeaderId or drmAdditionalHeaderSetId attribute
+ if 'id' not in e.attrib:
+ self.report_error('Missing ID in f4m DRM')
+ media = remove_encrypted_media(media)
if not media:
self.report_error('Unsupported DRM')
return media
@@ -379,7 +380,7 @@ class F4mFD(FragmentFD):
base_url_parsed = compat_urllib_parse_urlparse(base_url)
- self._start_frag_download(ctx)
+ self._start_frag_download(ctx, info_dict)
frag_index = 0
while fragments_list:
@@ -433,6 +434,6 @@ class F4mFD(FragmentFD):
msg = 'Missed %d fragments' % (fragments_list[0][1] - (frag_i + 1))
self.report_warning(msg)
- self._finish_frag_download(ctx)
+ self._finish_frag_download(ctx, info_dict)
return True
diff --git a/hypervideo_dl/downloader/fragment.py b/hypervideo_dl/downloader/fragment.py
index b82e3cf..57068db 100644
--- a/hypervideo_dl/downloader/fragment.py
+++ b/hypervideo_dl/downloader/fragment.py
@@ -3,10 +3,23 @@ from __future__ import division, unicode_literals
import os
import time
import json
+from math import ceil
+
+try:
+ import concurrent.futures
+ can_threaded_download = True
+except ImportError:
+ can_threaded_download = False
from .common import FileDownloader
from .http import HttpFD
+from ..aes import aes_cbc_decrypt_bytes
+from ..compat import (
+ compat_urllib_error,
+ compat_struct_pack,
+)
from ..utils import (
+ DownloadError,
error_to_compat_str,
encodeFilename,
sanitize_open,
@@ -31,6 +44,7 @@ class FragmentFD(FileDownloader):
Skip unavailable fragments (DASH and hlsnative only)
keep_fragments: Keep downloaded fragments on disk after downloading is
finished
+ _no_ytdl_file: Don't use .ytdl file
For each incomplete fragment download hypervideo keeps on disk a special
bookkeeping file with download state and metadata (in future such files will
@@ -55,29 +69,31 @@ class FragmentFD(FileDownloader):
def report_retry_fragment(self, err, frag_index, count, retries):
self.to_screen(
- '[download] Got server HTTP error: %s. Retrying fragment %d (attempt %d of %s)...'
+ '\r[download] Got server HTTP error: %s. Retrying fragment %d (attempt %d of %s) ...'
% (error_to_compat_str(err), frag_index, count, self.format_retries(retries)))
def report_skip_fragment(self, frag_index):
- self.to_screen('[download] Skipping fragment %d...' % frag_index)
+ self.to_screen('[download] Skipping fragment %d ...' % frag_index)
def _prepare_url(self, info_dict, url):
headers = info_dict.get('http_headers')
return sanitized_Request(url, None, headers) if headers else url
- def _prepare_and_start_frag_download(self, ctx):
+ def _prepare_and_start_frag_download(self, ctx, info_dict):
self._prepare_frag_download(ctx)
- self._start_frag_download(ctx)
+ self._start_frag_download(ctx, info_dict)
- @staticmethod
- def __do_ytdl_file(ctx):
- return not ctx['live'] and not ctx['tmpfilename'] == '-'
+ def __do_ytdl_file(self, ctx):
+ return not ctx['live'] and not ctx['tmpfilename'] == '-' and not self.params.get('_no_ytdl_file')
def _read_ytdl_file(self, ctx):
assert 'ytdl_corrupt' not in ctx
stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'r')
try:
- ctx['fragment_index'] = json.loads(stream.read())['downloader']['current_fragment']['index']
+ ytdl_data = json.loads(stream.read())
+ ctx['fragment_index'] = ytdl_data['downloader']['current_fragment']['index']
+ if 'extra_state' in ytdl_data['downloader']:
+ ctx['extra_state'] = ytdl_data['downloader']['extra_state']
except Exception:
ctx['ytdl_corrupt'] = True
finally:
@@ -85,32 +101,42 @@ class FragmentFD(FileDownloader):
def _write_ytdl_file(self, ctx):
frag_index_stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'w')
- downloader = {
- 'current_fragment': {
- 'index': ctx['fragment_index'],
- },
- }
- if ctx.get('fragment_count') is not None:
- downloader['fragment_count'] = ctx['fragment_count']
- frag_index_stream.write(json.dumps({'downloader': downloader}))
- frag_index_stream.close()
+ try:
+ downloader = {
+ 'current_fragment': {
+ 'index': ctx['fragment_index'],
+ },
+ }
+ if 'extra_state' in ctx:
+ downloader['extra_state'] = ctx['extra_state']
+ if ctx.get('fragment_count') is not None:
+ downloader['fragment_count'] = ctx['fragment_count']
+ frag_index_stream.write(json.dumps({'downloader': downloader}))
+ finally:
+ frag_index_stream.close()
- def _download_fragment(self, ctx, frag_url, info_dict, headers=None):
+ def _download_fragment(self, ctx, frag_url, info_dict, headers=None, request_data=None):
fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], ctx['fragment_index'])
fragment_info_dict = {
'url': frag_url,
'http_headers': headers or info_dict.get('http_headers'),
+ 'request_data': request_data,
+ 'ctx_id': ctx.get('ctx_id'),
}
success = ctx['dl'].download(fragment_filename, fragment_info_dict)
if not success:
return False, None
if fragment_info_dict.get('filetime'):
ctx['fragment_filetime'] = fragment_info_dict.get('filetime')
- down, frag_sanitized = sanitize_open(fragment_filename, 'rb')
+ ctx['fragment_filename_sanitized'] = fragment_filename
+ return True, self._read_fragment(ctx)
+
+ def _read_fragment(self, ctx):
+ down, frag_sanitized = sanitize_open(ctx['fragment_filename_sanitized'], 'rb')
ctx['fragment_filename_sanitized'] = frag_sanitized
frag_content = down.read()
down.close()
- return True, frag_content
+ return frag_content
def _append_fragment(self, ctx, frag_content):
try:
@@ -173,7 +199,7 @@ class FragmentFD(FileDownloader):
'.ytdl file is corrupt' if is_corrupt else
'Inconsistent state of incomplete fragment download')
self.report_warning(
- '%s. Restarting from the beginning...' % message)
+ '%s. Restarting from the beginning ...' % message)
ctx['fragment_index'] = resume_len = 0
if 'ytdl_corrupt' in ctx:
del ctx['ytdl_corrupt']
@@ -192,9 +218,10 @@ class FragmentFD(FileDownloader):
'complete_frags_downloaded_bytes': resume_len,
})
- def _start_frag_download(self, ctx):
+ def _start_frag_download(self, ctx, info_dict):
resume_len = ctx['complete_frags_downloaded_bytes']
total_frags = ctx['total_frags']
+ ctx_id = ctx.get('ctx_id')
# This dict stores the download progress, it's updated by the progress
# hook
state = {
@@ -218,9 +245,16 @@ class FragmentFD(FileDownloader):
if s['status'] not in ('downloading', 'finished'):
return
+ if ctx_id is not None and s.get('ctx_id') != ctx_id:
+ return
+
+ state['max_progress'] = ctx.get('max_progress')
+ state['progress_idx'] = ctx.get('progress_idx')
+
time_now = time.time()
state['elapsed'] = time_now - start
frag_total_bytes = s.get('total_bytes') or 0
+ s['fragment_info_dict'] = s.pop('info_dict', {})
if not ctx['live']:
estimated_size = (
(ctx['complete_frags_downloaded_bytes'] + frag_total_bytes)
@@ -243,13 +277,13 @@ class FragmentFD(FileDownloader):
state['speed'] = s.get('speed') or ctx.get('speed')
ctx['speed'] = state['speed']
ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
- self._hook_progress(state)
+ self._hook_progress(state, info_dict)
ctx['dl'].add_progress_hook(frag_progress_hook)
return start
- def _finish_frag_download(self, ctx):
+ def _finish_frag_download(self, ctx, info_dict):
ctx['dest_stream'].close()
if self.__do_ytdl_file(ctx):
ytdl_filename = encodeFilename(self.ytdl_filename(ctx['filename']))
@@ -276,4 +310,177 @@ class FragmentFD(FileDownloader):
'filename': ctx['filename'],
'status': 'finished',
'elapsed': elapsed,
+ 'ctx_id': ctx.get('ctx_id'),
+ 'max_progress': ctx.get('max_progress'),
+ 'progress_idx': ctx.get('progress_idx'),
+ }, info_dict)
+
+ def _prepare_external_frag_download(self, ctx):
+ if 'live' not in ctx:
+ ctx['live'] = False
+ if not ctx['live']:
+ total_frags_str = '%d' % ctx['total_frags']
+ ad_frags = ctx.get('ad_frags', 0)
+ if ad_frags:
+ total_frags_str += ' (not including %d ad)' % ad_frags
+ else:
+ total_frags_str = 'unknown (live)'
+ self.to_screen(
+ '[%s] Total fragments: %s' % (self.FD_NAME, total_frags_str))
+
+ tmpfilename = self.temp_name(ctx['filename'])
+
+ # Should be initialized before ytdl file check
+ ctx.update({
+ 'tmpfilename': tmpfilename,
+ 'fragment_index': 0,
})
+
+ def decrypter(self, info_dict):
+ _key_cache = {}
+
+ def _get_key(url):
+ if url not in _key_cache:
+ _key_cache[url] = self.ydl.urlopen(self._prepare_url(info_dict, url)).read()
+ return _key_cache[url]
+
+ def decrypt_fragment(fragment, frag_content):
+ decrypt_info = fragment.get('decrypt_info')
+ if not decrypt_info or decrypt_info['METHOD'] != 'AES-128':
+ return frag_content
+ iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', fragment['media_sequence'])
+ decrypt_info['KEY'] = decrypt_info.get('KEY') or _get_key(info_dict.get('_decryption_key_url') or decrypt_info['URI'])
+ # Don't decrypt the content in tests since the data is explicitly truncated and it's not to a valid block
+ # size (see https://github.com/ytdl-org/youtube-dl/pull/27660). Tests only care that the correct data downloaded,
+ # not what it decrypts to.
+ if self.params.get('test', False):
+ return frag_content
+ padding_len = 16 - (len(frag_content) % 16)
+ decrypted_data = aes_cbc_decrypt_bytes(frag_content + bytes([padding_len] * padding_len), decrypt_info['KEY'], iv)
+ return decrypted_data[:-decrypted_data[-1]]
+
+ return decrypt_fragment
+
+ def download_and_append_fragments_multiple(self, *args, pack_func=None, finish_func=None):
+ '''
+ @params (ctx1, fragments1, info_dict1), (ctx2, fragments2, info_dict2), ...
+ all args must be either tuple or list
+ '''
+ max_progress = len(args)
+ if max_progress == 1:
+ return self.download_and_append_fragments(*args[0], pack_func=pack_func, finish_func=finish_func)
+ max_workers = self.params.get('concurrent_fragment_downloads', max_progress)
+ self._prepare_multiline_status(max_progress)
+
+ def thread_func(idx, ctx, fragments, info_dict, tpe):
+ ctx['max_progress'] = max_progress
+ ctx['progress_idx'] = idx
+ return self.download_and_append_fragments(ctx, fragments, info_dict, pack_func=pack_func, finish_func=finish_func, tpe=tpe)
+
+ class FTPE(concurrent.futures.ThreadPoolExecutor):
+ # has to stop this or it's going to wait on the worker thread itself
+ def __exit__(self, exc_type, exc_val, exc_tb):
+ pass
+
+ spins = []
+ for idx, (ctx, fragments, info_dict) in enumerate(args):
+ tpe = FTPE(ceil(max_workers / max_progress))
+ job = tpe.submit(thread_func, idx, ctx, fragments, info_dict, tpe)
+ spins.append((tpe, job))
+
+ result = True
+ for tpe, job in spins:
+ try:
+ result = result and job.result()
+ finally:
+ tpe.shutdown(wait=True)
+ return result
+
+ def download_and_append_fragments(self, ctx, fragments, info_dict, *, pack_func=None, finish_func=None, tpe=None):
+ fragment_retries = self.params.get('fragment_retries', 0)
+ is_fatal = (lambda idx: idx == 0) if self.params.get('skip_unavailable_fragments', True) else (lambda _: True)
+ if not pack_func:
+ pack_func = lambda frag_content, _: frag_content
+
+ def download_fragment(fragment, ctx):
+ frag_index = ctx['fragment_index'] = fragment['frag_index']
+ headers = info_dict.get('http_headers', {}).copy()
+ byte_range = fragment.get('byte_range')
+ if byte_range:
+ headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1)
+
+ # Never skip the first fragment
+ fatal = is_fatal(fragment.get('index') or (frag_index - 1))
+ count, frag_content = 0, None
+ while count <= fragment_retries:
+ try:
+ success, frag_content = self._download_fragment(ctx, fragment['url'], info_dict, headers)
+ if not success:
+ return False, frag_index
+ break
+ except compat_urllib_error.HTTPError as err:
+ # Unavailable (possibly temporary) fragments may be served.
+ # First we try to retry then either skip or abort.
+ # See https://github.com/ytdl-org/youtube-dl/issues/10165,
+ # https://github.com/ytdl-org/youtube-dl/issues/10448).
+ count += 1
+ if count <= fragment_retries:
+ self.report_retry_fragment(err, frag_index, count, fragment_retries)
+ except DownloadError:
+ # Don't retry fragment if error occurred during HTTP downloading
+ # itself since it has own retry settings
+ if not fatal:
+ break
+ raise
+
+ if count > fragment_retries:
+ if not fatal:
+ return False, frag_index
+ ctx['dest_stream'].close()
+ self.report_error('Giving up after %s fragment retries' % fragment_retries)
+ return False, frag_index
+ return frag_content, frag_index
+
+ def append_fragment(frag_content, frag_index, ctx):
+ if not frag_content:
+ if not is_fatal(frag_index - 1):
+ self.report_skip_fragment(frag_index)
+ return True
+ else:
+ ctx['dest_stream'].close()
+ self.report_error(
+ 'fragment %s not found, unable to continue' % frag_index)
+ return False
+ self._append_fragment(ctx, pack_func(frag_content, frag_index))
+ return True
+
+ decrypt_fragment = self.decrypter(info_dict)
+
+ max_workers = self.params.get('concurrent_fragment_downloads', 1)
+ if can_threaded_download and max_workers > 1:
+
+ def _download_fragment(fragment):
+ ctx_copy = ctx.copy()
+ frag_content, frag_index = download_fragment(fragment, ctx_copy)
+ return fragment, frag_content, frag_index, ctx_copy.get('fragment_filename_sanitized')
+
+ self.report_warning('The download speed shown is only of one thread. This is a known issue and patches are welcome')
+ with tpe or concurrent.futures.ThreadPoolExecutor(max_workers) as pool:
+ for fragment, frag_content, frag_index, frag_filename in pool.map(_download_fragment, fragments):
+ ctx['fragment_filename_sanitized'] = frag_filename
+ ctx['fragment_index'] = frag_index
+ result = append_fragment(decrypt_fragment(fragment, frag_content), frag_index, ctx)
+ if not result:
+ return False
+ else:
+ for fragment in fragments:
+ frag_content, frag_index = download_fragment(fragment, ctx)
+ result = append_fragment(decrypt_fragment(fragment, frag_content), frag_index, ctx)
+ if not result:
+ return False
+
+ if finish_func is not None:
+ ctx['dest_stream'].write(finish_func())
+ ctx['dest_stream'].flush()
+ self._finish_frag_download(ctx, info_dict)
+ return True
diff --git a/hypervideo_dl/downloader/hls.py b/hypervideo_dl/downloader/hls.py
index 7aaebc9..61312c5 100644
--- a/hypervideo_dl/downloader/hls.py
+++ b/hypervideo_dl/downloader/hls.py
@@ -1,36 +1,37 @@
from __future__ import unicode_literals
import re
+import io
import binascii
-try:
- from Crypto.Cipher import AES
- can_decrypt_frag = True
-except ImportError:
- can_decrypt_frag = False
+from ..downloader import get_suitable_downloader
from .fragment import FragmentFD
from .external import FFmpegFD
from ..compat import (
- compat_urllib_error,
+ compat_pycrypto_AES,
compat_urlparse,
- compat_struct_pack,
)
from ..utils import (
parse_m3u8_attributes,
update_url_query,
+ bug_reports_message,
)
+from .. import webvtt
class HlsFD(FragmentFD):
- """ A limited implementation that does not require ffmpeg """
+ """
+ Download segments in a m3u8 manifest. External downloaders can take over
+ the fragment downloads by supporting the 'm3u8_frag_urls' protocol and
+ re-defining 'supports_manifest' function
+ """
FD_NAME = 'hlsnative'
@staticmethod
- def can_download(manifest, info_dict):
- UNSUPPORTED_FEATURES = (
- r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1]
+ def can_download(manifest, info_dict, allow_unplayable_formats=False):
+ UNSUPPORTED_FEATURES = [
# r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2]
# Live streams heuristic does not always work (e.g. geo restricted to Germany
@@ -42,20 +43,23 @@ class HlsFD(FragmentFD):
# no segments will definitely be appended to the end of the playlist.
# r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of
# # event media playlists [4]
- r'#EXT-X-MAP:', # media initialization [5]
-
+ # r'#EXT-X-MAP:', # media initialization [5]
# 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4
# 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2
# 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2
# 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5
# 5. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.5
- )
- check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES]
- is_aes128_enc = '#EXT-X-KEY:METHOD=AES-128' in manifest
- check_results.append(can_decrypt_frag or not is_aes128_enc)
- check_results.append(not (is_aes128_enc and r'#EXT-X-BYTERANGE' in manifest))
- check_results.append(not info_dict.get('is_live'))
- return all(check_results)
+ ]
+ if not allow_unplayable_formats:
+ UNSUPPORTED_FEATURES += [
+ r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1]
+ ]
+
+ def check_results():
+ yield not info_dict.get('is_live')
+ for feature in UNSUPPORTED_FEATURES:
+ yield not re.search(feature, manifest)
+ return all(check_results())
def real_download(self, filename, info_dict):
man_url = info_dict['url']
@@ -65,17 +69,32 @@ class HlsFD(FragmentFD):
man_url = urlh.geturl()
s = urlh.read().decode('utf-8', 'ignore')
- if not self.can_download(s, info_dict):
- if info_dict.get('extra_param_to_segment_url') or info_dict.get('_decryption_key_url'):
- self.report_error('pycrypto not found. Please install it.')
- return False
- self.report_warning(
- 'hlsnative has detected features it does not support, '
- 'extraction will be delegated to ffmpeg')
+ can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None
+ if can_download and not compat_pycrypto_AES and '#EXT-X-KEY:METHOD=AES-128' in s:
+ if FFmpegFD.available():
+ can_download, message = False, 'The stream has AES-128 encryption and pycryptodomex is not available'
+ else:
+ message = ('The stream has AES-128 encryption and neither ffmpeg nor pycryptodomex are available; '
+ 'Decryption will be performed natively, but will be extremely slow')
+ if not can_download:
+ message = message or 'Unsupported features have been detected'
fd = FFmpegFD(self.ydl, self.params)
- for ph in self._progress_hooks:
- fd.add_progress_hook(ph)
+ self.report_warning(f'{message}; extraction will be delegated to {fd.get_basename()}')
return fd.real_download(filename, info_dict)
+ elif message:
+ self.report_warning(message)
+
+ is_webvtt = info_dict['ext'] == 'vtt'
+ if is_webvtt:
+ real_downloader = None # Packing the fragments is not currently supported for external downloader
+ else:
+ real_downloader = get_suitable_downloader(
+ info_dict, self.params, None, protocol='m3u8_frag_urls', to_stdout=(filename == '-'))
+ if real_downloader and not real_downloader.supports_manifest(s):
+ real_downloader = None
+ if real_downloader:
+ self.to_screen(
+ '[%s] Fragment downloads will be delegated to %s' % (self.FD_NAME, real_downloader.get_basename()))
def is_ad_fragment_start(s):
return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s
@@ -85,6 +104,8 @@ class HlsFD(FragmentFD):
return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s
or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment'))
+ fragments = []
+
media_frags = 0
ad_frags = 0
ad_frag_next = False
@@ -109,12 +130,14 @@ class HlsFD(FragmentFD):
'ad_frags': ad_frags,
}
- self._prepare_and_start_frag_download(ctx)
+ if real_downloader:
+ self._prepare_external_frag_download(ctx)
+ else:
+ self._prepare_and_start_frag_download(ctx, info_dict)
- fragment_retries = self.params.get('fragment_retries', 0)
- skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
- test = self.params.get('test', False)
+ extra_state = ctx.setdefault('extra_state', {})
+ format_index = info_dict.get('format_index')
extra_query = None
extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url')
if extra_param_to_segment_url:
@@ -123,12 +146,15 @@ class HlsFD(FragmentFD):
media_sequence = 0
decrypt_info = {'METHOD': 'NONE'}
byte_range = {}
+ discontinuity_count = 0
frag_index = 0
ad_frag_next = False
for line in s.splitlines():
line = line.strip()
if line:
if not line.startswith('#'):
+ if format_index and discontinuity_count != format_index:
+ continue
if ad_frag_next:
continue
frag_index += 1
@@ -140,50 +166,49 @@ class HlsFD(FragmentFD):
else compat_urlparse.urljoin(man_url, line))
if extra_query:
frag_url = update_url_query(frag_url, extra_query)
- count = 0
- headers = info_dict.get('http_headers', {})
- if byte_range:
- headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1)
- while count <= fragment_retries:
- try:
- success, frag_content = self._download_fragment(
- ctx, frag_url, info_dict, headers)
- if not success:
- return False
- break
- except compat_urllib_error.HTTPError as err:
- # Unavailable (possibly temporary) fragments may be served.
- # First we try to retry then either skip or abort.
- # See https://github.com/ytdl-org/youtube-dl/issues/10165,
- # https://github.com/ytdl-org/youtube-dl/issues/10448).
- count += 1
- if count <= fragment_retries:
- self.report_retry_fragment(err, frag_index, count, fragment_retries)
- if count > fragment_retries:
- if skip_unavailable_fragments:
- i += 1
- media_sequence += 1
- self.report_skip_fragment(frag_index)
- continue
+
+ fragments.append({
+ 'frag_index': frag_index,
+ 'url': frag_url,
+ 'decrypt_info': decrypt_info,
+ 'byte_range': byte_range,
+ 'media_sequence': media_sequence,
+ })
+ media_sequence += 1
+
+ elif line.startswith('#EXT-X-MAP'):
+ if format_index and discontinuity_count != format_index:
+ continue
+ if frag_index > 0:
self.report_error(
- 'giving up after %s fragment retries' % fragment_retries)
+ 'Initialization fragment found after media fragments, unable to download')
return False
- if decrypt_info['METHOD'] == 'AES-128':
- iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence)
- decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen(
- self._prepare_url(info_dict, info_dict.get('_decryption_key_url') or decrypt_info['URI'])).read()
- # Don't decrypt the content in tests since the data is explicitly truncated and it's not to a valid block
- # size (see https://github.com/ytdl-org/youtube-dl/pull/27660). Tests only care that the correct data downloaded,
- # not what it decrypts to.
- if not test:
- frag_content = AES.new(
- decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content)
- self._append_fragment(ctx, frag_content)
- # We only download the first fragment during the test
- if test:
- break
- i += 1
+ frag_index += 1
+ map_info = parse_m3u8_attributes(line[11:])
+ frag_url = (
+ map_info.get('URI')
+ if re.match(r'^https?://', map_info.get('URI'))
+ else compat_urlparse.urljoin(man_url, map_info.get('URI')))
+ if extra_query:
+ frag_url = update_url_query(frag_url, extra_query)
+
+ fragments.append({
+ 'frag_index': frag_index,
+ 'url': frag_url,
+ 'decrypt_info': decrypt_info,
+ 'byte_range': byte_range,
+ 'media_sequence': media_sequence
+ })
media_sequence += 1
+
+ if map_info.get('BYTERANGE'):
+ splitted_byte_range = map_info.get('BYTERANGE').split('@')
+ sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end']
+ byte_range = {
+ 'start': sub_range_start,
+ 'end': sub_range_start + int(splitted_byte_range[0]),
+ }
+
elif line.startswith('#EXT-X-KEY'):
decrypt_url = decrypt_info.get('URI')
decrypt_info = parse_m3u8_attributes(line[11:])
@@ -197,6 +222,7 @@ class HlsFD(FragmentFD):
decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query)
if decrypt_url != decrypt_info['URI']:
decrypt_info['KEY'] = None
+
elif line.startswith('#EXT-X-MEDIA-SEQUENCE'):
media_sequence = int(line[22:])
elif line.startswith('#EXT-X-BYTERANGE'):
@@ -210,7 +236,114 @@ class HlsFD(FragmentFD):
ad_frag_next = True
elif is_ad_fragment_end(line):
ad_frag_next = False
+ elif line.startswith('#EXT-X-DISCONTINUITY'):
+ discontinuity_count += 1
+ i += 1
+
+ # We only download the first fragment during the test
+ if self.params.get('test', False):
+ fragments = [fragments[0] if fragments else None]
+
+ if real_downloader:
+ info_dict['fragments'] = fragments
+ fd = real_downloader(self.ydl, self.params)
+ # TODO: Make progress updates work without hooking twice
+ # for ph in self._progress_hooks:
+ # fd.add_progress_hook(ph)
+ return fd.real_download(filename, info_dict)
+
+ if is_webvtt:
+ def pack_fragment(frag_content, frag_index):
+ output = io.StringIO()
+ adjust = 0
+ overflow = False
+ mpegts_last = None
+ for block in webvtt.parse_fragment(frag_content):
+ if isinstance(block, webvtt.CueBlock):
+ extra_state['webvtt_mpegts_last'] = mpegts_last
+ if overflow:
+ extra_state['webvtt_mpegts_adjust'] += 1
+ overflow = False
+ block.start += adjust
+ block.end += adjust
+
+ dedup_window = extra_state.setdefault('webvtt_dedup_window', [])
+
+ ready = []
+
+ i = 0
+ is_new = True
+ while i < len(dedup_window):
+ wcue = dedup_window[i]
+ wblock = webvtt.CueBlock.from_json(wcue)
+ i += 1
+ if wblock.hinges(block):
+ wcue['end'] = block.end
+ is_new = False
+ continue
+ if wblock == block:
+ is_new = False
+ continue
+ if wblock.end > block.start:
+ continue
+ ready.append(wblock)
+ i -= 1
+ del dedup_window[i]
+
+ if is_new:
+ dedup_window.append(block.as_json)
+ for block in ready:
+ block.write_into(output)
+
+ # we only emit cues once they fall out of the duplicate window
+ continue
+ elif isinstance(block, webvtt.Magic):
+ # take care of MPEG PES timestamp overflow
+ if block.mpegts is None:
+ block.mpegts = 0
+ extra_state.setdefault('webvtt_mpegts_adjust', 0)
+ block.mpegts += extra_state['webvtt_mpegts_adjust'] << 33
+ if block.mpegts < extra_state.get('webvtt_mpegts_last', 0):
+ overflow = True
+ block.mpegts += 1 << 33
+ mpegts_last = block.mpegts
+
+ if frag_index == 1:
+ extra_state['webvtt_mpegts'] = block.mpegts or 0
+ extra_state['webvtt_local'] = block.local or 0
+ # XXX: block.local = block.mpegts = None ?
+ else:
+ if block.mpegts is not None and block.local is not None:
+ adjust = (
+ (block.mpegts - extra_state.get('webvtt_mpegts', 0))
+ - (block.local - extra_state.get('webvtt_local', 0))
+ )
+ continue
+ elif isinstance(block, webvtt.HeaderBlock):
+ if frag_index != 1:
+ # XXX: this should probably be silent as well
+ # or verify that all segments contain the same data
+ self.report_warning(bug_reports_message(
+ 'Discarding a %s block found in the middle of the stream; '
+ 'if the subtitles display incorrectly,'
+ % (type(block).__name__)))
+ continue
+ block.write_into(output)
+
+ return output.getvalue().encode('utf-8')
+
+ def fin_fragments():
+ dedup_window = extra_state.get('webvtt_dedup_window')
+ if not dedup_window:
+ return b''
+
+ output = io.StringIO()
+ for cue in dedup_window:
+ webvtt.CueBlock.from_json(cue).write_into(output)
- self._finish_frag_download(ctx)
+ return output.getvalue().encode('utf-8')
- return True
+ self.download_and_append_fragments(
+ ctx, fragments, info_dict, pack_func=pack_fragment, finish_func=fin_fragments)
+ else:
+ return self.download_and_append_fragments(ctx, fragments, info_dict)
diff --git a/hypervideo_dl/downloader/http.py b/hypervideo_dl/downloader/http.py
index d8ac41d..2e95bb9 100644
--- a/hypervideo_dl/downloader/http.py
+++ b/hypervideo_dl/downloader/http.py
@@ -18,6 +18,7 @@ from ..utils import (
int_or_none,
sanitize_open,
sanitized_Request,
+ ThrottledDownload,
write_xattr,
XAttrMetadataError,
XAttrUnavailableError,
@@ -27,6 +28,7 @@ from ..utils import (
class HttpFD(FileDownloader):
def real_download(self, filename, info_dict):
url = info_dict['url']
+ request_data = info_dict.get('request_data', None)
class DownloadContext(dict):
__getattr__ = dict.get
@@ -46,8 +48,9 @@ class HttpFD(FileDownloader):
is_test = self.params.get('test', False)
chunk_size = self._TEST_FILE_SIZE if is_test else (
- info_dict.get('downloader_options', {}).get('http_chunk_size')
- or self.params.get('http_chunk_size') or 0)
+ self.params.get('http_chunk_size')
+ or info_dict.get('downloader_options', {}).get('http_chunk_size')
+ or 0)
ctx.open_mode = 'wb'
ctx.resume_len = 0
@@ -55,6 +58,7 @@ class HttpFD(FileDownloader):
ctx.block_size = self.params.get('buffersize', 1024)
ctx.start_time = time.time()
ctx.chunk_size = None
+ throttle_start = None
if self.params.get('continuedl', True):
# Establish possible resume length
@@ -101,7 +105,7 @@ class HttpFD(FileDownloader):
range_end = ctx.data_len - 1
has_range = range_start is not None
ctx.has_range = has_range
- request = sanitized_Request(url, None, headers)
+ request = sanitized_Request(url, request_data, headers)
if has_range:
set_range(request, range_start, range_end)
# Establish connection
@@ -152,7 +156,7 @@ class HttpFD(FileDownloader):
try:
# Open the connection again without the range header
ctx.data = self.ydl.urlopen(
- sanitized_Request(url, None, headers))
+ sanitized_Request(url, request_data, headers))
content_length = ctx.data.info()['Content-Length']
except (compat_urllib_error.HTTPError, ) as err:
if err.code < 500 or err.code >= 600:
@@ -175,7 +179,7 @@ class HttpFD(FileDownloader):
'status': 'finished',
'downloaded_bytes': ctx.resume_len,
'total_bytes': ctx.resume_len,
- })
+ }, info_dict)
raise SucceedDownload()
else:
# The length does not match, we start the download over
@@ -194,6 +198,7 @@ class HttpFD(FileDownloader):
raise RetryDownload(err)
def download():
+ nonlocal throttle_start
data_len = ctx.data.info().get('Content-length', None)
# Range HTTP header may be ignored/unsupported by a webserver
@@ -235,7 +240,7 @@ class HttpFD(FileDownloader):
while True:
try:
# Download and write
- data_block = ctx.data.read(block_size if data_len is None else min(block_size, data_len - byte_counter))
+ data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
# socket.timeout is a subclass of socket.error but may not have
# errno set
except socket.timeout as e:
@@ -307,11 +312,24 @@ class HttpFD(FileDownloader):
'eta': eta,
'speed': speed,
'elapsed': now - ctx.start_time,
- })
+ 'ctx_id': info_dict.get('ctx_id'),
+ }, info_dict)
if data_len is not None and byte_counter == data_len:
break
+ if speed and speed < (self.params.get('throttledratelimit') or 0):
+ # The speed must stay below the limit for 3 seconds
+ # This prevents raising error when the speed temporarily goes down
+ if throttle_start is None:
+ throttle_start = now
+ elif now - throttle_start > 3:
+ if ctx.stream is not None and ctx.tmpfilename != '-':
+ ctx.stream.close()
+ raise ThrottledDownload()
+ elif speed:
+ throttle_start = None
+
if not is_test and ctx.chunk_size and ctx.data_len is not None and byte_counter < ctx.data_len:
ctx.resume_len = byte_counter
# ctx.block_size = block_size
@@ -342,7 +360,8 @@ class HttpFD(FileDownloader):
'filename': ctx.filename,
'status': 'finished',
'elapsed': time.time() - ctx.start_time,
- })
+ 'ctx_id': info_dict.get('ctx_id'),
+ }, info_dict)
return True
@@ -354,6 +373,8 @@ class HttpFD(FileDownloader):
count += 1
if count <= retries:
self.report_retry(e.source_error, count, retries)
+ else:
+ self.to_screen(f'[download] Got server HTTP error: {e.source_error}')
continue
except NextFragment:
continue
diff --git a/hypervideo_dl/downloader/ism.py b/hypervideo_dl/downloader/ism.py
index 1ca666b..09516ab 100644
--- a/hypervideo_dl/downloader/ism.py
+++ b/hypervideo_dl/downloader/ism.py
@@ -48,7 +48,7 @@ def write_piff_header(stream, params):
language = params.get('language', 'und')
height = params.get('height', 0)
width = params.get('width', 0)
- is_audio = width == 0 and height == 0
+ stream_type = params['stream_type']
creation_time = modification_time = int(time.time())
ftyp_payload = b'isml' # major brand
@@ -77,7 +77,7 @@ def write_piff_header(stream, params):
tkhd_payload += u32.pack(0) * 2 # reserved
tkhd_payload += s16.pack(0) # layer
tkhd_payload += s16.pack(0) # alternate group
- tkhd_payload += s88.pack(1 if is_audio else 0) # volume
+ tkhd_payload += s88.pack(1 if stream_type == 'audio' else 0) # volume
tkhd_payload += u16.pack(0) # reserved
tkhd_payload += unity_matrix
tkhd_payload += u1616.pack(width)
@@ -93,19 +93,34 @@ def write_piff_header(stream, params):
mdia_payload = full_box(b'mdhd', 1, 0, mdhd_payload) # Media Header Box
hdlr_payload = u32.pack(0) # pre defined
- hdlr_payload += b'soun' if is_audio else b'vide' # handler type
- hdlr_payload += u32.pack(0) * 3 # reserved
- hdlr_payload += (b'Sound' if is_audio else b'Video') + b'Handler\0' # name
+ if stream_type == 'audio': # handler type
+ hdlr_payload += b'soun'
+ hdlr_payload += u32.pack(0) * 3 # reserved
+ hdlr_payload += b'SoundHandler\0' # name
+ elif stream_type == 'video':
+ hdlr_payload += b'vide'
+ hdlr_payload += u32.pack(0) * 3 # reserved
+ hdlr_payload += b'VideoHandler\0' # name
+ elif stream_type == 'text':
+ hdlr_payload += b'subt'
+ hdlr_payload += u32.pack(0) * 3 # reserved
+ hdlr_payload += b'SubtitleHandler\0' # name
+ else:
+ assert False
mdia_payload += full_box(b'hdlr', 0, 0, hdlr_payload) # Handler Reference Box
- if is_audio:
+ if stream_type == 'audio':
smhd_payload = s88.pack(0) # balance
smhd_payload += u16.pack(0) # reserved
media_header_box = full_box(b'smhd', 0, 0, smhd_payload) # Sound Media Header
- else:
+ elif stream_type == 'video':
vmhd_payload = u16.pack(0) # graphics mode
vmhd_payload += u16.pack(0) * 3 # opcolor
media_header_box = full_box(b'vmhd', 0, 1, vmhd_payload) # Video Media Header
+ elif stream_type == 'text':
+ media_header_box = full_box(b'sthd', 0, 0, b'') # Subtitle Media Header
+ else:
+ assert False
minf_payload = media_header_box
dref_payload = u32.pack(1) # entry count
@@ -117,7 +132,7 @@ def write_piff_header(stream, params):
sample_entry_payload = u8.pack(0) * 6 # reserved
sample_entry_payload += u16.pack(1) # data reference index
- if is_audio:
+ if stream_type == 'audio':
sample_entry_payload += u32.pack(0) * 2 # reserved
sample_entry_payload += u16.pack(params.get('channels', 2))
sample_entry_payload += u16.pack(params.get('bits_per_sample', 16))
@@ -127,7 +142,7 @@ def write_piff_header(stream, params):
if fourcc == 'AACL':
sample_entry_box = box(b'mp4a', sample_entry_payload)
- else:
+ elif stream_type == 'video':
sample_entry_payload += u16.pack(0) # pre defined
sample_entry_payload += u16.pack(0) # reserved
sample_entry_payload += u32.pack(0) * 3 # pre defined
@@ -155,6 +170,18 @@ def write_piff_header(stream, params):
avcc_payload += pps
sample_entry_payload += box(b'avcC', avcc_payload) # AVC Decoder Configuration Record
sample_entry_box = box(b'avc1', sample_entry_payload) # AVC Simple Entry
+ else:
+ assert False
+ elif stream_type == 'text':
+ if fourcc == 'TTML':
+ sample_entry_payload += b'http://www.w3.org/ns/ttml\0' # namespace
+ sample_entry_payload += b'\0' # schema location
+ sample_entry_payload += b'\0' # auxilary mime types(??)
+ sample_entry_box = box(b'stpp', sample_entry_payload)
+ else:
+ assert False
+ else:
+ assert False
stsd_payload += sample_entry_box
stbl_payload = full_box(b'stsd', 0, 0, stsd_payload) # Sample Description Box
@@ -219,12 +246,15 @@ class IsmFD(FragmentFD):
'total_frags': len(segments),
}
- self._prepare_and_start_frag_download(ctx)
+ self._prepare_and_start_frag_download(ctx, info_dict)
+
+ extra_state = ctx.setdefault('extra_state', {
+ 'ism_track_written': False,
+ })
fragment_retries = self.params.get('fragment_retries', 0)
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
- track_written = False
frag_index = 0
for i, segment in enumerate(segments):
frag_index += 1
@@ -236,11 +266,11 @@ class IsmFD(FragmentFD):
success, frag_content = self._download_fragment(ctx, segment['url'], info_dict)
if not success:
return False
- if not track_written:
+ if not extra_state['ism_track_written']:
tfhd_data = extract_box_data(frag_content, [b'moof', b'traf', b'tfhd'])
info_dict['_download_params']['track_id'] = u32.unpack(tfhd_data[4:8])[0]
write_piff_header(ctx['dest_stream'], info_dict['_download_params'])
- track_written = True
+ extra_state['ism_track_written'] = True
self._append_fragment(ctx, frag_content)
break
except compat_urllib_error.HTTPError as err:
@@ -254,6 +284,6 @@ class IsmFD(FragmentFD):
self.report_error('giving up after %s fragment retries' % fragment_retries)
return False
- self._finish_frag_download(ctx)
+ self._finish_frag_download(ctx, info_dict)
return True
diff --git a/hypervideo_dl/downloader/mhtml.py b/hypervideo_dl/downloader/mhtml.py
new file mode 100644
index 0000000..f0f4dc6
--- /dev/null
+++ b/hypervideo_dl/downloader/mhtml.py
@@ -0,0 +1,202 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import io
+import quopri
+import re
+import uuid
+
+from .fragment import FragmentFD
+from ..utils import (
+ escapeHTML,
+ formatSeconds,
+ srt_subtitles_timecode,
+ urljoin,
+)
+from ..version import __version__ as YT_DLP_VERSION
+
+
+class MhtmlFD(FragmentFD):
+ FD_NAME = 'mhtml'
+
+ _STYLESHEET = """\
+html, body {
+ margin: 0;
+ padding: 0;
+ height: 100vh;
+}
+
+html {
+ overflow-y: scroll;
+ scroll-snap-type: y mandatory;
+}
+
+body {
+ scroll-snap-type: y mandatory;
+ display: flex;
+ flex-flow: column;
+}
+
+body > figure {
+ max-width: 100vw;
+ max-height: 100vh;
+ scroll-snap-align: center;
+}
+
+body > figure > figcaption {
+ text-align: center;
+ height: 2.5em;
+}
+
+body > figure > img {
+ display: block;
+ margin: auto;
+ max-width: 100%;
+ max-height: calc(100vh - 5em);
+}
+"""
+ _STYLESHEET = re.sub(r'\s+', ' ', _STYLESHEET)
+ _STYLESHEET = re.sub(r'\B \B|(?<=[\w\-]) (?=[^\w\-])|(?<=[^\w\-]) (?=[\w\-])', '', _STYLESHEET)
+
+ @staticmethod
+ def _escape_mime(s):
+ return '=?utf-8?Q?' + (b''.join(
+ bytes((b,)) if b >= 0x20 else b'=%02X' % b
+ for b in quopri.encodestring(s.encode('utf-8'), header=True)
+ )).decode('us-ascii') + '?='
+
+ def _gen_cid(self, i, fragment, frag_boundary):
+ return '%u.%s@hypervideo.github.io.invalid' % (i, frag_boundary)
+
+ def _gen_stub(self, *, fragments, frag_boundary, title):
+ output = io.StringIO()
+
+ output.write((
+ '<!DOCTYPE html>'
+ '<html>'
+ '<head>'
+ '' '<meta name="generator" content="hypervideo {version}">'
+ '' '<title>{title}</title>'
+ '' '<style>{styles}</style>'
+ '<body>'
+ ).format(
+ version=escapeHTML(YT_DLP_VERSION),
+ styles=self._STYLESHEET,
+ title=escapeHTML(title)
+ ))
+
+ t0 = 0
+ for i, frag in enumerate(fragments):
+ output.write('<figure>')
+ try:
+ t1 = t0 + frag['duration']
+ output.write((
+ '<figcaption>Slide #{num}: {t0} – {t1} (duration: {duration})</figcaption>'
+ ).format(
+ num=i + 1,
+ t0=srt_subtitles_timecode(t0),
+ t1=srt_subtitles_timecode(t1),
+ duration=formatSeconds(frag['duration'], msec=True)
+ ))
+ except (KeyError, ValueError, TypeError):
+ t1 = None
+ output.write((
+ '<figcaption>Slide #{num}</figcaption>'
+ ).format(num=i + 1))
+ output.write('<img src="cid:{cid}">'.format(
+ cid=self._gen_cid(i, frag, frag_boundary)))
+ output.write('</figure>')
+ t0 = t1
+
+ return output.getvalue()
+
+ def real_download(self, filename, info_dict):
+ fragment_base_url = info_dict.get('fragment_base_url')
+ fragments = info_dict['fragments'][:1] if self.params.get(
+ 'test', False) else info_dict['fragments']
+ title = info_dict['title']
+ origin = info_dict['webpage_url']
+
+ ctx = {
+ 'filename': filename,
+ 'total_frags': len(fragments),
+ }
+
+ self._prepare_and_start_frag_download(ctx, info_dict)
+
+ extra_state = ctx.setdefault('extra_state', {
+ 'header_written': False,
+ 'mime_boundary': str(uuid.uuid4()).replace('-', ''),
+ })
+
+ frag_boundary = extra_state['mime_boundary']
+
+ if not extra_state['header_written']:
+ stub = self._gen_stub(
+ fragments=fragments,
+ frag_boundary=frag_boundary,
+ title=title
+ )
+
+ ctx['dest_stream'].write((
+ 'MIME-Version: 1.0\r\n'
+ 'From: <nowhere@hypervideo.github.io.invalid>\r\n'
+ 'To: <nowhere@hypervideo.github.io.invalid>\r\n'
+ 'Subject: {title}\r\n'
+ 'Content-type: multipart/related; '
+ '' 'boundary="{boundary}"; '
+ '' 'type="text/html"\r\n'
+ 'X.hypervideo.Origin: {origin}\r\n'
+ '\r\n'
+ '--{boundary}\r\n'
+ 'Content-Type: text/html; charset=utf-8\r\n'
+ 'Content-Length: {length}\r\n'
+ '\r\n'
+ '{stub}\r\n'
+ ).format(
+ origin=origin,
+ boundary=frag_boundary,
+ length=len(stub),
+ title=self._escape_mime(title),
+ stub=stub
+ ).encode('utf-8'))
+ extra_state['header_written'] = True
+
+ for i, fragment in enumerate(fragments):
+ if (i + 1) <= ctx['fragment_index']:
+ continue
+
+ fragment_url = urljoin(fragment_base_url, fragment['path'])
+ success, frag_content = self._download_fragment(ctx, fragment_url, info_dict)
+ if not success:
+ continue
+
+ mime_type = b'image/jpeg'
+ if frag_content.startswith(b'\x89PNG\r\n\x1a\n'):
+ mime_type = b'image/png'
+ if frag_content.startswith((b'GIF87a', b'GIF89a')):
+ mime_type = b'image/gif'
+ if frag_content.startswith(b'RIFF') and frag_content[8:12] == 'WEBP':
+ mime_type = b'image/webp'
+
+ frag_header = io.BytesIO()
+ frag_header.write(
+ b'--%b\r\n' % frag_boundary.encode('us-ascii'))
+ frag_header.write(
+ b'Content-ID: <%b>\r\n' % self._gen_cid(i, fragment, frag_boundary).encode('us-ascii'))
+ frag_header.write(
+ b'Content-type: %b\r\n' % mime_type)
+ frag_header.write(
+ b'Content-length: %u\r\n' % len(frag_content))
+ frag_header.write(
+ b'Content-location: %b\r\n' % fragment_url.encode('us-ascii'))
+ frag_header.write(
+ b'X.hypervideo.Duration: %f\r\n' % fragment['duration'])
+ frag_header.write(b'\r\n')
+ self._append_fragment(
+ ctx, frag_header.getvalue() + frag_content + b'\r\n')
+
+ ctx['dest_stream'].write(
+ b'--%b--\r\n\r\n' % frag_boundary.encode('us-ascii'))
+ self._finish_frag_download(ctx, info_dict)
+ return True
diff --git a/hypervideo_dl/downloader/niconico.py b/hypervideo_dl/downloader/niconico.py
new file mode 100644
index 0000000..521dfec
--- /dev/null
+++ b/hypervideo_dl/downloader/niconico.py
@@ -0,0 +1,57 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import threading
+
+from .common import FileDownloader
+from ..downloader import get_suitable_downloader
+from ..extractor.niconico import NiconicoIE
+from ..utils import sanitized_Request
+
+
+class NiconicoDmcFD(FileDownloader):
+ """ Downloading niconico douga from DMC with heartbeat """
+
+ FD_NAME = 'niconico_dmc'
+
+ def real_download(self, filename, info_dict):
+ self.to_screen('[%s] Downloading from DMC' % self.FD_NAME)
+
+ ie = NiconicoIE(self.ydl)
+ info_dict, heartbeat_info_dict = ie._get_heartbeat_info(info_dict)
+
+ fd = get_suitable_downloader(info_dict, params=self.params)(self.ydl, self.params)
+
+ success = download_complete = False
+ timer = [None]
+ heartbeat_lock = threading.Lock()
+ heartbeat_url = heartbeat_info_dict['url']
+ heartbeat_data = heartbeat_info_dict['data'].encode()
+ heartbeat_interval = heartbeat_info_dict.get('interval', 30)
+
+ request = sanitized_Request(heartbeat_url, heartbeat_data)
+
+ def heartbeat():
+ try:
+ self.ydl.urlopen(request).read()
+ except Exception:
+ self.to_screen('[%s] Heartbeat failed' % self.FD_NAME)
+
+ with heartbeat_lock:
+ if not download_complete:
+ timer[0] = threading.Timer(heartbeat_interval, heartbeat)
+ timer[0].start()
+
+ heartbeat_info_dict['ping']()
+ self.to_screen('[%s] Heartbeat with %d second interval ...' % (self.FD_NAME, heartbeat_interval))
+ try:
+ heartbeat()
+ if type(fd).__name__ == 'HlsFD':
+ info_dict.update(ie._extract_m3u8_formats(info_dict['url'], info_dict['id'])[0])
+ success = fd.real_download(filename, info_dict)
+ finally:
+ if heartbeat_lock:
+ with heartbeat_lock:
+ timer[0].cancel()
+ download_complete = True
+ return success
diff --git a/hypervideo_dl/downloader/rtmp.py b/hypervideo_dl/downloader/rtmp.py
index fbb7f51..6dca647 100644
--- a/hypervideo_dl/downloader/rtmp.py
+++ b/hypervideo_dl/downloader/rtmp.py
@@ -66,7 +66,7 @@ class RtmpFD(FileDownloader):
'eta': eta,
'elapsed': time_now - start,
'speed': speed,
- })
+ }, info_dict)
cursor_in_new_line = False
else:
# no percent for live streams
@@ -82,18 +82,20 @@ class RtmpFD(FileDownloader):
'status': 'downloading',
'elapsed': time_now - start,
'speed': speed,
- })
+ }, info_dict)
cursor_in_new_line = False
elif self.params.get('verbose', False):
if not cursor_in_new_line:
self.to_screen('')
cursor_in_new_line = True
self.to_screen('[rtmpdump] ' + line)
- finally:
+ if not cursor_in_new_line:
+ self.to_screen('')
+ return proc.wait()
+ except BaseException: # Including KeyboardInterrupt
+ proc.kill()
proc.wait()
- if not cursor_in_new_line:
- self.to_screen('')
- return proc.returncode
+ raise
url = info_dict['url']
player_url = info_dict.get('player_url')
@@ -115,7 +117,7 @@ class RtmpFD(FileDownloader):
# Check for rtmpdump first
if not check_executable('rtmpdump', ['-h']):
- self.report_error('RTMP download detected but "rtmpdump" could not be run. Please install it.')
+ self.report_error('RTMP download detected but "rtmpdump" could not be run. Please install')
return False
# Download using rtmpdump. rtmpdump returns exit code 2 when
@@ -206,7 +208,7 @@ class RtmpFD(FileDownloader):
'filename': filename,
'status': 'finished',
'elapsed': time.time() - started,
- })
+ }, info_dict)
return True
else:
self.to_stderr('\n')
diff --git a/hypervideo_dl/downloader/rtsp.py b/hypervideo_dl/downloader/rtsp.py
index 939358b..7815d59 100644
--- a/hypervideo_dl/downloader/rtsp.py
+++ b/hypervideo_dl/downloader/rtsp.py
@@ -24,7 +24,7 @@ class RtspFD(FileDownloader):
args = [
'mpv', '-really-quiet', '--vo=null', '--stream-dump=' + tmpfilename, url]
else:
- self.report_error('MMS or RTSP download detected but neither "mplayer" nor "mpv" could be run. Please install any.')
+ self.report_error('MMS or RTSP download detected but neither "mplayer" nor "mpv" could be run. Please install one')
return False
self._debug_cmd(args)
@@ -39,7 +39,7 @@ class RtspFD(FileDownloader):
'total_bytes': fsize,
'filename': filename,
'status': 'finished',
- })
+ }, info_dict)
return True
else:
self.to_stderr('\n')
diff --git a/hypervideo_dl/downloader/websocket.py b/hypervideo_dl/downloader/websocket.py
new file mode 100644
index 0000000..0882220
--- /dev/null
+++ b/hypervideo_dl/downloader/websocket.py
@@ -0,0 +1,59 @@
+import os
+import signal
+import asyncio
+import threading
+
+try:
+ import websockets
+ has_websockets = True
+except ImportError:
+ has_websockets = False
+
+from .common import FileDownloader
+from .external import FFmpegFD
+
+
+class FFmpegSinkFD(FileDownloader):
+ """ A sink to ffmpeg for downloading fragments in any form """
+
+ def real_download(self, filename, info_dict):
+ info_copy = info_dict.copy()
+ info_copy['url'] = '-'
+
+ async def call_conn(proc, stdin):
+ try:
+ await self.real_connection(stdin, info_dict)
+ except (BrokenPipeError, OSError):
+ pass
+ finally:
+ try:
+ stdin.flush()
+ stdin.close()
+ except OSError:
+ pass
+ os.kill(os.getpid(), signal.SIGINT)
+
+ class FFmpegStdinFD(FFmpegFD):
+ @classmethod
+ def get_basename(cls):
+ return FFmpegFD.get_basename()
+
+ def on_process_started(self, proc, stdin):
+ thread = threading.Thread(target=asyncio.run, daemon=True, args=(call_conn(proc, stdin), ))
+ thread.start()
+
+ return FFmpegStdinFD(self.ydl, self.params or {}).download(filename, info_copy)
+
+ async def real_connection(self, sink, info_dict):
+ """ Override this in subclasses """
+ raise NotImplementedError('This method must be implemented by subclasses')
+
+
+class WebSocketFragmentFD(FFmpegSinkFD):
+ async def real_connection(self, sink, info_dict):
+ async with websockets.connect(info_dict['url'], extra_headers=info_dict.get('http_headers', {})) as ws:
+ while True:
+ recv = await ws.recv()
+ if isinstance(recv, str):
+ recv = recv.encode('utf8')
+ sink.write(recv)
diff --git a/hypervideo_dl/downloader/youtube_live_chat.py b/hypervideo_dl/downloader/youtube_live_chat.py
new file mode 100644
index 0000000..ef4205e
--- /dev/null
+++ b/hypervideo_dl/downloader/youtube_live_chat.py
@@ -0,0 +1,236 @@
+from __future__ import division, unicode_literals
+
+import json
+import time
+
+from .fragment import FragmentFD
+from ..compat import compat_urllib_error
+from ..utils import (
+ try_get,
+ dict_get,
+ int_or_none,
+ RegexNotFoundError,
+)
+from ..extractor.youtube import YoutubeBaseInfoExtractor as YT_BaseIE
+
+
+class YoutubeLiveChatFD(FragmentFD):
+ """ Downloads YouTube live chats fragment by fragment """
+
+ FD_NAME = 'youtube_live_chat'
+
+ def real_download(self, filename, info_dict):
+ video_id = info_dict['video_id']
+ self.to_screen('[%s] Downloading live chat' % self.FD_NAME)
+
+ fragment_retries = self.params.get('fragment_retries', 0)
+ test = self.params.get('test', False)
+
+ ctx = {
+ 'filename': filename,
+ 'live': True,
+ 'total_frags': None,
+ }
+
+ ie = YT_BaseIE(self.ydl)
+
+ start_time = int(time.time() * 1000)
+
+ def dl_fragment(url, data=None, headers=None):
+ http_headers = info_dict.get('http_headers', {})
+ if headers:
+ http_headers = http_headers.copy()
+ http_headers.update(headers)
+ return self._download_fragment(ctx, url, info_dict, http_headers, data)
+
+ def parse_actions_replay(live_chat_continuation):
+ offset = continuation_id = click_tracking_params = None
+ processed_fragment = bytearray()
+ for action in live_chat_continuation.get('actions', []):
+ if 'replayChatItemAction' in action:
+ replay_chat_item_action = action['replayChatItemAction']
+ offset = int(replay_chat_item_action['videoOffsetTimeMsec'])
+ processed_fragment.extend(
+ json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n')
+ if offset is not None:
+ continuation = try_get(
+ live_chat_continuation,
+ lambda x: x['continuations'][0]['liveChatReplayContinuationData'], dict)
+ if continuation:
+ continuation_id = continuation.get('continuation')
+ click_tracking_params = continuation.get('clickTrackingParams')
+ self._append_fragment(ctx, processed_fragment)
+ return continuation_id, offset, click_tracking_params
+
+ def try_refresh_replay_beginning(live_chat_continuation):
+ # choose the second option that contains the unfiltered live chat replay
+ refresh_continuation = try_get(
+ live_chat_continuation,
+ lambda x: x['header']['liveChatHeaderRenderer']['viewSelector']['sortFilterSubMenuRenderer']['subMenuItems'][1]['continuation']['reloadContinuationData'], dict)
+ if refresh_continuation:
+ # no data yet but required to call _append_fragment
+ self._append_fragment(ctx, b'')
+ refresh_continuation_id = refresh_continuation.get('continuation')
+ offset = 0
+ click_tracking_params = refresh_continuation.get('trackingParams')
+ return refresh_continuation_id, offset, click_tracking_params
+ return parse_actions_replay(live_chat_continuation)
+
+ live_offset = 0
+
+ def parse_actions_live(live_chat_continuation):
+ nonlocal live_offset
+ continuation_id = click_tracking_params = None
+ processed_fragment = bytearray()
+ for action in live_chat_continuation.get('actions', []):
+ timestamp = self.parse_live_timestamp(action)
+ if timestamp is not None:
+ live_offset = timestamp - start_time
+ # compatibility with replay format
+ pseudo_action = {
+ 'replayChatItemAction': {'actions': [action]},
+ 'videoOffsetTimeMsec': str(live_offset),
+ 'isLive': True,
+ }
+ processed_fragment.extend(
+ json.dumps(pseudo_action, ensure_ascii=False).encode('utf-8') + b'\n')
+ continuation_data_getters = [
+ lambda x: x['continuations'][0]['invalidationContinuationData'],
+ lambda x: x['continuations'][0]['timedContinuationData'],
+ ]
+ continuation_data = try_get(live_chat_continuation, continuation_data_getters, dict)
+ if continuation_data:
+ continuation_id = continuation_data.get('continuation')
+ click_tracking_params = continuation_data.get('clickTrackingParams')
+ timeout_ms = int_or_none(continuation_data.get('timeoutMs'))
+ if timeout_ms is not None:
+ time.sleep(timeout_ms / 1000)
+ self._append_fragment(ctx, processed_fragment)
+ return continuation_id, live_offset, click_tracking_params
+
+ def download_and_parse_fragment(url, frag_index, request_data=None, headers=None):
+ count = 0
+ while count <= fragment_retries:
+ try:
+ success, raw_fragment = dl_fragment(url, request_data, headers)
+ if not success:
+ return False, None, None, None
+ try:
+ data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
+ except RegexNotFoundError:
+ data = None
+ if not data:
+ data = json.loads(raw_fragment)
+ live_chat_continuation = try_get(
+ data,
+ lambda x: x['continuationContents']['liveChatContinuation'], dict) or {}
+ if info_dict['protocol'] == 'youtube_live_chat_replay':
+ if frag_index == 1:
+ continuation_id, offset, click_tracking_params = try_refresh_replay_beginning(live_chat_continuation)
+ else:
+ continuation_id, offset, click_tracking_params = parse_actions_replay(live_chat_continuation)
+ elif info_dict['protocol'] == 'youtube_live_chat':
+ continuation_id, offset, click_tracking_params = parse_actions_live(live_chat_continuation)
+ return True, continuation_id, offset, click_tracking_params
+ except compat_urllib_error.HTTPError as err:
+ count += 1
+ if count <= fragment_retries:
+ self.report_retry_fragment(err, frag_index, count, fragment_retries)
+ if count > fragment_retries:
+ self.report_error('giving up after %s fragment retries' % fragment_retries)
+ return False, None, None, None
+
+ self._prepare_and_start_frag_download(ctx, info_dict)
+
+ success, raw_fragment = dl_fragment(info_dict['url'])
+ if not success:
+ return False
+ try:
+ data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
+ except RegexNotFoundError:
+ return False
+ continuation_id = try_get(
+ data,
+ lambda x: x['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation'])
+ # no data yet but required to call _append_fragment
+ self._append_fragment(ctx, b'')
+
+ ytcfg = ie.extract_ytcfg(video_id, raw_fragment.decode('utf-8', 'replace'))
+
+ if not ytcfg:
+ return False
+ api_key = try_get(ytcfg, lambda x: x['INNERTUBE_API_KEY'])
+ innertube_context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'])
+ if not api_key or not innertube_context:
+ return False
+ visitor_data = try_get(innertube_context, lambda x: x['client']['visitorData'], str)
+ if info_dict['protocol'] == 'youtube_live_chat_replay':
+ url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat_replay?key=' + api_key
+ chat_page_url = 'https://www.youtube.com/live_chat_replay?continuation=' + continuation_id
+ elif info_dict['protocol'] == 'youtube_live_chat':
+ url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat?key=' + api_key
+ chat_page_url = 'https://www.youtube.com/live_chat?continuation=' + continuation_id
+
+ frag_index = offset = 0
+ click_tracking_params = None
+ while continuation_id is not None:
+ frag_index += 1
+ request_data = {
+ 'context': innertube_context,
+ 'continuation': continuation_id,
+ }
+ if frag_index > 1:
+ request_data['currentPlayerState'] = {'playerOffsetMs': str(max(offset - 5000, 0))}
+ if click_tracking_params:
+ request_data['context']['clickTracking'] = {'clickTrackingParams': click_tracking_params}
+ headers = ie.generate_api_headers(ytcfg=ytcfg, visitor_data=visitor_data)
+ headers.update({'content-type': 'application/json'})
+ fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode('utf-8') + b'\n'
+ success, continuation_id, offset, click_tracking_params = download_and_parse_fragment(
+ url, frag_index, fragment_request_data, headers)
+ else:
+ success, continuation_id, offset, click_tracking_params = download_and_parse_fragment(
+ chat_page_url, frag_index)
+ if not success:
+ return False
+ if test:
+ break
+
+ self._finish_frag_download(ctx, info_dict)
+ return True
+
+ @staticmethod
+ def parse_live_timestamp(action):
+ action_content = dict_get(
+ action,
+ ['addChatItemAction', 'addLiveChatTickerItemAction', 'addBannerToLiveChatCommand'])
+ if not isinstance(action_content, dict):
+ return None
+ item = dict_get(action_content, ['item', 'bannerRenderer'])
+ if not isinstance(item, dict):
+ return None
+ renderer = dict_get(item, [
+ # text
+ 'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer',
+ 'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer',
+ # ticker
+ 'liveChatTickerPaidMessageItemRenderer',
+ 'liveChatTickerSponsorItemRenderer',
+ # banner
+ 'liveChatBannerRenderer',
+ ])
+ if not isinstance(renderer, dict):
+ return None
+ parent_item_getters = [
+ lambda x: x['showItemEndpoint']['showLiveChatItemEndpoint']['renderer'],
+ lambda x: x['contents'],
+ ]
+ parent_item = try_get(renderer, parent_item_getters, dict)
+ if parent_item:
+ renderer = dict_get(parent_item, [
+ 'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer',
+ 'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer',
+ ])
+ if not isinstance(renderer, dict):
+ return None
+ return int_or_none(renderer.get('timestampUsec'), 1000)
diff --git a/hypervideo_dl/extractor/__init__.py b/hypervideo_dl/extractor/__init__.py
index 18d8dbc..198c4ae 100644
--- a/hypervideo_dl/extractor/__init__.py
+++ b/hypervideo_dl/extractor/__init__.py
@@ -1,13 +1,17 @@
from __future__ import unicode_literals
+from ..utils import load_plugins
+
try:
from .lazy_extractors import *
from .lazy_extractors import _ALL_CLASSES
_LAZY_LOADER = True
+ _PLUGIN_CLASSES = {}
except ImportError:
_LAZY_LOADER = False
- from .extractors import *
+if not _LAZY_LOADER:
+ from .extractors import *
_ALL_CLASSES = [
klass
for name, klass in globals().items()
@@ -15,6 +19,9 @@ except ImportError:
]
_ALL_CLASSES.append(GenericIE)
+ _PLUGIN_CLASSES = load_plugins('extractor', 'IE', globals())
+ _ALL_CLASSES = list(_PLUGIN_CLASSES.values()) + _ALL_CLASSES
+
def gen_extractor_classes():
""" Return a list of supported extractors.
diff --git a/hypervideo_dl/extractor/abc.py b/hypervideo_dl/extractor/abc.py
index 6637f4f..3e20216 100644
--- a/hypervideo_dl/extractor/abc.py
+++ b/hypervideo_dl/extractor/abc.py
@@ -12,6 +12,7 @@ from ..utils import (
js_to_json,
int_or_none,
parse_iso8601,
+ str_or_none,
try_get,
unescapeHTML,
update_url_query,
@@ -20,7 +21,7 @@ from ..utils import (
class ABCIE(InfoExtractor):
IE_NAME = 'abc.net.au'
- _VALID_URL = r'https?://(?:www\.)?abc\.net\.au/news/(?:[^/]+/){1,2}(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:www\.)?abc\.net\.au/(?:news|btn)/(?:[^/]+/){1,4}(?P<id>\d{5,})'
_TESTS = [{
'url': 'http://www.abc.net.au/news/2014-11-05/australia-to-staff-ebola-treatment-centre-in-sierra-leone/5868334',
@@ -34,7 +35,7 @@ class ABCIE(InfoExtractor):
'skip': 'this video has expired',
}, {
'url': 'http://www.abc.net.au/news/2015-08-17/warren-entsch-introduces-same-sex-marriage-bill/6702326',
- 'md5': 'db2a5369238b51f9811ad815b69dc086',
+ 'md5': '4ebd61bdc82d9a8b722f64f1f4b4d121',
'info_dict': {
'id': 'NvqvPeNZsHU',
'ext': 'mp4',
@@ -58,39 +59,102 @@ class ABCIE(InfoExtractor):
}, {
'url': 'http://www.abc.net.au/news/2015-10-19/6866214',
'only_matching': True,
+ }, {
+ 'url': 'https://www.abc.net.au/btn/classroom/wwi-centenary/10527914',
+ 'info_dict': {
+ 'id': '10527914',
+ 'ext': 'mp4',
+ 'title': 'WWI Centenary',
+ 'description': 'md5:c2379ec0ca84072e86b446e536954546',
+ }
+ }, {
+ 'url': 'https://www.abc.net.au/news/programs/the-world/2020-06-10/black-lives-matter-protests-spawn-support-for/12342074',
+ 'info_dict': {
+ 'id': '12342074',
+ 'ext': 'mp4',
+ 'title': 'Black Lives Matter protests spawn support for Papuans in Indonesia',
+ 'description': 'md5:2961a17dc53abc558589ccd0fb8edd6f',
+ }
+ }, {
+ 'url': 'https://www.abc.net.au/btn/newsbreak/btn-newsbreak-20200814/12560476',
+ 'info_dict': {
+ 'id': 'tDL8Ld4dK_8',
+ 'ext': 'mp4',
+ 'title': 'Fortnite Banned From Apple and Google App Stores',
+ 'description': 'md5:a6df3f36ce8f816b74af4bd6462f5651',
+ 'upload_date': '20200813',
+ 'uploader': 'Behind the News',
+ 'uploader_id': 'behindthenews',
+ }
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- mobj = re.search(
- r'inline(?P<type>Video|Audio|YouTube)Data\.push\((?P<json_data>[^)]+)\);',
- webpage)
+ mobj = re.search(r'<a\s+href="(?P<url>[^"]+)"\s+data-duration="\d+"\s+title="Download audio directly">', webpage)
+ if mobj:
+ urls_info = mobj.groupdict()
+ youtube = False
+ video = False
+ else:
+ mobj = re.search(r'<a href="(?P<url>http://www\.youtube\.com/watch\?v=[^"]+)"><span><strong>External Link:</strong>',
+ webpage)
+ if mobj is None:
+ mobj = re.search(r'<iframe width="100%" src="(?P<url>//www\.youtube-nocookie\.com/embed/[^?"]+)', webpage)
+ if mobj:
+ urls_info = mobj.groupdict()
+ youtube = True
+ video = True
+
if mobj is None:
- expired = self._html_search_regex(r'(?s)class="expired-(?:video|audio)".+?<span>(.+?)</span>', webpage, 'expired', None)
- if expired:
- raise ExtractorError('%s said: %s' % (self.IE_NAME, expired), expected=True)
- raise ExtractorError('Unable to extract video urls')
+ mobj = re.search(r'(?P<type>)"sources": (?P<json_data>\[[^\]]+\]),', webpage)
+ if mobj is None:
+ mobj = re.search(
+ r'inline(?P<type>Video|Audio|YouTube)Data\.push\((?P<json_data>[^)]+)\);',
+ webpage)
+ if mobj is None:
+ expired = self._html_search_regex(r'(?s)class="expired-(?:video|audio)".+?<span>(.+?)</span>', webpage, 'expired', None)
+ if expired:
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, expired), expected=True)
+ raise ExtractorError('Unable to extract video urls')
- urls_info = self._parse_json(
- mobj.group('json_data'), video_id, transform_source=js_to_json)
+ urls_info = self._parse_json(
+ mobj.group('json_data'), video_id, transform_source=js_to_json)
+ youtube = mobj.group('type') == 'YouTube'
+ video = mobj.group('type') == 'Video' or urls_info[0]['contentType'] == 'video/mp4'
if not isinstance(urls_info, list):
urls_info = [urls_info]
- if mobj.group('type') == 'YouTube':
+ if youtube:
return self.playlist_result([
self.url_result(url_info['url']) for url_info in urls_info])
- formats = [{
- 'url': url_info['url'],
- 'vcodec': url_info.get('codec') if mobj.group('type') == 'Video' else 'none',
- 'width': int_or_none(url_info.get('width')),
- 'height': int_or_none(url_info.get('height')),
- 'tbr': int_or_none(url_info.get('bitrate')),
- 'filesize': int_or_none(url_info.get('filesize')),
- } for url_info in urls_info]
+ formats = []
+ for url_info in urls_info:
+ height = int_or_none(url_info.get('height'))
+ bitrate = int_or_none(url_info.get('bitrate'))
+ width = int_or_none(url_info.get('width'))
+ format_id = None
+ mobj = re.search(r'_(?:(?P<height>\d+)|(?P<bitrate>\d+)k)\.mp4$', url_info['url'])
+ if mobj:
+ height_from_url = mobj.group('height')
+ if height_from_url:
+ height = height or int_or_none(height_from_url)
+ width = width or int_or_none(url_info.get('label'))
+ else:
+ bitrate = bitrate or int_or_none(mobj.group('bitrate'))
+ format_id = str_or_none(url_info.get('label'))
+ formats.append({
+ 'url': url_info['url'],
+ 'vcodec': url_info.get('codec') if video else 'none',
+ 'width': width,
+ 'height': height,
+ 'tbr': bitrate,
+ 'filesize': int_or_none(url_info.get('filesize')),
+ 'format_id': format_id
+ })
self._sort_formats(formats)
diff --git a/hypervideo_dl/extractor/abcnews.py b/hypervideo_dl/extractor/abcnews.py
index 908c833..296b8ce 100644
--- a/hypervideo_dl/extractor/abcnews.py
+++ b/hypervideo_dl/extractor/abcnews.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .amp import AMPIE
from .common import InfoExtractor
@@ -59,7 +58,7 @@ class AbcNewsVideoIE(AMPIE):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
display_id = mobj.group('display_id')
video_id = mobj.group('id')
info_dict = self._extract_feed_info(
diff --git a/hypervideo_dl/extractor/abcotvs.py b/hypervideo_dl/extractor/abcotvs.py
index 0bc69a6..5bff466 100644
--- a/hypervideo_dl/extractor/abcotvs.py
+++ b/hypervideo_dl/extractor/abcotvs.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..compat import compat_str
@@ -55,7 +54,7 @@ class ABCOTVSIE(InfoExtractor):
}
def _real_extract(self, url):
- site, display_id, video_id = re.match(self._VALID_URL, url).groups()
+ site, display_id, video_id = self._match_valid_url(url).groups()
display_id = display_id or video_id
station = self._SITE_MAP[site]
diff --git a/hypervideo_dl/extractor/acast.py b/hypervideo_dl/extractor/acast.py
index b9355a2..63587c5 100644
--- a/hypervideo_dl/extractor/acast.py
+++ b/hypervideo_dl/extractor/acast.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..utils import (
@@ -80,7 +79,7 @@ class ACastIE(ACastBaseIE):
}]
def _real_extract(self, url):
- channel, display_id = re.match(self._VALID_URL, url).groups()
+ channel, display_id = self._match_valid_url(url).groups()
episode = self._call_api(
'%s/episodes/%s' % (channel, display_id),
display_id, {'showInfo': 'true'})
diff --git a/hypervideo_dl/extractor/adobepass.py b/hypervideo_dl/extractor/adobepass.py
index 38dca1b..9378c33 100644
--- a/hypervideo_dl/extractor/adobepass.py
+++ b/hypervideo_dl/extractor/adobepass.py
@@ -1,6 +1,7 @@
# coding: utf-8
from __future__ import unicode_literals
+import json
import re
import time
import xml.etree.ElementTree as etree
@@ -9,6 +10,7 @@ from .common import InfoExtractor
from ..compat import (
compat_kwargs,
compat_urlparse,
+ compat_getpass
)
from ..utils import (
unescapeHTML,
@@ -35,6 +37,11 @@ MSO_INFO = {
'username_field': 'email',
'password_field': 'loginpassword',
},
+ 'RCN': {
+ 'name': 'RCN',
+ 'username_field': 'UserName',
+ 'password_field': 'UserPassword',
+ },
'Rogers': {
'name': 'Rogers',
'username_field': 'UserName',
@@ -60,11 +67,25 @@ MSO_INFO = {
'username_field': 'IDToken1',
'password_field': 'IDToken2',
},
+ 'Spectrum': {
+ 'name': 'Spectrum',
+ 'username_field': 'IDToken1',
+ 'password_field': 'IDToken2',
+ },
+ 'Philo': {
+ 'name': 'Philo',
+ 'username_field': 'ident'
+ },
'Verizon': {
'name': 'Verizon FiOS',
'username_field': 'IDToken1',
'password_field': 'IDToken2',
},
+ 'Cablevision': {
+ 'name': 'Optimum/Cablevision',
+ 'username_field': 'j_username',
+ 'password_field': 'j_password',
+ },
'thr030': {
'name': '3 Rivers Communications'
},
@@ -1319,6 +1340,11 @@ MSO_INFO = {
'cou060': {
'name': 'Zito Media'
},
+ 'slingtv': {
+ 'name': 'Sling TV',
+ 'username_field': 'username',
+ 'password_field': 'password',
+ },
}
@@ -1409,7 +1435,7 @@ class AdobePassIE(InfoExtractor):
authn_token = None
if not authn_token:
# TODO add support for other TV Providers
- mso_id = self._downloader.params.get('ap_mso')
+ mso_id = self.get_param('ap_mso')
if not mso_id:
raise_mvpd_required()
username, password = self._get_login_info('ap_username', 'ap_password', mso_id)
@@ -1438,6 +1464,13 @@ class AdobePassIE(InfoExtractor):
provider_redirect_page, 'oauth redirect')
self._download_webpage(
oauth_redirect_url, video_id, 'Confirming auto login')
+ elif 'automatically signed in with' in provider_redirect_page:
+ # Seems like comcast is rolling up new way of automatically signing customers
+ oauth_redirect_url = self._html_search_regex(
+ r'continue:\s*"(https://oauth.xfinity.com/oauth/authorize\?.+)"', provider_redirect_page,
+ 'oauth redirect (signed)')
+ # Just need to process the request. No useful data comes back
+ self._download_webpage(oauth_redirect_url, video_id, 'Confirming auto login')
else:
if '<form name="signin"' in provider_redirect_page:
provider_login_page_res = provider_redirect_page_res
@@ -1460,11 +1493,28 @@ class AdobePassIE(InfoExtractor):
mvpd_confirm_page, urlh = mvpd_confirm_page_res
if '<button class="submit" value="Resume">Resume</button>' in mvpd_confirm_page:
post_form(mvpd_confirm_page_res, 'Confirming Login')
+ elif mso_id == 'Philo':
+ # Philo has very unique authentication method
+ self._download_webpage(
+ 'https://idp.philo.com/auth/init/login_code', video_id, 'Requesting auth code', data=urlencode_postdata({
+ 'ident': username,
+ 'device': 'web',
+ 'send_confirm_link': False,
+ 'send_token': True
+ }))
+ philo_code = compat_getpass('Type auth code you have received [Return]: ')
+ self._download_webpage(
+ 'https://idp.philo.com/auth/update/login_code', video_id, 'Submitting token', data=urlencode_postdata({
+ 'token': philo_code
+ }))
+ mvpd_confirm_page_res = self._download_webpage_handle('https://idp.philo.com/idp/submit', video_id, 'Confirming Philo Login')
+ post_form(mvpd_confirm_page_res, 'Confirming Login')
elif mso_id == 'Verizon':
# In general, if you're connecting from a Verizon-assigned IP,
# you will not actually pass your credentials.
provider_redirect_page, urlh = provider_redirect_page_res
- if 'Please wait ...' in provider_redirect_page:
+ # From non-Verizon IP, still gave 'Please wait', but noticed N==Y; will need to try on Verizon IP
+ if 'Please wait ...' in provider_redirect_page and '\'N\'== "Y"' not in provider_redirect_page:
saml_redirect_url = self._html_search_regex(
r'self\.parent\.location=(["\'])(?P<url>.+?)\1',
provider_redirect_page,
@@ -1472,7 +1522,8 @@ class AdobePassIE(InfoExtractor):
saml_login_page = self._download_webpage(
saml_redirect_url, video_id,
'Downloading SAML Login Page')
- else:
+ elif 'Verizon FiOS - sign in' in provider_redirect_page:
+ # FXNetworks from non-Verizon IP
saml_login_page_res = post_form(
provider_redirect_page_res, 'Logging in', {
mso_info['username_field']: username,
@@ -1482,6 +1533,26 @@ class AdobePassIE(InfoExtractor):
if 'Please try again.' in saml_login_page:
raise ExtractorError(
'We\'re sorry, but either the User ID or Password entered is not correct.')
+ else:
+ # ABC from non-Verizon IP
+ saml_redirect_url = self._html_search_regex(
+ r'var\surl\s*=\s*(["\'])(?P<url>.+?)\1',
+ provider_redirect_page,
+ 'SAML Redirect URL', group='url')
+ saml_redirect_url = saml_redirect_url.replace(r'\/', '/')
+ saml_redirect_url = saml_redirect_url.replace(r'\-', '-')
+ saml_redirect_url = saml_redirect_url.replace(r'\x26', '&')
+ saml_login_page = self._download_webpage(
+ saml_redirect_url, video_id,
+ 'Downloading SAML Login Page')
+ saml_login_page, urlh = post_form(
+ [saml_login_page, saml_redirect_url], 'Logging in', {
+ mso_info['username_field']: username,
+ mso_info['password_field']: password,
+ })
+ if 'Please try again.' in saml_login_page:
+ raise ExtractorError(
+ 'Failed to login, incorrect User ID or Password.')
saml_login_url = self._search_regex(
r'xmlHttp\.open\("POST"\s*,\s*(["\'])(?P<url>.+?)\1',
saml_login_page, 'SAML Login URL', group='url')
@@ -1496,6 +1567,75 @@ class AdobePassIE(InfoExtractor):
}), headers={
'Content-Type': 'application/x-www-form-urlencoded'
})
+ elif mso_id == 'Spectrum':
+ # Spectrum's login for is dynamically loaded via JS so we need to hardcode the flow
+ # as a one-off implementation.
+ provider_redirect_page, urlh = provider_redirect_page_res
+ provider_login_page_res = post_form(
+ provider_redirect_page_res, self._DOWNLOADING_LOGIN_PAGE)
+ saml_login_page, urlh = provider_login_page_res
+ relay_state = self._search_regex(
+ r'RelayState\s*=\s*"(?P<relay>.+?)";',
+ saml_login_page, 'RelayState', group='relay')
+ saml_request = self._search_regex(
+ r'SAMLRequest\s*=\s*"(?P<saml_request>.+?)";',
+ saml_login_page, 'SAMLRequest', group='saml_request')
+ login_json = {
+ mso_info['username_field']: username,
+ mso_info['password_field']: password,
+ 'RelayState': relay_state,
+ 'SAMLRequest': saml_request,
+ }
+ saml_response_json = self._download_json(
+ 'https://tveauthn.spectrum.net/tveauthentication/api/v1/manualAuth', video_id,
+ 'Downloading SAML Response',
+ data=json.dumps(login_json).encode(),
+ headers={
+ 'Content-Type': 'application/json',
+ 'Accept': 'application/json',
+ })
+ self._download_webpage(
+ saml_response_json['SAMLRedirectUri'], video_id,
+ 'Confirming Login', data=urlencode_postdata({
+ 'SAMLResponse': saml_response_json['SAMLResponse'],
+ 'RelayState': relay_state,
+ }), headers={
+ 'Content-Type': 'application/x-www-form-urlencoded'
+ })
+ elif mso_id == 'slingtv':
+ # SlingTV has a meta-refresh based authentication, but also
+ # looks at the tab history to count the number of times the
+ # browser has been on a page
+
+ first_bookend_page, urlh = provider_redirect_page_res
+
+ hidden_data = self._hidden_inputs(first_bookend_page)
+ hidden_data['history'] = 1
+
+ provider_login_page_res = self._download_webpage_handle(
+ urlh.geturl(), video_id, 'Sending first bookend',
+ query=hidden_data)
+
+ provider_association_redirect, urlh = post_form(
+ provider_login_page_res, 'Logging in', {
+ mso_info['username_field']: username,
+ mso_info['password_field']: password
+ })
+
+ provider_refresh_redirect_url = extract_redirect_url(
+ provider_association_redirect, url=urlh.geturl())
+
+ last_bookend_page, urlh = self._download_webpage_handle(
+ provider_refresh_redirect_url, video_id,
+ 'Downloading Auth Association Redirect Page')
+ hidden_data = self._hidden_inputs(last_bookend_page)
+ hidden_data['history'] = 3
+
+ mvpd_confirm_page_res = self._download_webpage_handle(
+ urlh.geturl(), video_id, 'Sending final bookend',
+ query=hidden_data)
+
+ post_form(mvpd_confirm_page_res, 'Confirming Login')
else:
# Some providers (e.g. DIRECTV NOW) have another meta refresh
# based redirect that should be followed.
@@ -1508,10 +1648,13 @@ class AdobePassIE(InfoExtractor):
'Downloading Provider Redirect Page (meta refresh)')
provider_login_page_res = post_form(
provider_redirect_page_res, self._DOWNLOADING_LOGIN_PAGE)
- mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', {
+ form_data = {
mso_info.get('username_field', 'username'): username,
- mso_info.get('password_field', 'password'): password,
- })
+ mso_info.get('password_field', 'password'): password
+ }
+ if mso_id == 'Cablevision':
+ form_data['_eventId_proceed'] = ''
+ mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', form_data)
if mso_id != 'Rogers':
post_form(mvpd_confirm_page_res, 'Confirming Login')
diff --git a/hypervideo_dl/extractor/adobetv.py b/hypervideo_dl/extractor/adobetv.py
index 80060f0..12b8192 100644
--- a/hypervideo_dl/extractor/adobetv.py
+++ b/hypervideo_dl/extractor/adobetv.py
@@ -66,7 +66,7 @@ class AdobeTVBaseIE(InfoExtractor):
if original_filename.startswith('s3://') and not s3_extracted:
formats.append({
'format_id': 'original',
- 'preference': 1,
+ 'quality': 1,
'url': original_filename.replace('s3://', 'https://s3.amazonaws.com/'),
})
s3_extracted = True
@@ -132,7 +132,7 @@ class AdobeTVIE(AdobeTVBaseIE):
}
def _real_extract(self, url):
- language, show_urlname, urlname = re.match(self._VALID_URL, url).groups()
+ language, show_urlname, urlname = self._match_valid_url(url).groups()
if not language:
language = 'en'
@@ -178,7 +178,7 @@ class AdobeTVShowIE(AdobeTVPlaylistBaseIE):
_process_data = AdobeTVBaseIE._parse_video_data
def _real_extract(self, url):
- language, show_urlname = re.match(self._VALID_URL, url).groups()
+ language, show_urlname = self._match_valid_url(url).groups()
if not language:
language = 'en'
query = {
@@ -215,7 +215,7 @@ class AdobeTVChannelIE(AdobeTVPlaylistBaseIE):
show_data['url'], 'AdobeTVShow', str_or_none(show_data.get('id')))
def _real_extract(self, url):
- language, channel_urlname, category_urlname = re.match(self._VALID_URL, url).groups()
+ language, channel_urlname, category_urlname = self._match_valid_url(url).groups()
if not language:
language = 'en'
query = {
diff --git a/hypervideo_dl/extractor/adultswim.py b/hypervideo_dl/extractor/adultswim.py
index 8d1d9ac..c97cfc1 100644
--- a/hypervideo_dl/extractor/adultswim.py
+++ b/hypervideo_dl/extractor/adultswim.py
@@ -2,7 +2,6 @@
from __future__ import unicode_literals
import json
-import re
from .turner import TurnerBaseIE
from ..utils import (
@@ -89,7 +88,7 @@ class AdultSwimIE(TurnerBaseIE):
}]
def _real_extract(self, url):
- show_path, episode_path = re.match(self._VALID_URL, url).groups()
+ show_path, episode_path = self._match_valid_url(url).groups()
display_id = episode_path or show_path
query = '''query {
getShowBySlug(slug:"%s") {
diff --git a/hypervideo_dl/extractor/aenetworks.py b/hypervideo_dl/extractor/aenetworks.py
index e55c03f..8025de5 100644
--- a/hypervideo_dl/extractor/aenetworks.py
+++ b/hypervideo_dl/extractor/aenetworks.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .theplatform import ThePlatformIE
from ..utils import (
@@ -20,8 +19,8 @@ class AENetworksBaseIE(ThePlatformIE):
(?:history(?:vault)?|aetv|mylifetime|lifetimemovieclub)\.com|
fyi\.tv
)/'''
- _THEPLATFORM_KEY = 'crazyjava'
- _THEPLATFORM_SECRET = 's3cr3t'
+ _THEPLATFORM_KEY = '43jXaGRQud'
+ _THEPLATFORM_SECRET = 'S10BPXHMlb'
_DOMAIN_MAP = {
'history.com': ('HISTORY', 'history'),
'aetv.com': ('AETV', 'aetv'),
@@ -170,7 +169,7 @@ class AENetworksIE(AENetworksBaseIE):
}]
def _real_extract(self, url):
- domain, canonical = re.match(self._VALID_URL, url).groups()
+ domain, canonical = self._match_valid_url(url).groups()
return self._extract_aetn_info(domain, 'canonical', '/' + canonical, url)
@@ -187,7 +186,7 @@ class AENetworksListBaseIE(AENetworksBaseIE):
}))['data'][resource]
def _real_extract(self, url):
- domain, slug = re.match(self._VALID_URL, url).groups()
+ domain, slug = self._match_valid_url(url).groups()
_, brand = self._DOMAIN_MAP[domain]
playlist = self._call_api(self._RESOURCE, slug, brand, self._FIELDS)
base_url = 'http://watch.%s' % domain
@@ -309,7 +308,7 @@ class HistoryPlayerIE(AENetworksBaseIE):
_TESTS = []
def _real_extract(self, url):
- domain, video_id = re.match(self._VALID_URL, url).groups()
+ domain, video_id = self._match_valid_url(url).groups()
return self._extract_aetn_info(domain, 'id', video_id, url)
diff --git a/hypervideo_dl/extractor/afreecatv.py b/hypervideo_dl/extractor/afreecatv.py
index b56abb1..063872b 100644
--- a/hypervideo_dl/extractor/afreecatv.py
+++ b/hypervideo_dl/extractor/afreecatv.py
@@ -6,9 +6,11 @@ import re
from .common import InfoExtractor
from ..compat import compat_xpath
from ..utils import (
+ date_from_str,
determine_ext,
ExtractorError,
int_or_none,
+ unified_strdate,
url_or_none,
urlencode_postdata,
xpath_text,
@@ -237,6 +239,7 @@ class AfreecaTVIE(InfoExtractor):
r'nTitleNo\s*=\s*(\d+)', webpage, 'title', default=video_id)
partial_view = False
+ adult_view = False
for _ in range(2):
query = {
'nTitleNo': video_id,
@@ -245,6 +248,8 @@ class AfreecaTVIE(InfoExtractor):
}
if partial_view:
query['partialView'] = 'SKIP_ADULT'
+ if adult_view:
+ query['adultView'] = 'ADULT_VIEW'
video_xml = self._download_xml(
'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php',
video_id, 'Downloading video info XML%s'
@@ -257,13 +262,16 @@ class AfreecaTVIE(InfoExtractor):
if flag and flag == 'SUCCEED':
break
if flag == 'PARTIAL_ADULT':
- self._downloader.report_warning(
+ self.report_warning(
'In accordance with local laws and regulations, underage users are restricted from watching adult content. '
'Only content suitable for all ages will be downloaded. '
'Provide account credentials if you wish to download restricted content.')
partial_view = True
continue
elif flag == 'ADULT':
+ if not adult_view:
+ adult_view = True
+ continue
error = 'Only users older than 19 are able to watch this video. Provide account credentials to download this content.'
else:
error = flag
@@ -309,8 +317,15 @@ class AfreecaTVIE(InfoExtractor):
if not file_url:
continue
key = file_element.get('key', '')
- upload_date = self._search_regex(
- r'^(\d{8})_', key, 'upload date', default=None)
+ upload_date = unified_strdate(self._search_regex(
+ r'^(\d{8})_', key, 'upload date', default=None))
+ if upload_date is not None:
+ # sometimes the upload date isn't included in the file name
+ # instead, another random ID is, which may parse as a valid
+ # date but be wildly out of a reasonable range
+ parsed_date = date_from_str(upload_date)
+ if parsed_date.year < 2000 or parsed_date.year >= 2100:
+ upload_date = None
file_duration = int_or_none(file_element.get('duration'))
format_id = key if key else '%s_%s' % (video_id, file_num)
if determine_ext(file_url) == 'm3u8':
@@ -323,7 +338,7 @@ class AfreecaTVIE(InfoExtractor):
'url': file_url,
'format_id': 'http',
}]
- if not formats:
+ if not formats and not self.get_param('ignore_no_formats'):
continue
self._sort_formats(formats)
file_info = common_entry.copy()
diff --git a/hypervideo_dl/extractor/aljazeera.py b/hypervideo_dl/extractor/aljazeera.py
index c4f915a..e829b45 100644
--- a/hypervideo_dl/extractor/aljazeera.py
+++ b/hypervideo_dl/extractor/aljazeera.py
@@ -1,7 +1,6 @@
from __future__ import unicode_literals
import json
-import re
from .common import InfoExtractor
@@ -32,7 +31,7 @@ class AlJazeeraIE(InfoExtractor):
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
def _real_extract(self, url):
- post_type, name = re.match(self._VALID_URL, url).groups()
+ post_type, name = self._match_valid_url(url).groups()
post_type = {
'features': 'post',
'program': 'episode',
@@ -40,7 +39,7 @@ class AlJazeeraIE(InfoExtractor):
}[post_type.split('/')[0]]
video = self._download_json(
'https://www.aljazeera.com/graphql', name, query={
- 'operationName': 'SingleArticleQuery',
+ 'operationName': 'ArchipelagoSingleArticleQuery',
'variables': json.dumps({
'name': name,
'postType': post_type,
diff --git a/hypervideo_dl/extractor/alura.py b/hypervideo_dl/extractor/alura.py
new file mode 100644
index 0000000..f5325de
--- /dev/null
+++ b/hypervideo_dl/extractor/alura.py
@@ -0,0 +1,179 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+from ..compat import (
+ compat_urlparse,
+)
+
+from ..utils import (
+ urlencode_postdata,
+ urljoin,
+ int_or_none,
+ clean_html,
+ ExtractorError
+)
+
+
+class AluraIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:cursos\.)?alura\.com\.br/course/(?P<course_name>[^/]+)/task/(?P<id>\d+)'
+ _LOGIN_URL = 'https://cursos.alura.com.br/loginForm?urlAfterLogin=/loginForm'
+ _VIDEO_URL = 'https://cursos.alura.com.br/course/%s/task/%s/video'
+ _NETRC_MACHINE = 'alura'
+ _TESTS = [{
+ 'url': 'https://cursos.alura.com.br/course/clojure-mutabilidade-com-atoms-e-refs/task/60095',
+ 'info_dict': {
+ 'id': '60095',
+ 'ext': 'mp4',
+ 'title': 'Referências, ref-set e alter'
+ },
+ 'skip': 'Requires alura account credentials'},
+ {
+ # URL without video
+ 'url': 'https://cursos.alura.com.br/course/clojure-mutabilidade-com-atoms-e-refs/task/60098',
+ 'only_matching': True},
+ {
+ 'url': 'https://cursos.alura.com.br/course/fundamentos-market-digital/task/55219',
+ 'only_matching': True}
+ ]
+
+ def _real_extract(self, url):
+
+ course, video_id = self._match_valid_url(url)
+ video_url = self._VIDEO_URL % (course, video_id)
+
+ video_dict = self._download_json(video_url, video_id, 'Searching for videos')
+
+ if video_dict:
+ webpage = self._download_webpage(url, video_id)
+ video_title = clean_html(self._search_regex(
+ r'<span[^>]+class=(["\'])task-body-header-title-text\1[^>]*>(?P<title>[^<]+)',
+ webpage, 'title', group='title'))
+
+ formats = []
+ for video_obj in video_dict:
+ video_url_m3u8 = video_obj.get('link')
+ video_format = self._extract_m3u8_formats(
+ video_url_m3u8, None, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False)
+ for f in video_format:
+ m = re.search(r'^[\w \W]*-(?P<res>\w*).mp4[\W \w]*', f['url'])
+ if m:
+ if not f.get('height'):
+ f['height'] = int('720' if m.group('res') == 'hd' else '480')
+ formats.extend(video_format)
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': video_title,
+ "formats": formats
+ }
+
+ def _real_initialize(self):
+ self._login()
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None:
+ return
+ pass
+
+ login_page = self._download_webpage(
+ self._LOGIN_URL, None, 'Downloading login popup')
+
+ def is_logged(webpage):
+ return any(re.search(p, webpage) for p in (
+ r'href=[\"|\']?/signout[\"|\']',
+ r'>Logout<'))
+
+ # already logged in
+ if is_logged(login_page):
+ return
+
+ login_form = self._hidden_inputs(login_page)
+
+ login_form.update({
+ 'username': username,
+ 'password': password,
+ })
+
+ post_url = self._search_regex(
+ r'<form[^>]+class=["|\']signin-form["|\'] action=["|\'](?P<url>.+?)["|\']', login_page,
+ 'post url', default=self._LOGIN_URL, group='url')
+
+ if not post_url.startswith('http'):
+ post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
+
+ response = self._download_webpage(
+ post_url, None, 'Logging in',
+ data=urlencode_postdata(login_form),
+ headers={'Content-Type': 'application/x-www-form-urlencoded'})
+
+ if not is_logged(response):
+ error = self._html_search_regex(
+ r'(?s)<p[^>]+class="alert-message[^"]*">(.+?)</p>',
+ response, 'error message', default=None)
+ if error:
+ raise ExtractorError('Unable to login: %s' % error, expected=True)
+ raise ExtractorError('Unable to log in')
+
+
+class AluraCourseIE(AluraIE):
+
+ _VALID_URL = r'https?://(?:cursos\.)?alura\.com\.br/course/(?P<id>[^/]+)'
+ _LOGIN_URL = 'https://cursos.alura.com.br/loginForm?urlAfterLogin=/loginForm'
+ _NETRC_MACHINE = 'aluracourse'
+ _TESTS = [{
+ 'url': 'https://cursos.alura.com.br/course/clojure-mutabilidade-com-atoms-e-refs',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if AluraIE.suitable(url) else super(AluraCourseIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+
+ course_path = self._match_id(url)
+ webpage = self._download_webpage(url, course_path)
+
+ course_title = self._search_regex(
+ r'<h1.*?>(.*?)<strong>(?P<course_title>.*?)</strong></h[0-9]>', webpage,
+ 'course title', default=course_path, group='course_title')
+
+ entries = []
+ if webpage:
+ for path in re.findall(r'<a\b(?=[^>]* class="[^"]*(?<=[" ])courseSectionList-section[" ])(?=[^>]* href="([^"]*))', webpage):
+ page_url = urljoin(url, path)
+ section_path = self._download_webpage(page_url, course_path)
+ for path_video in re.findall(r'<a\b(?=[^>]* class="[^"]*(?<=[" ])task-menu-nav-item-link-VIDEO[" ])(?=[^>]* href="([^"]*))', section_path):
+ chapter = clean_html(
+ self._search_regex(
+ r'<h3[^>]+class=(["\'])task-menu-section-title-text\1[^>]*>(?P<chapter>[^<]+)',
+ section_path,
+ 'chapter',
+ group='chapter'))
+
+ chapter_number = int_or_none(
+ self._search_regex(
+ r'<span[^>]+class=(["\'])task-menu-section-title-number[^>]*>(.*?)<strong>(?P<chapter_number>[^<]+)</strong>',
+ section_path,
+ 'chapter number',
+ group='chapter_number'))
+ video_url = urljoin(url, path_video)
+
+ entry = {
+ '_type': 'url_transparent',
+ 'id': self._match_id(video_url),
+ 'url': video_url,
+ 'id_key': self.ie_key(),
+ 'chapter': chapter,
+ 'chapter_number': chapter_number
+ }
+ entries.append(entry)
+ return self.playlist_result(entries, course_path, course_title)
diff --git a/hypervideo_dl/extractor/amcnetworks.py b/hypervideo_dl/extractor/amcnetworks.py
index b8027bb..e38e215 100644
--- a/hypervideo_dl/extractor/amcnetworks.py
+++ b/hypervideo_dl/extractor/amcnetworks.py
@@ -63,17 +63,37 @@ class AMCNetworksIE(ThePlatformIE):
}
def _real_extract(self, url):
- site, display_id = re.match(self._VALID_URL, url).groups()
+ site, display_id = self._match_valid_url(url).groups()
requestor_id = self._REQUESTOR_ID_MAP[site]
- properties = self._download_json(
- 'https://content-delivery-gw.svc.ds.amcn.com/api/v2/content/amcn/%s/url/%s' % (requestor_id.lower(), display_id),
- display_id)['data']['properties']
+ page_data = self._download_json(
+ 'https://content-delivery-gw.svc.ds.amcn.com/api/v2/content/amcn/%s/url/%s'
+ % (requestor_id.lower(), display_id), display_id)['data']
+ properties = page_data.get('properties') or {}
query = {
'mbr': 'true',
'manifest': 'm3u',
}
- tp_path = 'M_UwQC/media/' + properties['videoPid']
- media_url = 'https://link.theplatform.com/s/' + tp_path
+
+ video_player_count = 0
+ try:
+ for v in page_data['children']:
+ if v.get('type') == 'video-player':
+ releasePid = v['properties']['currentVideo']['meta']['releasePid']
+ tp_path = 'M_UwQC/' + releasePid
+ media_url = 'https://link.theplatform.com/s/' + tp_path
+ video_player_count += 1
+ except KeyError:
+ pass
+ if video_player_count > 1:
+ self.report_warning(
+ 'The JSON data has %d video players. Only one will be extracted' % video_player_count)
+
+ # Fall back to videoPid if releasePid not found.
+ # TODO: Fall back to videoPid if releasePid manifest uses DRM.
+ if not video_player_count:
+ tp_path = 'M_UwQC/media/' + properties['videoPid']
+ media_url = 'https://link.theplatform.com/s/' + tp_path
+
theplatform_metadata = self._download_theplatform_metadata(tp_path, display_id)
info = self._parse_theplatform_metadata(theplatform_metadata)
video_id = theplatform_metadata['pid']
@@ -90,30 +110,41 @@ class AMCNetworksIE(ThePlatformIE):
formats, subtitles = self._extract_theplatform_smil(
media_url, video_id)
self._sort_formats(formats)
+
+ thumbnails = []
+ thumbnail_urls = [properties.get('imageDesktop')]
+ if 'thumbnail' in info:
+ thumbnail_urls.append(info.pop('thumbnail'))
+ for thumbnail_url in thumbnail_urls:
+ if not thumbnail_url:
+ continue
+ mobj = re.search(r'(\d+)x(\d+)', thumbnail_url)
+ thumbnails.append({
+ 'url': thumbnail_url,
+ 'width': int(mobj.group(1)) if mobj else None,
+ 'height': int(mobj.group(2)) if mobj else None,
+ })
+
info.update({
+ 'age_limit': parse_age_limit(rating),
+ 'formats': formats,
'id': video_id,
'subtitles': subtitles,
- 'formats': formats,
- 'age_limit': parse_age_limit(parse_age_limit(rating)),
+ 'thumbnails': thumbnails,
})
ns_keys = theplatform_metadata.get('$xmlns', {}).keys()
if ns_keys:
ns = list(ns_keys)[0]
- series = theplatform_metadata.get(ns + '$show')
- season_number = int_or_none(
- theplatform_metadata.get(ns + '$season'))
- episode = theplatform_metadata.get(ns + '$episodeTitle')
+ episode = theplatform_metadata.get(ns + '$episodeTitle') or None
episode_number = int_or_none(
theplatform_metadata.get(ns + '$episode'))
- if season_number:
- title = 'Season %d - %s' % (season_number, title)
- if series:
- title = '%s - %s' % (series, title)
+ season_number = int_or_none(
+ theplatform_metadata.get(ns + '$season'))
+ series = theplatform_metadata.get(ns + '$show') or None
info.update({
- 'title': title,
- 'series': series,
- 'season_number': season_number,
'episode': episode,
'episode_number': episode_number,
+ 'season_number': season_number,
+ 'series': series,
})
return info
diff --git a/hypervideo_dl/extractor/americastestkitchen.py b/hypervideo_dl/extractor/americastestkitchen.py
index be960c0..6e6099a 100644
--- a/hypervideo_dl/extractor/americastestkitchen.py
+++ b/hypervideo_dl/extractor/americastestkitchen.py
@@ -2,7 +2,6 @@
from __future__ import unicode_literals
import json
-import re
from .common import InfoExtractor
from ..utils import (
@@ -69,7 +68,7 @@ class AmericasTestKitchenIE(InfoExtractor):
}]
def _real_extract(self, url):
- resource_type, video_id = re.match(self._VALID_URL, url).groups()
+ resource_type, video_id = self._match_valid_url(url).groups()
is_episode = resource_type == 'episode'
if is_episode:
resource_type = 'episodes'
@@ -114,7 +113,7 @@ class AmericasTestKitchenSeasonIE(InfoExtractor):
}]
def _real_extract(self, url):
- show_name, season_number = re.match(self._VALID_URL, url).groups()
+ show_name, season_number = self._match_valid_url(url).groups()
season_number = int(season_number)
slug = 'atk' if show_name == 'americastestkitchen' else 'cco'
diff --git a/hypervideo_dl/extractor/animelab.py b/hypervideo_dl/extractor/animelab.py
new file mode 100644
index 0000000..4fb7ee4
--- /dev/null
+++ b/hypervideo_dl/extractor/animelab.py
@@ -0,0 +1,285 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+from ..utils import (
+ ExtractorError,
+ urlencode_postdata,
+ int_or_none,
+ str_or_none,
+ determine_ext,
+)
+
+from ..compat import compat_HTTPError
+
+
+class AnimeLabBaseIE(InfoExtractor):
+ _LOGIN_REQUIRED = True
+ _LOGIN_URL = 'https://www.animelab.com/login'
+ _NETRC_MACHINE = 'animelab'
+
+ def _login(self):
+ def is_logged_in(login_webpage):
+ return 'Sign In' not in login_webpage
+
+ login_page = self._download_webpage(
+ self._LOGIN_URL, None, 'Downloading login page')
+
+ # Check if already logged in
+ if is_logged_in(login_page):
+ return
+
+ (username, password) = self._get_login_info()
+ if username is None and self._LOGIN_REQUIRED:
+ self.raise_login_required('Login is required to access any AnimeLab content')
+
+ login_form = {
+ 'email': username,
+ 'password': password,
+ }
+
+ try:
+ response = self._download_webpage(
+ self._LOGIN_URL, None, 'Logging in', 'Wrong login info',
+ data=urlencode_postdata(login_form),
+ headers={'Content-Type': 'application/x-www-form-urlencoded'})
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
+ raise ExtractorError('Unable to log in (wrong credentials?)', expected=True)
+ else:
+ raise
+
+ # if login was successful
+ if is_logged_in(response):
+ return
+
+ raise ExtractorError('Unable to login (cannot verify if logged in)')
+
+ def _real_initialize(self):
+ self._login()
+
+
+class AnimeLabIE(AnimeLabBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?animelab\.com/player/(?P<id>[^/]+)'
+
+ # the following tests require authentication, but a free account will suffice
+ # just set 'usenetrc' to true in test/local_parameters.json if you use a .netrc file
+ # or you can set 'username' and 'password' there
+ # the tests also select a specific format so that the same video is downloaded
+ # regardless of whether the user is premium or not (needs testing on a premium account)
+ _TEST = {
+ 'url': 'https://www.animelab.com/player/fullmetal-alchemist-brotherhood-episode-42',
+ 'md5': '05bde4b91a5d1ff46ef5b94df05b0f7f',
+ 'info_dict': {
+ 'id': '383',
+ 'ext': 'mp4',
+ 'display_id': 'fullmetal-alchemist-brotherhood-episode-42',
+ 'title': 'Fullmetal Alchemist: Brotherhood - Episode 42 - Signs of a Counteroffensive',
+ 'description': 'md5:103eb61dd0a56d3dfc5dbf748e5e83f4',
+ 'series': 'Fullmetal Alchemist: Brotherhood',
+ 'episode': 'Signs of a Counteroffensive',
+ 'episode_number': 42,
+ 'duration': 1469,
+ 'season': 'Season 1',
+ 'season_number': 1,
+ 'season_id': '38',
+ },
+ 'params': {
+ 'format': '[format_id=21711_yeshardsubbed_ja-JP][height=480]',
+ },
+ 'skip': 'All AnimeLab content requires authentication',
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ # unfortunately we can get different URLs for the same formats
+ # e.g. if we are using a "free" account so no dubs available
+ # (so _remove_duplicate_formats is not effective)
+ # so we use a dictionary as a workaround
+ formats = {}
+ for language_option_url in ('https://www.animelab.com/player/%s/subtitles',
+ 'https://www.animelab.com/player/%s/dubbed'):
+ actual_url = language_option_url % display_id
+ webpage = self._download_webpage(actual_url, display_id, 'Downloading URL ' + actual_url)
+
+ video_collection = self._parse_json(self._search_regex(r'new\s+?AnimeLabApp\.VideoCollection\s*?\((.*?)\);', webpage, 'AnimeLab VideoCollection'), display_id)
+ position = int_or_none(self._search_regex(r'playlistPosition\s*?=\s*?(\d+)', webpage, 'Playlist Position'))
+
+ raw_data = video_collection[position]['videoEntry']
+
+ video_id = str_or_none(raw_data['id'])
+
+ # create a title from many sources (while grabbing other info)
+ # TODO use more fallback sources to get some of these
+ series = raw_data.get('showTitle')
+ video_type = raw_data.get('videoEntryType', {}).get('name')
+ episode_number = raw_data.get('episodeNumber')
+ episode_name = raw_data.get('name')
+
+ title_parts = (series, video_type, episode_number, episode_name)
+ if None not in title_parts:
+ title = '%s - %s %s - %s' % title_parts
+ else:
+ title = episode_name
+
+ description = raw_data.get('synopsis') or self._og_search_description(webpage, default=None)
+
+ duration = int_or_none(raw_data.get('duration'))
+
+ thumbnail_data = raw_data.get('images', [])
+ thumbnails = []
+ for thumbnail in thumbnail_data:
+ for instance in thumbnail['imageInstances']:
+ image_data = instance.get('imageInfo', {})
+ thumbnails.append({
+ 'id': str_or_none(image_data.get('id')),
+ 'url': image_data.get('fullPath'),
+ 'width': image_data.get('width'),
+ 'height': image_data.get('height'),
+ })
+
+ season_data = raw_data.get('season', {}) or {}
+ season = str_or_none(season_data.get('name'))
+ season_number = int_or_none(season_data.get('seasonNumber'))
+ season_id = str_or_none(season_data.get('id'))
+
+ for video_data in raw_data['videoList']:
+ current_video_list = {}
+ current_video_list['language'] = video_data.get('language', {}).get('languageCode')
+
+ is_hardsubbed = video_data.get('hardSubbed')
+
+ for video_instance in video_data['videoInstances']:
+ httpurl = video_instance.get('httpUrl')
+ url = httpurl if httpurl else video_instance.get('rtmpUrl')
+ if url is None:
+ # this video format is unavailable to the user (not premium etc.)
+ continue
+
+ current_format = current_video_list.copy()
+
+ format_id_parts = []
+
+ format_id_parts.append(str_or_none(video_instance.get('id')))
+
+ if is_hardsubbed is not None:
+ if is_hardsubbed:
+ format_id_parts.append('yeshardsubbed')
+ else:
+ format_id_parts.append('nothardsubbed')
+
+ format_id_parts.append(current_format['language'])
+
+ format_id = '_'.join([x for x in format_id_parts if x is not None])
+
+ ext = determine_ext(url)
+ if ext == 'm3u8':
+ for format_ in self._extract_m3u8_formats(
+ url, video_id, m3u8_id=format_id, fatal=False):
+ formats[format_['format_id']] = format_
+ continue
+ elif ext == 'mpd':
+ for format_ in self._extract_mpd_formats(
+ url, video_id, mpd_id=format_id, fatal=False):
+ formats[format_['format_id']] = format_
+ continue
+
+ current_format['url'] = url
+ quality_data = video_instance.get('videoQuality')
+ if quality_data:
+ quality = quality_data.get('name') or quality_data.get('description')
+ else:
+ quality = None
+
+ height = None
+ if quality:
+ height = int_or_none(self._search_regex(r'(\d+)p?$', quality, 'Video format height', default=None))
+
+ if height is None:
+ self.report_warning('Could not get height of video')
+ else:
+ current_format['height'] = height
+ current_format['format_id'] = format_id
+
+ formats[current_format['format_id']] = current_format
+
+ formats = list(formats.values())
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'series': series,
+ 'episode': episode_name,
+ 'episode_number': int_or_none(episode_number),
+ 'thumbnails': thumbnails,
+ 'duration': duration,
+ 'formats': formats,
+ 'season': season,
+ 'season_number': season_number,
+ 'season_id': season_id,
+ }
+
+
+class AnimeLabShowsIE(AnimeLabBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?animelab\.com/shows/(?P<id>[^/]+)'
+
+ _TEST = {
+ 'url': 'https://www.animelab.com/shows/attack-on-titan',
+ 'info_dict': {
+ 'id': '45',
+ 'title': 'Attack on Titan',
+ 'description': 'md5:989d95a2677e9309368d5cf39ba91469',
+ },
+ 'playlist_count': 59,
+ 'skip': 'All AnimeLab content requires authentication',
+ }
+
+ def _real_extract(self, url):
+ _BASE_URL = 'http://www.animelab.com'
+ _SHOWS_API_URL = '/api/videoentries/show/videos/'
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id, 'Downloading requested URL')
+
+ show_data_str = self._search_regex(r'({"id":.*}),\svideoEntry', webpage, 'AnimeLab show data')
+ show_data = self._parse_json(show_data_str, display_id)
+
+ show_id = str_or_none(show_data.get('id'))
+ title = show_data.get('name')
+ description = show_data.get('shortSynopsis') or show_data.get('longSynopsis')
+
+ entries = []
+ for season in show_data['seasons']:
+ season_id = season['id']
+ get_data = urlencode_postdata({
+ 'seasonId': season_id,
+ 'limit': 1000,
+ })
+ # despite using urlencode_postdata, we are sending a GET request
+ target_url = _BASE_URL + _SHOWS_API_URL + show_id + "?" + get_data.decode('utf-8')
+ response = self._download_webpage(
+ target_url,
+ None, 'Season id %s' % season_id)
+
+ season_data = self._parse_json(response, display_id)
+
+ for video_data in season_data['list']:
+ entries.append(self.url_result(
+ _BASE_URL + '/player/' + video_data['slug'], 'AnimeLab',
+ str_or_none(video_data.get('id')), video_data.get('name')
+ ))
+
+ return {
+ '_type': 'playlist',
+ 'id': show_id,
+ 'title': title,
+ 'description': description,
+ 'entries': entries,
+ }
+
+# TODO implement myqueue
diff --git a/hypervideo_dl/extractor/anvato.py b/hypervideo_dl/extractor/anvato.py
index b739856..b82f0b5 100644
--- a/hypervideo_dl/extractor/anvato.py
+++ b/hypervideo_dl/extractor/anvato.py
@@ -21,6 +21,16 @@ from ..utils import (
unsmuggle_url,
)
+# This import causes a ModuleNotFoundError on some systems for unknown reason.
+# See issues:
+# https://github.com/hypervideo/hypervideo/issues/35
+# https://github.com/ytdl-org/youtube-dl/issues/27449
+# https://github.com/animelover1984/youtube-dl/issues/17
+try:
+ from .anvato_token_generator import NFLTokenGenerator
+except ImportError:
+ NFLTokenGenerator = None
+
def md5_text(s):
if not isinstance(s, compat_str):
@@ -203,6 +213,10 @@ class AnvatoIE(InfoExtractor):
'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582'
}
+ _TOKEN_GENERATORS = {
+ 'GXvEgwyJeWem8KCYXfeoHWknwP48Mboj': NFLTokenGenerator,
+ }
+
_API_KEY = '3hwbSuqqT690uxjNYBktSQpa5ZrpYYR0Iofx7NcJHyA'
_ANVP_RE = r'<script[^>]+\bdata-anvp\s*=\s*(["\'])(?P<anvp>(?:(?!\1).)+)\1'
@@ -262,9 +276,12 @@ class AnvatoIE(InfoExtractor):
'anvrid': anvrid,
'anvts': server_time,
}
- api['anvstk'] = md5_text('%s|%s|%d|%s' % (
- access_key, anvrid, server_time,
- self._ANVACK_TABLE.get(access_key, self._API_KEY)))
+ if self._TOKEN_GENERATORS.get(access_key) is not None:
+ api['anvstk2'] = self._TOKEN_GENERATORS[access_key].generate(self, access_key, video_id)
+ else:
+ api['anvstk'] = md5_text('%s|%s|%d|%s' % (
+ access_key, anvrid, server_time,
+ self._ANVACK_TABLE.get(access_key, self._API_KEY)))
return self._download_json(
video_data_url, video_id, transform_source=strip_jsonp,
@@ -373,7 +390,7 @@ class AnvatoIE(InfoExtractor):
'countries': smuggled_data.get('geo_countries'),
})
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
access_key, video_id = mobj.group('access_key_or_mcp', 'id')
if access_key not in self._ANVACK_TABLE:
access_key = self._MCP_TO_ACCESS_KEY_TABLE.get(
diff --git a/hypervideo_dl/extractor/anvato_token_generator/__init__.py b/hypervideo_dl/extractor/anvato_token_generator/__init__.py
new file mode 100644
index 0000000..6e223db
--- /dev/null
+++ b/hypervideo_dl/extractor/anvato_token_generator/__init__.py
@@ -0,0 +1,7 @@
+from __future__ import unicode_literals
+
+from .nfl import NFLTokenGenerator
+
+__all__ = [
+ 'NFLTokenGenerator',
+]
diff --git a/hypervideo_dl/extractor/anvato_token_generator/common.py b/hypervideo_dl/extractor/anvato_token_generator/common.py
new file mode 100644
index 0000000..b959a90
--- /dev/null
+++ b/hypervideo_dl/extractor/anvato_token_generator/common.py
@@ -0,0 +1,6 @@
+from __future__ import unicode_literals
+
+
+class TokenGenerator:
+ def generate(self, anvack, mcp_id):
+ raise NotImplementedError('This method must be implemented by subclasses')
diff --git a/hypervideo_dl/extractor/anvato_token_generator/nfl.py b/hypervideo_dl/extractor/anvato_token_generator/nfl.py
new file mode 100644
index 0000000..97a2b24
--- /dev/null
+++ b/hypervideo_dl/extractor/anvato_token_generator/nfl.py
@@ -0,0 +1,30 @@
+from __future__ import unicode_literals
+
+import json
+
+from .common import TokenGenerator
+
+
+class NFLTokenGenerator(TokenGenerator):
+ _AUTHORIZATION = None
+
+ def generate(ie, anvack, mcp_id):
+ if not NFLTokenGenerator._AUTHORIZATION:
+ reroute = ie._download_json(
+ 'https://api.nfl.com/v1/reroute', mcp_id,
+ data=b'grant_type=client_credentials',
+ headers={'X-Domain-Id': 100})
+ NFLTokenGenerator._AUTHORIZATION = '%s %s' % (reroute.get('token_type') or 'Bearer', reroute['access_token'])
+ return ie._download_json(
+ 'https://api.nfl.com/v3/shield/', mcp_id, data=json.dumps({
+ 'query': '''{
+ viewer {
+ mediaToken(anvack: "%s", id: %s) {
+ token
+ }
+ }
+}''' % (anvack, mcp_id),
+ }).encode(), headers={
+ 'Authorization': NFLTokenGenerator._AUTHORIZATION,
+ 'Content-Type': 'application/json',
+ })['data']['viewer']['mediaToken']['token']
diff --git a/hypervideo_dl/extractor/aol.py b/hypervideo_dl/extractor/aol.py
index f6ecb84..4766a2c 100644
--- a/hypervideo_dl/extractor/aol.py
+++ b/hypervideo_dl/extractor/aol.py
@@ -4,13 +4,10 @@ from __future__ import unicode_literals
import re
from .yahoo import YahooIE
-from ..compat import (
- compat_parse_qs,
- compat_urllib_parse_urlparse,
-)
from ..utils import (
ExtractorError,
int_or_none,
+ parse_qs,
url_or_none,
)
@@ -119,13 +116,13 @@ class AolIE(YahooIE):
'height': int(mobj.group(2)),
})
else:
- qs = compat_parse_qs(compat_urllib_parse_urlparse(video_url).query)
+ qs = parse_qs(video_url)
f.update({
'width': int_or_none(qs.get('w', [None])[0]),
'height': int_or_none(qs.get('h', [None])[0]),
})
formats.append(f)
- self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id'))
+ self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/apa.py b/hypervideo_dl/extractor/apa.py
index cbc1c0e..1736cdf 100644
--- a/hypervideo_dl/extractor/apa.py
+++ b/hypervideo_dl/extractor/apa.py
@@ -42,7 +42,7 @@ class APAIE(InfoExtractor):
webpage)]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id, base_url = mobj.group('id', 'base_url')
webpage = self._download_webpage(
diff --git a/hypervideo_dl/extractor/aparat.py b/hypervideo_dl/extractor/aparat.py
index a9527e7..da06a3c 100644
--- a/hypervideo_dl/extractor/aparat.py
+++ b/hypervideo_dl/extractor/aparat.py
@@ -72,8 +72,7 @@ class AparatIE(InfoExtractor):
r'(\d+)[pP]', label or '', 'height',
default=None)),
})
- self._sort_formats(
- formats, field_preference=('height', 'width', 'tbr', 'format_id'))
+ self._sort_formats(formats)
info = self._search_json_ld(webpage, video_id, default={})
diff --git a/hypervideo_dl/extractor/appleconnect.py b/hypervideo_dl/extractor/appleconnect.py
index a84b8b1..494f833 100644
--- a/hypervideo_dl/extractor/appleconnect.py
+++ b/hypervideo_dl/extractor/appleconnect.py
@@ -9,10 +9,10 @@ from ..utils import (
class AppleConnectIE(InfoExtractor):
- _VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/idsa\.(?P<id>[\w-]+)'
- _TEST = {
+ _VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/(?:id)?sa\.(?P<id>[\w-]+)'
+ _TESTS = [{
'url': 'https://itunes.apple.com/us/post/idsa.4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
- 'md5': 'e7c38568a01ea45402570e6029206723',
+ 'md5': 'c1d41f72c8bcaf222e089434619316e4',
'info_dict': {
'id': '4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
'ext': 'm4v',
@@ -22,7 +22,10 @@ class AppleConnectIE(InfoExtractor):
'upload_date': '20150710',
'timestamp': 1436545535,
},
- }
+ }, {
+ 'url': 'https://itunes.apple.com/us/post/sa.0fe0229f-2457-11e5-9f40-1bb645f2d5d9',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -36,7 +39,7 @@ class AppleConnectIE(InfoExtractor):
video_data = self._parse_json(video_json, video_id)
timestamp = str_to_int(self._html_search_regex(r'data-timestamp="(\d+)"', webpage, 'timestamp'))
- like_count = str_to_int(self._html_search_regex(r'(\d+) Loves', webpage, 'like count'))
+ like_count = str_to_int(self._html_search_regex(r'(\d+) Loves', webpage, 'like count', default=None))
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/appletrailers.py b/hypervideo_dl/extractor/appletrailers.py
index 10442a5..0abfb43 100644
--- a/hypervideo_dl/extractor/appletrailers.py
+++ b/hypervideo_dl/extractor/appletrailers.py
@@ -94,7 +94,7 @@ class AppleTrailersIE(InfoExtractor):
_JSON_RE = r'iTunes.playURL\((.*?)\);'
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
movie = mobj.group('movie')
uploader_id = mobj.group('company')
diff --git a/hypervideo_dl/extractor/archiveorg.py b/hypervideo_dl/extractor/archiveorg.py
index e42ed5e..d90fcb1 100644
--- a/hypervideo_dl/extractor/archiveorg.py
+++ b/hypervideo_dl/extractor/archiveorg.py
@@ -1,9 +1,33 @@
+# coding: utf-8
from __future__ import unicode_literals
+import re
+import json
+
from .common import InfoExtractor
+from .youtube import YoutubeIE
+from ..compat import (
+ compat_urllib_parse_unquote,
+ compat_urllib_parse_unquote_plus,
+ compat_HTTPError
+)
from ..utils import (
clean_html,
+ determine_ext,
+ dict_get,
extract_attributes,
+ ExtractorError,
+ HEADRequest,
+ int_or_none,
+ KNOWN_EXTENSIONS,
+ merge_dicts,
+ mimetype2ext,
+ parse_duration,
+ parse_qs,
+ RegexNotFoundError,
+ str_to_int,
+ str_or_none,
+ try_get,
unified_strdate,
unified_timestamp,
)
@@ -11,22 +35,22 @@ from ..utils import (
class ArchiveOrgIE(InfoExtractor):
IE_NAME = 'archive.org'
- IE_DESC = 'archive.org videos'
- _VALID_URL = r'https?://(?:www\.)?archive\.org/(?:details|embed)/(?P<id>[^/?#&]+)'
+ IE_DESC = 'archive.org video and audio'
+ _VALID_URL = r'https?://(?:www\.)?archive\.org/(?:details|embed)/(?P<id>[^?#]+)(?:[?].*)?$'
_TESTS = [{
'url': 'http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect',
'md5': '8af1d4cf447933ed3c7f4871162602db',
'info_dict': {
'id': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect',
- 'ext': 'ogg',
+ 'ext': 'ogv',
'title': '1968 Demo - FJCC Conference Presentation Reel #1',
'description': 'md5:da45c349df039f1cc8075268eb1b5c25',
- 'creator': 'SRI International',
'release_date': '19681210',
- 'uploader': 'SRI International',
'timestamp': 1268695290,
'upload_date': '20100315',
- }
+ 'creator': 'SRI International',
+ 'uploader': 'laura@archive.org',
+ },
}, {
'url': 'https://archive.org/details/Cops1922',
'md5': '0869000b4ce265e8ca62738b336b268a',
@@ -35,61 +59,360 @@ class ArchiveOrgIE(InfoExtractor):
'ext': 'mp4',
'title': 'Buster Keaton\'s "Cops" (1922)',
'description': 'md5:43a603fd6c5b4b90d12a96b921212b9c',
+ 'uploader': 'yorkmba99@hotmail.com',
'timestamp': 1387699629,
- 'upload_date': '20131222',
- }
+ 'upload_date': "20131222",
+ },
}, {
'url': 'http://archive.org/embed/XD300-23_68HighlightsAResearchCntAugHumanIntellect',
'only_matching': True,
}, {
- 'url': 'https://archive.org/details/MSNBCW_20131125_040000_To_Catch_a_Predator/',
- 'only_matching': True,
+ 'url': 'https://archive.org/details/Election_Ads',
+ 'md5': '284180e857160cf866358700bab668a3',
+ 'info_dict': {
+ 'id': 'Election_Ads/Commercial-JFK1960ElectionAdCampaignJingle.mpg',
+ 'title': 'Commercial-JFK1960ElectionAdCampaignJingle.mpg',
+ 'ext': 'mp4',
+ },
+ }, {
+ 'url': 'https://archive.org/details/Election_Ads/Commercial-Nixon1960ElectionAdToughonDefense.mpg',
+ 'md5': '7915213ef02559b5501fe630e1a53f59',
+ 'info_dict': {
+ 'id': 'Election_Ads/Commercial-Nixon1960ElectionAdToughonDefense.mpg',
+ 'title': 'Commercial-Nixon1960ElectionAdToughonDefense.mpg',
+ 'ext': 'mp4',
+ 'timestamp': 1205588045,
+ 'uploader': 'mikedavisstripmaster@yahoo.com',
+ 'description': '1960 Presidential Campaign Election Commercials John F Kennedy, Richard M Nixon',
+ 'upload_date': '20080315',
+ },
+ }, {
+ 'url': 'https://archive.org/details/gd1977-05-08.shure57.stevenson.29303.flac16',
+ 'md5': '7d07ffb42aba6537c28e053efa4b54c9',
+ 'info_dict': {
+ 'id': 'gd1977-05-08.shure57.stevenson.29303.flac16/gd1977-05-08d01t01.flac',
+ 'title': 'Turning',
+ 'ext': 'flac',
+ },
+ }, {
+ 'url': 'https://archive.org/details/gd1977-05-08.shure57.stevenson.29303.flac16/gd1977-05-08d01t07.flac',
+ 'md5': 'a07cd8c6ab4ee1560f8a0021717130f3',
+ 'info_dict': {
+ 'id': 'gd1977-05-08.shure57.stevenson.29303.flac16/gd1977-05-08d01t07.flac',
+ 'title': 'Deal',
+ 'ext': 'flac',
+ 'timestamp': 1205895624,
+ 'uploader': 'mvernon54@yahoo.com',
+ 'description': 'md5:6a31f1996db0aa0fc9da6d6e708a1bb0',
+ 'upload_date': '20080319',
+ 'location': 'Barton Hall - Cornell University',
+ },
+ }, {
+ 'url': 'https://archive.org/details/lp_the-music-of-russia_various-artists-a-askaryan-alexander-melik',
+ 'md5': '7cb019baa9b332e82ea7c10403acd180',
+ 'info_dict': {
+ 'id': 'lp_the-music-of-russia_various-artists-a-askaryan-alexander-melik/disc1/01.01. Bells Of Rostov.mp3',
+ 'title': 'Bells Of Rostov',
+ 'ext': 'mp3',
+ },
+ }, {
+ 'url': 'https://archive.org/details/lp_the-music-of-russia_various-artists-a-askaryan-alexander-melik/disc1/02.02.+Song+And+Chorus+In+The+Polovetsian+Camp+From+%22Prince+Igor%22+(Act+2%2C+Scene+1).mp3',
+ 'md5': '1d0aabe03edca83ca58d9ed3b493a3c3',
+ 'info_dict': {
+ 'id': 'lp_the-music-of-russia_various-artists-a-askaryan-alexander-melik/disc1/02.02. Song And Chorus In The Polovetsian Camp From "Prince Igor" (Act 2, Scene 1).mp3',
+ 'title': 'Song And Chorus In The Polovetsian Camp From "Prince Igor" (Act 2, Scene 1)',
+ 'ext': 'mp3',
+ 'timestamp': 1569662587,
+ 'uploader': 'associate-joygen-odiongan@archive.org',
+ 'description': 'md5:012b2d668ae753be36896f343d12a236',
+ 'upload_date': '20190928',
+ },
}]
+ @staticmethod
+ def _playlist_data(webpage):
+ element = re.findall(r'''(?xs)
+ <input
+ (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
+ \s+class=['"]?js-play8-playlist['"]?
+ (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
+ \s*/>
+ ''', webpage)[0]
+
+ return json.loads(extract_attributes(element)['value'])
+
def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(
- 'http://archive.org/embed/' + video_id, video_id)
-
- playlist = None
- play8 = self._search_regex(
- r'(<[^>]+\bclass=["\']js-play8-playlist[^>]+>)', webpage,
- 'playlist', default=None)
- if play8:
- attrs = extract_attributes(play8)
- playlist = attrs.get('value')
- if not playlist:
- # Old jwplayer fallback
- playlist = self._search_regex(
- r"(?s)Play\('[^']+'\s*,\s*(\[.+\])\s*,\s*{.*?}\)",
- webpage, 'jwplayer playlist', default='[]')
- jwplayer_playlist = self._parse_json(playlist, video_id, fatal=False)
- if jwplayer_playlist:
- info = self._parse_jwplayer_data(
- {'playlist': jwplayer_playlist}, video_id, base_url=url)
- else:
- # HTML5 media fallback
- info = self._parse_html5_media_entries(url, webpage, video_id)[0]
- info['id'] = video_id
+ video_id = compat_urllib_parse_unquote_plus(self._match_id(url))
+ identifier, entry_id = (video_id.split('/', 1) + [None])[:2]
+
+ # Archive.org metadata API doesn't clearly demarcate playlist entries
+ # or subtitle tracks, so we get them from the embeddable player.
+ embed_page = self._download_webpage(
+ 'https://archive.org/embed/' + identifier, identifier)
+ playlist = self._playlist_data(embed_page)
+
+ entries = {}
+ for p in playlist:
+ # If the user specified a playlist entry in the URL, ignore the
+ # rest of the playlist.
+ if entry_id and p['orig'] != entry_id:
+ continue
- def get_optional(metadata, field):
- return metadata.get(field, [None])[0]
+ entries[p['orig']] = {
+ 'formats': [],
+ 'thumbnails': [],
+ 'artist': p.get('artist'),
+ 'track': p.get('title'),
+ 'subtitles': {}}
+
+ for track in p.get('tracks', []):
+ if track['kind'] != 'subtitles':
+ continue
+
+ entries[p['orig']][track['label']] = {
+ 'url': 'https://archive.org/' + track['file'].lstrip('/')}
metadata = self._download_json(
- 'http://archive.org/details/' + video_id, video_id, query={
- 'output': 'json',
- })['metadata']
- info.update({
- 'title': get_optional(metadata, 'title') or info.get('title'),
- 'description': clean_html(get_optional(metadata, 'description')),
- })
- if info.get('_type') != 'playlist':
- creator = get_optional(metadata, 'creator')
- info.update({
- 'creator': creator,
- 'release_date': unified_strdate(get_optional(metadata, 'date')),
- 'uploader': get_optional(metadata, 'publisher') or creator,
- 'timestamp': unified_timestamp(get_optional(metadata, 'publicdate')),
- 'language': get_optional(metadata, 'language'),
- })
+ 'http://archive.org/metadata/' + identifier, identifier)
+ m = metadata['metadata']
+ identifier = m['identifier']
+
+ info = {
+ 'id': identifier,
+ 'title': m['title'],
+ 'description': clean_html(m.get('description')),
+ 'uploader': dict_get(m, ['uploader', 'adder']),
+ 'creator': m.get('creator'),
+ 'license': m.get('licenseurl'),
+ 'release_date': unified_strdate(m.get('date')),
+ 'timestamp': unified_timestamp(dict_get(m, ['publicdate', 'addeddate'])),
+ 'webpage_url': 'https://archive.org/details/' + identifier,
+ 'location': m.get('venue'),
+ 'release_year': int_or_none(m.get('year'))}
+
+ for f in metadata['files']:
+ if f['name'] in entries:
+ entries[f['name']] = merge_dicts(entries[f['name']], {
+ 'id': identifier + '/' + f['name'],
+ 'title': f.get('title') or f['name'],
+ 'display_id': f['name'],
+ 'description': clean_html(f.get('description')),
+ 'creator': f.get('creator'),
+ 'duration': parse_duration(f.get('length')),
+ 'track_number': int_or_none(f.get('track')),
+ 'album': f.get('album'),
+ 'discnumber': int_or_none(f.get('disc')),
+ 'release_year': int_or_none(f.get('year'))})
+ entry = entries[f['name']]
+ elif f.get('original') in entries:
+ entry = entries[f['original']]
+ else:
+ continue
+
+ if f.get('format') == 'Thumbnail':
+ entry['thumbnails'].append({
+ 'id': f['name'],
+ 'url': 'https://archive.org/download/' + identifier + '/' + f['name'],
+ 'width': int_or_none(f.get('width')),
+ 'height': int_or_none(f.get('width')),
+ 'filesize': int_or_none(f.get('size'))})
+
+ extension = (f['name'].rsplit('.', 1) + [None])[1]
+ if extension in KNOWN_EXTENSIONS:
+ entry['formats'].append({
+ 'url': 'https://archive.org/download/' + identifier + '/' + f['name'],
+ 'format': f.get('format'),
+ 'width': int_or_none(f.get('width')),
+ 'height': int_or_none(f.get('height')),
+ 'filesize': int_or_none(f.get('size')),
+ 'protocol': 'https'})
+
+ # Sort available formats by filesize
+ for entry in entries.values():
+ entry['formats'] = list(sorted(entry['formats'], key=lambda x: x.get('filesize', -1)))
+
+ if len(entries) == 1:
+ # If there's only one item, use it as the main info dict
+ only_video = entries[list(entries.keys())[0]]
+ if entry_id:
+ info = merge_dicts(only_video, info)
+ else:
+ info = merge_dicts(info, only_video)
+ else:
+ # Otherwise, we have a playlist.
+ info['_type'] = 'playlist'
+ info['entries'] = list(entries.values())
+
+ if metadata.get('reviews'):
+ info['comments'] = []
+ for review in metadata['reviews']:
+ info['comments'].append({
+ 'id': review.get('review_id'),
+ 'author': review.get('reviewer'),
+ 'text': str_or_none(review.get('reviewtitle'), '') + '\n\n' + review.get('reviewbody'),
+ 'timestamp': unified_timestamp(review.get('createdate')),
+ 'parent': 'root'})
+
return info
+
+
+class YoutubeWebArchiveIE(InfoExtractor):
+ IE_NAME = 'web.archive:youtube'
+ IE_DESC = 'web.archive.org saved youtube videos'
+ _VALID_URL = r"""(?x)^
+ (?:https?://)?web\.archive\.org/
+ (?:web/)?
+ (?:[0-9A-Za-z_*]+/)? # /web and the version index is optional
+
+ (?:https?(?::|%3[Aa])//)?
+ (?:
+ (?:\w+\.)?youtube\.com/watch(?:\?|%3[fF])(?:[^\#]+(?:&|%26))?v(?:=|%3[dD]) # Youtube URL
+ |(wayback-fakeurl\.archive\.org/yt/) # Or the internal fake url
+ )
+ (?P<id>[0-9A-Za-z_-]{11})(?:%26|\#|&|$)
+ """
+
+ _TESTS = [
+ {
+ 'url': 'https://web.archive.org/web/20150415002341/https://www.youtube.com/watch?v=aYAGB11YrSs',
+ 'info_dict': {
+ 'id': 'aYAGB11YrSs',
+ 'ext': 'webm',
+ 'title': 'Team Fortress 2 - Sandviches!'
+ }
+ },
+ {
+ # Internal link
+ 'url': 'https://web.archive.org/web/2oe/http://wayback-fakeurl.archive.org/yt/97t7Xj_iBv0',
+ 'info_dict': {
+ 'id': '97t7Xj_iBv0',
+ 'ext': 'mp4',
+ 'title': 'How Flexible Machines Could Save The World'
+ }
+ },
+ {
+ # Video from 2012, webm format itag 45.
+ 'url': 'https://web.archive.org/web/20120712231619/http://www.youtube.com/watch?v=AkhihxRKcrs&gl=US&hl=en',
+ 'info_dict': {
+ 'id': 'AkhihxRKcrs',
+ 'ext': 'webm',
+ 'title': 'Limited Run: Mondo\'s Modern Classic 1 of 3 (SDCC 2012)'
+ }
+ },
+ {
+ # Old flash-only video. Webpage title starts with "YouTube - ".
+ 'url': 'https://web.archive.org/web/20081211103536/http://www.youtube.com/watch?v=jNQXAC9IVRw',
+ 'info_dict': {
+ 'id': 'jNQXAC9IVRw',
+ 'ext': 'unknown_video',
+ 'title': 'Me at the zoo'
+ }
+ },
+ {
+ # Flash video with .flv extension (itag 34). Title has prefix "YouTube -"
+ # Title has some weird unicode characters too.
+ 'url': 'https://web.archive.org/web/20110712231407/http://www.youtube.com/watch?v=lTx3G6h2xyA',
+ 'info_dict': {
+ 'id': 'lTx3G6h2xyA',
+ 'ext': 'flv',
+ 'title': '‪Madeon - Pop Culture (live mashup)‬‏'
+ }
+ },
+ { # Some versions of Youtube have have "YouTube" as page title in html (and later rewritten by js).
+ 'url': 'https://web.archive.org/web/http://www.youtube.com/watch?v=kH-G_aIBlFw',
+ 'info_dict': {
+ 'id': 'kH-G_aIBlFw',
+ 'ext': 'mp4',
+ 'title': 'kH-G_aIBlFw'
+ },
+ 'expected_warnings': [
+ 'unable to extract title',
+ ]
+ },
+ {
+ # First capture is a 302 redirect intermediary page.
+ 'url': 'https://web.archive.org/web/20050214000000/http://www.youtube.com/watch?v=0altSZ96U4M',
+ 'info_dict': {
+ 'id': '0altSZ96U4M',
+ 'ext': 'mp4',
+ 'title': '0altSZ96U4M'
+ },
+ 'expected_warnings': [
+ 'unable to extract title',
+ ]
+ },
+ {
+ # Video not archived, only capture is unavailable video page
+ 'url': 'https://web.archive.org/web/20210530071008/https://www.youtube.com/watch?v=lHJTf93HL1s&spfreload=10',
+ 'only_matching': True,
+ },
+ { # Encoded url
+ 'url': 'https://web.archive.org/web/20120712231619/http%3A//www.youtube.com/watch%3Fgl%3DUS%26v%3DAkhihxRKcrs%26hl%3Den',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://web.archive.org/web/20120712231619/http%3A//www.youtube.com/watch%3Fv%3DAkhihxRKcrs%26gl%3DUS%26hl%3Den',
+ 'only_matching': True,
+ }
+ ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ title = video_id # if we are not able get a title
+
+ def _extract_title(webpage):
+ page_title = self._html_search_regex(
+ r'<title>([^<]*)</title>', webpage, 'title', fatal=False) or ''
+ # YouTube video pages appear to always have either 'YouTube -' as suffix or '- YouTube' as prefix.
+ try:
+ page_title = self._html_search_regex(
+ r'(?:YouTube\s*-\s*(.*)$)|(?:(.*)\s*-\s*YouTube$)',
+ page_title, 'title', default='')
+ except RegexNotFoundError:
+ page_title = None
+
+ if not page_title:
+ self.report_warning('unable to extract title', video_id=video_id)
+ return
+ return page_title
+
+ # If the video is no longer available, the oldest capture may be one before it was removed.
+ # Setting the capture date in url to early date seems to redirect to earliest capture.
+ webpage = self._download_webpage(
+ 'https://web.archive.org/web/20050214000000/http://www.youtube.com/watch?v=%s' % video_id,
+ video_id=video_id, fatal=False, errnote='unable to download video webpage (probably not archived).')
+ if webpage:
+ title = _extract_title(webpage) or title
+
+ # Use link translator mentioned in https://github.com/ytdl-org/youtube-dl/issues/13655
+ internal_fake_url = 'https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/%s' % video_id
+ try:
+ video_file_webpage = self._request_webpage(
+ HEADRequest(internal_fake_url), video_id,
+ note='Fetching video file url', expected_status=True)
+ except ExtractorError as e:
+ # HTTP Error 404 is expected if the video is not saved.
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
+ raise ExtractorError(
+ 'HTTP Error %s. Most likely the video is not archived or issue with web.archive.org.' % e.cause.code,
+ expected=True)
+ raise
+ video_file_url = compat_urllib_parse_unquote(video_file_webpage.url)
+ video_file_url_qs = parse_qs(video_file_url)
+
+ # Attempt to recover any ext & format info from playback url
+ format = {'url': video_file_url}
+ itag = try_get(video_file_url_qs, lambda x: x['itag'][0])
+ if itag and itag in YoutubeIE._formats: # Naughty access but it works
+ format.update(YoutubeIE._formats[itag])
+ format.update({'format_id': itag})
+ else:
+ mime = try_get(video_file_url_qs, lambda x: x['mime'][0])
+ ext = mimetype2ext(mime) or determine_ext(video_file_url)
+ format.update({'ext': ext})
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': [format],
+ 'duration': str_to_int(try_get(video_file_url_qs, lambda x: x['dur'][0]))
+ }
diff --git a/hypervideo_dl/extractor/arcpublishing.py b/hypervideo_dl/extractor/arcpublishing.py
index ca6a6c4..5a9b818 100644
--- a/hypervideo_dl/extractor/arcpublishing.py
+++ b/hypervideo_dl/extractor/arcpublishing.py
@@ -86,7 +86,7 @@ class ArcPublishingIE(InfoExtractor):
return entries
def _real_extract(self, url):
- org, uuid = re.match(self._VALID_URL, url).groups()
+ org, uuid = self._match_valid_url(url).groups()
for orgs, tmpl in self._POWA_DEFAULTS:
if org in orgs:
base_api_tmpl = tmpl
@@ -129,10 +129,6 @@ class ArcPublishingIE(InfoExtractor):
if all([f.get('acodec') == 'none' for f in m3u8_formats]):
continue
for f in m3u8_formats:
- if f.get('acodec') == 'none':
- f['preference'] = -40
- elif f.get('vcodec') == 'none':
- f['preference'] = -50
height = f.get('height')
if not height:
continue
@@ -150,10 +146,9 @@ class ArcPublishingIE(InfoExtractor):
'height': int_or_none(s.get('height')),
'filesize': int_or_none(s.get('filesize')),
'url': s_url,
- 'preference': -1,
+ 'quality': -10,
})
- self._sort_formats(
- formats, ('preference', 'width', 'height', 'vbr', 'filesize', 'tbr', 'ext', 'format_id'))
+ self._sort_formats(formats)
subtitles = {}
for subtitle in (try_get(video, lambda x: x['subtitles']['urls'], list) or []):
diff --git a/hypervideo_dl/extractor/ard.py b/hypervideo_dl/extractor/ard.py
index d45a9fe..048d30f 100644
--- a/hypervideo_dl/extractor/ard.py
+++ b/hypervideo_dl/extractor/ard.py
@@ -36,12 +36,12 @@ class ARDMediathekBaseIE(InfoExtractor):
if not formats:
if fsk:
- raise ExtractorError(
+ self.raise_no_formats(
'This video is only available after 20:00', expected=True)
elif media_info.get('_geoblocked'):
self.raise_geo_restricted(
'This video is not available due to geoblocking',
- countries=self._GEO_COUNTRIES)
+ countries=self._GEO_COUNTRIES, metadata_available=True)
self._sort_formats(formats)
@@ -62,6 +62,45 @@ class ARDMediathekBaseIE(InfoExtractor):
'subtitles': subtitles,
}
+ def _ARD_extract_episode_info(self, title):
+ """Try to extract season/episode data from the title."""
+ res = {}
+ if not title:
+ return res
+
+ for pattern in [
+ # Pattern for title like "Homo sapiens (S06/E07) - Originalversion"
+ # from: https://www.ardmediathek.de/one/sendung/doctor-who/Y3JpZDovL3dkci5kZS9vbmUvZG9jdG9yIHdobw
+ r'.*(?P<ep_info> \(S(?P<season_number>\d+)/E(?P<episode_number>\d+)\)).*',
+ # E.g.: title="Fritjof aus Norwegen (2) (AD)"
+ # from: https://www.ardmediathek.de/ard/sammlung/der-krieg-und-ich/68cMkqJdllm639Skj4c7sS/
+ r'.*(?P<ep_info> \((?:Folge |Teil )?(?P<episode_number>\d+)(?:/\d+)?\)).*',
+ r'.*(?P<ep_info>Folge (?P<episode_number>\d+)(?:\:| -|) )\"(?P<episode>.+)\".*',
+ # E.g.: title="Folge 25/42: Symmetrie"
+ # from: https://www.ardmediathek.de/ard/video/grips-mathe/folge-25-42-symmetrie/ard-alpha/Y3JpZDovL2JyLmRlL3ZpZGVvLzMyYzI0ZjczLWQ1N2MtNDAxNC05ZmZhLTFjYzRkZDA5NDU5OQ/
+ # E.g.: title="Folge 1063 - Vertrauen"
+ # from: https://www.ardmediathek.de/ard/sendung/die-fallers/Y3JpZDovL3N3ci5kZS8yMzAyMDQ4/
+ r'.*(?P<ep_info>Folge (?P<episode_number>\d+)(?:/\d+)?(?:\:| -|) ).*',
+ ]:
+ m = re.match(pattern, title)
+ if m:
+ groupdict = m.groupdict()
+ res['season_number'] = int_or_none(groupdict.get('season_number'))
+ res['episode_number'] = int_or_none(groupdict.get('episode_number'))
+ res['episode'] = str_or_none(groupdict.get('episode'))
+ # Build the episode title by removing numeric episode information:
+ if groupdict.get('ep_info') and not res['episode']:
+ res['episode'] = str_or_none(
+ title.replace(groupdict.get('ep_info'), ''))
+ if res['episode']:
+ res['episode'] = res['episode'].strip()
+ break
+
+ # As a fallback use the whole title as the episode name:
+ if not res.get('episode'):
+ res['episode'] = title.strip()
+ return res
+
def _extract_formats(self, media_info, video_id):
type_ = media_info.get('_type')
media_array = media_info.get('_mediaArray', [])
@@ -160,7 +199,7 @@ class ARDMediathekIE(ARDMediathekBaseIE):
def _real_extract(self, url):
# determine video id from url
- m = re.match(self._VALID_URL, url)
+ m = self._match_valid_url(url)
document_id = None
@@ -233,7 +272,8 @@ class ARDMediathekIE(ARDMediathekBaseIE):
else: # request JSON file
if not document_id:
video_id = self._search_regex(
- r'/play/(?:config|media)/(\d+)', webpage, 'media id')
+ (r'/play/(?:config|media|sola)/(\d+)', r'contentId["\']\s*:\s*(\d+)'),
+ webpage, 'media id', default=None)
info = self._extract_media_info(
'http://www.ardmediathek.de/play/media/%s' % video_id,
webpage, video_id)
@@ -244,6 +284,7 @@ class ARDMediathekIE(ARDMediathekBaseIE):
'description': description,
'thumbnail': thumbnail,
})
+ info.update(self._ARD_extract_episode_info(info['title']))
return info
@@ -270,6 +311,9 @@ class ARDIE(InfoExtractor):
'url': 'https://www.daserste.de/information/nachrichten-wetter/tagesthemen/videosextern/tagesthemen-17736.html',
'only_matching': True,
}, {
+ 'url': 'https://www.daserste.de/unterhaltung/serie/in-aller-freundschaft-die-jungen-aerzte/videos/diversity-tag-sanam-afrashteh100.html',
+ 'only_matching': True,
+ }, {
'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html',
'only_matching': True,
}, {
@@ -281,7 +325,7 @@ class ARDIE(InfoExtractor):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
display_id = mobj.group('id')
player_url = mobj.group('mainurl') + '~playerXml.xml'
@@ -344,7 +388,7 @@ class ARDIE(InfoExtractor):
class ARDBetaMediathekIE(ARDMediathekBaseIE):
- _VALID_URL = r'https://(?:(?:beta|www)\.)?ardmediathek\.de/(?:[^/]+/)?(?:player|live|video)/(?:[^/]+/)*(?P<id>Y3JpZDovL[a-zA-Z0-9]+)'
+ _VALID_URL = r'https://(?:(?:beta|www)\.)?ardmediathek\.de/(?P<client>[^/]+)/(?P<mode>player|live|video|sendung|sammlung)/(?P<display_id>(?:[^/]+/)*)(?P<video_id>[a-zA-Z0-9]+)'
_TESTS = [{
'url': 'https://www.ardmediathek.de/mdr/video/die-robuste-roswita/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy84MWMxN2MzZC0wMjkxLTRmMzUtODk4ZS0wYzhlOWQxODE2NGI/',
'md5': 'a1dc75a39c61601b980648f7c9f9f71d',
@@ -375,21 +419,132 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
'url': 'https://www.ardmediathek.de/swr/live/Y3JpZDovL3N3ci5kZS8xMzQ4MTA0Mg',
'only_matching': True,
}, {
- 'url': 'https://www.ardmediathek.de/video/coronavirus-update-ndr-info/astrazeneca-kurz-lockdown-und-pims-syndrom-81/ndr/Y3JpZDovL25kci5kZS84NzE0M2FjNi0wMWEwLTQ5ODEtOTE5NS1mOGZhNzdhOTFmOTI/',
+ # playlist of type 'sendung'
+ 'url': 'https://www.ardmediathek.de/ard/sendung/doctor-who/Y3JpZDovL3dkci5kZS9vbmUvZG9jdG9yIHdobw/',
'only_matching': True,
}, {
- 'url': 'https://www.ardmediathek.de/ard/player/Y3JpZDovL3dkci5kZS9CZWl0cmFnLWQ2NDJjYWEzLTMwZWYtNGI4NS1iMTI2LTU1N2UxYTcxOGIzOQ/tatort-duo-koeln-leipzig-ihr-kinderlein-kommet',
+ # playlist of type 'sammlung'
+ 'url': 'https://www.ardmediathek.de/ard/sammlung/team-muenster/5JpTzLSbWUAK8184IOvEir/',
'only_matching': True,
}]
+ def _ARD_load_playlist_snipped(self, playlist_id, display_id, client, mode, pageNumber):
+ """ Query the ARD server for playlist information
+ and returns the data in "raw" format """
+ if mode == 'sendung':
+ graphQL = json.dumps({
+ 'query': '''{
+ showPage(
+ client: "%s"
+ showId: "%s"
+ pageNumber: %d
+ ) {
+ pagination {
+ pageSize
+ totalElements
+ }
+ teasers { # Array
+ mediumTitle
+ links { target { id href title } }
+ type
+ }
+ }}''' % (client, playlist_id, pageNumber),
+ }).encode()
+ else: # mode == 'sammlung'
+ graphQL = json.dumps({
+ 'query': '''{
+ morePage(
+ client: "%s"
+ compilationId: "%s"
+ pageNumber: %d
+ ) {
+ widget {
+ pagination {
+ pageSize
+ totalElements
+ }
+ teasers { # Array
+ mediumTitle
+ links { target { id href title } }
+ type
+ }
+ }
+ }}''' % (client, playlist_id, pageNumber),
+ }).encode()
+ # Ressources for ARD graphQL debugging:
+ # https://api-test.ardmediathek.de/public-gateway
+ show_page = self._download_json(
+ 'https://api.ardmediathek.de/public-gateway',
+ '[Playlist] %s' % display_id,
+ data=graphQL,
+ headers={'Content-Type': 'application/json'})['data']
+ # align the structure of the returned data:
+ if mode == 'sendung':
+ show_page = show_page['showPage']
+ else: # mode == 'sammlung'
+ show_page = show_page['morePage']['widget']
+ return show_page
+
+ def _ARD_extract_playlist(self, url, playlist_id, display_id, client, mode):
+ """ Collects all playlist entries and returns them as info dict.
+ Supports playlists of mode 'sendung' and 'sammlung', and also nested
+ playlists. """
+ entries = []
+ pageNumber = 0
+ while True: # iterate by pageNumber
+ show_page = self._ARD_load_playlist_snipped(
+ playlist_id, display_id, client, mode, pageNumber)
+ for teaser in show_page['teasers']: # process playlist items
+ if '/compilation/' in teaser['links']['target']['href']:
+ # alternativ cond.: teaser['type'] == "compilation"
+ # => This is an nested compilation, e.g. like:
+ # https://www.ardmediathek.de/ard/sammlung/die-kirche-bleibt-im-dorf/5eOHzt8XB2sqeFXbIoJlg2/
+ link_mode = 'sammlung'
+ else:
+ link_mode = 'video'
+
+ item_url = 'https://www.ardmediathek.de/%s/%s/%s/%s/%s' % (
+ client, link_mode, display_id,
+ # perform HTLM quoting of episode title similar to ARD:
+ re.sub('^-|-$', '', # remove '-' from begin/end
+ re.sub('[^a-zA-Z0-9]+', '-', # replace special chars by -
+ teaser['links']['target']['title'].lower()
+ .replace('ä', 'ae').replace('ö', 'oe')
+ .replace('ü', 'ue').replace('ß', 'ss'))),
+ teaser['links']['target']['id'])
+ entries.append(self.url_result(
+ item_url,
+ ie=ARDBetaMediathekIE.ie_key()))
+
+ if (show_page['pagination']['pageSize'] * (pageNumber + 1)
+ >= show_page['pagination']['totalElements']):
+ # we've processed enough pages to get all playlist entries
+ break
+ pageNumber = pageNumber + 1
+
+ return self.playlist_result(entries, playlist_title=display_id)
+
def _real_extract(self, url):
- video_id = self._match_id(url)
+ mobj = self._match_valid_url(url)
+ video_id = mobj.group('video_id')
+ display_id = mobj.group('display_id')
+ if display_id:
+ display_id = display_id.rstrip('/')
+ if not display_id:
+ display_id = video_id
+
+ if mobj.group('mode') in ('sendung', 'sammlung'):
+ # this is a playlist-URL
+ return self._ARD_extract_playlist(
+ url, video_id, display_id,
+ mobj.group('client'),
+ mobj.group('mode'))
player_page = self._download_json(
'https://api.ardmediathek.de/public-gateway',
- video_id, data=json.dumps({
+ display_id, data=json.dumps({
'query': '''{
- playerPage(client: "ard", clipId: "%s") {
+ playerPage(client:"%s", clipId: "%s") {
blockedByFsk
broadcastedOn
maturityContentRating
@@ -419,7 +574,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
}
}
}
-}''' % video_id,
+}''' % (mobj.group('client'), video_id),
}).encode(), headers={
'Content-Type': 'application/json'
})['data']['playerPage']
@@ -444,9 +599,11 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
r'\(FSK\s*(\d+)\)\s*$', description, 'age limit', default=None))
info.update({
'age_limit': age_limit,
+ 'display_id': display_id,
'title': title,
'description': description,
'timestamp': unified_timestamp(player_page.get('broadcastedOn')),
'series': try_get(player_page, lambda x: x['show']['title']),
})
+ info.update(self._ARD_extract_episode_info(info['title']))
return info
diff --git a/hypervideo_dl/extractor/arkena.py b/hypervideo_dl/extractor/arkena.py
index fd46b1c..4f4f457 100644
--- a/hypervideo_dl/extractor/arkena.py
+++ b/hypervideo_dl/extractor/arkena.py
@@ -4,12 +4,12 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
-from ..compat import compat_urlparse
from ..utils import (
ExtractorError,
float_or_none,
int_or_none,
parse_iso8601,
+ parse_qs,
try_get,
)
@@ -63,13 +63,13 @@ class ArkenaIE(InfoExtractor):
return mobj.group('url')
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
account_id = mobj.group('account_id')
# Handle http://video.arkena.com/play2/embed/player URL
if not video_id:
- qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+ qs = parse_qs(url)
video_id = qs.get('mediaId', [None])[0]
account_id = qs.get('accountId', [None])[0]
if not video_id or not account_id:
diff --git a/hypervideo_dl/extractor/arte.py b/hypervideo_dl/extractor/arte.py
index 03abdbf..296b169 100644
--- a/hypervideo_dl/extractor/arte.py
+++ b/hypervideo_dl/extractor/arte.py
@@ -6,11 +6,11 @@ import re
from .common import InfoExtractor
from ..compat import (
compat_str,
- compat_urlparse,
)
from ..utils import (
ExtractorError,
int_or_none,
+ parse_qs,
qualities,
try_get,
unified_strdate,
@@ -49,7 +49,7 @@ class ArteTVIE(ArteTVBaseIE):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
lang = mobj.group('lang') or mobj.group('lang_2')
@@ -150,7 +150,6 @@ class ArteTVIE(ArteTVBaseIE):
format = {
'format_id': format_id,
- 'preference': -10 if f.get('videoFormat') == 'M3U8' else None,
'language_preference': lang_pref,
'format_note': '%s, %s' % (f.get('versionCode'), f.get('versionLibelle')),
'width': int_or_none(f.get('width')),
@@ -168,12 +167,14 @@ class ArteTVIE(ArteTVBaseIE):
formats.append(format)
- self._sort_formats(formats)
+ # For this extractor, quality only represents the relative quality
+ # with respect to other formats with the same resolution
+ self._sort_formats(formats, ('res', 'quality'))
return {
'id': player_info.get('VID') or video_id,
'title': title,
- 'description': player_info.get('VDE'),
+ 'description': player_info.get('VDE') or player_info.get('V7T'),
'upload_date': unified_strdate(upload_date_str),
'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
'formats': formats,
@@ -203,7 +204,7 @@ class ArteTVEmbedIE(InfoExtractor):
webpage)]
def _real_extract(self, url):
- qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+ qs = parse_qs(url)
json_url = qs['json_url'][0]
video_id = ArteTVIE._match_id(json_url)
return self.url_result(
@@ -226,7 +227,7 @@ class ArteTVPlaylistIE(ArteTVBaseIE):
}]
def _real_extract(self, url):
- lang, playlist_id = re.match(self._VALID_URL, url).groups()
+ lang, playlist_id = self._match_valid_url(url).groups()
collection = self._download_json(
'%s/collectionData/%s/%s?source=videos'
% (self._API_BASE, lang, playlist_id), playlist_id)
diff --git a/hypervideo_dl/extractor/asiancrush.py b/hypervideo_dl/extractor/asiancrush.py
index 66ce7c6..75a6329 100644
--- a/hypervideo_dl/extractor/asiancrush.py
+++ b/hypervideo_dl/extractor/asiancrush.py
@@ -111,7 +111,7 @@ class AsianCrushIE(AsianCrushBaseIE):
}]
def _real_extract(self, url):
- host, video_id = re.match(self._VALID_URL, url).groups()
+ host, video_id = self._match_valid_url(url).groups()
if host == 'cocoro.tv':
webpage = self._download_webpage(url, video_id)
@@ -161,7 +161,7 @@ class AsianCrushPlaylistIE(AsianCrushBaseIE):
yield self._parse_video_data(video)
def _real_extract(self, url):
- host, playlist_id = re.match(self._VALID_URL, url).groups()
+ host, playlist_id = self._match_valid_url(url).groups()
if host == 'cocoro.tv':
webpage = self._download_webpage(url, playlist_id)
diff --git a/hypervideo_dl/extractor/atresplayer.py b/hypervideo_dl/extractor/atresplayer.py
index c2cec98..8143eb4 100644
--- a/hypervideo_dl/extractor/atresplayer.py
+++ b/hypervideo_dl/extractor/atresplayer.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..compat import compat_HTTPError
@@ -75,7 +74,7 @@ class AtresPlayerIE(InfoExtractor):
self._request_webpage(target_url, None, 'Following Target URL')
def _real_extract(self, url):
- display_id, video_id = re.match(self._VALID_URL, url).groups()
+ display_id, video_id = self._match_valid_url(url).groups()
try:
episode = self._download_json(
@@ -86,18 +85,19 @@ class AtresPlayerIE(InfoExtractor):
title = episode['titulo']
formats = []
+ subtitles = {}
for source in episode.get('sources', []):
src = source.get('src')
if not src:
continue
src_type = source.get('type')
if src_type == 'application/vnd.apple.mpegurl':
- formats.extend(self._extract_m3u8_formats(
+ formats, subtitles = self._extract_m3u8_formats(
src, video_id, 'mp4', 'm3u8_native',
- m3u8_id='hls', fatal=False))
+ m3u8_id='hls', fatal=False)
elif src_type == 'application/dash+xml':
- formats.extend(self._extract_mpd_formats(
- src, video_id, mpd_id='dash', fatal=False))
+ formats, subtitles = self._extract_mpd_formats(
+ src, video_id, mpd_id='dash', fatal=False)
self._sort_formats(formats)
heartbeat = episode.get('heartbeat') or {}
@@ -115,4 +115,5 @@ class AtresPlayerIE(InfoExtractor):
'channel': get_meta('channel'),
'season': get_meta('season'),
'episode_number': int_or_none(get_meta('episodeNumber')),
+ 'subtitles': subtitles,
}
diff --git a/hypervideo_dl/extractor/atvat.py b/hypervideo_dl/extractor/atvat.py
index 95e572d..7c30cfc 100644
--- a/hypervideo_dl/extractor/atvat.py
+++ b/hypervideo_dl/extractor/atvat.py
@@ -1,75 +1,106 @@
# coding: utf-8
from __future__ import unicode_literals
+import datetime
+
from .common import InfoExtractor
from ..utils import (
- determine_ext,
- int_or_none,
- unescapeHTML,
+ float_or_none,
+ jwt_encode_hs256,
+ try_get,
)
class ATVAtIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?atv\.at/(?:[^/]+/){2}(?P<id>[dv]\d+)'
+ _VALID_URL = r'https?://(?:www\.)?atv\.at/tv/(?:[^/]+/){2,3}(?P<id>.*)'
+
_TESTS = [{
- 'url': 'http://atv.at/aktuell/di-210317-2005-uhr/v1698449/',
- 'md5': 'c3b6b975fb3150fc628572939df205f2',
+ 'url': 'https://www.atv.at/tv/bauer-sucht-frau/staffel-18/bauer-sucht-frau/bauer-sucht-frau-staffel-18-folge-3-die-hofwochen',
+ 'md5': '3c3b4aaca9f63e32b35e04a9c2515903',
'info_dict': {
- 'id': '1698447',
+ 'id': 'v-ce9cgn1e70n5-1',
'ext': 'mp4',
- 'title': 'DI, 21.03.17 | 20:05 Uhr 1/1',
+ 'title': 'Bauer sucht Frau - Staffel 18 Folge 3 - Die Hofwochen',
}
}, {
- 'url': 'http://atv.at/aktuell/meinrad-knapp/d8416/',
+ 'url': 'https://www.atv.at/tv/bauer-sucht-frau/staffel-18/episode-01/bauer-sucht-frau-staffel-18-vorstellungsfolge-1',
'only_matching': True,
}]
- def _real_extract(self, url):
- display_id = self._match_id(url)
- webpage = self._download_webpage(url, display_id)
- video_data = self._parse_json(unescapeHTML(self._search_regex(
- [r'flashPlayerOptions\s*=\s*(["\'])(?P<json>(?:(?!\1).)+)\1',
- r'class="[^"]*jsb_video/FlashPlayer[^"]*"[^>]+data-jsb="(?P<json>[^"]+)"'],
- webpage, 'player data', group='json')),
- display_id)['config']['initial_video']
+ # extracted from bootstrap.js function (search for e.encryption_key and use your browser's debugger)
+ _ACCESS_ID = 'x_atv'
+ _ENCRYPTION_KEY = 'Hohnaekeishoogh2omaeghooquooshia'
- video_id = video_data['id']
- video_title = video_data['title']
+ def _extract_video_info(self, url, content, video):
+ clip_id = content.get('splitId', content['id'])
+ formats = []
+ clip_urls = video['urls']
+ for protocol, variant in clip_urls.items():
+ source_url = try_get(variant, lambda x: x['clear']['url'])
+ if not source_url:
+ continue
+ if protocol == 'dash':
+ formats.extend(self._extract_mpd_formats(
+ source_url, clip_id, mpd_id=protocol, fatal=False))
+ elif protocol == 'hls':
+ formats.extend(self._extract_m3u8_formats(
+ source_url, clip_id, 'mp4', 'm3u8_native',
+ m3u8_id=protocol, fatal=False))
+ else:
+ formats.append({
+ 'url': source_url,
+ 'format_id': protocol,
+ })
+ self._sort_formats(formats)
- parts = []
- for part in video_data.get('parts', []):
- part_id = part['id']
- part_title = part['title']
+ return {
+ 'id': clip_id,
+ 'title': content.get('title'),
+ 'duration': float_or_none(content.get('duration')),
+ 'series': content.get('tvShowTitle'),
+ 'formats': formats,
+ }
- formats = []
- for source in part.get('sources', []):
- source_url = source.get('src')
- if not source_url:
- continue
- ext = determine_ext(source_url)
- if ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- source_url, part_id, 'mp4', 'm3u8_native',
- m3u8_id='hls', fatal=False))
- else:
- formats.append({
- 'format_id': source.get('delivery'),
- 'url': source_url,
- })
- self._sort_formats(formats)
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ json_data = self._parse_json(
+ self._search_regex(r'<script id="state" type="text/plain">(.*)</script>', webpage, 'json_data'),
+ video_id=video_id)
+
+ video_title = json_data['views']['default']['page']['title']
+ contentResource = json_data['views']['default']['page']['contentResource']
+ content_id = contentResource[0]['id']
+ content_ids = [{'id': id, 'subclip_start': content['start'], 'subclip_end': content['end']}
+ for id, content in enumerate(contentResource)]
- parts.append({
- 'id': part_id,
- 'title': part_title,
- 'thumbnail': part.get('preview_image_url'),
- 'duration': int_or_none(part.get('duration')),
- 'is_live': part.get('is_livestream'),
- 'formats': formats,
+ time_of_request = datetime.datetime.now()
+ not_before = time_of_request - datetime.timedelta(minutes=5)
+ expire = time_of_request + datetime.timedelta(minutes=5)
+ payload = {
+ 'content_ids': {
+ content_id: content_ids,
+ },
+ 'secure_delivery': True,
+ 'iat': int(time_of_request.timestamp()),
+ 'nbf': int(not_before.timestamp()),
+ 'exp': int(expire.timestamp()),
+ }
+ jwt_token = jwt_encode_hs256(payload, self._ENCRYPTION_KEY, headers={'kid': self._ACCESS_ID})
+ videos = self._download_json(
+ 'https://vas-v4.p7s1video.net/4.0/getsources',
+ content_id, 'Downloading videos JSON', query={
+ 'token': jwt_token.decode('utf-8')
})
+ video_id, videos_data = list(videos['data'].items())[0]
+ entries = [
+ self._extract_video_info(url, contentResource[video['id']], video)
+ for video in videos_data]
+
return {
'_type': 'multi_video',
'id': video_id,
'title': video_title,
- 'entries': parts,
+ 'entries': entries,
}
diff --git a/hypervideo_dl/extractor/audius.py b/hypervideo_dl/extractor/audius.py
new file mode 100644
index 0000000..fa64995
--- /dev/null
+++ b/hypervideo_dl/extractor/audius.py
@@ -0,0 +1,274 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import random
+
+from .common import InfoExtractor
+from ..utils import ExtractorError, try_get, compat_str, str_or_none
+from ..compat import compat_urllib_parse_unquote
+
+
+class AudiusBaseIE(InfoExtractor):
+ _API_BASE = None
+ _API_V = '/v1'
+
+ def _get_response_data(self, response):
+ if isinstance(response, dict):
+ response_data = response.get('data')
+ if response_data is not None:
+ return response_data
+ if len(response) == 1 and 'message' in response:
+ raise ExtractorError('API error: %s' % response['message'],
+ expected=True)
+ raise ExtractorError('Unexpected API response')
+
+ def _select_api_base(self):
+ """Selecting one of the currently available API hosts"""
+ response = super(AudiusBaseIE, self)._download_json(
+ 'https://api.audius.co/', None,
+ note='Requesting available API hosts',
+ errnote='Unable to request available API hosts')
+ hosts = self._get_response_data(response)
+ if isinstance(hosts, list):
+ self._API_BASE = random.choice(hosts)
+ return
+ raise ExtractorError('Unable to get available API hosts')
+
+ @staticmethod
+ def _prepare_url(url, title):
+ """
+ Audius removes forward slashes from the uri, but leaves backslashes.
+ The problem is that the current version of Chrome replaces backslashes
+ in the address bar with a forward slashes, so if you copy the link from
+ there and paste it into youtube-dl, you won't be able to download
+ anything from this link, since the Audius API won't be able to resolve
+ this url
+ """
+ url = compat_urllib_parse_unquote(url)
+ title = compat_urllib_parse_unquote(title)
+ if '/' in title or '%2F' in title:
+ fixed_title = title.replace('/', '%5C').replace('%2F', '%5C')
+ return url.replace(title, fixed_title)
+ return url
+
+ def _api_request(self, path, item_id=None, note='Downloading JSON metadata',
+ errnote='Unable to download JSON metadata',
+ expected_status=None):
+ if self._API_BASE is None:
+ self._select_api_base()
+ try:
+ response = super(AudiusBaseIE, self)._download_json(
+ '%s%s%s' % (self._API_BASE, self._API_V, path), item_id, note=note,
+ errnote=errnote, expected_status=expected_status)
+ except ExtractorError as exc:
+ # some of Audius API hosts may not work as expected and return HTML
+ if 'Failed to parse JSON' in compat_str(exc):
+ raise ExtractorError('An error occurred while receiving data. Try again',
+ expected=True)
+ raise exc
+ return self._get_response_data(response)
+
+ def _resolve_url(self, url, item_id):
+ return self._api_request('/resolve?url=%s' % url, item_id,
+ expected_status=404)
+
+
+class AudiusIE(AudiusBaseIE):
+ _VALID_URL = r'''(?x)https?://(?:www\.)?(?:audius\.co/(?P<uploader>[\w\d-]+)(?!/album|/playlist)/(?P<title>\S+))'''
+ IE_DESC = 'Audius.co'
+ _TESTS = [
+ {
+ # URL from Chrome address bar which replace backslash to forward slash
+ 'url': 'https://audius.co/test_acc/t%D0%B5%D0%B5%D0%B5est-1.%5E_%7B%7D/%22%3C%3E.%E2%84%96~%60-198631',
+ 'md5': '92c35d3e754d5a0f17eef396b0d33582',
+ 'info_dict': {
+ 'id': 'xd8gY',
+ 'title': '''Tеееest/ 1.!@#$%^&*()_+=[]{};'\\\":<>,.?/№~`''',
+ 'ext': 'mp3',
+ 'description': 'Description',
+ 'duration': 30,
+ 'track': '''Tеееest/ 1.!@#$%^&*()_+=[]{};'\\\":<>,.?/№~`''',
+ 'artist': 'test',
+ 'genre': 'Electronic',
+ 'thumbnail': r're:https?://.*\.jpg',
+ 'view_count': int,
+ 'like_count': int,
+ 'repost_count': int,
+ }
+ },
+ {
+ # Regular track
+ 'url': 'https://audius.co/voltra/radar-103692',
+ 'md5': '491898a0a8de39f20c5d6a8a80ab5132',
+ 'info_dict': {
+ 'id': 'KKdy2',
+ 'title': 'RADAR',
+ 'ext': 'mp3',
+ 'duration': 318,
+ 'track': 'RADAR',
+ 'artist': 'voltra',
+ 'genre': 'Trance',
+ 'thumbnail': r're:https?://.*\.jpg',
+ 'view_count': int,
+ 'like_count': int,
+ 'repost_count': int,
+ }
+ },
+ ]
+
+ _ARTWORK_MAP = {
+ "150x150": 150,
+ "480x480": 480,
+ "1000x1000": 1000
+ }
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ track_id = try_get(mobj, lambda x: x.group('track_id'))
+ if track_id is None:
+ title = mobj.group('title')
+ # uploader = mobj.group('uploader')
+ url = self._prepare_url(url, title)
+ track_data = self._resolve_url(url, title)
+ else: # API link
+ title = None
+ # uploader = None
+ track_data = self._api_request('/tracks/%s' % track_id, track_id)
+
+ if not isinstance(track_data, dict):
+ raise ExtractorError('Unexpected API response')
+
+ track_id = track_data.get('id')
+ if track_id is None:
+ raise ExtractorError('Unable to get ID of the track')
+
+ artworks_data = track_data.get('artwork')
+ thumbnails = []
+ if isinstance(artworks_data, dict):
+ for quality_key, thumbnail_url in artworks_data.items():
+ thumbnail = {
+ "url": thumbnail_url
+ }
+ quality_code = self._ARTWORK_MAP.get(quality_key)
+ if quality_code is not None:
+ thumbnail['preference'] = quality_code
+ thumbnails.append(thumbnail)
+
+ return {
+ 'id': track_id,
+ 'title': track_data.get('title', title),
+ 'url': '%s/v1/tracks/%s/stream' % (self._API_BASE, track_id),
+ 'ext': 'mp3',
+ 'description': track_data.get('description'),
+ 'duration': track_data.get('duration'),
+ 'track': track_data.get('title'),
+ 'artist': try_get(track_data, lambda x: x['user']['name'], compat_str),
+ 'genre': track_data.get('genre'),
+ 'thumbnails': thumbnails,
+ 'view_count': track_data.get('play_count'),
+ 'like_count': track_data.get('favorite_count'),
+ 'repost_count': track_data.get('repost_count'),
+ }
+
+
+class AudiusTrackIE(AudiusIE):
+ _VALID_URL = r'''(?x)(?:audius:)(?:https?://(?:www\.)?.+/v1/tracks/)?(?P<track_id>\w+)'''
+ IE_NAME = 'audius:track'
+ IE_DESC = 'Audius track ID or API link. Prepend with "audius:"'
+ _TESTS = [
+ {
+ 'url': 'audius:9RWlo',
+ 'only_matching': True
+ },
+ {
+ 'url': 'audius:http://discoveryprovider.audius.prod-us-west-2.staked.cloud/v1/tracks/9RWlo',
+ 'only_matching': True
+ },
+ ]
+
+
+class AudiusPlaylistIE(AudiusBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?audius\.co/(?P<uploader>[\w\d-]+)/(?:album|playlist)/(?P<title>\S+)'
+ IE_NAME = 'audius:playlist'
+ IE_DESC = 'Audius.co playlists'
+ _TEST = {
+ 'url': 'https://audius.co/test_acc/playlist/test-playlist-22910',
+ 'info_dict': {
+ 'id': 'DNvjN',
+ 'title': 'test playlist',
+ 'description': 'Test description\n\nlol',
+ },
+ 'playlist_count': 175,
+ }
+
+ def _build_playlist(self, tracks):
+ entries = []
+ for track in tracks:
+ if not isinstance(track, dict):
+ raise ExtractorError('Unexpected API response')
+ track_id = str_or_none(track.get('id'))
+ if not track_id:
+ raise ExtractorError('Unable to get track ID from playlist')
+ entries.append(self.url_result(
+ 'audius:%s' % track_id,
+ ie=AudiusTrackIE.ie_key(), video_id=track_id))
+ return entries
+
+ def _real_extract(self, url):
+ self._select_api_base()
+ mobj = self._match_valid_url(url)
+ title = mobj.group('title')
+ # uploader = mobj.group('uploader')
+ url = self._prepare_url(url, title)
+ playlist_response = self._resolve_url(url, title)
+
+ if not isinstance(playlist_response, list) or len(playlist_response) != 1:
+ raise ExtractorError('Unexpected API response')
+
+ playlist_data = playlist_response[0]
+ if not isinstance(playlist_data, dict):
+ raise ExtractorError('Unexpected API response')
+
+ playlist_id = playlist_data.get('id')
+ if playlist_id is None:
+ raise ExtractorError('Unable to get playlist ID')
+
+ playlist_tracks = self._api_request(
+ '/playlists/%s/tracks' % playlist_id,
+ title, note='Downloading playlist tracks metadata',
+ errnote='Unable to download playlist tracks metadata')
+ if not isinstance(playlist_tracks, list):
+ raise ExtractorError('Unexpected API response')
+
+ entries = self._build_playlist(playlist_tracks)
+ return self.playlist_result(entries, playlist_id,
+ playlist_data.get('playlist_name', title),
+ playlist_data.get('description'))
+
+
+class AudiusProfileIE(AudiusPlaylistIE):
+ IE_NAME = 'audius:artist'
+ IE_DESC = 'Audius.co profile/artist pages'
+ _VALID_URL = r'https?://(?:www)?audius\.co/(?P<id>[^\/]+)/?(?:[?#]|$)'
+ _TEST = {
+ 'url': 'https://audius.co/pzl/',
+ 'info_dict': {
+ 'id': 'ezRo7',
+ 'description': 'TAMALE\n\nContact: officialpzl@gmail.com',
+ 'title': 'pzl',
+ },
+ 'playlist_count': 24,
+ }
+
+ def _real_extract(self, url):
+ self._select_api_base()
+ profile_id = self._match_id(url)
+ try:
+ _profile_data = self._api_request('/full/users/handle/' + profile_id, profile_id)
+ except ExtractorError as e:
+ raise ExtractorError('Could not download profile info; ' + str(e))
+ profile_audius_id = _profile_data[0]['id']
+ profile_bio = _profile_data[0].get('bio')
+
+ api_call = self._api_request('/full/users/handle/%s/tracks' % profile_id, profile_id)
+ return self.playlist_result(self._build_playlist(api_call), profile_audius_id, profile_id, profile_bio)
diff --git a/hypervideo_dl/extractor/awaan.py b/hypervideo_dl/extractor/awaan.py
index 3a7700c..22cc10d 100644
--- a/hypervideo_dl/extractor/awaan.py
+++ b/hypervideo_dl/extractor/awaan.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
import base64
from .common import InfoExtractor
@@ -19,10 +18,10 @@ from ..utils import (
class AWAANIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?show/(?P<show_id>\d+)/[^/]+(?:/(?P<video_id>\d+)/(?P<season_id>\d+))?'
+ _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?show/(?P<show_id>\d+)/[^/]+(?:/(?P<id>\d+)/(?P<season_id>\d+))?'
def _real_extract(self, url):
- show_id, video_id, season_id = re.match(self._VALID_URL, url).groups()
+ show_id, video_id, season_id = self._match_valid_url(url).groups()
if video_id and int(video_id) > 0:
return self.url_result(
'http://awaan.ae/media/%s' % video_id, 'AWAANVideo')
@@ -154,7 +153,7 @@ class AWAANSeasonIE(InfoExtractor):
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
- show_id, season_id = re.match(self._VALID_URL, url).groups()
+ show_id, season_id = self._match_valid_url(url).groups()
data = {}
if season_id:
diff --git a/hypervideo_dl/extractor/azmedien.py b/hypervideo_dl/extractor/azmedien.py
index 9302669..fee640e 100644
--- a/hypervideo_dl/extractor/azmedien.py
+++ b/hypervideo_dl/extractor/azmedien.py
@@ -2,7 +2,6 @@
from __future__ import unicode_literals
import json
-import re
from .common import InfoExtractor
from .kaltura import KalturaIE
@@ -51,7 +50,7 @@ class AZMedienIE(InfoExtractor):
_PARTNER_ID = '1719221'
def _real_extract(self, url):
- host, display_id, article_id, entry_id = re.match(self._VALID_URL, url).groups()
+ host, display_id, article_id, entry_id = self._match_valid_url(url).groups()
if not entry_id:
entry_id = self._download_json(
diff --git a/hypervideo_dl/extractor/baidu.py b/hypervideo_dl/extractor/baidu.py
index 234a661..364fd94 100644
--- a/hypervideo_dl/extractor/baidu.py
+++ b/hypervideo_dl/extractor/baidu.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..utils import unescapeHTML
@@ -33,7 +32,7 @@ class BaiduVideoIE(InfoExtractor):
path, category, playlist_id), playlist_id, note)
def _real_extract(self, url):
- category, playlist_id = re.match(self._VALID_URL, url).groups()
+ category, playlist_id = self._match_valid_url(url).groups()
if category == 'show':
category = 'tvshow'
if category == 'tv':
diff --git a/hypervideo_dl/extractor/bandcamp.py b/hypervideo_dl/extractor/bandcamp.py
index dbe57c7..b664145 100644
--- a/hypervideo_dl/extractor/bandcamp.py
+++ b/hypervideo_dl/extractor/bandcamp.py
@@ -31,9 +31,9 @@ class BandcampIE(InfoExtractor):
'info_dict': {
'id': '1812978515',
'ext': 'mp3',
- 'title': "hypervideo \"'/\\ä↭ - hypervideo \"'/\\ä↭ - hypervideo test song \"'/\\ä↭",
+ 'title': "youtube-dl \"'/\\ä↭ - youtube-dl \"'/\\ä↭ - youtube-dl test song \"'/\\ä↭",
'duration': 9.8485,
- 'uploader': 'hypervideo "\'/\\ä↭',
+ 'uploader': 'youtube-dl "\'/\\ä↭',
'upload_date': '20121129',
'timestamp': 1354224127,
},
@@ -212,7 +212,7 @@ class BandcampIE(InfoExtractor):
class BandcampAlbumIE(BandcampIE):
IE_NAME = 'Bandcamp:album'
- _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<id>[^/?#&]+))?'
+ _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?!/music)(?:/album/(?P<id>[^/?#&]+))?'
_TESTS = [{
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
@@ -294,7 +294,7 @@ class BandcampAlbumIE(BandcampIE):
else super(BandcampAlbumIE, cls).suitable(url))
def _real_extract(self, url):
- uploader_id, album_id = re.match(self._VALID_URL, url).groups()
+ uploader_id, album_id = self._match_valid_url(url).groups()
playlist_id = album_id or uploader_id
webpage = self._download_webpage(url, playlist_id)
tralbum = self._extract_data_attr(webpage, playlist_id)
@@ -389,3 +389,43 @@ class BandcampWeeklyIE(BandcampIE):
'episode_id': show_id,
'formats': formats
}
+
+
+class BandcampMusicIE(InfoExtractor):
+ _VALID_URL = r'https?://(?P<id>[^/]+)\.bandcamp\.com/music'
+ _TESTS = [{
+ 'url': 'https://steviasphere.bandcamp.com/music',
+ 'playlist_mincount': 47,
+ 'info_dict': {
+ 'id': 'steviasphere',
+ },
+ }, {
+ 'url': 'https://coldworldofficial.bandcamp.com/music',
+ 'playlist_mincount': 10,
+ 'info_dict': {
+ 'id': 'coldworldofficial',
+ },
+ }, {
+ 'url': 'https://nuclearwarnowproductions.bandcamp.com/music',
+ 'playlist_mincount': 399,
+ 'info_dict': {
+ 'id': 'nuclearwarnowproductions',
+ },
+ }
+ ]
+
+ _TYPE_IE_DICT = {
+ 'album': BandcampAlbumIE.ie_key(),
+ 'track': BandcampIE.ie_key()
+ }
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+ webpage = self._download_webpage(url, id)
+ items = re.findall(r'href\=\"\/(?P<path>(?P<type>album|track)+/[^\"]+)', webpage)
+ entries = [
+ self.url_result(
+ f'https://{id}.bandcamp.com/{item[0]}',
+ ie=self._TYPE_IE_DICT[item[1]])
+ for item in items]
+ return self.playlist_result(entries, id)
diff --git a/hypervideo_dl/extractor/bannedvideo.py b/hypervideo_dl/extractor/bannedvideo.py
new file mode 100644
index 0000000..3db1151
--- /dev/null
+++ b/hypervideo_dl/extractor/bannedvideo.py
@@ -0,0 +1,158 @@
+from __future__ import unicode_literals
+
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ try_get,
+ int_or_none,
+ url_or_none,
+ float_or_none,
+ unified_timestamp,
+)
+
+
+class BannedVideoIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?banned\.video/watch\?id=(?P<id>[0-f]{24})'
+ _TESTS = [{
+ 'url': 'https://banned.video/watch?id=5e7a859644e02200c6ef5f11',
+ 'md5': '14b6e81d41beaaee2215cd75c6ed56e4',
+ 'info_dict': {
+ 'id': '5e7a859644e02200c6ef5f11',
+ 'ext': 'mp4',
+ 'title': 'China Discovers Origin of Corona Virus: Issues Emergency Statement',
+ 'thumbnail': r're:^https?://(?:www\.)?assets\.infowarsmedia.com/images/',
+ 'description': 'md5:560d96f02abbebe6c6b78b47465f6b28',
+ 'upload_date': '20200324',
+ 'timestamp': 1585087895,
+ }
+ }]
+
+ _GRAPHQL_GETMETADATA_QUERY = '''
+query GetVideoAndComments($id: String!) {
+ getVideo(id: $id) {
+ streamUrl
+ directUrl
+ unlisted
+ live
+ tags {
+ name
+ }
+ title
+ summary
+ playCount
+ largeImage
+ videoDuration
+ channel {
+ _id
+ title
+ }
+ createdAt
+ }
+ getVideoComments(id: $id, limit: 999999, offset: 0) {
+ _id
+ content
+ user {
+ _id
+ username
+ }
+ voteCount {
+ positive
+ }
+ createdAt
+ replyCount
+ }
+}'''
+
+ _GRAPHQL_GETCOMMENTSREPLIES_QUERY = '''
+query GetCommentReplies($id: String!) {
+ getCommentReplies(id: $id, limit: 999999, offset: 0) {
+ _id
+ content
+ user {
+ _id
+ username
+ }
+ voteCount {
+ positive
+ }
+ createdAt
+ replyCount
+ }
+}'''
+
+ _GRAPHQL_QUERIES = {
+ 'GetVideoAndComments': _GRAPHQL_GETMETADATA_QUERY,
+ 'GetCommentReplies': _GRAPHQL_GETCOMMENTSREPLIES_QUERY,
+ }
+
+ def _call_api(self, video_id, id, operation, note):
+ return self._download_json(
+ 'https://api.infowarsmedia.com/graphql', video_id, note=note,
+ headers={
+ 'Content-Type': 'application/json; charset=utf-8'
+ }, data=json.dumps({
+ 'variables': {'id': id},
+ 'operationName': operation,
+ 'query': self._GRAPHQL_QUERIES[operation]
+ }).encode('utf8')).get('data')
+
+ def _get_comments(self, video_id, comments, comment_data):
+ yield from comments
+ for comment in comment_data.copy():
+ comment_id = comment.get('_id')
+ if comment.get('replyCount') > 0:
+ reply_json = self._call_api(
+ video_id, comment_id, 'GetCommentReplies',
+ f'Downloading replies for comment {comment_id}')
+ for reply in reply_json.get('getCommentReplies'):
+ yield self._parse_comment(reply, comment_id)
+
+ @staticmethod
+ def _parse_comment(comment_data, parent):
+ return {
+ 'id': comment_data.get('_id'),
+ 'text': comment_data.get('content'),
+ 'author': try_get(comment_data, lambda x: x['user']['username']),
+ 'author_id': try_get(comment_data, lambda x: x['user']['_id']),
+ 'timestamp': unified_timestamp(comment_data.get('createdAt')),
+ 'parent': parent,
+ 'like_count': try_get(comment_data, lambda x: x['voteCount']['positive']),
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video_json = self._call_api(video_id, video_id, 'GetVideoAndComments', 'Downloading video metadata')
+ video_info = video_json['getVideo']
+ is_live = video_info.get('live')
+ comments = [self._parse_comment(comment, 'root') for comment in video_json.get('getVideoComments')]
+
+ formats = [{
+ 'format_id': 'direct',
+ 'quality': 1,
+ 'url': video_info.get('directUrl'),
+ 'ext': 'mp4',
+ }] if url_or_none(video_info.get('directUrl')) else []
+ if video_info.get('streamUrl'):
+ formats.extend(self._extract_m3u8_formats(
+ video_info.get('streamUrl'), video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls', live=True))
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': video_info.get('title')[:-1],
+ 'formats': formats,
+ 'is_live': is_live,
+ 'description': video_info.get('summary'),
+ 'channel': try_get(video_info, lambda x: x['channel']['title']),
+ 'channel_id': try_get(video_info, lambda x: x['channel']['_id']),
+ 'view_count': int_or_none(video_info.get('playCount')),
+ 'thumbnail': url_or_none(video_info.get('largeImage')),
+ 'duration': float_or_none(video_info.get('videoDuration')),
+ 'timestamp': unified_timestamp(video_info.get('createdAt')),
+ 'tags': [tag.get('name') for tag in video_info.get('tags')],
+ 'availability': self._availability(is_unlisted=video_info.get('unlisted')),
+ 'comments': comments,
+ '__post_extractor': self.extract_comments(video_id, comments, video_json.get('getVideoComments'))
+ }
diff --git a/hypervideo_dl/extractor/bbc.py b/hypervideo_dl/extractor/bbc.py
index 247d982..4e2dcd7 100644
--- a/hypervideo_dl/extractor/bbc.py
+++ b/hypervideo_dl/extractor/bbc.py
@@ -10,9 +10,7 @@ from .common import InfoExtractor
from ..compat import (
compat_etree_Element,
compat_HTTPError,
- compat_parse_qs,
compat_str,
- compat_urllib_parse_urlparse,
compat_urlparse,
)
from ..utils import (
@@ -26,6 +24,7 @@ from ..utils import (
js_to_json,
parse_duration,
parse_iso8601,
+ parse_qs,
strip_or_none,
try_get,
unescapeHTML,
@@ -589,8 +588,8 @@ class BBCIE(BBCCoUkIE):
_VALID_URL = r'https?://(?:www\.)?bbc\.(?:com|co\.uk)/(?:[^/]+/)+(?P<id>[^/#?]+)'
_MEDIA_SETS = [
- 'mobile-tablet-main',
'pc',
+ 'mobile-tablet-main',
]
_TESTS = [{
@@ -1271,7 +1270,7 @@ class BBCIE(BBCCoUkIE):
entries = []
for num, media_meta in enumerate(medias, start=1):
formats, subtitles = self._extract_from_media_meta(media_meta, playlist_id)
- if not formats:
+ if not formats and not self.get_param('ignore_no_formats'):
continue
self._sort_formats(formats)
@@ -1410,7 +1409,7 @@ class BBCCoUkIPlayerPlaylistBaseIE(InfoExtractor):
def _real_extract(self, url):
pid = self._match_id(url)
- qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+ qs = parse_qs(url)
series_id = qs.get('seriesId', [None])[0]
page = qs.get('page', [None])[0]
per_page = 36 if page else self._PAGE_SIZE
diff --git a/hypervideo_dl/extractor/beatport.py b/hypervideo_dl/extractor/beatport.py
index e607094..e1cf8b4 100644
--- a/hypervideo_dl/extractor/beatport.py
+++ b/hypervideo_dl/extractor/beatport.py
@@ -40,7 +40,7 @@ class BeatportIE(InfoExtractor):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
track_id = mobj.group('id')
display_id = mobj.group('display_id')
@@ -69,12 +69,10 @@ class BeatportIE(InfoExtractor):
'vcodec': 'none',
}
if ext == 'mp3':
- fmt['preference'] = 0
fmt['acodec'] = 'mp3'
fmt['abr'] = 96
fmt['asr'] = 44100
elif ext == 'mp4':
- fmt['preference'] = 1
fmt['acodec'] = 'aac'
fmt['abr'] = 96
fmt['asr'] = 44100
diff --git a/hypervideo_dl/extractor/beeg.py b/hypervideo_dl/extractor/beeg.py
index 5788d13..8fbabe7 100644
--- a/hypervideo_dl/extractor/beeg.py
+++ b/hypervideo_dl/extractor/beeg.py
@@ -3,10 +3,10 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import (
compat_str,
- compat_urlparse,
)
from ..utils import (
int_or_none,
+ parse_qs,
unified_timestamp,
)
@@ -57,7 +57,7 @@ class BeegIE(InfoExtractor):
query = {
'v': 2,
}
- qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+ qs = parse_qs(url)
t = qs.get('t', [''])[0].split('-')
if len(t) > 1:
query.update({
diff --git a/hypervideo_dl/extractor/behindkink.py b/hypervideo_dl/extractor/behindkink.py
index 9bca853..2c97f98 100644
--- a/hypervideo_dl/extractor/behindkink.py
+++ b/hypervideo_dl/extractor/behindkink.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..utils import url_basename
@@ -24,7 +23,7 @@ class BehindKinkIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
display_id = mobj.group('id')
webpage = self._download_webpage(url, display_id)
diff --git a/hypervideo_dl/extractor/bellmedia.py b/hypervideo_dl/extractor/bellmedia.py
index 9f9de96..904c17e 100644
--- a/hypervideo_dl/extractor/bellmedia.py
+++ b/hypervideo_dl/extractor/bellmedia.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
@@ -78,7 +77,7 @@ class BellMediaIE(InfoExtractor):
}
def _real_extract(self, url):
- domain, video_id = re.match(self._VALID_URL, url).groups()
+ domain, video_id = self._match_valid_url(url).groups()
domain = domain.split('.')[0]
return {
'_type': 'url_transparent',
diff --git a/hypervideo_dl/extractor/bet.py b/hypervideo_dl/extractor/bet.py
index d7ceaa8..2c71442 100644
--- a/hypervideo_dl/extractor/bet.py
+++ b/hypervideo_dl/extractor/bet.py
@@ -3,6 +3,8 @@ from __future__ import unicode_literals
from .mtv import MTVServicesInfoExtractor
from ..utils import unified_strdate
+# TODO Remove - Reason: Outdated Site
+
class BetIE(MTVServicesInfoExtractor):
_VALID_URL = r'https?://(?:www\.)?bet\.com/(?:[^/]+/)+(?P<id>.+?)\.html'
diff --git a/hypervideo_dl/extractor/bilibili.py b/hypervideo_dl/extractor/bilibili.py
index 08e12cc..8d66b43 100644
--- a/hypervideo_dl/extractor/bilibili.py
+++ b/hypervideo_dl/extractor/bilibili.py
@@ -1,25 +1,33 @@
# coding: utf-8
-from __future__ import unicode_literals
import hashlib
+import itertools
+import functools
import re
+import math
-from .common import InfoExtractor
+from .common import InfoExtractor, SearchInfoExtractor
from ..compat import (
compat_parse_qs,
compat_urlparse,
+ compat_urllib_parse_urlparse
)
from ..utils import (
ExtractorError,
int_or_none,
float_or_none,
parse_iso8601,
+ traverse_obj,
+ try_get,
smuggle_url,
+ srt_subtitles_timecode,
str_or_none,
+ str_to_int,
strip_jsonp,
unified_timestamp,
unsmuggle_url,
urlencode_postdata,
+ OnDemandPagedList
)
@@ -32,13 +40,14 @@ class BiliBiliIE(InfoExtractor):
(?:
video/[aA][vV]|
anime/(?P<anime_id>\d+)/play\#
- )(?P<id_bv>\d+)|
- video/[bB][vV](?P<id>[^/?#&]+)
+ )(?P<id>\d+)|
+ (s/)?video/[bB][vV](?P<id_bv>[^/?#&]+)
)
+ (?:/?\?p=(?P<page>\d+))?
'''
_TESTS = [{
- 'url': 'http://www.bilibili.tv/video/av1074402/',
+ 'url': 'http://www.bilibili.com/video/av1074402/',
'md5': '5f7d29e1a2872f3df0cf76b1f87d3788',
'info_dict': {
'id': '1074402',
@@ -57,6 +66,10 @@ class BiliBiliIE(InfoExtractor):
'url': 'http://bangumi.bilibili.com/anime/1869/play#40062',
'only_matching': True,
}, {
+ # bilibili.tv
+ 'url': 'http://www.bilibili.tv/video/av1074402/',
+ 'only_matching': True,
+ }, {
'url': 'http://bangumi.bilibili.com/anime/5802/play#100643',
'md5': '3f721ad1e75030cc06faf73587cfec57',
'info_dict': {
@@ -86,7 +99,7 @@ class BiliBiliIE(InfoExtractor):
'upload_date': '20170301',
},
'params': {
- 'skip_download': True, # Test metadata only
+ 'skip_download': True,
},
}, {
'info_dict': {
@@ -100,13 +113,21 @@ class BiliBiliIE(InfoExtractor):
'upload_date': '20170301',
},
'params': {
- 'skip_download': True, # Test metadata only
+ 'skip_download': True,
},
}]
}, {
# new BV video id format
'url': 'https://www.bilibili.com/video/BV1JE411F741',
'only_matching': True,
+ }, {
+ # Anthology
+ 'url': 'https://www.bilibili.com/video/BV1bK411W797',
+ 'info_dict': {
+ 'id': 'BV1bK411W797',
+ 'title': '物语中的人物是如何吐槽自己的OP的'
+ },
+ 'playlist_count': 17,
}]
_APP_KEY = 'iVGUTjsxvpLeuDCf'
@@ -123,13 +144,32 @@ class BiliBiliIE(InfoExtractor):
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id') or mobj.group('id_bv')
+ mobj = self._match_valid_url(url)
+ video_id = mobj.group('id_bv') or mobj.group('id')
+
+ av_id, bv_id = self._get_video_id_set(video_id, mobj.group('id_bv') is not None)
+ video_id = av_id
+
anime_id = mobj.group('anime_id')
+ page_id = mobj.group('page')
webpage = self._download_webpage(url, video_id)
+ # Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself.
+ # If the video has no page argument, check to see if it's an anthology
+ if page_id is None:
+ if not self.get_param('noplaylist'):
+ r = self._extract_anthology_entries(bv_id, video_id, webpage)
+ if r is not None:
+ self.to_screen('Downloading anthology %s - add --no-playlist to just download video' % video_id)
+ return r
+ else:
+ self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
+
if 'anime/' not in url:
cid = self._search_regex(
+ r'\bcid(?:["\']:|=)(\d+),["\']page(?:["\']:|=)' + str(page_id), webpage, 'cid',
+ default=None
+ ) or self._search_regex(
r'\bcid(?:["\']:|=)(\d+)', webpage, 'cid',
default=None
) or compat_parse_qs(self._search_regex(
@@ -190,7 +230,7 @@ class BiliBiliIE(InfoExtractor):
formats.append({
'url': backup_url,
# backup URLs have lower priorities
- 'preference': -2 if 'hd.mp4' in backup_url else -3,
+ 'quality': -2 if 'hd.mp4' in backup_url else -3,
})
for a_format in formats:
@@ -208,9 +248,20 @@ class BiliBiliIE(InfoExtractor):
break
title = self._html_search_regex(
- ('<h1[^>]+\btitle=(["\'])(?P<title>(?:(?!\1).)+)\1',
- '(?s)<h1[^>]*>(?P<title>.+?)</h1>'), webpage, 'title',
+ (r'<h1[^>]+\btitle=(["\'])(?P<title>(?:(?!\1).)+)\1',
+ r'(?s)<h1[^>]*>(?P<title>.+?)</h1>'), webpage, 'title',
group='title')
+
+ # Get part title for anthologies
+ if page_id is not None:
+ # TODO: The json is already downloaded by _extract_anthology_entries. Don't redownload for each video
+ part_title = try_get(
+ self._download_json(
+ f'https://api.bilibili.com/x/player/pagelist?bvid={bv_id}&jsonp=jsonp',
+ video_id, note='Extracting videos in anthology'),
+ lambda x: x['data'][int(page_id) - 1]['part'])
+ title = part_title or title
+
description = self._html_search_meta('description', webpage)
timestamp = unified_timestamp(self._html_search_regex(
r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time',
@@ -220,7 +271,8 @@ class BiliBiliIE(InfoExtractor):
# TODO 'view_count' requires deobfuscating Javascript
info = {
- 'id': video_id,
+ 'id': str(video_id) if page_id is None else '%s_part%s' % (video_id, page_id),
+ 'cid': cid,
'title': title,
'description': description,
'timestamp': timestamp,
@@ -229,33 +281,117 @@ class BiliBiliIE(InfoExtractor):
}
uploader_mobj = re.search(
- r'<a[^>]+href="(?:https?:)?//space\.bilibili\.com/(?P<id>\d+)"[^>]*>(?P<name>[^<]+)',
+ r'<a[^>]+href="(?:https?:)?//space\.bilibili\.com/(?P<id>\d+)"[^>]*>\s*(?P<name>[^<]+?)\s*<',
webpage)
if uploader_mobj:
info.update({
- 'uploader': uploader_mobj.group('name'),
+ 'uploader': uploader_mobj.group('name').strip(),
'uploader_id': uploader_mobj.group('id'),
})
+
if not info.get('uploader'):
info['uploader'] = self._html_search_meta(
'author', webpage, 'uploader', default=None)
+ top_level_info = {
+ 'tags': traverse_obj(self._download_json(
+ f'https://api.bilibili.com/x/tag/archive/tags?aid={video_id}',
+ video_id, fatal=False, note='Downloading tags'), ('data', ..., 'tag_name')),
+ }
+
+ entries[0]['subtitles'] = {
+ 'danmaku': [{
+ 'ext': 'xml',
+ 'url': f'https://comment.bilibili.com/{cid}.xml',
+ }]
+ }
+
+ r'''
+ # Requires https://github.com/m13253/danmaku2ass which is licenced under GPL3
+ # See https://github.com/animelover1984/youtube-dl
+
+ raw_danmaku = self._download_webpage(
+ f'https://comment.bilibili.com/{cid}.xml', video_id, fatal=False, note='Downloading danmaku comments')
+ danmaku = NiconicoIE.CreateDanmaku(raw_danmaku, commentType='Bilibili', x=1024, y=576)
+ entries[0]['subtitles'] = {
+ 'danmaku': [{
+ 'ext': 'ass',
+ 'data': danmaku
+ }]
+ }
+ '''
+
+ top_level_info['__post_extractor'] = self.extract_comments(video_id)
+
for entry in entries:
entry.update(info)
if len(entries) == 1:
+ entries[0].update(top_level_info)
return entries[0]
- else:
- for idx, entry in enumerate(entries):
- entry['id'] = '%s_part%d' % (video_id, (idx + 1))
-
- return {
- '_type': 'multi_video',
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'entries': entries,
- }
+
+ for idx, entry in enumerate(entries):
+ entry['id'] = '%s_part%d' % (video_id, (idx + 1))
+
+ return {
+ '_type': 'multi_video',
+ 'id': str(video_id),
+ 'bv_id': bv_id,
+ 'title': title,
+ 'description': description,
+ 'entries': entries,
+ **info, **top_level_info
+ }
+
+ def _extract_anthology_entries(self, bv_id, video_id, webpage):
+ title = self._html_search_regex(
+ (r'<h1[^>]+\btitle=(["\'])(?P<title>(?:(?!\1).)+)\1',
+ r'(?s)<h1[^>]*>(?P<title>.+?)</h1>'), webpage, 'title',
+ group='title')
+ json_data = self._download_json(
+ f'https://api.bilibili.com/x/player/pagelist?bvid={bv_id}&jsonp=jsonp',
+ video_id, note='Extracting videos in anthology')
+
+ if json_data['data']:
+ return self.playlist_from_matches(
+ json_data['data'], bv_id, title, ie=BiliBiliIE.ie_key(),
+ getter=lambda entry: 'https://www.bilibili.com/video/%s?p=%d' % (bv_id, entry['page']))
+
+ def _get_video_id_set(self, id, is_bv):
+ query = {'bvid': id} if is_bv else {'aid': id}
+ response = self._download_json(
+ "http://api.bilibili.cn/x/web-interface/view",
+ id, query=query,
+ note='Grabbing original ID via API')
+
+ if response['code'] == -400:
+ raise ExtractorError('Video ID does not exist', expected=True, video_id=id)
+ elif response['code'] != 0:
+ raise ExtractorError(f'Unknown error occurred during API check (code {response["code"]})',
+ expected=True, video_id=id)
+ return response['data']['aid'], response['data']['bvid']
+
+ def _get_comments(self, video_id, commentPageNumber=0):
+ for idx in itertools.count(1):
+ replies = traverse_obj(
+ self._download_json(
+ f'https://api.bilibili.com/x/v2/reply?pn={idx}&oid={video_id}&type=1&jsonp=jsonp&sort=2&_=1567227301685',
+ video_id, note=f'Extracting comments from page {idx}'),
+ ('data', 'replies')) or []
+ for children in map(self._get_all_children, replies):
+ yield from children
+
+ def _get_all_children(self, reply):
+ yield {
+ 'author': traverse_obj(reply, ('member', 'uname')),
+ 'author_id': traverse_obj(reply, ('member', 'mid')),
+ 'id': reply.get('rpid'),
+ 'text': traverse_obj(reply, ('content', 'message')),
+ 'timestamp': reply.get('ctime'),
+ 'parent': reply.get('parent') or 'root',
+ }
+ for children in map(self._get_all_children, reply.get('replies') or []):
+ yield from children
class BiliBiliBangumiIE(InfoExtractor):
@@ -325,6 +461,136 @@ class BiliBiliBangumiIE(InfoExtractor):
season_info.get('bangumi_title'), season_info.get('evaluate'))
+class BilibiliChannelIE(InfoExtractor):
+ _VALID_URL = r'https?://space.bilibili\.com/(?P<id>\d+)'
+ _API_URL = "https://api.bilibili.com/x/space/arc/search?mid=%s&pn=%d&jsonp=jsonp"
+ _TESTS = [{
+ 'url': 'https://space.bilibili.com/3985676/video',
+ 'info_dict': {},
+ 'playlist_mincount': 112,
+ }]
+
+ def _entries(self, list_id):
+ count, max_count = 0, None
+
+ for page_num in itertools.count(1):
+ data = self._download_json(
+ self._API_URL % (list_id, page_num), list_id, note=f'Downloading page {page_num}')['data']
+
+ max_count = max_count or try_get(data, lambda x: x['page']['count'])
+
+ entries = try_get(data, lambda x: x['list']['vlist'])
+ if not entries:
+ return
+ for entry in entries:
+ yield self.url_result(
+ 'https://www.bilibili.com/video/%s' % entry['bvid'],
+ BiliBiliIE.ie_key(), entry['bvid'])
+
+ count += len(entries)
+ if max_count and count >= max_count:
+ return
+
+ def _real_extract(self, url):
+ list_id = self._match_id(url)
+ return self.playlist_result(self._entries(list_id), list_id)
+
+
+class BilibiliCategoryIE(InfoExtractor):
+ IE_NAME = 'Bilibili category extractor'
+ _MAX_RESULTS = 1000000
+ _VALID_URL = r'https?://www\.bilibili\.com/v/[a-zA-Z]+\/[a-zA-Z]+'
+ _TESTS = [{
+ 'url': 'https://www.bilibili.com/v/kichiku/mad',
+ 'info_dict': {
+ 'id': 'kichiku: mad',
+ 'title': 'kichiku: mad'
+ },
+ 'playlist_mincount': 45,
+ 'params': {
+ 'playlistend': 45
+ }
+ }]
+
+ def _fetch_page(self, api_url, num_pages, query, page_num):
+ parsed_json = self._download_json(
+ api_url, query, query={'Search_key': query, 'pn': page_num},
+ note='Extracting results from page %s of %s' % (page_num, num_pages))
+
+ video_list = try_get(parsed_json, lambda x: x['data']['archives'], list)
+ if not video_list:
+ raise ExtractorError('Failed to retrieve video list for page %d' % page_num)
+
+ for video in video_list:
+ yield self.url_result(
+ 'https://www.bilibili.com/video/%s' % video['bvid'], 'BiliBili', video['bvid'])
+
+ def _entries(self, category, subcategory, query):
+ # map of categories : subcategories : RIDs
+ rid_map = {
+ 'kichiku': {
+ 'mad': 26,
+ 'manual_vocaloid': 126,
+ 'guide': 22,
+ 'theatre': 216,
+ 'course': 127
+ },
+ }
+
+ if category not in rid_map:
+ raise ExtractorError(
+ f'The category {category} isn\'t supported. Supported categories: {list(rid_map.keys())}')
+ if subcategory not in rid_map[category]:
+ raise ExtractorError(
+ f'The subcategory {subcategory} isn\'t supported for this category. Supported subcategories: {list(rid_map[category].keys())}')
+ rid_value = rid_map[category][subcategory]
+
+ api_url = 'https://api.bilibili.com/x/web-interface/newlist?rid=%d&type=1&ps=20&jsonp=jsonp' % rid_value
+ page_json = self._download_json(api_url, query, query={'Search_key': query, 'pn': '1'})
+ page_data = try_get(page_json, lambda x: x['data']['page'], dict)
+ count, size = int_or_none(page_data.get('count')), int_or_none(page_data.get('size'))
+ if count is None or not size:
+ raise ExtractorError('Failed to calculate either page count or size')
+
+ num_pages = math.ceil(count / size)
+
+ return OnDemandPagedList(functools.partial(
+ self._fetch_page, api_url, num_pages, query), size)
+
+ def _real_extract(self, url):
+ u = compat_urllib_parse_urlparse(url)
+ category, subcategory = u.path.split('/')[2:4]
+ query = '%s: %s' % (category, subcategory)
+
+ return self.playlist_result(self._entries(category, subcategory, query), query, query)
+
+
+class BiliBiliSearchIE(SearchInfoExtractor):
+ IE_DESC = 'Bilibili video search, "bilisearch" keyword'
+ _MAX_RESULTS = 100000
+ _SEARCH_KEY = 'bilisearch'
+
+ def _search_results(self, query):
+ for page_num in itertools.count(1):
+ videos = self._download_json(
+ 'https://api.bilibili.com/x/web-interface/search/type', query,
+ note=f'Extracting results from page {page_num}', query={
+ 'Search_key': query,
+ 'keyword': query,
+ 'page': page_num,
+ 'context': '',
+ 'order': 'pubdate',
+ 'duration': 0,
+ 'tids_2': '',
+ '__refresh__': 'true',
+ 'search_type': 'video',
+ 'tids': 0,
+ 'highlight': 1,
+ })['data'].get('result') or []
+ for video in videos:
+ yield self.url_result(video['arcurl'], 'BiliBili', str(video['aid']))
+
+
class BilibiliAudioBaseIE(InfoExtractor):
def _call_api(self, path, sid, query=None):
if not query:
@@ -367,6 +633,7 @@ class BilibiliAudioIE(BilibiliAudioBaseIE):
formats = [{
'url': play_data['cdns'][0],
'filesize': int_or_none(play_data.get('size')),
+ 'vcodec': 'none'
}]
song = self._call_api('song/info', au_id)
@@ -449,3 +716,152 @@ class BiliBiliPlayerIE(InfoExtractor):
return self.url_result(
'http://www.bilibili.tv/video/av%s/' % video_id,
ie=BiliBiliIE.ie_key(), video_id=video_id)
+
+
+class BiliIntlBaseIE(InfoExtractor):
+ _API_URL = 'https://api.bili{}/intl/gateway{}'
+
+ def _call_api(self, type, endpoint, id):
+ return self._download_json(self._API_URL.format(type, endpoint), id)['data']
+
+ def json2srt(self, json):
+ data = '\n\n'.join(
+ f'{i + 1}\n{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n{line["content"]}'
+ for i, line in enumerate(json['body']))
+ return data
+
+ def _get_subtitles(self, type, ep_id):
+ sub_json = self._call_api(type, f'/m/subtitle?ep_id={ep_id}&platform=web', ep_id)
+ subtitles = {}
+ for sub in sub_json.get('subtitles', []):
+ sub_url = sub.get('url')
+ if not sub_url:
+ continue
+ sub_data = self._download_json(sub_url, ep_id, fatal=False)
+ if not sub_data:
+ continue
+ subtitles.setdefault(sub.get('key', 'en'), []).append({
+ 'ext': 'srt',
+ 'data': self.json2srt(sub_data)
+ })
+ return subtitles
+
+ def _get_formats(self, type, ep_id):
+ video_json = self._call_api(type, f'/web/playurl?ep_id={ep_id}&platform=web', ep_id)
+ if not video_json:
+ self.raise_login_required(method='cookies')
+ video_json = video_json['playurl']
+ formats = []
+ for vid in video_json.get('video', []):
+ video_res = vid.get('video_resource') or {}
+ video_info = vid.get('stream_info') or {}
+ if not video_res.get('url'):
+ continue
+ formats.append({
+ 'url': video_res['url'],
+ 'ext': 'mp4',
+ 'format_note': video_info.get('desc_words'),
+ 'width': video_res.get('width'),
+ 'height': video_res.get('height'),
+ 'vbr': video_res.get('bandwidth'),
+ 'acodec': 'none',
+ 'vcodec': video_res.get('codecs'),
+ 'filesize': video_res.get('size'),
+ })
+ for aud in video_json.get('audio_resource', []):
+ if not aud.get('url'):
+ continue
+ formats.append({
+ 'url': aud['url'],
+ 'ext': 'mp4',
+ 'abr': aud.get('bandwidth'),
+ 'acodec': aud.get('codecs'),
+ 'vcodec': 'none',
+ 'filesize': aud.get('size'),
+ })
+
+ self._sort_formats(formats)
+ return formats
+
+ def _extract_ep_info(self, type, episode_data, ep_id):
+ return {
+ 'id': ep_id,
+ 'title': episode_data.get('long_title') or episode_data['title'],
+ 'thumbnail': episode_data.get('cover'),
+ 'episode_number': str_to_int(episode_data.get('title')),
+ 'formats': self._get_formats(type, ep_id),
+ 'subtitles': self._get_subtitles(type, ep_id),
+ 'extractor_key': BiliIntlIE.ie_key(),
+ }
+
+
+class BiliIntlIE(BiliIntlBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?bili(?P<type>bili\.tv|intl.com)/(?:[a-z]{2}/)?play/(?P<season_id>\d+)/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.bilibili.tv/en/play/34613/341736',
+ 'info_dict': {
+ 'id': '341736',
+ 'ext': 'mp4',
+ 'title': 'The First Night',
+ 'thumbnail': 'https://i0.hdslb.com/bfs/intl/management/91e30e5521235d9b163339a26a0b030ebda54310.png',
+ 'episode_number': 2,
+ },
+ 'params': {
+ 'format': 'bv',
+ },
+ }, {
+ 'url': 'https://www.biliintl.com/en/play/34613/341736',
+ 'info_dict': {
+ 'id': '341736',
+ 'ext': 'mp4',
+ 'title': 'The First Night',
+ 'thumbnail': 'https://i0.hdslb.com/bfs/intl/management/91e30e5521235d9b163339a26a0b030ebda54310.png',
+ 'episode_number': 2,
+ },
+ 'params': {
+ 'format': 'bv',
+ },
+ }]
+
+ def _real_extract(self, url):
+ type, season_id, id = self._match_valid_url(url).groups()
+ data_json = self._call_api(type, f'/web/view/ogv_collection?season_id={season_id}', id)
+ episode_data = next(
+ episode for episode in data_json.get('episodes', [])
+ if str(episode.get('ep_id')) == id)
+ return self._extract_ep_info(type, episode_data, id)
+
+
+class BiliIntlSeriesIE(BiliIntlBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?bili(?P<type>bili\.tv|intl.com)/(?:[a-z]{2}/)?play/(?P<id>\d+)$'
+ _TESTS = [{
+ 'url': 'https://www.bilibili.tv/en/play/34613',
+ 'playlist_mincount': 15,
+ 'info_dict': {
+ 'id': '34613',
+ },
+ 'params': {
+ 'skip_download': True,
+ 'format': 'bv',
+ },
+ }, {
+ 'url': 'https://www.biliintl.com/en/play/34613',
+ 'playlist_mincount': 15,
+ 'info_dict': {
+ 'id': '34613',
+ },
+ 'params': {
+ 'skip_download': True,
+ 'format': 'bv',
+ },
+ }]
+
+ def _entries(self, id, type):
+ data_json = self._call_api(type, f'/web/view/ogv_collection?season_id={id}', id)
+ for episode in data_json.get('episodes', []):
+ episode_id = str(episode.get('ep_id'))
+ yield self._extract_ep_info(type, episode, episode_id)
+
+ def _real_extract(self, url):
+ type, id = self._match_valid_url(url).groups()
+ return self.playlist_result(self._entries(id, type), playlist_id=id)
diff --git a/hypervideo_dl/extractor/bitchute.py b/hypervideo_dl/extractor/bitchute.py
index 0c773e6..dcae6f4 100644
--- a/hypervideo_dl/extractor/bitchute.py
+++ b/hypervideo_dl/extractor/bitchute.py
@@ -6,6 +6,8 @@ import re
from .common import InfoExtractor
from ..utils import (
+ ExtractorError,
+ GeoRestrictedError,
orderedSet,
unified_strdate,
urlencode_postdata,
@@ -15,16 +17,16 @@ from ..utils import (
class BitChuteIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P<id>[^/?#&]+)'
_TESTS = [{
- 'url': 'https://www.bitchute.com/video/szoMrox2JEI/',
- 'md5': '66c4a70e6bfc40dcb6be3eb1d74939eb',
+ 'url': 'https://www.bitchute.com/video/UGlrF9o9b-Q/',
+ 'md5': '7e427d7ed7af5a75b5855705ec750e2b',
'info_dict': {
'id': 'szoMrox2JEI',
'ext': 'mp4',
- 'title': 'Fuck bitches get money',
- 'description': 'md5:3f21f6fb5b1d17c3dee9cf6b5fe60b3a',
+ 'title': 'This is the first video on #BitChute !',
+ 'description': 'md5:a0337e7b1fe39e32336974af8173a034',
'thumbnail': r're:^https?://.*\.jpg$',
- 'uploader': 'Victoria X Rave',
- 'upload_date': '20170813',
+ 'uploader': 'BitChute',
+ 'upload_date': '20170103',
},
}, {
'url': 'https://www.bitchute.com/embed/lbb5G1hjPhw/',
@@ -34,6 +36,14 @@ class BitChuteIE(InfoExtractor):
'only_matching': True,
}]
+ @staticmethod
+ def _extract_urls(webpage):
+ return [
+ mobj.group('url')
+ for mobj in re.finditer(
+ r'<(?:script|iframe)[^>]+\bsrc=(["\'])(?P<url>%s)' % BitChuteIE._VALID_URL,
+ webpage)]
+
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -59,8 +69,14 @@ class BitChuteIE(InfoExtractor):
for format_url in orderedSet(format_urls)]
if not formats:
- formats = self._parse_html5_media_entries(
- url, webpage, video_id)[0]['formats']
+ entries = self._parse_html5_media_entries(
+ url, webpage, video_id)
+ if not entries:
+ error = self._html_search_regex(r'<h1 class="page-title">([^<]+)</h1>', webpage, 'error', default='Cannot find video')
+ if error == 'Video Unavailable':
+ raise GeoRestrictedError(error)
+ raise ExtractorError(error)
+ formats = entries[0]['formats']
self._check_formats(formats, video_id)
self._sort_formats(formats)
diff --git a/hypervideo_dl/extractor/bitwave.py b/hypervideo_dl/extractor/bitwave.py
new file mode 100644
index 0000000..eb16c46
--- /dev/null
+++ b/hypervideo_dl/extractor/bitwave.py
@@ -0,0 +1,61 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class BitwaveReplayIE(InfoExtractor):
+ IE_NAME = 'bitwave:replay'
+ _VALID_URL = r'https?://(?:www\.)?bitwave\.tv/(?P<user>\w+)/replay/(?P<id>\w+)/?$'
+ _TEST = {
+ 'url': 'https://bitwave.tv/RhythmicCarnage/replay/z4P6eq5L7WDrM85UCrVr',
+ 'only_matching': True
+ }
+
+ def _real_extract(self, url):
+ replay_id = self._match_id(url)
+ replay = self._download_json(
+ 'https://api.bitwave.tv/v1/replays/' + replay_id,
+ replay_id
+ )
+
+ return {
+ 'id': replay_id,
+ 'title': replay['data']['title'],
+ 'uploader': replay['data']['name'],
+ 'uploader_id': replay['data']['name'],
+ 'url': replay['data']['url'],
+ 'thumbnails': [
+ {'url': x} for x in replay['data']['thumbnails']
+ ],
+ }
+
+
+class BitwaveStreamIE(InfoExtractor):
+ IE_NAME = 'bitwave:stream'
+ _VALID_URL = r'https?://(?:www\.)?bitwave\.tv/(?P<id>\w+)/?$'
+ _TEST = {
+ 'url': 'https://bitwave.tv/doomtube',
+ 'only_matching': True
+ }
+
+ def _real_extract(self, url):
+ username = self._match_id(url)
+ channel = self._download_json(
+ 'https://api.bitwave.tv/v1/channels/' + username,
+ username)
+
+ formats = self._extract_m3u8_formats(
+ channel['data']['url'], username,
+ 'mp4')
+ self._sort_formats(formats)
+
+ return {
+ 'id': username,
+ 'title': self._live_title(channel['data']['title']),
+ 'uploader': username,
+ 'uploader_id': username,
+ 'formats': formats,
+ 'thumbnail': channel['data']['thumbnail'],
+ 'is_live': True,
+ 'view_count': channel['data']['viewCount']
+ }
diff --git a/hypervideo_dl/extractor/blackboardcollaborate.py b/hypervideo_dl/extractor/blackboardcollaborate.py
new file mode 100644
index 0000000..8ae2941
--- /dev/null
+++ b/hypervideo_dl/extractor/blackboardcollaborate.py
@@ -0,0 +1,67 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+
+from .common import InfoExtractor
+from ..utils import parse_iso8601
+
+
+class BlackboardCollaborateIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?P<region>[a-z-]+)\.bbcollab\.com/
+ (?:
+ collab/ui/session/playback/load|
+ recording
+ )/
+ (?P<id>[^/]+)'''
+ _TESTS = [
+ {
+ 'url': 'https://us-lti.bbcollab.com/collab/ui/session/playback/load/0a633b6a88824deb8c918f470b22b256',
+ 'md5': 'bb7a055682ee4f25fdb5838cdf014541',
+ 'info_dict': {
+ 'id': '0a633b6a88824deb8c918f470b22b256',
+ 'title': 'HESI A2 Information Session - Thursday, May 6, 2021 - recording_1',
+ 'ext': 'mp4',
+ 'duration': 1896000,
+ 'timestamp': 1620331399,
+ 'upload_date': '20210506',
+ },
+ },
+ {
+ 'url': 'https://us.bbcollab.com/collab/ui/session/playback/load/76761522adfe4345a0dee6794bbcabda',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://ca.bbcollab.com/collab/ui/session/playback/load/b6399dcb44df4f21b29ebe581e22479d',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://eu.bbcollab.com/recording/51ed7b50810c4444a106e48cefb3e6b5',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://au.bbcollab.com/collab/ui/session/playback/load/2bccf7165d7c419ab87afc1ec3f3bb15',
+ 'only_matching': True,
+ },
+ ]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ region = mobj.group('region')
+ video_id = mobj.group('id')
+ info = self._download_json(
+ 'https://{}.bbcollab.com/collab/api/csa/recordings/{}/data'.format(region, video_id), video_id)
+ duration = info.get('duration')
+ title = info['name']
+ upload_date = info.get('created')
+ streams = info['streams']
+ formats = [{'format_id': k, 'url': url} for k, url in streams.items()]
+
+ return {
+ 'duration': duration,
+ 'formats': formats,
+ 'id': video_id,
+ 'timestamp': parse_iso8601(upload_date),
+ 'title': title,
+ }
diff --git a/hypervideo_dl/extractor/blinkx.py b/hypervideo_dl/extractor/blinkx.py
new file mode 100644
index 0000000..d70a3b3
--- /dev/null
+++ b/hypervideo_dl/extractor/blinkx.py
@@ -0,0 +1,86 @@
+from __future__ import unicode_literals
+
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ remove_start,
+ int_or_none,
+)
+
+
+class BlinkxIE(InfoExtractor):
+ _VALID_URL = r'(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P<id>[^?]+)'
+ IE_NAME = 'blinkx'
+
+ _TEST = {
+ 'url': 'http://www.blinkx.com/ce/Da0Gw3xc5ucpNduzLuDDlv4WC9PuI4fDi1-t6Y3LyfdY2SZS5Urbvn-UPJvrvbo8LTKTc67Wu2rPKSQDJyZeeORCR8bYkhs8lI7eqddznH2ofh5WEEdjYXnoRtj7ByQwt7atMErmXIeYKPsSDuMAAqJDlQZ-3Ff4HJVeH_s3Gh8oQ',
+ 'md5': '337cf7a344663ec79bf93a526a2e06c7',
+ 'info_dict': {
+ 'id': 'Da0Gw3xc',
+ 'ext': 'mp4',
+ 'title': 'No Daily Show for John Oliver; HBO Show Renewed - IGN News',
+ 'uploader': 'IGN News',
+ 'upload_date': '20150217',
+ 'timestamp': 1424215740,
+ 'description': 'HBO has renewed Last Week Tonight With John Oliver for two more seasons.',
+ 'duration': 47.743333,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ display_id = video_id[:8]
+
+ api_url = ('https://apib4.blinkx.com/api.php?action=play_video&'
+ + 'video=%s' % video_id)
+ data_json = self._download_webpage(api_url, display_id)
+ data = json.loads(data_json)['api']['results'][0]
+ duration = None
+ thumbnails = []
+ formats = []
+ for m in data['media']:
+ if m['type'] == 'jpg':
+ thumbnails.append({
+ 'url': m['link'],
+ 'width': int(m['w']),
+ 'height': int(m['h']),
+ })
+ elif m['type'] == 'original':
+ duration = float(m['d'])
+ elif m['type'] == 'youtube':
+ yt_id = m['link']
+ self.to_screen('Youtube video detected: %s' % yt_id)
+ return self.url_result(yt_id, 'Youtube', video_id=yt_id)
+ elif m['type'] in ('flv', 'mp4'):
+ vcodec = remove_start(m['vcodec'], 'ff')
+ acodec = remove_start(m['acodec'], 'ff')
+ vbr = int_or_none(m.get('vbr') or m.get('vbitrate'), 1000)
+ abr = int_or_none(m.get('abr') or m.get('abitrate'), 1000)
+ tbr = vbr + abr if vbr and abr else None
+ format_id = '%s-%sk-%s' % (vcodec, tbr, m['w'])
+ formats.append({
+ 'format_id': format_id,
+ 'url': m['link'],
+ 'vcodec': vcodec,
+ 'acodec': acodec,
+ 'abr': abr,
+ 'vbr': vbr,
+ 'tbr': tbr,
+ 'width': int_or_none(m.get('w')),
+ 'height': int_or_none(m.get('h')),
+ })
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': display_id,
+ 'fullid': video_id,
+ 'title': data['title'],
+ 'formats': formats,
+ 'uploader': data.get('channel_name'),
+ 'timestamp': data.get('pubdate_epoch'),
+ 'description': data.get('description'),
+ 'thumbnails': thumbnails,
+ 'duration': duration,
+ }
diff --git a/hypervideo_dl/extractor/bokecc.py b/hypervideo_dl/extractor/bokecc.py
index 6017e83..6a89d36 100644
--- a/hypervideo_dl/extractor/bokecc.py
+++ b/hypervideo_dl/extractor/bokecc.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..compat import compat_parse_qs
@@ -23,7 +22,7 @@ class BokeCCBaseIE(InfoExtractor):
formats = [{
'format_id': format_id,
'url': quality.find('./copy').attrib['playurl'],
- 'preference': int(quality.attrib['value']),
+ 'quality': int(quality.attrib['value']),
} for quality in info_xml.findall('./video/quality')]
self._sort_formats(formats)
@@ -45,7 +44,7 @@ class BokeCCIE(BokeCCBaseIE):
}]
def _real_extract(self, url):
- qs = compat_parse_qs(re.match(self._VALID_URL, url).group('query'))
+ qs = compat_parse_qs(self._match_valid_url(url).group('query'))
if not qs.get('vid') or not qs.get('uid'):
raise ExtractorError('Invalid URL', expected=True)
diff --git a/hypervideo_dl/extractor/bongacams.py b/hypervideo_dl/extractor/bongacams.py
index 180542f..9e75511 100644
--- a/hypervideo_dl/extractor/bongacams.py
+++ b/hypervideo_dl/extractor/bongacams.py
@@ -1,6 +1,5 @@
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..compat import compat_str
@@ -22,7 +21,7 @@ class BongaCamsIE(InfoExtractor):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
host = mobj.group('host')
channel_id = mobj.group('id')
diff --git a/hypervideo_dl/extractor/box.py b/hypervideo_dl/extractor/box.py
index aae82d1..8214086 100644
--- a/hypervideo_dl/extractor/box.py
+++ b/hypervideo_dl/extractor/box.py
@@ -2,7 +2,6 @@
from __future__ import unicode_literals
import json
-import re
from .common import InfoExtractor
from ..utils import (
@@ -30,7 +29,7 @@ class BoxIE(InfoExtractor):
}
def _real_extract(self, url):
- shared_name, file_id = re.match(self._VALID_URL, url).groups()
+ shared_name, file_id = self._match_valid_url(url).groups()
webpage = self._download_webpage(url, file_id)
request_token = self._parse_json(self._search_regex(
r'Box\.config\s*=\s*({.+?});', webpage,
diff --git a/hypervideo_dl/extractor/bpb.py b/hypervideo_dl/extractor/bpb.py
index 0783353..8f6ef3c 100644
--- a/hypervideo_dl/extractor/bpb.py
+++ b/hypervideo_dl/extractor/bpb.py
@@ -47,7 +47,7 @@ class BpbIE(InfoExtractor):
quality = 'high' if '_high' in video_url else 'low'
formats.append({
'url': video_url,
- 'preference': 10 if quality == 'high' else 0,
+ 'quality': 10 if quality == 'high' else 0,
'format_note': quality,
'format_id': '%s-%s' % (quality, determine_ext(video_url)),
})
diff --git a/hypervideo_dl/extractor/br.py b/hypervideo_dl/extractor/br.py
index 9bde7f2..7169ece 100644
--- a/hypervideo_dl/extractor/br.py
+++ b/hypervideo_dl/extractor/br.py
@@ -2,7 +2,6 @@
from __future__ import unicode_literals
import json
-import re
from .common import InfoExtractor
from ..utils import (
@@ -86,7 +85,7 @@ class BRIE(InfoExtractor):
]
def _real_extract(self, url):
- base_url, display_id = re.search(self._VALID_URL, url).groups()
+ base_url, display_id = self._match_valid_url(url).groups()
page = self._download_webpage(url, display_id)
xml_url = self._search_regex(
r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/(?:[a-z0-9\-]+/)+[a-z0-9/~_.-]+)'}\)\);", page, 'XMLURL')
@@ -114,7 +113,7 @@ class BRIE(InfoExtractor):
medias.append(media)
if len(medias) > 1:
- self._downloader.report_warning(
+ self.report_warning(
'found multiple medias; please '
'report this with the video URL to http://yt-dl.org/bug')
if not medias:
diff --git a/hypervideo_dl/extractor/bravotv.py b/hypervideo_dl/extractor/bravotv.py
index bae2aed..139d51c 100644
--- a/hypervideo_dl/extractor/bravotv.py
+++ b/hypervideo_dl/extractor/bravotv.py
@@ -8,6 +8,9 @@ from ..utils import (
smuggle_url,
update_url_query,
int_or_none,
+ float_or_none,
+ try_get,
+ dict_get,
)
@@ -24,6 +27,11 @@ class BravoTVIE(AdobePassIE):
'uploader': 'NBCU-BRAV',
'upload_date': '20190314',
'timestamp': 1552591860,
+ 'season_number': 16,
+ 'episode_number': 15,
+ 'series': 'Top Chef',
+ 'episode': 'The Top Chef Season 16 Winner Is...',
+ 'duration': 190.0,
}
}, {
'url': 'http://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1',
@@ -34,7 +42,7 @@ class BravoTVIE(AdobePassIE):
}]
def _real_extract(self, url):
- site, display_id = re.match(self._VALID_URL, url).groups()
+ site, display_id = self._match_valid_url(url).groups()
webpage = self._download_webpage(url, display_id)
settings = self._parse_json(self._search_regex(
r'<script[^>]+data-drupal-selector="drupal-settings-json"[^>]*>({.+?})</script>', webpage, 'drupal settings'),
@@ -79,12 +87,34 @@ class BravoTVIE(AdobePassIE):
'episode_number': int_or_none(metadata.get('episode_num')),
})
query['switch'] = 'progressive'
+
+ tp_url = 'http://link.theplatform.com/s/%s/%s' % (account_pid, tp_path)
+
+ tp_metadata = self._download_json(
+ update_url_query(tp_url, {'format': 'preview'}),
+ display_id, fatal=False)
+ if tp_metadata:
+ info.update({
+ 'title': tp_metadata.get('title'),
+ 'description': tp_metadata.get('description'),
+ 'duration': float_or_none(tp_metadata.get('duration'), 1000),
+ 'season_number': int_or_none(
+ dict_get(tp_metadata, ('pl1$seasonNumber', 'nbcu$seasonNumber'))),
+ 'episode_number': int_or_none(
+ dict_get(tp_metadata, ('pl1$episodeNumber', 'nbcu$episodeNumber'))),
+ # For some reason the series is sometimes wrapped into a single element array.
+ 'series': try_get(
+ dict_get(tp_metadata, ('pl1$show', 'nbcu$show')),
+ lambda x: x[0] if isinstance(x, list) else x,
+ expected_type=str),
+ 'episode': dict_get(
+ tp_metadata, ('pl1$episodeName', 'nbcu$episodeName', 'title')),
+ })
+
info.update({
'_type': 'url_transparent',
'id': release_pid,
- 'url': smuggle_url(update_url_query(
- 'http://link.theplatform.com/s/%s/%s' % (account_pid, tp_path),
- query), {'force_smil_url': True}),
+ 'url': smuggle_url(update_url_query(tp_url, query), {'force_smil_url': True}),
'ie_key': 'ThePlatform',
})
return info
diff --git a/hypervideo_dl/extractor/breakcom.py b/hypervideo_dl/extractor/breakcom.py
index 68c7cf2..f38789f 100644
--- a/hypervideo_dl/extractor/breakcom.py
+++ b/hypervideo_dl/extractor/breakcom.py
@@ -1,6 +1,5 @@
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from .youtube import YoutubeIE
@@ -41,7 +40,7 @@ class BreakIE(InfoExtractor):
}]
def _real_extract(self, url):
- display_id, video_id = re.match(self._VALID_URL, url).groups()
+ display_id, video_id = self._match_valid_url(url).groups()
webpage = self._download_webpage(url, display_id)
diff --git a/hypervideo_dl/extractor/brightcove.py b/hypervideo_dl/extractor/brightcove.py
index 6022076..cd1c3f0 100644
--- a/hypervideo_dl/extractor/brightcove.py
+++ b/hypervideo_dl/extractor/brightcove.py
@@ -11,7 +11,6 @@ from ..compat import (
compat_etree_fromstring,
compat_HTTPError,
compat_parse_qs,
- compat_urllib_parse_urlparse,
compat_urlparse,
compat_xml_parse_error,
)
@@ -26,6 +25,7 @@ from ..utils import (
js_to_json,
mimetype2ext,
parse_iso8601,
+ parse_qs,
smuggle_url,
str_or_none,
try_get,
@@ -177,7 +177,7 @@ class BrightcoveLegacyIE(InfoExtractor):
flashvars = {}
data_url = object_doc.attrib.get('data', '')
- data_url_params = compat_parse_qs(compat_urllib_parse_urlparse(data_url).query)
+ data_url_params = parse_qs(data_url)
def find_param(name):
if name in flashvars:
@@ -290,7 +290,7 @@ class BrightcoveLegacyIE(InfoExtractor):
url = re.sub(r'(?<=[?&])(videoI(d|D)|idVideo|bctid)', '%40videoPlayer', url)
# Change bckey (used by bcove.me urls) to playerKey
url = re.sub(r'(?<=[?&])bckey', 'playerKey', url)
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
query_str = mobj.group('query')
query = compat_urlparse.parse_qs(query_str)
@@ -472,27 +472,32 @@ class BrightcoveNewIE(AdobePassIE):
title = json_data['name'].strip()
num_drm_sources = 0
- formats = []
+ formats, subtitles = [], {}
sources = json_data.get('sources') or []
for source in sources:
container = source.get('container')
ext = mimetype2ext(source.get('type'))
src = source.get('src')
+ skip_unplayable = not self.get_param('allow_unplayable_formats')
# https://support.brightcove.com/playback-api-video-fields-reference#key_systems_object
- if container == 'WVM' or source.get('key_systems'):
+ if skip_unplayable and (container == 'WVM' or source.get('key_systems')):
num_drm_sources += 1
continue
- elif ext == 'ism':
+ elif ext == 'ism' and skip_unplayable:
continue
elif ext == 'm3u8' or container == 'M2TS':
if not src:
continue
- formats.extend(self._extract_m3u8_formats(
- src, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
+ f, subs = self._extract_m3u8_formats_and_subtitles(
+ src, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
+ formats.extend(f)
+ subtitles = self._merge_subtitles(subtitles, subs)
elif ext == 'mpd':
if not src:
continue
- formats.extend(self._extract_mpd_formats(src, video_id, 'dash', fatal=False))
+ f, subs = self._extract_mpd_formats_and_subtitles(src, video_id, 'dash', fatal=False)
+ formats.extend(f)
+ subtitles = self._merge_subtitles(subtitles, subs)
else:
streaming_src = source.get('streaming_src')
stream_name, app_name = source.get('stream_name'), source.get('app_name')
@@ -544,17 +549,17 @@ class BrightcoveNewIE(AdobePassIE):
errors = json_data.get('errors')
if errors:
error = errors[0]
- raise ExtractorError(
+ self.raise_no_formats(
error.get('message') or error.get('error_subcode') or error['error_code'], expected=True)
- if sources and num_drm_sources == len(sources):
- raise ExtractorError('This video is DRM protected.', expected=True)
+ elif (not self.get_param('allow_unplayable_formats')
+ and sources and num_drm_sources == len(sources)):
+ self.report_drm(video_id)
self._sort_formats(formats)
for f in formats:
f.setdefault('http_headers', {}).update(headers)
- subtitles = {}
for text_track in json_data.get('text_tracks', []):
if text_track.get('kind') != 'captions':
continue
@@ -593,7 +598,7 @@ class BrightcoveNewIE(AdobePassIE):
'ip_blocks': smuggled_data.get('geo_ip_blocks'),
})
- account_id, player_id, embed, content_type, video_id = re.match(self._VALID_URL, url).groups()
+ account_id, player_id, embed, content_type, video_id = self._match_valid_url(url).groups()
policy_key_id = '%s_%s' % (account_id, player_id)
policy_key = self._downloader.cache.load('brightcove', policy_key_id)
diff --git a/hypervideo_dl/extractor/byutv.py b/hypervideo_dl/extractor/byutv.py
index 0b11bf1..f4d5086 100644
--- a/hypervideo_dl/extractor/byutv.py
+++ b/hypervideo_dl/extractor/byutv.py
@@ -1,6 +1,5 @@
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..utils import (
@@ -52,7 +51,7 @@ class BYUtvIE(InfoExtractor):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
display_id = mobj.group('display_id') or video_id
@@ -82,6 +81,7 @@ class BYUtvIE(InfoExtractor):
info = {}
formats = []
+ subtitles = {}
for format_id, ep in video.items():
if not isinstance(ep, dict):
continue
@@ -90,12 +90,16 @@ class BYUtvIE(InfoExtractor):
continue
ext = determine_ext(video_url)
if ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
+ m3u8_fmts, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id='hls', fatal=False))
+ m3u8_id='hls', fatal=False)
+ formats.extend(m3u8_fmts)
+ subtitles = self._merge_subtitles(subtitles, m3u8_subs)
elif ext == 'mpd':
- formats.extend(self._extract_mpd_formats(
- video_url, video_id, mpd_id='dash', fatal=False))
+ mpd_fmts, mpd_subs = self._extract_mpd_formats_and_subtitles(
+ video_url, video_id, mpd_id='dash', fatal=False)
+ formats.extend(mpd_fmts)
+ subtitles = self._merge_subtitles(subtitles, mpd_subs)
else:
formats.append({
'url': video_url,
@@ -114,4 +118,5 @@ class BYUtvIE(InfoExtractor):
'display_id': display_id,
'title': display_id,
'formats': formats,
+ 'subtitles': subtitles,
})
diff --git a/hypervideo_dl/extractor/c56.py b/hypervideo_dl/extractor/c56.py
index cac8fdc..a853c53 100644
--- a/hypervideo_dl/extractor/c56.py
+++ b/hypervideo_dl/extractor/c56.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..utils import js_to_json
@@ -31,7 +30,7 @@ class C56IE(InfoExtractor):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
+ mobj = self._match_valid_url(url)
text_id = mobj.group('textid')
webpage = self._download_webpage(url, text_id)
diff --git a/hypervideo_dl/extractor/cam4.py b/hypervideo_dl/extractor/cam4.py
new file mode 100644
index 0000000..30daf2b
--- /dev/null
+++ b/hypervideo_dl/extractor/cam4.py
@@ -0,0 +1,32 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class CAM4IE(InfoExtractor):
+ _VALID_URL = r'https?://(?:[^/]+\.)?cam4\.com/(?P<id>[a-z0-9_]+)'
+ _TEST = {
+ 'url': 'https://www.cam4.com/foxynesss',
+ 'info_dict': {
+ 'id': 'foxynesss',
+ 'ext': 'mp4',
+ 'title': 're:^foxynesss [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'age_limit': 18,
+ }
+ }
+
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
+ m3u8_playlist = self._download_json('https://www.cam4.com/rest/v1.0/profile/{}/streamInfo'.format(channel_id), channel_id).get('cdnURL')
+
+ formats = self._extract_m3u8_formats(m3u8_playlist, channel_id, 'mp4', m3u8_id='hls', live=True)
+ self._sort_formats(formats)
+
+ return {
+ 'id': channel_id,
+ 'title': self._live_title(channel_id),
+ 'is_live': True,
+ 'age_limit': 18,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/cammodels.py b/hypervideo_dl/extractor/cammodels.py
index 1eb81b7..eb2a8b4 100644
--- a/hypervideo_dl/extractor/cammodels.py
+++ b/hypervideo_dl/extractor/cammodels.py
@@ -82,7 +82,7 @@ class CamModelsIE(InfoExtractor):
f.update({
'ext': 'mp4',
# hls skips fragments, preferring rtmp
- 'preference': -1,
+ 'quality': -10,
})
else:
continue
diff --git a/hypervideo_dl/extractor/canalplus.py b/hypervideo_dl/extractor/canalplus.py
index 51c11cb..211ea26 100644
--- a/hypervideo_dl/extractor/canalplus.py
+++ b/hypervideo_dl/extractor/canalplus.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..utils import (
@@ -50,7 +49,7 @@ class CanalplusIE(InfoExtractor):
}]
def _real_extract(self, url):
- site, display_id, video_id = re.match(self._VALID_URL, url).groups()
+ site, display_id, video_id = self._match_valid_url(url).groups()
site_id = self._SITE_ID_MAP[site]
@@ -89,7 +88,7 @@ class CanalplusIE(InfoExtractor):
# the secret extracted from ya function in http://player.canalplus.fr/common/js/canalPlayer.js
'url': format_url + '?secret=pqzerjlsmdkjfoiuerhsdlfknaes',
'format_id': format_id,
- 'preference': preference(format_id),
+ 'quality': preference(format_id),
})
self._sort_formats(formats)
diff --git a/hypervideo_dl/extractor/canvas.py b/hypervideo_dl/extractor/canvas.py
index eefbab2..49e7e4e 100644
--- a/hypervideo_dl/extractor/canvas.py
+++ b/hypervideo_dl/extractor/canvas.py
@@ -1,7 +1,5 @@
from __future__ import unicode_literals
-import re
-import json
from .common import InfoExtractor
from .gigya import GigyaBaseIE
@@ -17,6 +15,7 @@ from ..utils import (
str_or_none,
strip_or_none,
url_or_none,
+ urlencode_postdata
)
@@ -24,7 +23,7 @@ class CanvasIE(InfoExtractor):
_VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet|vrt(?:video|nieuws)|sporza|dako)/assets/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
- 'md5': '68993eda72ef62386a15ea2cf3c93107',
+ 'md5': '37b2b7bb9b3dcaa05b67058dc3a714a9',
'info_dict': {
'id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
'display_id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
@@ -32,9 +31,9 @@ class CanvasIE(InfoExtractor):
'title': 'Nachtwacht: De Greystook',
'description': 'Nachtwacht: De Greystook',
'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 1468.04,
+ 'duration': 1468.02,
},
- 'expected_warnings': ['is not a supported codec', 'Unknown MIME type'],
+ 'expected_warnings': ['is not a supported codec'],
}, {
'url': 'https://mediazone.vrt.be/api/v1/canvas/assets/mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e',
'only_matching': True,
@@ -47,7 +46,7 @@ class CanvasIE(InfoExtractor):
_REST_API_BASE = 'https://media-services-public.vrt.be/vualto-video-aggregator-web/rest/external/v1'
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
site_id, video_id = mobj.group('site_id'), mobj.group('id')
data = None
@@ -83,24 +82,31 @@ class CanvasIE(InfoExtractor):
description = data.get('description')
formats = []
+ subtitles = {}
for target in data['targetUrls']:
format_url, format_type = url_or_none(target.get('url')), str_or_none(target.get('type'))
if not format_url or not format_type:
continue
format_type = format_type.upper()
if format_type in self._HLS_ENTRY_PROTOCOLS_MAP:
- formats.extend(self._extract_m3u8_formats(
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
format_url, video_id, 'mp4', self._HLS_ENTRY_PROTOCOLS_MAP[format_type],
- m3u8_id=format_type, fatal=False))
+ m3u8_id=format_type, fatal=False)
+ formats.extend(fmts)
+ subtitles = self._merge_subtitles(subtitles, subs)
elif format_type == 'HDS':
formats.extend(self._extract_f4m_formats(
format_url, video_id, f4m_id=format_type, fatal=False))
elif format_type == 'MPEG_DASH':
- formats.extend(self._extract_mpd_formats(
- format_url, video_id, mpd_id=format_type, fatal=False))
+ fmts, subs = self._extract_mpd_formats_and_subtitles(
+ format_url, video_id, mpd_id=format_type, fatal=False)
+ formats.extend(fmts)
+ subtitles = self._merge_subtitles(subtitles, subs)
elif format_type == 'HSS':
- formats.extend(self._extract_ism_formats(
- format_url, video_id, ism_id='mss', fatal=False))
+ fmts, subs = self._extract_ism_formats_and_subtitles(
+ format_url, video_id, ism_id='mss', fatal=False)
+ formats.extend(fmts)
+ subtitles = self._merge_subtitles(subtitles, subs)
else:
formats.append({
'format_id': format_type,
@@ -108,7 +114,6 @@ class CanvasIE(InfoExtractor):
})
self._sort_formats(formats)
- subtitles = {}
subtitle_urls = data.get('subtitleUrls')
if isinstance(subtitle_urls, list):
for subtitle in subtitle_urls:
@@ -186,7 +191,7 @@ class CanvasEenIE(InfoExtractor):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
site_id, display_id = mobj.group('site_id'), mobj.group('id')
webpage = self._download_webpage(url, display_id)
@@ -259,7 +264,7 @@ class VrtNUIE(GigyaBaseIE):
'expected_warnings': ['Unable to download asset JSON', 'is not a supported codec', 'Unknown MIME type'],
}]
_NETRC_MACHINE = 'vrtnu'
- _APIKEY = '3_0Z2HujMtiWq_pkAjgnS2Md2E11a1AwZjYiBETtwNE-EoEHDINgtnvcAOpNgmrVGy'
+ _APIKEY = '3_qhEcPa5JGFROVwu5SWKqJ4mVOIkwlFNMSKwzPDAh8QZOtHqu6L4nD5Q7lk0eXOOG'
_CONTEXT_ID = 'R3595707040'
def _real_initialize(self):
@@ -270,35 +275,41 @@ class VrtNUIE(GigyaBaseIE):
if username is None:
return
- auth_data = {
- 'APIKey': self._APIKEY,
- 'targetEnv': 'jssdk',
- 'loginID': username,
- 'password': password,
- 'authMode': 'cookie',
- }
+ auth_info = self._download_json(
+ 'https://accounts.vrt.be/accounts.login', None,
+ note='Login data', errnote='Could not get Login data',
+ headers={}, data=urlencode_postdata({
+ 'loginID': username,
+ 'password': password,
+ 'sessionExpiration': '-2',
+ 'APIKey': self._APIKEY,
+ 'targetEnv': 'jssdk',
+ }))
- auth_info = self._gigya_login(auth_data)
+ if auth_info.get('errorDetails'):
+ raise ExtractorError('Unable to login: VrtNU said: ' + auth_info.get('errorDetails'), expected=True)
# Sometimes authentication fails for no good reason, retry
login_attempt = 1
while login_attempt <= 3:
try:
- # When requesting a token, no actual token is returned, but the
- # necessary cookies are set.
+ self._request_webpage('https://token.vrt.be/vrtnuinitlogin',
+ None, note='Requesting XSRF Token', errnote='Could not get XSRF Token',
+ query={'provider': 'site', 'destination': 'https://www.vrt.be/vrtnu/'})
+
+ post_data = {
+ 'UID': auth_info['UID'],
+ 'UIDSignature': auth_info['UIDSignature'],
+ 'signatureTimestamp': auth_info['signatureTimestamp'],
+ 'client_id': 'vrtnu-site',
+ '_csrf': self._get_cookies('https://login.vrt.be').get('OIDCXSRF').value,
+ }
+
self._request_webpage(
- 'https://token.vrt.be',
+ 'https://login.vrt.be/perform_login',
None, note='Requesting a token', errnote='Could not get a token',
- headers={
- 'Content-Type': 'application/json',
- 'Referer': 'https://www.vrt.be/vrtnu/',
- },
- data=json.dumps({
- 'uid': auth_info['UID'],
- 'uidsig': auth_info['UIDSignature'],
- 'ts': auth_info['signatureTimestamp'],
- 'email': auth_info['profile']['email'],
- }).encode('utf-8'))
+ headers={}, data=urlencode_postdata(post_data))
+
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
login_attempt += 1
diff --git a/hypervideo_dl/extractor/cbc.py b/hypervideo_dl/extractor/cbc.py
index fd5ec60..2429521 100644
--- a/hypervideo_dl/extractor/cbc.py
+++ b/hypervideo_dl/extractor/cbc.py
@@ -1,30 +1,18 @@
# coding: utf-8
from __future__ import unicode_literals
-import hashlib
-import json
import re
-from xml.sax.saxutils import escape
from .common import InfoExtractor
from ..compat import (
compat_str,
- compat_HTTPError,
)
from ..utils import (
js_to_json,
smuggle_url,
try_get,
- xpath_text,
- xpath_element,
- xpath_with_ns,
- find_xpath_attr,
orderedSet,
- parse_duration,
- parse_iso8601,
- parse_age_limit,
strip_or_none,
- int_or_none,
ExtractorError,
)
@@ -59,6 +47,7 @@ class CBCIE(InfoExtractor):
'uploader': 'CBCC-NEW',
'timestamp': 1382717907,
},
+ 'skip': 'No longer available',
}, {
# with clipId, feed only available via tpfeed.cbc.ca
'url': 'http://www.cbc.ca/archives/entry/1978-robin-williams-freestyles-on-90-minutes-live',
@@ -209,289 +198,228 @@ class CBCPlayerIE(InfoExtractor):
}
-class CBCWatchBaseIE(InfoExtractor):
- _device_id = None
- _device_token = None
- _API_BASE_URL = 'https://api-cbc.cloud.clearleap.com/cloffice/client/'
- _NS_MAP = {
- 'media': 'http://search.yahoo.com/mrss/',
- 'clearleap': 'http://www.clearleap.com/namespace/clearleap/1.0/',
- }
- _GEO_COUNTRIES = ['CA']
- _LOGIN_URL = 'https://api.loginradius.com/identity/v2/auth/login'
- _TOKEN_URL = 'https://cloud-api.loginradius.com/sso/jwt/api/token'
- _API_KEY = '3f4beddd-2061-49b0-ae80-6f1f2ed65b37'
- _NETRC_MACHINE = 'cbcwatch'
-
- def _signature(self, email, password):
- data = json.dumps({
- 'email': email,
- 'password': password,
- }).encode()
- headers = {'content-type': 'application/json'}
- query = {'apikey': self._API_KEY}
- resp = self._download_json(self._LOGIN_URL, None, data=data, headers=headers, query=query)
- access_token = resp['access_token']
-
- # token
- query = {
- 'access_token': access_token,
- 'apikey': self._API_KEY,
- 'jwtapp': 'jwt',
- }
- resp = self._download_json(self._TOKEN_URL, None, headers=headers, query=query)
- return resp['signature']
-
- def _call_api(self, path, video_id):
- url = path if path.startswith('http') else self._API_BASE_URL + path
- for _ in range(2):
- try:
- result = self._download_xml(url, video_id, headers={
- 'X-Clearleap-DeviceId': self._device_id,
- 'X-Clearleap-DeviceToken': self._device_token,
- })
- except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
- # Device token has expired, re-acquiring device token
- self._register_device()
- continue
- raise
- error_message = xpath_text(result, 'userMessage') or xpath_text(result, 'systemMessage')
- if error_message:
- raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message))
- return result
-
- def _real_initialize(self):
- if self._valid_device_token():
- return
- device = self._downloader.cache.load(
- 'cbcwatch', self._cache_device_key()) or {}
- self._device_id, self._device_token = device.get('id'), device.get('token')
- if self._valid_device_token():
- return
- self._register_device()
-
- def _valid_device_token(self):
- return self._device_id and self._device_token
-
- def _cache_device_key(self):
- email, _ = self._get_login_info()
- return '%s_device' % hashlib.sha256(email.encode()).hexdigest() if email else 'device'
-
- def _register_device(self):
- result = self._download_xml(
- self._API_BASE_URL + 'device/register',
- None, 'Acquiring device token',
- data=b'<device><type>web</type></device>')
- self._device_id = xpath_text(result, 'deviceId', fatal=True)
- email, password = self._get_login_info()
- if email and password:
- signature = self._signature(email, password)
- data = '<login><token>{0}</token><device><deviceId>{1}</deviceId><type>web</type></device></login>'.format(
- escape(signature), escape(self._device_id)).encode()
- url = self._API_BASE_URL + 'device/login'
- result = self._download_xml(
- url, None, data=data,
- headers={'content-type': 'application/xml'})
- self._device_token = xpath_text(result, 'token', fatal=True)
- else:
- self._device_token = xpath_text(result, 'deviceToken', fatal=True)
- self._downloader.cache.store(
- 'cbcwatch', self._cache_device_key(), {
- 'id': self._device_id,
- 'token': self._device_token,
- })
-
- def _parse_rss_feed(self, rss):
- channel = xpath_element(rss, 'channel', fatal=True)
-
- def _add_ns(path):
- return xpath_with_ns(path, self._NS_MAP)
-
- entries = []
- for item in channel.findall('item'):
- guid = xpath_text(item, 'guid', fatal=True)
- title = xpath_text(item, 'title', fatal=True)
-
- media_group = xpath_element(item, _add_ns('media:group'), fatal=True)
- content = xpath_element(media_group, _add_ns('media:content'), fatal=True)
- content_url = content.attrib['url']
-
- thumbnails = []
- for thumbnail in media_group.findall(_add_ns('media:thumbnail')):
- thumbnail_url = thumbnail.get('url')
- if not thumbnail_url:
- continue
- thumbnails.append({
- 'id': thumbnail.get('profile'),
- 'url': thumbnail_url,
- 'width': int_or_none(thumbnail.get('width')),
- 'height': int_or_none(thumbnail.get('height')),
- })
-
- timestamp = None
- release_date = find_xpath_attr(
- item, _add_ns('media:credit'), 'role', 'releaseDate')
- if release_date is not None:
- timestamp = parse_iso8601(release_date.text)
-
- entries.append({
- '_type': 'url_transparent',
- 'url': content_url,
- 'id': guid,
- 'title': title,
- 'description': xpath_text(item, 'description'),
- 'timestamp': timestamp,
- 'duration': int_or_none(content.get('duration')),
- 'age_limit': parse_age_limit(xpath_text(item, _add_ns('media:rating'))),
- 'episode': xpath_text(item, _add_ns('clearleap:episode')),
- 'episode_number': int_or_none(xpath_text(item, _add_ns('clearleap:episodeInSeason'))),
- 'series': xpath_text(item, _add_ns('clearleap:series')),
- 'season_number': int_or_none(xpath_text(item, _add_ns('clearleap:season'))),
- 'thumbnails': thumbnails,
- 'ie_key': 'CBCWatchVideo',
- })
-
- return self.playlist_result(
- entries, xpath_text(channel, 'guid'),
- xpath_text(channel, 'title'),
- xpath_text(channel, 'description'))
-
-
-class CBCWatchVideoIE(CBCWatchBaseIE):
- IE_NAME = 'cbc.ca:watch:video'
- _VALID_URL = r'https?://api-cbc\.cloud\.clearleap\.com/cloffice/client/web/play/?\?.*?\bcontentId=(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
- _TEST = {
- # geo-restricted to Canada, bypassable
- 'url': 'https://api-cbc.cloud.clearleap.com/cloffice/client/web/play/?contentId=3c84472a-1eea-4dee-9267-2655d5055dcf&categoryId=ebc258f5-ee40-4cca-b66b-ba6bd55b7235',
- 'only_matching': True,
- }
+class CBCGemIE(InfoExtractor):
+ IE_NAME = 'gem.cbc.ca'
+ _VALID_URL = r'https?://gem\.cbc\.ca/media/(?P<id>[0-9a-z-]+/s[0-9]+[a-z][0-9]+)'
+ _TESTS = [{
+ # This is a normal, public, TV show video
+ 'url': 'https://gem.cbc.ca/media/schitts-creek/s06e01',
+ 'md5': '93dbb31c74a8e45b378cf13bd3f6f11e',
+ 'info_dict': {
+ 'id': 'schitts-creek/s06e01',
+ 'ext': 'mp4',
+ 'title': 'Smoke Signals',
+ 'description': 'md5:929868d20021c924020641769eb3e7f1',
+ 'thumbnail': 'https://images.radio-canada.ca/v1/synps-cbc/episode/perso/cbc_schitts_creek_season_06e01_thumbnail_v01.jpg?im=Resize=(Size)',
+ 'duration': 1314,
+ 'categories': ['comedy'],
+ 'series': 'Schitt\'s Creek',
+ 'season': 'Season 6',
+ 'season_number': 6,
+ 'episode': 'Smoke Signals',
+ 'episode_number': 1,
+ 'episode_id': 'schitts-creek/s06e01',
+ },
+ 'params': {'format': 'bv'},
+ 'skip': 'Geo-restricted to Canada',
+ }, {
+ # This video requires an account in the browser, but works fine in hypervideo
+ 'url': 'https://gem.cbc.ca/media/schitts-creek/s01e01',
+ 'md5': '297a9600f554f2258aed01514226a697',
+ 'info_dict': {
+ 'id': 'schitts-creek/s01e01',
+ 'ext': 'mp4',
+ 'title': 'The Cup Runneth Over',
+ 'description': 'md5:9bca14ea49ab808097530eb05a29e797',
+ 'thumbnail': 'https://images.radio-canada.ca/v1/synps-cbc/episode/perso/cbc_schitts_creek_season_01e01_thumbnail_v01.jpg?im=Resize=(Size)',
+ 'series': 'Schitt\'s Creek',
+ 'season_number': 1,
+ 'season': 'Season 1',
+ 'episode_number': 1,
+ 'episode': 'The Cup Runneth Over',
+ 'episode_id': 'schitts-creek/s01e01',
+ 'duration': 1309,
+ 'categories': ['comedy'],
+ },
+ 'params': {'format': 'bv'},
+ 'skip': 'Geo-restricted to Canada',
+ }]
+ _API_BASE = 'https://services.radio-canada.ca/ott/cbc-api/v2/assets/'
def _real_extract(self, url):
video_id = self._match_id(url)
- result = self._call_api(url, video_id)
-
- m3u8_url = xpath_text(result, 'url', fatal=True)
- formats = self._extract_m3u8_formats(re.sub(r'/([^/]+)/[^/?]+\.m3u8', r'/\1/\1.m3u8', m3u8_url), video_id, 'mp4', fatal=False)
- if len(formats) < 2:
- formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
- for f in formats:
- format_id = f.get('format_id')
- if format_id.startswith('AAC'):
- f['acodec'] = 'aac'
- elif format_id.startswith('AC3'):
- f['acodec'] = 'ac-3'
+ video_info = self._download_json(self._API_BASE + video_id, video_id)
+
+ last_error = None
+ attempt = -1
+ retries = self.get_param('extractor_retries', 15)
+ while attempt < retries:
+ attempt += 1
+ if last_error:
+ self.report_warning('%s. Retrying ...' % last_error)
+ m3u8_info = self._download_json(
+ video_info['playSession']['url'], video_id,
+ note='Downloading JSON metadata%s' % f' (attempt {attempt})')
+ m3u8_url = m3u8_info.get('url')
+ if m3u8_url:
+ break
+ elif m3u8_info.get('errorCode') == 1:
+ self.raise_geo_restricted(countries=['CA'])
+ else:
+ last_error = f'{self.IE_NAME} said: {m3u8_info.get("errorCode")} - {m3u8_info.get("message")}'
+ # 35 means media unavailable, but retries work
+ if m3u8_info.get('errorCode') != 35 or attempt >= retries:
+ raise ExtractorError(last_error)
+
+ formats = self._extract_m3u8_formats(m3u8_url, video_id, m3u8_id='hls')
+ self._remove_duplicate_formats(formats)
+
+ for i, format in enumerate(formats):
+ if format.get('vcodec') == 'none':
+ if format.get('ext') is None:
+ format['ext'] = 'm4a'
+ if format.get('acodec') is None:
+ format['acodec'] = 'mp4a.40.2'
+
+ # Put described audio at the beginning of the list, so that it
+ # isn't chosen by default, as most people won't want it.
+ if 'descriptive' in format['format_id'].lower():
+ format['preference'] = -2
+
self._sort_formats(formats)
- info = {
+ return {
'id': video_id,
- 'title': video_id,
+ 'title': video_info['title'],
+ 'description': video_info.get('description'),
+ 'thumbnail': video_info.get('image'),
+ 'series': video_info.get('series'),
+ 'season_number': video_info.get('season'),
+ 'season': f'Season {video_info.get("season")}',
+ 'episode_number': video_info.get('episode'),
+ 'episode': video_info.get('title'),
+ 'episode_id': video_id,
+ 'duration': video_info.get('duration'),
+ 'categories': [video_info.get('category')],
'formats': formats,
+ 'release_timestamp': video_info.get('airDate'),
+ 'timestamp': video_info.get('availableDate'),
}
- rss = xpath_element(result, 'rss')
- if rss:
- info.update(self._parse_rss_feed(rss)['entries'][0])
- del info['url']
- del info['_type']
- del info['ie_key']
- return info
-
-class CBCWatchIE(CBCWatchBaseIE):
- IE_NAME = 'cbc.ca:watch'
- _VALID_URL = r'https?://(?:gem|watch)\.cbc\.ca/(?:[^/]+/)+(?P<id>[0-9a-f-]+)'
+class CBCGemPlaylistIE(InfoExtractor):
+ IE_NAME = 'gem.cbc.ca:playlist'
+ _VALID_URL = r'https?://gem\.cbc\.ca/media/(?P<id>(?P<show>[0-9a-z-]+)/s(?P<season>[0-9]+))/?(?:[?#]|$)'
_TESTS = [{
- # geo-restricted to Canada, bypassable
- 'url': 'http://watch.cbc.ca/doc-zone/season-6/customer-disservice/38e815a-009e3ab12e4',
+ # TV show playlist, all public videos
+ 'url': 'https://gem.cbc.ca/media/schitts-creek/s06',
+ 'playlist_count': 16,
'info_dict': {
- 'id': '9673749a-5e77-484c-8b62-a1092a6b5168',
- 'ext': 'mp4',
- 'title': 'Customer (Dis)Service',
- 'description': 'md5:8bdd6913a0fe03d4b2a17ebe169c7c87',
- 'upload_date': '20160219',
- 'timestamp': 1455840000,
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- 'format': 'bestvideo',
+ 'id': 'schitts-creek/s06',
+ 'title': 'Season 6',
+ 'description': 'md5:6a92104a56cbeb5818cc47884d4326a2',
},
- }, {
- # geo-restricted to Canada, bypassable
- 'url': 'http://watch.cbc.ca/arthur/all/1ed4b385-cd84-49cf-95f0-80f004680057',
- 'info_dict': {
- 'id': '1ed4b385-cd84-49cf-95f0-80f004680057',
- 'title': 'Arthur',
- 'description': 'Arthur, the sweetest 8-year-old aardvark, and his pals solve all kinds of problems with humour, kindness and teamwork.',
- },
- 'playlist_mincount': 30,
- }, {
- 'url': 'https://gem.cbc.ca/media/this-hour-has-22-minutes/season-26/episode-20/38e815a-0108c6c6a42',
- 'only_matching': True,
}]
+ _API_BASE = 'https://services.radio-canada.ca/ott/cbc-api/v2/shows/'
def _real_extract(self, url):
- video_id = self._match_id(url)
- rss = self._call_api('web/browse/' + video_id, video_id)
- return self._parse_rss_feed(rss)
+ match = self._match_valid_url(url)
+ season_id = match.group('id')
+ show = match.group('show')
+ show_info = self._download_json(self._API_BASE + show, season_id)
+ season = int(match.group('season'))
+ season_info = try_get(show_info, lambda x: x['seasons'][season - 1])
+
+ if season_info is None:
+ raise ExtractorError(f'Couldn\'t find season {season} of {show}')
+
+ episodes = []
+ for episode in season_info['assets']:
+ episodes.append({
+ '_type': 'url_transparent',
+ 'ie_key': 'CBCGem',
+ 'url': 'https://gem.cbc.ca/media/' + episode['id'],
+ 'id': episode['id'],
+ 'title': episode.get('title'),
+ 'description': episode.get('description'),
+ 'thumbnail': episode.get('image'),
+ 'series': episode.get('series'),
+ 'season_number': episode.get('season'),
+ 'season': season_info['title'],
+ 'season_id': season_info.get('id'),
+ 'episode_number': episode.get('episode'),
+ 'episode': episode.get('title'),
+ 'episode_id': episode['id'],
+ 'duration': episode.get('duration'),
+ 'categories': [episode.get('category')],
+ })
+ thumbnail = None
+ tn_uri = season_info.get('image')
+ # the-national was observed to use a "data:image/png;base64"
+ # URI for their 'image' value. The image was 1x1, and is
+ # probably just a placeholder, so it is ignored.
+ if tn_uri is not None and not tn_uri.startswith('data:'):
+ thumbnail = tn_uri
-class CBCOlympicsIE(InfoExtractor):
- IE_NAME = 'cbc.ca:olympics'
- _VALID_URL = r'https?://olympics\.cbc\.ca/video/[^/]+/(?P<id>[^/?#]+)'
- _TESTS = [{
- 'url': 'https://olympics.cbc.ca/video/whats-on-tv/olympic-morning-featuring-the-opening-ceremony/',
- 'only_matching': True,
- }]
+ return {
+ '_type': 'playlist',
+ 'entries': episodes,
+ 'id': season_id,
+ 'title': season_info['title'],
+ 'description': season_info.get('description'),
+ 'thumbnail': thumbnail,
+ 'series': show_info.get('title'),
+ 'season_number': season_info.get('season'),
+ 'season': season_info['title'],
+ }
+
+
+class CBCGemLiveIE(InfoExtractor):
+ IE_NAME = 'gem.cbc.ca:live'
+ _VALID_URL = r'https?://gem\.cbc\.ca/live/(?P<id>[0-9]{12})'
+ _TEST = {
+ 'url': 'https://gem.cbc.ca/live/920604739687',
+ 'info_dict': {
+ 'title': 'Ottawa',
+ 'description': 'The live TV channel and local programming from Ottawa',
+ 'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/CBC_OTT_VMS/Live_Channel_Static_Images/Ottawa_2880x1620.jpg',
+ 'is_live': True,
+ 'id': 'AyqZwxRqh8EH',
+ 'ext': 'mp4',
+ 'timestamp': 1492106160,
+ 'upload_date': '20170413',
+ 'uploader': 'CBCC-NEW',
+ },
+ 'skip': 'Live might have ended',
+ }
+
+ # It's unclear where the chars at the end come from, but they appear to be
+ # constant. Might need updating in the future.
+ _API = 'https://tpfeed.cbc.ca/f/ExhSPC/t_t3UKJR6MAT'
def _real_extract(self, url):
- display_id = self._match_id(url)
- webpage = self._download_webpage(url, display_id)
- video_id = self._hidden_inputs(webpage)['videoId']
- video_doc = self._download_xml(
- 'https://olympics.cbc.ca/videodata/%s.xml' % video_id, video_id)
- title = xpath_text(video_doc, 'title', fatal=True)
- is_live = xpath_text(video_doc, 'kind') == 'Live'
- if is_live:
- title = self._live_title(title)
-
- formats = []
- for video_source in video_doc.findall('videoSources/videoSource'):
- uri = xpath_text(video_source, 'uri')
- if not uri:
- continue
- tokenize = self._download_json(
- 'https://olympics.cbc.ca/api/api-akamai/tokenize',
- video_id, data=json.dumps({
- 'VideoSource': uri,
- }).encode(), headers={
- 'Content-Type': 'application/json',
- 'Referer': url,
- # d3.VideoPlayer._init in https://olympics.cbc.ca/components/script/base.js
- 'Cookie': '_dvp=TK:C0ObxjerU', # AKAMAI CDN cookie
- }, fatal=False)
- if not tokenize:
- continue
- content_url = tokenize['ContentUrl']
- video_source_format = video_source.get('format')
- if video_source_format == 'IIS':
- formats.extend(self._extract_ism_formats(
- content_url, video_id, ism_id=video_source_format, fatal=False))
- else:
- formats.extend(self._extract_m3u8_formats(
- content_url, video_id, 'mp4',
- 'm3u8' if is_live else 'm3u8_native',
- m3u8_id=video_source_format, fatal=False))
- self._sort_formats(formats)
+ video_id = self._match_id(url)
+ live_info = self._download_json(self._API, video_id)['entries']
+
+ video_info = None
+ for stream in live_info:
+ if stream.get('guid') == video_id:
+ video_info = stream
+
+ if video_info is None:
+ raise ExtractorError(
+ 'Couldn\'t find video metadata, maybe this livestream is now offline',
+ expected=True)
return {
+ '_type': 'url_transparent',
+ 'ie_key': 'ThePlatform',
+ 'url': video_info['content'][0]['url'],
'id': video_id,
- 'display_id': display_id,
- 'title': title,
- 'description': xpath_text(video_doc, 'description'),
- 'thumbnail': xpath_text(video_doc, 'thumbnailUrl'),
- 'duration': parse_duration(xpath_text(video_doc, 'duration')),
- 'formats': formats,
- 'is_live': is_live,
+ 'title': video_info.get('title'),
+ 'description': video_info.get('description'),
+ 'tags': try_get(video_info, lambda x: x['keywords'].split(', ')),
+ 'thumbnail': video_info.get('cbc$staticImage'),
+ 'is_live': True,
}
diff --git a/hypervideo_dl/extractor/cbs.py b/hypervideo_dl/extractor/cbs.py
index c79e55a..ae9ce58 100644
--- a/hypervideo_dl/extractor/cbs.py
+++ b/hypervideo_dl/extractor/cbs.py
@@ -8,6 +8,7 @@ from ..utils import (
xpath_element,
xpath_text,
update_url_query,
+ url_or_none,
)
@@ -25,12 +26,64 @@ class CBSBaseIE(ThePlatformFeedIE):
})
return subtitles
+ def _extract_common_video_info(self, content_id, asset_types, mpx_acc, extra_info):
+ tp_path = 'dJ5BDC/media/guid/%d/%s' % (mpx_acc, content_id)
+ tp_release_url = f'https://link.theplatform.com/s/{tp_path}'
+ info = self._extract_theplatform_metadata(tp_path, content_id)
+
+ formats, subtitles = [], {}
+ last_e = None
+ for asset_type, query in asset_types.items():
+ try:
+ tp_formats, tp_subtitles = self._extract_theplatform_smil(
+ update_url_query(tp_release_url, query), content_id,
+ 'Downloading %s SMIL data' % asset_type)
+ except ExtractorError as e:
+ last_e = e
+ if asset_type != 'fallback':
+ continue
+ query['formats'] = '' # blank query to check if expired
+ try:
+ tp_formats, tp_subtitles = self._extract_theplatform_smil(
+ update_url_query(tp_release_url, query), content_id,
+ 'Downloading %s SMIL data, trying again with another format' % asset_type)
+ except ExtractorError as e:
+ last_e = e
+ continue
+ formats.extend(tp_formats)
+ subtitles = self._merge_subtitles(subtitles, tp_subtitles)
+ if last_e and not formats:
+ self.raise_no_formats(last_e, True, content_id)
+ self._sort_formats(formats)
+
+ extra_info.update({
+ 'id': content_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ })
+ info.update({k: v for k, v in extra_info.items() if v is not None})
+ return info
+
+ def _extract_video_info(self, *args, **kwargs):
+ # Extract assets + metadata and call _extract_common_video_info
+ raise NotImplementedError('This method must be implemented by subclasses')
+
+ def _real_extract(self, url):
+ return self._extract_video_info(self._match_id(url))
+
class CBSIE(CBSBaseIE):
- _VALID_URL = r'(?:cbs:|https?://(?:www\.)?(?:(?:cbs|paramountplus)\.com/shows/[^/]+/video|colbertlateshow\.com/(?:video|podcasts))/)(?P<id>[\w-]+)'
+ _VALID_URL = r'''(?x)
+ (?:
+ cbs:|
+ https?://(?:www\.)?(?:
+ cbs\.com/(?:shows/[^/]+/video|movies/[^/]+)/|
+ colbertlateshow\.com/(?:video|podcasts)/)
+ )(?P<id>[\w-]+)'''
+ # All tests are blocked outside US
_TESTS = [{
- 'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
+ 'url': 'https://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
'info_dict': {
'id': '_u7W953k6la293J7EPTd9oHkSPs6Xn6_',
'ext': 'mp4',
@@ -45,71 +98,70 @@ class CBSIE(CBSBaseIE):
# m3u8 download
'skip_download': True,
},
- '_skip': 'Blocked outside the US',
+ }, {
+ 'url': 'https://www.cbs.com/shows/the-late-show-with-stephen-colbert/video/60icOhMb9NcjbcWnF_gub9XXHdeBcNk2/the-late-show-6-23-21-christine-baranski-joy-oladokun-',
+ 'info_dict': {
+ 'id': '60icOhMb9NcjbcWnF_gub9XXHdeBcNk2',
+ 'title': 'The Late Show - 6/23/21 (Christine Baranski, Joy Oladokun)',
+ 'timestamp': 1624507140,
+ 'description': 'md5:e01af24e95c74d55e8775aef86117b95',
+ 'uploader': 'CBSI-NEW',
+ 'upload_date': '20210624',
+ },
+ 'params': {
+ 'ignore_no_formats_error': True,
+ 'skip_download': True,
+ },
+ 'expected_warnings': [
+ 'This content expired on', 'No video formats found', 'Requested format is not available'],
}, {
'url': 'http://colbertlateshow.com/video/8GmB0oY0McANFvp2aEffk9jZZZ2YyXxy/the-colbeard/',
'only_matching': True,
}, {
'url': 'http://www.colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/',
'only_matching': True,
- }, {
- 'url': 'https://www.paramountplus.com/shows/all-rise/video/QmR1WhNkh1a_IrdHZrbcRklm176X_rVc/all-rise-space/',
- 'only_matching': True,
}]
def _extract_video_info(self, content_id, site='cbs', mpx_acc=2198311517):
items_data = self._download_xml(
- 'http://can.cbs.com/thunder/player/videoPlayerService.php',
+ 'https://can.cbs.com/thunder/player/videoPlayerService.php',
content_id, query={'partner': site, 'contentId': content_id})
video_data = xpath_element(items_data, './/item')
- title = xpath_text(video_data, 'videoTitle', 'title', True)
- tp_path = 'dJ5BDC/media/guid/%d/%s' % (mpx_acc, content_id)
- tp_release_url = 'http://link.theplatform.com/s/' + tp_path
+ title = xpath_text(video_data, 'videoTitle', 'title') or xpath_text(video_data, 'videotitle', 'title')
- asset_types = []
- subtitles = {}
- formats = []
- last_e = None
+ asset_types = {}
+ has_drm = False
for item in items_data.findall('.//item'):
asset_type = xpath_text(item, 'assetType')
- if not asset_type or asset_type in asset_types or 'HLS_FPS' in asset_type or 'DASH_CENC' in asset_type:
- continue
- asset_types.append(asset_type)
query = {
'mbr': 'true',
'assetTypes': asset_type,
}
- if asset_type.startswith('HLS') or asset_type in ('OnceURL', 'StreamPack'):
+ if not asset_type:
+ # fallback for content_ids that videoPlayerService doesn't return anything for
+ asset_type = 'fallback'
+ query['formats'] = 'M3U+none,MPEG4,M3U+appleHlsEncryption,MP3'
+ del query['assetTypes']
+ if asset_type in asset_types:
+ continue
+ elif any(excluded in asset_type for excluded in ('HLS_FPS', 'DASH_CENC', 'OnceURL')):
+ if 'DASH_CENC' in asset_type:
+ has_drm = True
+ continue
+ if asset_type.startswith('HLS') or 'StreamPack' in asset_type:
query['formats'] = 'MPEG4,M3U'
elif asset_type in ('RTMP', 'WIFI', '3G'):
query['formats'] = 'MPEG4,FLV'
- try:
- tp_formats, tp_subtitles = self._extract_theplatform_smil(
- update_url_query(tp_release_url, query), content_id,
- 'Downloading %s SMIL data' % asset_type)
- except ExtractorError as e:
- last_e = e
- continue
- formats.extend(tp_formats)
- subtitles = self._merge_subtitles(subtitles, tp_subtitles)
- if last_e and not formats:
- raise last_e
- self._sort_formats(formats)
+ asset_types[asset_type] = query
- info = self._extract_theplatform_metadata(tp_path, content_id)
- info.update({
- 'id': content_id,
+ if not asset_types and has_drm:
+ self.report_drm(content_id)
+
+ return self._extract_common_video_info(content_id, asset_types, mpx_acc, extra_info={
'title': title,
'series': xpath_text(video_data, 'seriesTitle'),
'season_number': int_or_none(xpath_text(video_data, 'seasonNumber')),
'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')),
'duration': int_or_none(xpath_text(video_data, 'videoLength'), 1000),
- 'thumbnail': xpath_text(video_data, 'previewImageURL'),
- 'formats': formats,
- 'subtitles': subtitles,
+ 'thumbnail': url_or_none(xpath_text(video_data, 'previewImageURL')),
})
- return info
-
- def _real_extract(self, url):
- content_id = self._match_id(url)
- return self._extract_video_info(content_id)
diff --git a/hypervideo_dl/extractor/cbsinteractive.py b/hypervideo_dl/extractor/cbsinteractive.py
index 6596e98..9d4f754 100644
--- a/hypervideo_dl/extractor/cbsinteractive.py
+++ b/hypervideo_dl/extractor/cbsinteractive.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .cbs import CBSIE
from ..utils import int_or_none
@@ -71,7 +70,7 @@ class CBSInteractiveIE(CBSIE):
}
def _real_extract(self, url):
- site, display_id = re.match(self._VALID_URL, url).groups()
+ site, display_id = self._match_valid_url(url).groups()
webpage = self._download_webpage(url, display_id)
data_json = self._html_search_regex(
diff --git a/hypervideo_dl/extractor/cbssports.py b/hypervideo_dl/extractor/cbssports.py
index a891c9a..b8a6e59 100644
--- a/hypervideo_dl/extractor/cbssports.py
+++ b/hypervideo_dl/extractor/cbssports.py
@@ -1,6 +1,5 @@
from __future__ import unicode_literals
-import re
# from .cbs import CBSBaseIE
from .common import InfoExtractor
@@ -30,7 +29,7 @@ class CBSSportsEmbedIE(InfoExtractor):
# return self._extract_feed_info('dJ5BDC', 'VxxJg8Ymh8sE', filter_query, video_id)
def _real_extract(self, url):
- uuid, pcid = re.match(self._VALID_URL, url).groups()
+ uuid, pcid = self._match_valid_url(url).groups()
query = {'id': uuid} if uuid else {'pcid': pcid}
video = self._download_json(
'https://www.cbssports.com/api/content/video/',
diff --git a/hypervideo_dl/extractor/ccma.py b/hypervideo_dl/extractor/ccma.py
index e6ae493..ea98f86 100644
--- a/hypervideo_dl/extractor/ccma.py
+++ b/hypervideo_dl/extractor/ccma.py
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
import calendar
import datetime
-import re
from .common import InfoExtractor
from ..utils import (
@@ -61,7 +60,7 @@ class CCMAIE(InfoExtractor):
}]
def _real_extract(self, url):
- media_type, media_id = re.match(self._VALID_URL, url).groups()
+ media_type, media_id = self._match_valid_url(url).groups()
media = self._download_json(
'http://dinamics.ccma.cat/pvideo/media.jsp', media_id, query={
diff --git a/hypervideo_dl/extractor/cctv.py b/hypervideo_dl/extractor/cctv.py
index c76f361..9b86121 100644
--- a/hypervideo_dl/extractor/cctv.py
+++ b/hypervideo_dl/extractor/cctv.py
@@ -162,7 +162,7 @@ class CCTVIE(InfoExtractor):
'url': video_url,
'format_id': 'http',
'quality': quality,
- 'preference': -1,
+ 'source_preference': -10
})
hls_url = try_get(data, lambda x: x['hls_url'], compat_str)
diff --git a/hypervideo_dl/extractor/cda.py b/hypervideo_dl/extractor/cda.py
index e1b3919..72c4705 100644
--- a/hypervideo_dl/extractor/cda.py
+++ b/hypervideo_dl/extractor/cda.py
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
import codecs
import re
+import json
from .common import InfoExtractor
from ..compat import (
@@ -19,6 +20,7 @@ from ..utils import (
parse_duration,
random_birthday,
urljoin,
+ try_get,
)
@@ -38,6 +40,8 @@ class CDAIE(InfoExtractor):
'average_rating': float,
'duration': 39,
'age_limit': 0,
+ 'upload_date': '20160221',
+ 'timestamp': 1456078244,
}
}, {
'url': 'http://www.cda.pl/video/57413289',
@@ -143,7 +147,7 @@ class CDAIE(InfoExtractor):
b = []
for c in a:
f = compat_ord(c)
- b.append(compat_chr(33 + (f + 14) % 94) if 33 <= f and 126 >= f else compat_chr(f))
+ b.append(compat_chr(33 + (f + 14) % 94) if 33 <= f <= 126 else compat_chr(f))
a = ''.join(b)
a = a.replace('.cda.mp4', '')
for p in ('.2cda.pl', '.3cda.pl'):
@@ -173,18 +177,34 @@ class CDAIE(InfoExtractor):
video['file'] = video['file'].replace('adc.mp4', '.mp4')
elif not video['file'].startswith('http'):
video['file'] = decrypt_file(video['file'])
- f = {
+ video_quality = video.get('quality')
+ qualities = video.get('qualities', {})
+ video_quality = next((k for k, v in qualities.items() if v == video_quality), video_quality)
+ info_dict['formats'].append({
'url': video['file'],
- }
- m = re.search(
- r'<a[^>]+data-quality="(?P<format_id>[^"]+)"[^>]+href="[^"]+"[^>]+class="[^"]*quality-btn-active[^"]*">(?P<height>[0-9]+)p',
- page)
- if m:
- f.update({
- 'format_id': m.group('format_id'),
- 'height': int(m.group('height')),
- })
- info_dict['formats'].append(f)
+ 'format_id': video_quality,
+ 'height': int_or_none(video_quality[:-1]),
+ })
+ for quality, cda_quality in qualities.items():
+ if quality == video_quality:
+ continue
+ data = {'jsonrpc': '2.0', 'method': 'videoGetLink', 'id': 2,
+ 'params': [video_id, cda_quality, video.get('ts'), video.get('hash2'), {}]}
+ data = json.dumps(data).encode('utf-8')
+ video_url = self._download_json(
+ f'https://www.cda.pl/video/{video_id}', video_id, headers={
+ 'Content-Type': 'application/json',
+ 'X-Requested-With': 'XMLHttpRequest'
+ }, data=data, note=f'Fetching {quality} url',
+ errnote=f'Failed to fetch {quality} url', fatal=False)
+ if try_get(video_url, lambda x: x['result']['status']) == 'ok':
+ video_url = try_get(video_url, lambda x: x['result']['resp'])
+ info_dict['formats'].append({
+ 'url': video_url,
+ 'format_id': quality,
+ 'height': int_or_none(quality[:-1])
+ })
+
if not info_dict['duration']:
info_dict['duration'] = parse_duration(video.get('duration'))
diff --git a/hypervideo_dl/extractor/ceskatelevize.py b/hypervideo_dl/extractor/ceskatelevize.py
index 7cb4efb..5e04d38 100644
--- a/hypervideo_dl/extractor/ceskatelevize.py
+++ b/hypervideo_dl/extractor/ceskatelevize.py
@@ -147,8 +147,6 @@ class CeskaTelevizeIE(InfoExtractor):
is_live = item.get('type') == 'LIVE'
formats = []
for format_id, stream_url in item.get('streamUrls', {}).items():
- if 'drmOnly=true' in stream_url:
- continue
if 'playerType=flash' in stream_url:
stream_formats = self._extract_m3u8_formats(
stream_url, playlist_id, 'mp4', 'm3u8_native',
@@ -157,6 +155,9 @@ class CeskaTelevizeIE(InfoExtractor):
stream_formats = self._extract_mpd_formats(
stream_url, playlist_id,
mpd_id='dash-%s' % format_id, fatal=False)
+ if 'drmOnly=true' in stream_url:
+ for f in stream_formats:
+ f['has_drm'] = True
# See https://github.com/ytdl-org/youtube-dl/issues/12119#issuecomment-280037031
if format_id == 'audioDescription':
for f in stream_formats:
diff --git a/hypervideo_dl/extractor/cgtn.py b/hypervideo_dl/extractor/cgtn.py
new file mode 100644
index 0000000..89f1738
--- /dev/null
+++ b/hypervideo_dl/extractor/cgtn.py
@@ -0,0 +1,64 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ try_get,
+ unified_timestamp,
+)
+
+
+class CGTNIE(InfoExtractor):
+ _VALID_URL = r'https?://news\.cgtn\.com/news/[0-9]{4}-[0-9]{2}-[0-9]{2}/[a-zA-Z0-9-]+-(?P<id>[a-zA-Z0-9-]+)/index\.html'
+ _TESTS = [
+ {
+ 'url': 'https://news.cgtn.com/news/2021-03-09/Up-and-Out-of-Poverty-Ep-1-A-solemn-promise-YuOUaOzGQU/index.html',
+ 'info_dict': {
+ 'id': 'YuOUaOzGQU',
+ 'ext': 'mp4',
+ 'title': 'Up and Out of Poverty Ep. 1: A solemn promise',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1615295940,
+ 'upload_date': '20210309',
+ },
+ 'params': {
+ 'skip_download': True
+ }
+ }, {
+ 'url': 'https://news.cgtn.com/news/2021-06-06/China-Indonesia-vow-to-further-deepen-maritime-cooperation-10REvJCewCY/index.html',
+ 'info_dict': {
+ 'id': '10REvJCewCY',
+ 'ext': 'mp4',
+ 'title': 'China, Indonesia vow to further deepen maritime cooperation',
+ 'thumbnail': r're:^https?://.*\.png$',
+ 'description': 'China and Indonesia vowed to upgrade their cooperation into the maritime sector and also for political security, economy, and cultural and people-to-people exchanges.',
+ 'author': 'CGTN',
+ 'category': 'China',
+ 'timestamp': 1622950200,
+ 'upload_date': '20210606',
+ },
+ 'params': {
+ 'skip_download': False
+ }
+ }
+ ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ download_url = self._html_search_regex(r'data-video ="(?P<url>.+m3u8)"', webpage, 'download_url')
+ datetime_str = self._html_search_regex(r'<span class="date">\s*(.+?)\s*</span>', webpage, 'datetime_str', fatal=False)
+
+ return {
+ 'id': video_id,
+ 'title': self._og_search_title(webpage),
+ 'description': self._og_search_description(webpage, default=None),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'formats': self._extract_m3u8_formats(download_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls'),
+ 'category': self._html_search_regex(r'<span class="section">\s*(.+?)\s*</span>',
+ webpage, 'category', fatal=False),
+ 'author': self._html_search_regex(r'<div class="news-author-name">\s*(.+?)\s*</div>',
+ webpage, 'author', default=None, fatal=False),
+ 'timestamp': try_get(unified_timestamp(datetime_str), lambda x: x - 8 * 3600),
+ }
diff --git a/hypervideo_dl/extractor/channel9.py b/hypervideo_dl/extractor/channel9.py
index 09cacf6..90024db 100644
--- a/hypervideo_dl/extractor/channel9.py
+++ b/hypervideo_dl/extractor/channel9.py
@@ -5,7 +5,6 @@ import re
from .common import InfoExtractor
from ..utils import (
clean_html,
- ExtractorError,
int_or_none,
parse_iso8601,
qualities,
@@ -97,7 +96,7 @@ class Channel9IE(InfoExtractor):
return self.playlist_result(entries, video_id, title_text)
def _real_extract(self, url):
- content_path, rss = re.match(self._VALID_URL, url).groups()
+ content_path, rss = self._match_valid_url(url).groups()
if rss:
return self._extract_list(content_path, url)
@@ -187,14 +186,13 @@ class Channel9IE(InfoExtractor):
'quality': quality(q, q_url),
})
- self._sort_formats(formats)
-
slides = content_data.get('Slides')
zip_file = content_data.get('ZipFile')
if not formats and not slides and not zip_file:
- raise ExtractorError(
+ self.raise_no_formats(
'None of recording, slides or zip are available for %s' % content_path)
+ self._sort_formats(formats)
subtitles = {}
for caption in content_data.get('Captions', []):
diff --git a/hypervideo_dl/extractor/chilloutzone.py b/hypervideo_dl/extractor/chilloutzone.py
index 5aac212..fd5202b 100644
--- a/hypervideo_dl/extractor/chilloutzone.py
+++ b/hypervideo_dl/extractor/chilloutzone.py
@@ -1,6 +1,5 @@
from __future__ import unicode_literals
-import re
import json
from .common import InfoExtractor
@@ -51,7 +50,7 @@ class ChilloutzoneIE(InfoExtractor):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
diff --git a/hypervideo_dl/extractor/chingari.py b/hypervideo_dl/extractor/chingari.py
new file mode 100644
index 0000000..6bdc4f6
--- /dev/null
+++ b/hypervideo_dl/extractor/chingari.py
@@ -0,0 +1,209 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import itertools
+import json
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_parse_unquote_plus
+from ..utils import (
+ clean_html,
+ ExtractorError,
+ int_or_none,
+ str_to_int,
+ url_or_none,
+)
+
+
+class ChingariBaseIE(InfoExtractor):
+ def _get_post(self, id, post_data):
+ media_data = post_data['mediaLocation']
+ base_url = media_data['base']
+ author_data = post_data.get('authorData', {})
+ song_data = post_data.get('song', {}) # revist this in future for differentiating b/w 'art' and 'author'
+
+ formats = [{
+ 'format_id': frmt,
+ 'width': str_to_int(frmt[1:]),
+ 'url': base_url + frmt_path,
+ } for frmt, frmt_path in media_data.get('transcoded', {}).items()]
+
+ if media_data.get('path'):
+ formats.append({
+ 'format_id': 'original',
+ 'format_note': 'Direct video.',
+ 'url': base_url + '/apipublic' + media_data['path'],
+ 'quality': 10,
+ })
+ self._sort_formats(formats)
+ timestamp = str_to_int(post_data.get('created_at'))
+ if timestamp:
+ timestamp = int_or_none(timestamp, 1000)
+
+ thumbnail, uploader_url = None, None
+ if media_data.get('thumbnail'):
+ thumbnail = base_url + media_data.get('thumbnail')
+ if author_data.get('username'):
+ uploader_url = 'https://chingari.io/' + author_data.get('username')
+
+ return {
+ 'id': id,
+ 'title': compat_urllib_parse_unquote_plus(clean_html(post_data.get('caption'))),
+ 'description': compat_urllib_parse_unquote_plus(clean_html(post_data.get('caption'))),
+ 'duration': media_data.get('duration'),
+ 'thumbnail': url_or_none(thumbnail),
+ 'like_count': post_data.get('likeCount'),
+ 'view_count': post_data.get('viewsCount'),
+ 'comment_count': post_data.get('commentCount'),
+ 'repost_count': post_data.get('shareCount'),
+ 'timestamp': timestamp,
+ 'uploader_id': post_data.get('userId') or author_data.get('_id'),
+ 'uploader': author_data.get('name'),
+ 'uploader_url': url_or_none(uploader_url),
+ 'track': song_data.get('title'),
+ 'artist': song_data.get('author'),
+ 'formats': formats,
+ }
+
+
+class ChingariIE(ChingariBaseIE):
+ _VALID_URL = r'(?:https?://)(?:www\.)?chingari\.io/share/post\?id=(?P<id>[^&/#?]+)'
+ _TESTS = [{
+ 'url': 'https://chingari.io/share/post?id=612f8f4ce1dc57090e8a7beb',
+ 'info_dict': {
+ 'id': '612f8f4ce1dc57090e8a7beb',
+ 'ext': 'mp4',
+ 'title': 'Happy birthday Srila Prabhupada',
+ 'description': 'md5:c7080ebfdfeb06016e638c286d6bc3fa',
+ 'duration': 0,
+ 'thumbnail': 'https://media.chingari.io/uploads/c41d30e2-06b6-4e3b-9b4b-edbb929cec06-1630506826911/thumbnail/198f993f-ce87-4623-82c6-cd071bd6d4f4-1630506828016.jpg',
+ 'like_count': int,
+ 'view_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
+ 'timestamp': 1630506828,
+ 'upload_date': '20210901',
+ 'uploader_id': '5f0403982c8bd344f4813f8c',
+ 'uploader': 'ISKCON,Inc.',
+ 'uploader_url': 'https://chingari.io/iskcon,inc',
+ 'track': None,
+ 'artist': None,
+ },
+ 'params': {'skip_download': True}
+ }]
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+ post_json = self._download_json(f'https://api.chingari.io/post/post_details/{id}', id)
+ if post_json['code'] != 200:
+ raise ExtractorError(post_json['message'], expected=True)
+ post_data = post_json['data']
+ return self._get_post(id, post_data)
+
+
+class ChingariUserIE(ChingariBaseIE):
+ _VALID_URL = r'(?:https?://)(?:www\.)?chingari\.io/(?!share/post)(?P<id>[^/?]+)'
+ _TESTS = [{
+ 'url': 'https://chingari.io/dada1023',
+ 'playlist_mincount': 3,
+ 'info_dict': {
+ 'id': 'dada1023',
+ },
+ 'entries': [{
+ 'url': 'https://chingari.io/share/post?id=614781f3ade60b3a0bfff42a',
+ 'info_dict': {
+ 'id': '614781f3ade60b3a0bfff42a',
+ 'ext': 'mp4',
+ 'title': '#chingaribappa ',
+ 'description': 'md5:d1df21d84088770468fa63afe3b17857',
+ 'duration': 7,
+ 'thumbnail': 'https://media.chingari.io/uploads/346d86d4-abb2-474e-a164-ffccf2bbcb72-1632076273717/thumbnail/b0b3aac2-2b86-4dd1-909d-9ed6e57cf77c-1632076275552.jpg',
+ 'like_count': int,
+ 'view_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
+ 'timestamp': 1632076275,
+ 'upload_date': '20210919',
+ 'uploader_id': '5efc4b12cca35c3d1794c2d3',
+ 'uploader': 'dada (girish) dhawale',
+ 'uploader_url': 'https://chingari.io/dada1023',
+ 'track': None,
+ 'artist': None
+ },
+ 'params': {'skip_download': True}
+ }, {
+ 'url': 'https://chingari.io/share/post?id=6146b132bcbf860959e12cba',
+ 'info_dict': {
+ 'id': '6146b132bcbf860959e12cba',
+ 'ext': 'mp4',
+ 'title': 'Tactor harvesting',
+ 'description': 'md5:8403f12dce68828b77ecee7eb7e887b7',
+ 'duration': 59.3,
+ 'thumbnail': 'https://media.chingari.io/uploads/b353ca70-7a87-400d-93a6-fa561afaec86-1632022814584/thumbnail/c09302e3-2043-41b1-a2fe-77d97e5bd676-1632022834260.jpg',
+ 'like_count': int,
+ 'view_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
+ 'timestamp': 1632022834,
+ 'upload_date': '20210919',
+ 'uploader_id': '5efc4b12cca35c3d1794c2d3',
+ 'uploader': 'dada (girish) dhawale',
+ 'uploader_url': 'https://chingari.io/dada1023',
+ 'track': None,
+ 'artist': None
+ },
+ 'params': {'skip_download': True}
+ }, {
+ 'url': 'https://chingari.io/share/post?id=6145651b74cb030a64c40b82',
+ 'info_dict': {
+ 'id': '6145651b74cb030a64c40b82',
+ 'ext': 'mp4',
+ 'title': '#odiabhajan ',
+ 'description': 'md5:687ea36835b9276cf2af90f25e7654cb',
+ 'duration': 56.67,
+ 'thumbnail': 'https://media.chingari.io/uploads/6cbf216b-babc-4cce-87fe-ceaac8d706ac-1631937782708/thumbnail/8855754f-6669-48ce-b269-8cc0699ed6da-1631937819522.jpg',
+ 'like_count': int,
+ 'view_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
+ 'timestamp': 1631937819,
+ 'upload_date': '20210918',
+ 'uploader_id': '5efc4b12cca35c3d1794c2d3',
+ 'uploader': 'dada (girish) dhawale',
+ 'uploader_url': 'https://chingari.io/dada1023',
+ 'track': None,
+ 'artist': None
+ },
+ 'params': {'skip_download': True}
+ }],
+ }, {
+ 'url': 'https://chingari.io/iskcon%2Cinc',
+ 'playlist_mincount': 1025,
+ 'info_dict': {
+ 'id': 'iskcon%2Cinc',
+ },
+ }]
+
+ def _entries(self, id):
+ skip = 0
+ has_more = True
+ for page in itertools.count():
+ posts = self._download_json('https://api.chingari.io/users/getPosts', id,
+ data=json.dumps({'userId': id, 'ownerId': id, 'skip': skip, 'limit': 20}).encode(),
+ headers={'content-type': 'application/json;charset=UTF-8'},
+ note='Downloading page %s' % page)
+ for post in posts.get('data', []):
+ post_data = post['post']
+ yield self._get_post(post_data['_id'], post_data)
+ skip += 20
+ has_more = posts['hasMoreData']
+ if not has_more:
+ break
+
+ def _real_extract(self, url):
+ alt_id = self._match_id(url)
+ post_json = self._download_json(f'https://api.chingari.io/user/{alt_id}', alt_id)
+ if post_json['code'] != 200:
+ raise ExtractorError(post_json['message'], expected=True)
+ id = post_json['data']['_id']
+ return self.playlist_result(self._entries(id), playlist_id=alt_id)
diff --git a/hypervideo_dl/extractor/cinemax.py b/hypervideo_dl/extractor/cinemax.py
index 7f89d33..2c3ff8d 100644
--- a/hypervideo_dl/extractor/cinemax.py
+++ b/hypervideo_dl/extractor/cinemax.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .hbo import HBOBaseIE
@@ -23,7 +22,7 @@ class CinemaxIE(HBOBaseIE):
}]
def _real_extract(self, url):
- path, video_id = re.match(self._VALID_URL, url).groups()
+ path, video_id = self._match_valid_url(url).groups()
info = self._extract_info('https://www.cinemax.com/%s.xml' % path, video_id)
info['id'] = video_id
return info
diff --git a/hypervideo_dl/extractor/ciscolive.py b/hypervideo_dl/extractor/ciscolive.py
index da404e4..349c5eb 100644
--- a/hypervideo_dl/extractor/ciscolive.py
+++ b/hypervideo_dl/extractor/ciscolive.py
@@ -4,14 +4,11 @@ from __future__ import unicode_literals
import itertools
from .common import InfoExtractor
-from ..compat import (
- compat_parse_qs,
- compat_urllib_parse_urlparse,
-)
from ..utils import (
clean_html,
float_or_none,
int_or_none,
+ parse_qs,
try_get,
urlencode_postdata,
)
@@ -145,7 +142,7 @@ class CiscoLiveSearchIE(CiscoLiveBaseIE):
query['from'] += query['size']
def _real_extract(self, url):
- query = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+ query = parse_qs(url)
query['type'] = 'session'
return self.playlist_result(
self._entries(query, url), playlist_title='Search query')
diff --git a/hypervideo_dl/extractor/ciscowebex.py b/hypervideo_dl/extractor/ciscowebex.py
new file mode 100644
index 0000000..882dae9
--- /dev/null
+++ b/hypervideo_dl/extractor/ciscowebex.py
@@ -0,0 +1,90 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ try_get,
+ unified_timestamp,
+)
+
+
+class CiscoWebexIE(InfoExtractor):
+ IE_NAME = 'ciscowebex'
+ IE_DESC = 'Cisco Webex'
+ _VALID_URL = r'''(?x)
+ (?P<url>https?://(?P<subdomain>[^/#?]*)\.webex\.com/(?:
+ (?P<siteurl_1>[^/#?]*)/(?:ldr|lsr).php\?(?:[^#]*&)*RCID=(?P<rcid>[0-9a-f]{32})|
+ (?:recordingservice|webappng)/sites/(?P<siteurl_2>[^/#?]*)/recording/(?:playback/|play/)?(?P<id>[0-9a-f]{32})
+ ))'''
+
+ _TESTS = [{
+ 'url': 'https://demosubdomain.webex.com/demositeurl/ldr.php?RCID=e58e803bc0f766bb5f6376d2e86adb5b',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://demosubdomain.webex.com/demositeurl/lsr.php?RCID=bc04b4a7b5ea2cc3a493d5ae6aaff5d7',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://demosubdomain.webex.com/recordingservice/sites/demositeurl/recording/88e7a42f7b19f5b423c54754aecc2ce9/playback',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ rcid = mobj.group('rcid')
+ if rcid:
+ webpage = self._download_webpage(url, None, note='Getting video ID')
+ url = self._search_regex(self._VALID_URL, webpage, 'redirection url', group='url')
+ url = self._request_webpage(url, None, note='Resolving final URL').geturl()
+ mobj = self._match_valid_url(url)
+ subdomain = mobj.group('subdomain')
+ siteurl = mobj.group('siteurl_1') or mobj.group('siteurl_2')
+ video_id = mobj.group('id')
+
+ stream = self._download_json(
+ 'https://%s.webex.com/webappng/api/v1/recordings/%s/stream' % (subdomain, video_id),
+ video_id, fatal=False, query={'siteurl': siteurl})
+ if not stream:
+ self.raise_login_required(method='cookies')
+
+ video_id = stream.get('recordUUID') or video_id
+
+ formats = [{
+ 'format_id': 'video',
+ 'url': stream['fallbackPlaySrc'],
+ 'ext': 'mp4',
+ 'vcodec': 'avc1.640028',
+ 'acodec': 'mp4a.40.2',
+ }]
+ if stream.get('preventDownload') is False:
+ mp4url = try_get(stream, lambda x: x['downloadRecordingInfo']['downloadInfo']['mp4URL'])
+ if mp4url:
+ formats.append({
+ 'format_id': 'video',
+ 'url': mp4url,
+ 'ext': 'mp4',
+ 'vcodec': 'avc1.640028',
+ 'acodec': 'mp4a.40.2',
+ })
+ audiourl = try_get(stream, lambda x: x['downloadRecordingInfo']['downloadInfo']['audioURL'])
+ if audiourl:
+ formats.append({
+ 'format_id': 'audio',
+ 'url': audiourl,
+ 'ext': 'mp3',
+ 'vcodec': 'none',
+ 'acodec': 'mp3',
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': stream['recordName'],
+ 'description': stream.get('description'),
+ 'uploader': stream.get('ownerDisplayName'),
+ 'uploader_id': stream.get('ownerUserName') or stream.get('ownerId'), # mail or id
+ 'timestamp': unified_timestamp(stream.get('createTime')),
+ 'duration': int_or_none(stream.get('duration'), 1000),
+ 'webpage_url': 'https://%s.webex.com/recordingservice/sites/%s/recording/playback/%s' % (subdomain, siteurl, video_id),
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/cjsw.py b/hypervideo_dl/extractor/cjsw.py
index 505bdbe..1dea0d7 100644
--- a/hypervideo_dl/extractor/cjsw.py
+++ b/hypervideo_dl/extractor/cjsw.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..utils import (
@@ -30,7 +29,7 @@ class CJSWIE(InfoExtractor):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
program, episode_id = mobj.group('program', 'id')
audio_id = '%s/%s' % (program, episode_id)
diff --git a/hypervideo_dl/extractor/clyp.py b/hypervideo_dl/extractor/clyp.py
index 06d04de..e6b2ac4 100644
--- a/hypervideo_dl/extractor/clyp.py
+++ b/hypervideo_dl/extractor/clyp.py
@@ -1,12 +1,9 @@
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..compat import (
- compat_parse_qs,
- compat_urllib_parse_urlparse,
-)
from ..utils import (
float_or_none,
+ parse_qs,
unified_timestamp,
)
@@ -44,7 +41,7 @@ class ClypIE(InfoExtractor):
def _real_extract(self, url):
audio_id = self._match_id(url)
- qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+ qs = parse_qs(url)
token = qs.get('token', [None])[0]
query = {}
diff --git a/hypervideo_dl/extractor/cmt.py b/hypervideo_dl/extractor/cmt.py
index e701fbe..a4ddb91 100644
--- a/hypervideo_dl/extractor/cmt.py
+++ b/hypervideo_dl/extractor/cmt.py
@@ -2,6 +2,8 @@ from __future__ import unicode_literals
from .mtv import MTVIE
+# TODO Remove - Reason: Outdated Site
+
class CMTIE(MTVIE):
IE_NAME = 'cmt.com'
@@ -39,7 +41,7 @@ class CMTIE(MTVIE):
'only_matching': True,
}]
- def _extract_mgid(self, webpage):
+ def _extract_mgid(self, webpage, url):
mgid = self._search_regex(
r'MTVN\.VIDEO\.contentUri\s*=\s*([\'"])(?P<mgid>.+?)\1',
webpage, 'mgid', group='mgid', default=None)
@@ -50,5 +52,5 @@ class CMTIE(MTVIE):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- mgid = self._extract_mgid(webpage)
+ mgid = self._extract_mgid(webpage, url)
return self.url_result('http://media.mtvnservices.com/embed/%s' % mgid)
diff --git a/hypervideo_dl/extractor/cnbc.py b/hypervideo_dl/extractor/cnbc.py
index 7b9f453..da3730c 100644
--- a/hypervideo_dl/extractor/cnbc.py
+++ b/hypervideo_dl/extractor/cnbc.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..utils import smuggle_url
@@ -57,7 +56,7 @@ class CNBCVideoIE(InfoExtractor):
}
def _real_extract(self, url):
- path, display_id = re.match(self._VALID_URL, url).groups()
+ path, display_id = self._match_valid_url(url).groups()
video_id = self._download_json(
'https://webql-redesign.cnbcfm.com/graphql', display_id, query={
'query': '''{
diff --git a/hypervideo_dl/extractor/cnn.py b/hypervideo_dl/extractor/cnn.py
index 2d950fa..af11d95 100644
--- a/hypervideo_dl/extractor/cnn.py
+++ b/hypervideo_dl/extractor/cnn.py
@@ -1,6 +1,5 @@
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from .turner import TurnerBaseIE
@@ -88,7 +87,7 @@ class CNNIE(TurnerBaseIE):
return None
def _real_extract(self, url):
- sub_domain, path, page_title = re.match(self._VALID_URL, url).groups()
+ sub_domain, path, page_title = self._match_valid_url(url).groups()
if sub_domain not in ('money', 'edition'):
sub_domain = 'edition'
config = self._CONFIG[sub_domain]
diff --git a/hypervideo_dl/extractor/comedycentral.py b/hypervideo_dl/extractor/comedycentral.py
index 1bfa912..5a12ab5 100644
--- a/hypervideo_dl/extractor/comedycentral.py
+++ b/hypervideo_dl/extractor/comedycentral.py
@@ -4,7 +4,7 @@ from .mtv import MTVServicesInfoExtractor
class ComedyCentralIE(MTVServicesInfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?cc\.com/(?:episodes|video(?:-clips)?)/(?P<id>[0-9a-z]{6})'
+ _VALID_URL = r'https?://(?:www\.)?cc\.com/(?:episodes|video(?:-clips)?|collection-playlist)/(?P<id>[0-9a-z]{6})'
_FEED_URL = 'http://comedycentral.com/feeds/mrss/'
_TESTS = [{
@@ -24,6 +24,9 @@ class ComedyCentralIE(MTVServicesInfoExtractor):
}, {
'url': 'https://www.cc.com/video/k3sdvm/the-daily-show-with-jon-stewart-exclusive-the-fourth-estate',
'only_matching': True,
+ }, {
+ 'url': 'https://www.cc.com/collection-playlist/cosnej/stand-up-specials/t6vtjb',
+ 'only_matching': True,
}]
diff --git a/hypervideo_dl/extractor/common.py b/hypervideo_dl/extractor/common.py
index 8b622be..df74c75 100644
--- a/hypervideo_dl/extractor/common.py
+++ b/hypervideo_dl/extractor/common.py
@@ -4,13 +4,12 @@ from __future__ import unicode_literals
import base64
import datetime
import hashlib
+import itertools
import json
import netrc
import os
import random
import re
-import socket
-import ssl
import sys
import time
import math
@@ -20,8 +19,8 @@ from ..compat import (
compat_cookies_SimpleCookie,
compat_etree_Element,
compat_etree_fromstring,
+ compat_expanduser,
compat_getpass,
- compat_integer_types,
compat_http_client,
compat_os_name,
compat_str,
@@ -32,12 +31,12 @@ from ..compat import (
compat_urlparse,
compat_xml_parse_error,
)
+from ..downloader import FileDownloader
from ..downloader.f4m import (
get_base_url,
remove_encrypted_media,
)
from ..utils import (
- NO_DEFAULT,
age_restricted,
base_url,
bug_reports_message,
@@ -47,16 +46,19 @@ from ..utils import (
determine_protocol,
dict_get,
error_to_compat_str,
- ExtractorError,
extract_attributes,
+ ExtractorError,
fix_xml_ampersands,
float_or_none,
+ format_field,
GeoRestrictedError,
GeoUtils,
int_or_none,
js_to_json,
JSON_LD_RE,
mimetype2ext,
+ network_exceptions,
+ NO_DEFAULT,
orderedSet,
parse_bitrate,
parse_codecs,
@@ -65,19 +67,21 @@ from ..utils import (
parse_m3u8_attributes,
parse_resolution,
RegexNotFoundError,
- sanitized_Request,
sanitize_filename,
+ sanitized_Request,
str_or_none,
str_to_int,
strip_or_none,
+ traverse_obj,
unescapeHTML,
unified_strdate,
unified_timestamp,
update_Request,
update_url_query,
- urljoin,
url_basename,
url_or_none,
+ urljoin,
+ variadic,
xpath_element,
xpath_text,
xpath_with_ns,
@@ -143,6 +147,8 @@ class InfoExtractor(object):
* width Width of the video, if known
* height Height of the video, if known
* resolution Textual description of width and height
+ * dynamic_range The dynamic range of the video. One of:
+ "SDR" (None), "HDR10", "HDR10+, "HDR12", "HLG, "DV"
* tbr Average bitrate of audio and video in KBit/s
* abr Average audio bitrate in KBit/s
* acodec Name of the audio codec in use
@@ -156,7 +162,7 @@ class InfoExtractor(object):
* player_url SWF Player URL (used for rtmpdump).
* protocol The protocol that will be used for the actual
download, lower-case.
- "http", "https", "rtsp", "rtmp", "rtmpe",
+ "http", "https", "rtsp", "rtmp", "rtmp_ffmpeg", "rtmpe",
"m3u8", "m3u8_native" or "http_dash_segments".
* fragment_base_url
Base URL for fragments. Each fragment's path
@@ -201,8 +207,12 @@ class InfoExtractor(object):
width : height ratio as float.
* no_resume The server does not support resuming the
(HTTP or RTMP) download. Boolean.
+ * has_drm The format has DRM and cannot be downloaded. Boolean
* downloader_options A dictionary of downloader options as
described in FileDownloader
+ RTMP formats can also have the additional fields: page_url,
+ app, play_path, tc_url, flash_version, rtmp_live, rtmp_conn,
+ rtmp_protocol, rtmp_real_time
url: Final video URL.
ext: Video filename extension.
@@ -232,8 +242,7 @@ class InfoExtractor(object):
creator: The creator of the video.
release_timestamp: UNIX timestamp of the moment the video was released.
release_date: The date (YYYYMMDD) when the video was released.
- timestamp: UNIX timestamp of the moment the video became available
- (uploaded).
+ timestamp: UNIX timestamp of the moment the video was uploaded
upload_date: Video upload date (YYYYMMDD).
If not explicitly set, calculated from timestamp.
uploader_id: Nickname or id of the video uploader.
@@ -251,9 +260,11 @@ class InfoExtractor(object):
entry and one of:
* "data": The subtitles file contents
* "url": A URL pointing to the subtitles file
+ It can optionally also have:
+ * "name": Name or description of the subtitles
"ext" will be calculated from URL if missing
- automatic_captions: Like 'subtitles', used by the YoutubeIE for
- automatically generated captions
+ automatic_captions: Like 'subtitles'; contains automatically generated
+ captions instead of normal subtitles
duration: Length of the video in seconds, as an integer or float.
view_count: How many users have watched the video on the platform.
like_count: Number of positive ratings of the video
@@ -265,6 +276,7 @@ class InfoExtractor(object):
properties (all but one of text or html optional):
* "author" - human-readable name of the comment author
* "author_id" - user ID of the comment author
+ * "author_thumbnail" - The thumbnail of the comment author
* "id" - Comment ID
* "html" - Comment as HTML
* "text" - Plain text of the comment
@@ -272,6 +284,12 @@ class InfoExtractor(object):
* "parent" - ID of the comment this one is replying to.
Set to "root" to indicate that this is a
comment to the original video.
+ * "like_count" - Number of positive ratings of the comment
+ * "dislike_count" - Number of negative ratings of the comment
+ * "is_favorited" - Whether the comment is marked as
+ favorite by the video uploader
+ * "author_is_uploader" - Whether the comment is made by
+ the video uploader
age_limit: Age restriction for the video, as an integer (years)
webpage_url: The URL to the video webpage, if given to hypervideo it
should allow to get the same result again. (It will be set
@@ -279,8 +297,13 @@ class InfoExtractor(object):
categories: A list of categories that the video falls in, for example
["Sports", "Berlin"]
tags: A list of tags assigned to the video, e.g. ["sweden", "pop music"]
+ cast: A list of the video cast
is_live: True, False, or None (=unknown). Whether this video is a
live stream that goes on instead of a fixed-length video.
+ was_live: True, False, or None (=unknown). Whether this video was
+ originally a live stream.
+ live_status: 'is_live', 'is_upcoming', 'was_live', 'not_live' or None (=unknown)
+ If absent, automatically set from is_live, was_live
start_time: Time in seconds where the reproduction should start, as
specified in the URL.
end_time: Time in seconds where the reproduction should end, as
@@ -289,6 +312,22 @@ class InfoExtractor(object):
* "start_time" - The start time of the chapter in seconds
* "end_time" - The end time of the chapter in seconds
* "title" (optional, string)
+ playable_in_embed: Whether this video is allowed to play in embedded
+ players on other sites. Can be True (=always allowed),
+ False (=never allowed), None (=unknown), or a string
+ specifying the criteria for embedability (Eg: 'whitelist')
+ availability: Under what condition the video is available. One of
+ 'private', 'premium_only', 'subscriber_only', 'needs_auth',
+ 'unlisted' or 'public'. Use 'InfoExtractor._availability'
+ to set it
+ __post_extractor: A function to be called just before the metadata is
+ written to either disk, logger or console. The function
+ must return a dict which will be added to the info_dict.
+ This is usefull for additional information that is
+ time-consuming to extract. Note that the fields thus
+ extracted will not be available to output template and
+ match_filter. So, only "comments" and "comment_count" are
+ currently allowed to be extracted via this method.
The following fields should only be used when the video belongs to some logical
chapter or section:
@@ -337,9 +376,8 @@ class InfoExtractor(object):
There must be a key "entries", which is a list, an iterable, or a PagedList
object, each element of which is a valid dictionary by this specification.
- Additionally, playlists can have "id", "title", "description", "uploader",
- "uploader_id", "uploader_url", "duration" attributes with the same semantics
- as videos (see above).
+ Additionally, playlists can have "id", "title", and any other relevent
+ attributes with the same semantics as videos (see above).
_type "multi_video" indicates that there are multiple videos that
@@ -370,6 +408,10 @@ class InfoExtractor(object):
_real_extract() methods and define a _VALID_URL regexp.
Probably, they should also be added to the list of extractors.
+ Subclasses may also override suitable() if necessary, but ensure the function
+ signature is preserved and that this function imports everything it needs
+ (except other extractors), so that lazy_extractors works correctly
+
_GEO_BYPASS attribute may be set to False in order to disable
geo restriction bypass mechanisms for a particular extractor.
Though it won't disable explicit geo restriction bypass based on
@@ -385,7 +427,7 @@ class InfoExtractor(object):
will be used by geo restriction bypass mechanism similarly
to _GEO_COUNTRIES.
- Finally, the _WORKING attribute should be set to False for broken IEs
+ The _WORKING attribute should be set to False for broken IEs
in order to warn the users and skip the tests.
"""
@@ -397,30 +439,47 @@ class InfoExtractor(object):
_GEO_IP_BLOCKS = None
_WORKING = True
+ _LOGIN_HINTS = {
+ 'any': 'Use --cookies, --username and --password or --netrc to provide account credentials',
+ 'cookies': (
+ 'Use --cookies-from-browser or --cookies for the authentication. '
+ 'See https://github.com/ytdl-org/youtube-dl#how-do-i-pass-cookies-to-youtube-dl for how to manually pass cookies'),
+ 'password': 'Use --username and --password or --netrc to provide account credentials',
+ }
+
def __init__(self, downloader=None):
"""Constructor. Receives an optional downloader."""
self._ready = False
self._x_forwarded_for_ip = None
+ self._printed_messages = set()
self.set_downloader(downloader)
@classmethod
- def suitable(cls, url):
- """Receives a URL and returns True if suitable for this IE."""
-
+ def _match_valid_url(cls, url):
# This does not use has/getattr intentionally - we want to know whether
# we have cached the regexp for *this* class, whereas getattr would also
# match the superclass
if '_VALID_URL_RE' not in cls.__dict__:
cls._VALID_URL_RE = re.compile(cls._VALID_URL)
- return cls._VALID_URL_RE.match(url) is not None
+ return cls._VALID_URL_RE.match(url)
+
+ @classmethod
+ def suitable(cls, url):
+ """Receives a URL and returns True if suitable for this IE."""
+ # This function must import everything it needs (except other extractors),
+ # so that lazy_extractors works correctly
+ return cls._match_valid_url(url) is not None
@classmethod
def _match_id(cls, url):
- if '_VALID_URL_RE' not in cls.__dict__:
- cls._VALID_URL_RE = re.compile(cls._VALID_URL)
- m = cls._VALID_URL_RE.match(url)
- assert m
- return compat_str(m.group('id'))
+ return cls._match_valid_url(url).group('id')
+
+ @classmethod
+ def get_temp_id(cls, url):
+ try:
+ return cls._match_id(url)
+ except (IndexError, AttributeError):
+ return None
@classmethod
def working(cls):
@@ -429,6 +488,7 @@ class InfoExtractor(object):
def initialize(self):
"""Initializes an instance (authentication, etc)."""
+ self._printed_messages = set()
self._initialize_geo_bypass({
'countries': self._GEO_COUNTRIES,
'ip_blocks': self._GEO_IP_BLOCKS,
@@ -466,7 +526,7 @@ class InfoExtractor(object):
if not self._x_forwarded_for_ip:
# Geo bypass mechanism is explicitly disabled by user
- if not self._downloader.params.get('geo_bypass', True):
+ if not self.get_param('geo_bypass', True):
return
if not geo_bypass_context:
@@ -488,7 +548,7 @@ class InfoExtractor(object):
# Explicit IP block specified by user, use it right away
# regardless of whether extractor is geo bypassable or not
- ip_block = self._downloader.params.get('geo_bypass_ip_block', None)
+ ip_block = self.get_param('geo_bypass_ip_block', None)
# Otherwise use random IP block from geo bypass context but only
# if extractor is known as geo bypassable
@@ -499,17 +559,15 @@ class InfoExtractor(object):
if ip_block:
self._x_forwarded_for_ip = GeoUtils.random_ipv4(ip_block)
- if self._downloader.params.get('verbose', False):
- self._downloader.to_screen(
- '[debug] Using fake IP %s as X-Forwarded-For.'
- % self._x_forwarded_for_ip)
+ self._downloader.write_debug(
+ '[debug] Using fake IP %s as X-Forwarded-For' % self._x_forwarded_for_ip)
return
# Path 2: bypassing based on country code
# Explicit country code specified by user, use it right away
# regardless of whether extractor is geo bypassable or not
- country = self._downloader.params.get('geo_bypass_country', None)
+ country = self.get_param('geo_bypass_country', None)
# Otherwise use random country code from geo bypass context but
# only if extractor is known as geo bypassable
@@ -520,10 +578,8 @@ class InfoExtractor(object):
if country:
self._x_forwarded_for_ip = GeoUtils.random_ipv4(country)
- if self._downloader.params.get('verbose', False):
- self._downloader.to_screen(
- '[debug] Using fake IP %s (%s) as X-Forwarded-For.'
- % (self._x_forwarded_for_ip, country.upper()))
+ self._downloader.write_debug(
+ 'Using fake IP %s (%s) as X-Forwarded-For' % (self._x_forwarded_for_ip, country.upper()))
def extract(self, url):
"""Extracts URL information and returns it in list of dicts."""
@@ -531,25 +587,34 @@ class InfoExtractor(object):
for _ in range(2):
try:
self.initialize()
+ self.write_debug('Extracting URL: %s' % url)
ie_result = self._real_extract(url)
+ if ie_result is None:
+ return None
if self._x_forwarded_for_ip:
ie_result['__x_forwarded_for_ip'] = self._x_forwarded_for_ip
+ subtitles = ie_result.get('subtitles')
+ if (subtitles and 'live_chat' in subtitles
+ and 'no-live-chat' in self.get_param('compat_opts', [])):
+ del subtitles['live_chat']
return ie_result
except GeoRestrictedError as e:
if self.__maybe_fake_ip_and_retry(e.countries):
continue
raise
- except ExtractorError:
- raise
+ except ExtractorError as e:
+ video_id = e.video_id or self.get_temp_id(url)
+ raise ExtractorError(
+ e.msg, video_id=video_id, ie=self.IE_NAME, tb=e.traceback, expected=e.expected, cause=e.cause)
except compat_http_client.IncompleteRead as e:
- raise ExtractorError('A network error has occurred.', cause=e, expected=True)
+ raise ExtractorError('A network error has occurred.', cause=e, expected=True, video_id=self.get_temp_id(url))
except (KeyError, StopIteration) as e:
- raise ExtractorError('An extractor error has occurred.', cause=e)
+ raise ExtractorError('An extractor error has occurred.', cause=e, video_id=self.get_temp_id(url))
def __maybe_fake_ip_and_retry(self, countries):
- if (not self._downloader.params.get('geo_bypass_country', None)
+ if (not self.get_param('geo_bypass_country', None)
and self._GEO_BYPASS
- and self._downloader.params.get('geo_bypass', True)
+ and self.get_param('geo_bypass', True)
and not self._x_forwarded_for_ip
and countries):
country_code = random.choice(countries)
@@ -576,7 +641,7 @@ class InfoExtractor(object):
@classmethod
def ie_key(cls):
"""A string for getting the InfoExtractor with get_info_extractor"""
- return compat_str(cls.__name__[:-2])
+ return cls.__name__[:-2]
@property
def IE_NAME(self):
@@ -587,14 +652,10 @@ class InfoExtractor(object):
assert isinstance(err, compat_urllib_error.HTTPError)
if expected_status is None:
return False
- if isinstance(expected_status, compat_integer_types):
- return err.code == expected_status
- elif isinstance(expected_status, (list, tuple)):
- return err.code in expected_status
elif callable(expected_status):
return expected_status(err.code) is True
else:
- assert False
+ return err.code in variadic(expected_status)
def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers={}, query={}, expected_status=None):
"""
@@ -602,6 +663,14 @@ class InfoExtractor(object):
See _download_webpage docstring for arguments specification.
"""
+ if not self._downloader._first_webpage_request:
+ sleep_interval = float_or_none(self.get_param('sleep_interval_requests')) or 0
+ if sleep_interval > 0:
+ self.to_screen('Sleeping %s seconds ...' % sleep_interval)
+ time.sleep(sleep_interval)
+ else:
+ self._downloader._first_webpage_request = False
+
if note is None:
self.report_download_webpage(video_id)
elif note is not False:
@@ -627,12 +696,9 @@ class InfoExtractor(object):
url_or_request = update_url_query(url_or_request, query)
if data is not None or headers:
url_or_request = sanitized_Request(url_or_request, data, headers)
- exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
- if hasattr(ssl, 'CertificateError'):
- exceptions.append(ssl.CertificateError)
try:
return self._downloader.urlopen(url_or_request)
- except tuple(exceptions) as err:
+ except network_exceptions as err:
if isinstance(err, compat_urllib_error.HTTPError):
if self.__can_accept_status_code(err, expected_status):
# Retain reference to error to prevent file object from
@@ -651,7 +717,7 @@ class InfoExtractor(object):
if fatal:
raise ExtractorError(errmsg, sys.exc_info()[2], cause=err)
else:
- self._downloader.report_warning(errmsg)
+ self.report_warning(errmsg)
return False
def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
@@ -723,15 +789,16 @@ class InfoExtractor(object):
webpage_bytes = prefix + webpage_bytes
if not encoding:
encoding = self._guess_encoding_from_content(content_type, webpage_bytes)
- if self._downloader.params.get('dump_intermediate_pages', False):
+ if self.get_param('dump_intermediate_pages', False):
self.to_screen('Dumping request to ' + urlh.geturl())
dump = base64.b64encode(webpage_bytes).decode('ascii')
self._downloader.to_screen(dump)
- if self._downloader.params.get('write_pages', False):
+ if self.get_param('write_pages', False):
basen = '%s_%s' % (video_id, urlh.geturl())
- if len(basen) > 240:
+ trim_length = self.get_param('trim_file_name') or 240
+ if len(basen) > trim_length:
h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest()
- basen = basen[:240 - len(h)] + h
+ basen = basen[:trim_length - len(h)] + h
raw_filename = basen + '.dump'
filename = sanitize_filename(raw_filename, restricted=True)
self.to_screen('Saving request to ' + filename)
@@ -911,14 +978,72 @@ class InfoExtractor(object):
else:
self.report_warning(errmsg + str(ve))
- def report_warning(self, msg, video_id=None):
- idstr = '' if video_id is None else '%s: ' % video_id
- self._downloader.report_warning(
- '[%s] %s%s' % (self.IE_NAME, idstr, msg))
+ def _parse_socket_response_as_json(self, data, video_id, transform_source=None, fatal=True):
+ return self._parse_json(
+ data[data.find('{'):data.rfind('}') + 1],
+ video_id, transform_source, fatal)
+
+ def _download_socket_json_handle(
+ self, url_or_request, video_id, note='Polling socket',
+ errnote='Unable to poll socket', transform_source=None,
+ fatal=True, encoding=None, data=None, headers={}, query={},
+ expected_status=None):
+ """
+ Return a tuple (JSON object, URL handle).
+
+ See _download_webpage docstring for arguments specification.
+ """
+ res = self._download_webpage_handle(
+ url_or_request, video_id, note, errnote, fatal=fatal,
+ encoding=encoding, data=data, headers=headers, query=query,
+ expected_status=expected_status)
+ if res is False:
+ return res
+ webpage, urlh = res
+ return self._parse_socket_response_as_json(
+ webpage, video_id, transform_source=transform_source,
+ fatal=fatal), urlh
+
+ def _download_socket_json(
+ self, url_or_request, video_id, note='Polling socket',
+ errnote='Unable to poll socket', transform_source=None,
+ fatal=True, encoding=None, data=None, headers={}, query={},
+ expected_status=None):
+ """
+ Return the JSON object as a dict.
+
+ See _download_webpage docstring for arguments specification.
+ """
+ res = self._download_socket_json_handle(
+ url_or_request, video_id, note=note, errnote=errnote,
+ transform_source=transform_source, fatal=fatal, encoding=encoding,
+ data=data, headers=headers, query=query,
+ expected_status=expected_status)
+ return res if res is False else res[0]
+
+ def report_warning(self, msg, video_id=None, *args, only_once=False, **kwargs):
+ idstr = format_field(video_id, template='%s: ')
+ msg = f'[{self.IE_NAME}] {idstr}{msg}'
+ if only_once:
+ if f'WARNING: {msg}' in self._printed_messages:
+ return
+ self._printed_messages.add(f'WARNING: {msg}')
+ self._downloader.report_warning(msg, *args, **kwargs)
- def to_screen(self, msg):
+ def to_screen(self, msg, *args, **kwargs):
"""Print msg to screen, prefixing it with '[ie_name]'"""
- self._downloader.to_screen('[%s] %s' % (self.IE_NAME, msg))
+ self._downloader.to_screen('[%s] %s' % (self.IE_NAME, msg), *args, **kwargs)
+
+ def write_debug(self, msg, *args, **kwargs):
+ self._downloader.write_debug('[%s] %s' % (self.IE_NAME, msg), *args, **kwargs)
+
+ def get_param(self, name, default=None, *args, **kwargs):
+ if self._downloader:
+ return self._downloader.params.get(name, default, *args, **kwargs)
+ return default
+
+ def report_drm(self, video_id, partial=False):
+ self.raise_no_formats('This video is DRM protected', expected=True, video_id=video_id)
def report_extraction(self, id_or_name):
"""Report information extraction."""
@@ -936,24 +1061,40 @@ class InfoExtractor(object):
"""Report attempt to log in."""
self.to_screen('Logging in')
- @staticmethod
- def raise_login_required(msg='This video is only available for registered users'):
- raise ExtractorError(
- '%s. Use --username and --password or --netrc to provide account credentials.' % msg,
- expected=True)
+ def raise_login_required(
+ self, msg='This video is only available for registered users',
+ metadata_available=False, method='any'):
+ if metadata_available and self.get_param('ignore_no_formats_error'):
+ self.report_warning(msg)
+ if method is not None:
+ msg = '%s. %s' % (msg, self._LOGIN_HINTS[method])
+ raise ExtractorError(msg, expected=True)
+
+ def raise_geo_restricted(
+ self, msg='This video is not available from your location due to geo restriction',
+ countries=None, metadata_available=False):
+ if metadata_available and self.get_param('ignore_no_formats_error'):
+ self.report_warning(msg)
+ else:
+ raise GeoRestrictedError(msg, countries=countries)
- @staticmethod
- def raise_geo_restricted(msg='This video is not available from your location due to geo restriction', countries=None):
- raise GeoRestrictedError(msg, countries=countries)
+ def raise_no_formats(self, msg, expected=False, video_id=None):
+ if expected and self.get_param('ignore_no_formats_error'):
+ self.report_warning(msg, video_id)
+ elif isinstance(msg, ExtractorError):
+ raise msg
+ else:
+ raise ExtractorError(msg, expected=expected, video_id=video_id)
# Methods for following #608
@staticmethod
- def url_result(url, ie=None, video_id=None, video_title=None):
+ def url_result(url, ie=None, video_id=None, video_title=None, **kwargs):
"""Returns a URL that points to a page that should be processed"""
# TODO: ie should be the class used for getting the info
video_info = {'_type': 'url',
'url': url,
'ie_key': ie}
+ video_info.update(kwargs)
if video_id is not None:
video_info['id'] = video_id
if video_title is not None:
@@ -968,15 +1109,16 @@ class InfoExtractor(object):
urls, playlist_id=playlist_id, playlist_title=playlist_title)
@staticmethod
- def playlist_result(entries, playlist_id=None, playlist_title=None, playlist_description=None):
+ def playlist_result(entries, playlist_id=None, playlist_title=None, playlist_description=None, **kwargs):
"""Returns a playlist"""
video_info = {'_type': 'playlist',
'entries': entries}
+ video_info.update(kwargs)
if playlist_id:
video_info['id'] = playlist_id
if playlist_title:
video_info['title'] = playlist_title
- if playlist_description:
+ if playlist_description is not None:
video_info['description'] = playlist_description
return video_info
@@ -995,15 +1137,14 @@ class InfoExtractor(object):
if mobj:
break
- if not self._downloader.params.get('no_color') and compat_os_name != 'nt' and sys.stderr.isatty():
- _name = '\033[0;34m%s\033[0m' % name
- else:
- _name = name
+ _name = self._downloader._color_text(name, 'blue')
if mobj:
if group is None:
# return the first matching group
return next(g for g in mobj.groups() if g is not None)
+ elif isinstance(group, (list, tuple)):
+ return tuple(mobj.group(g) for g in group)
else:
return mobj.group(group)
elif default is not NO_DEFAULT:
@@ -1011,7 +1152,7 @@ class InfoExtractor(object):
elif fatal:
raise RegexNotFoundError('Unable to extract %s' % _name)
else:
- self._downloader.report_warning('unable to extract %s' % _name + bug_reports_message())
+ self.report_warning('unable to extract %s' % _name + bug_reports_message())
return None
def _html_search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None):
@@ -1029,9 +1170,12 @@ class InfoExtractor(object):
password = None
netrc_machine = netrc_machine or self._NETRC_MACHINE
- if self._downloader.params.get('usenetrc', False):
+ if self.get_param('usenetrc', False):
try:
- info = netrc.netrc().authenticators(netrc_machine)
+ netrc_file = compat_expanduser(self.get_param('netrc_location') or '~')
+ if os.path.isdir(netrc_file):
+ netrc_file = os.path.join(netrc_file, '.netrc')
+ info = netrc.netrc(file=netrc_file).authenticators(netrc_machine)
if info is not None:
username = info[0]
password = info[2]
@@ -1039,7 +1183,7 @@ class InfoExtractor(object):
raise netrc.NetrcParseError(
'No authenticators for %s' % netrc_machine)
except (IOError, netrc.NetrcParseError) as err:
- self._downloader.report_warning(
+ self.report_warning(
'parsing .netrc: %s' % error_to_compat_str(err))
return username, password
@@ -1053,15 +1197,11 @@ class InfoExtractor(object):
value.
If there's no info available, return (None, None)
"""
- if self._downloader is None:
- return (None, None)
-
- downloader_params = self._downloader.params
# Attempt to use provided username and password or .netrc data
- if downloader_params.get(username_option) is not None:
- username = downloader_params[username_option]
- password = downloader_params[password_option]
+ username = self.get_param(username_option)
+ if username is not None:
+ password = self.get_param(password_option)
else:
username, password = self._get_netrc_login_info(netrc_machine)
@@ -1074,12 +1214,10 @@ class InfoExtractor(object):
currently just uses the command line option
If there's no info available, return None
"""
- if self._downloader is None:
- return None
- downloader_params = self._downloader.params
- if downloader_params.get('twofactor') is not None:
- return downloader_params['twofactor']
+ tfa = self.get_param('twofactor')
+ if tfa is not None:
+ return tfa
return compat_getpass('Type %s and press [Return]: ' % note)
@@ -1102,8 +1240,7 @@ class InfoExtractor(object):
[^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(prop)
def _og_search_property(self, prop, html, name=None, **kargs):
- if not isinstance(prop, (list, tuple)):
- prop = [prop]
+ prop = variadic(prop)
if name is None:
name = 'OpenGraph %s' % prop[0]
og_regexes = []
@@ -1133,8 +1270,7 @@ class InfoExtractor(object):
return self._og_search_property('url', html, **kargs)
def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs):
- if not isinstance(name, (list, tuple)):
- name = [name]
+ name = variadic(name)
if display_name is None:
display_name = name[0]
return self._html_search_regex(
@@ -1194,7 +1330,7 @@ class InfoExtractor(object):
# JSON-LD may be malformed and thus `fatal` should be respected.
# At the same time `default` may be passed that assumes `fatal=False`
# for _search_regex. Let's simulate the same behavior here as well.
- fatal = kwargs.get('fatal', True) if default == NO_DEFAULT else False
+ fatal = kwargs.get('fatal', True) if default is NO_DEFAULT else False
json_ld = []
for mobj in json_ld_list:
json_ld_item = self._parse_json(
@@ -1214,7 +1350,7 @@ class InfoExtractor(object):
elif fatal:
raise RegexNotFoundError('Unable to extract JSON-LD')
else:
- self._downloader.report_warning('unable to extract JSON-LD %s' % bug_reports_message())
+ self.report_warning('unable to extract JSON-LD %s' % bug_reports_message())
return {}
def _json_ld(self, json_ld, video_id, fatal=True, expected_type=None):
@@ -1369,81 +1505,283 @@ class InfoExtractor(object):
html, '%s form' % form_id, group='form')
return self._hidden_inputs(form)
- def _sort_formats(self, formats, field_preference=None):
- if not formats:
- raise ExtractorError('No video formats found')
+ class FormatSort:
+ regex = r' *((?P<reverse>\+)?(?P<field>[a-zA-Z0-9_]+)((?P<separator>[~:])(?P<limit>.*?))?)? *$'
+
+ default = ('hidden', 'aud_or_vid', 'hasvid', 'ie_pref', 'lang', 'quality',
+ 'res', 'fps', 'hdr:12', 'codec:vp9.2', 'size', 'br', 'asr',
+ 'proto', 'ext', 'hasaud', 'source', 'format_id') # These must not be aliases
+ ytdl_default = ('hasaud', 'lang', 'quality', 'tbr', 'filesize', 'vbr',
+ 'height', 'width', 'proto', 'vext', 'abr', 'aext',
+ 'fps', 'fs_approx', 'source', 'format_id')
+
+ settings = {
+ 'vcodec': {'type': 'ordered', 'regex': True,
+ 'order': ['av0?1', 'vp0?9.2', 'vp0?9', '[hx]265|he?vc?', '[hx]264|avc', 'vp0?8', 'mp4v|h263', 'theora', '', None, 'none']},
+ 'acodec': {'type': 'ordered', 'regex': True,
+ 'order': ['opus', 'vorbis', 'aac', 'mp?4a?', 'mp3', 'e?a?c-?3', 'dts', '', None, 'none']},
+ 'hdr': {'type': 'ordered', 'regex': True, 'field': 'dynamic_range',
+ 'order': ['dv', '(hdr)?12', r'(hdr)?10\+', '(hdr)?10', 'hlg', '', 'sdr', None]},
+ 'proto': {'type': 'ordered', 'regex': True, 'field': 'protocol',
+ 'order': ['(ht|f)tps', '(ht|f)tp$', 'm3u8.+', '.*dash', 'ws|websocket', '', 'mms|rtsp', 'none', 'f4']},
+ 'vext': {'type': 'ordered', 'field': 'video_ext',
+ 'order': ('mp4', 'webm', 'flv', '', 'none'),
+ 'order_free': ('webm', 'mp4', 'flv', '', 'none')},
+ 'aext': {'type': 'ordered', 'field': 'audio_ext',
+ 'order': ('m4a', 'aac', 'mp3', 'ogg', 'opus', 'webm', '', 'none'),
+ 'order_free': ('opus', 'ogg', 'webm', 'm4a', 'mp3', 'aac', '', 'none')},
+ 'hidden': {'visible': False, 'forced': True, 'type': 'extractor', 'max': -1000},
+ 'aud_or_vid': {'visible': False, 'forced': True, 'type': 'multiple',
+ 'field': ('vcodec', 'acodec'),
+ 'function': lambda it: int(any(v != 'none' for v in it))},
+ 'ie_pref': {'priority': True, 'type': 'extractor'},
+ 'hasvid': {'priority': True, 'field': 'vcodec', 'type': 'boolean', 'not_in_list': ('none',)},
+ 'hasaud': {'field': 'acodec', 'type': 'boolean', 'not_in_list': ('none',)},
+ 'lang': {'convert': 'ignore', 'field': 'language_preference'},
+ 'quality': {'convert': 'float_none', 'default': -1},
+ 'filesize': {'convert': 'bytes'},
+ 'fs_approx': {'convert': 'bytes', 'field': 'filesize_approx'},
+ 'id': {'convert': 'string', 'field': 'format_id'},
+ 'height': {'convert': 'float_none'},
+ 'width': {'convert': 'float_none'},
+ 'fps': {'convert': 'float_none'},
+ 'tbr': {'convert': 'float_none'},
+ 'vbr': {'convert': 'float_none'},
+ 'abr': {'convert': 'float_none'},
+ 'asr': {'convert': 'float_none'},
+ 'source': {'convert': 'ignore', 'field': 'source_preference'},
+
+ 'codec': {'type': 'combined', 'field': ('vcodec', 'acodec')},
+ 'br': {'type': 'combined', 'field': ('tbr', 'vbr', 'abr'), 'same_limit': True},
+ 'size': {'type': 'combined', 'same_limit': True, 'field': ('filesize', 'fs_approx')},
+ 'ext': {'type': 'combined', 'field': ('vext', 'aext')},
+ 'res': {'type': 'multiple', 'field': ('height', 'width'),
+ 'function': lambda it: (lambda l: min(l) if l else 0)(tuple(filter(None, it)))},
+
+ # Most of these exist only for compatibility reasons
+ 'dimension': {'type': 'alias', 'field': 'res'},
+ 'resolution': {'type': 'alias', 'field': 'res'},
+ 'extension': {'type': 'alias', 'field': 'ext'},
+ 'bitrate': {'type': 'alias', 'field': 'br'},
+ 'total_bitrate': {'type': 'alias', 'field': 'tbr'},
+ 'video_bitrate': {'type': 'alias', 'field': 'vbr'},
+ 'audio_bitrate': {'type': 'alias', 'field': 'abr'},
+ 'framerate': {'type': 'alias', 'field': 'fps'},
+ 'language_preference': {'type': 'alias', 'field': 'lang'}, # not named as 'language' because such a field exists
+ 'protocol': {'type': 'alias', 'field': 'proto'},
+ 'source_preference': {'type': 'alias', 'field': 'source'},
+ 'filesize_approx': {'type': 'alias', 'field': 'fs_approx'},
+ 'filesize_estimate': {'type': 'alias', 'field': 'size'},
+ 'samplerate': {'type': 'alias', 'field': 'asr'},
+ 'video_ext': {'type': 'alias', 'field': 'vext'},
+ 'audio_ext': {'type': 'alias', 'field': 'aext'},
+ 'video_codec': {'type': 'alias', 'field': 'vcodec'},
+ 'audio_codec': {'type': 'alias', 'field': 'acodec'},
+ 'video': {'type': 'alias', 'field': 'hasvid'},
+ 'has_video': {'type': 'alias', 'field': 'hasvid'},
+ 'audio': {'type': 'alias', 'field': 'hasaud'},
+ 'has_audio': {'type': 'alias', 'field': 'hasaud'},
+ 'extractor': {'type': 'alias', 'field': 'ie_pref'},
+ 'preference': {'type': 'alias', 'field': 'ie_pref'},
+ 'extractor_preference': {'type': 'alias', 'field': 'ie_pref'},
+ 'format_id': {'type': 'alias', 'field': 'id'},
+ }
- for f in formats:
- # Automatically determine tbr when missing based on abr and vbr (improves
- # formats sorting in some cases)
- if 'tbr' not in f and f.get('abr') is not None and f.get('vbr') is not None:
- f['tbr'] = f['abr'] + f['vbr']
-
- def _formats_key(f):
- # TODO remove the following workaround
- from ..utils import determine_ext
- if not f.get('ext') and 'url' in f:
- f['ext'] = determine_ext(f['url'])
-
- if isinstance(field_preference, (list, tuple)):
- return tuple(
- f.get(field)
- if f.get(field) is not None
- else ('' if field == 'format_id' else -1)
- for field in field_preference)
-
- preference = f.get('preference')
- if preference is None:
- preference = 0
- if f.get('ext') in ['f4f', 'f4m']: # Not yet supported
- preference -= 0.5
-
- protocol = f.get('protocol') or determine_protocol(f)
- proto_preference = 0 if protocol in ['http', 'https'] else (-0.5 if protocol == 'rtsp' else -0.1)
-
- if f.get('vcodec') == 'none': # audio only
- preference -= 50
- if self._downloader.params.get('prefer_free_formats'):
- ORDER = ['aac', 'mp3', 'm4a', 'webm', 'ogg', 'opus']
+ _order = []
+
+ def _get_field_setting(self, field, key):
+ if field not in self.settings:
+ self.settings[field] = {}
+ propObj = self.settings[field]
+ if key not in propObj:
+ type = propObj.get('type')
+ if key == 'field':
+ default = 'preference' if type == 'extractor' else (field,) if type in ('combined', 'multiple') else field
+ elif key == 'convert':
+ default = 'order' if type == 'ordered' else 'float_string' if field else 'ignore'
else:
- ORDER = ['webm', 'opus', 'ogg', 'mp3', 'aac', 'm4a']
- ext_preference = 0
- try:
- audio_ext_preference = ORDER.index(f['ext'])
- except ValueError:
- audio_ext_preference = -1
+ default = {'type': 'field', 'visible': True, 'order': [], 'not_in_list': (None,)}.get(key, None)
+ propObj[key] = default
+ return propObj[key]
+
+ def _resolve_field_value(self, field, value, convertNone=False):
+ if value is None:
+ if not convertNone:
+ return None
+ else:
+ value = value.lower()
+ conversion = self._get_field_setting(field, 'convert')
+ if conversion == 'ignore':
+ return None
+ if conversion == 'string':
+ return value
+ elif conversion == 'float_none':
+ return float_or_none(value)
+ elif conversion == 'bytes':
+ return FileDownloader.parse_bytes(value)
+ elif conversion == 'order':
+ order_list = (self._use_free_order and self._get_field_setting(field, 'order_free')) or self._get_field_setting(field, 'order')
+ use_regex = self._get_field_setting(field, 'regex')
+ list_length = len(order_list)
+ empty_pos = order_list.index('') if '' in order_list else list_length + 1
+ if use_regex and value is not None:
+ for i, regex in enumerate(order_list):
+ if regex and re.match(regex, value):
+ return list_length - i
+ return list_length - empty_pos # not in list
+ else: # not regex or value = None
+ return list_length - (order_list.index(value) if value in order_list else empty_pos)
else:
- if f.get('acodec') == 'none': # video only
- preference -= 40
- if self._downloader.params.get('prefer_free_formats'):
- ORDER = ['flv', 'mp4', 'webm']
+ if value.isnumeric():
+ return float(value)
else:
- ORDER = ['webm', 'flv', 'mp4']
- try:
- ext_preference = ORDER.index(f['ext'])
- except ValueError:
- ext_preference = -1
- audio_ext_preference = 0
-
- return (
- preference,
- f.get('language_preference') if f.get('language_preference') is not None else -1,
- f.get('quality') if f.get('quality') is not None else -1,
- f.get('tbr') if f.get('tbr') is not None else -1,
- f.get('filesize') if f.get('filesize') is not None else -1,
- f.get('vbr') if f.get('vbr') is not None else -1,
- f.get('height') if f.get('height') is not None else -1,
- f.get('width') if f.get('width') is not None else -1,
- proto_preference,
- ext_preference,
- f.get('abr') if f.get('abr') is not None else -1,
- audio_ext_preference,
- f.get('fps') if f.get('fps') is not None else -1,
- f.get('filesize_approx') if f.get('filesize_approx') is not None else -1,
- f.get('source_preference') if f.get('source_preference') is not None else -1,
- f.get('format_id') if f.get('format_id') is not None else '',
- )
- formats.sort(key=_formats_key)
+ self.settings[field]['convert'] = 'string'
+ return value
+
+ def evaluate_params(self, params, sort_extractor):
+ self._use_free_order = params.get('prefer_free_formats', False)
+ self._sort_user = params.get('format_sort', [])
+ self._sort_extractor = sort_extractor
+
+ def add_item(field, reverse, closest, limit_text):
+ field = field.lower()
+ if field in self._order:
+ return
+ self._order.append(field)
+ limit = self._resolve_field_value(field, limit_text)
+ data = {
+ 'reverse': reverse,
+ 'closest': False if limit is None else closest,
+ 'limit_text': limit_text,
+ 'limit': limit}
+ if field in self.settings:
+ self.settings[field].update(data)
+ else:
+ self.settings[field] = data
+
+ sort_list = (
+ tuple(field for field in self.default if self._get_field_setting(field, 'forced'))
+ + (tuple() if params.get('format_sort_force', False)
+ else tuple(field for field in self.default if self._get_field_setting(field, 'priority')))
+ + tuple(self._sort_user) + tuple(sort_extractor) + self.default)
+
+ for item in sort_list:
+ match = re.match(self.regex, item)
+ if match is None:
+ raise ExtractorError('Invalid format sort string "%s" given by extractor' % item)
+ field = match.group('field')
+ if field is None:
+ continue
+ if self._get_field_setting(field, 'type') == 'alias':
+ field = self._get_field_setting(field, 'field')
+ reverse = match.group('reverse') is not None
+ closest = match.group('separator') == '~'
+ limit_text = match.group('limit')
+
+ has_limit = limit_text is not None
+ has_multiple_fields = self._get_field_setting(field, 'type') == 'combined'
+ has_multiple_limits = has_limit and has_multiple_fields and not self._get_field_setting(field, 'same_limit')
+
+ fields = self._get_field_setting(field, 'field') if has_multiple_fields else (field,)
+ limits = limit_text.split(':') if has_multiple_limits else (limit_text,) if has_limit else tuple()
+ limit_count = len(limits)
+ for (i, f) in enumerate(fields):
+ add_item(f, reverse, closest,
+ limits[i] if i < limit_count
+ else limits[0] if has_limit and not has_multiple_limits
+ else None)
+
+ def print_verbose_info(self, write_debug):
+ if self._sort_user:
+ write_debug('Sort order given by user: %s' % ', '.join(self._sort_user))
+ if self._sort_extractor:
+ write_debug('Sort order given by extractor: %s' % ', '.join(self._sort_extractor))
+ write_debug('Formats sorted by: %s' % ', '.join(['%s%s%s' % (
+ '+' if self._get_field_setting(field, 'reverse') else '', field,
+ '%s%s(%s)' % ('~' if self._get_field_setting(field, 'closest') else ':',
+ self._get_field_setting(field, 'limit_text'),
+ self._get_field_setting(field, 'limit'))
+ if self._get_field_setting(field, 'limit_text') is not None else '')
+ for field in self._order if self._get_field_setting(field, 'visible')]))
+
+ def _calculate_field_preference_from_value(self, format, field, type, value):
+ reverse = self._get_field_setting(field, 'reverse')
+ closest = self._get_field_setting(field, 'closest')
+ limit = self._get_field_setting(field, 'limit')
+
+ if type == 'extractor':
+ maximum = self._get_field_setting(field, 'max')
+ if value is None or (maximum is not None and value >= maximum):
+ value = -1
+ elif type == 'boolean':
+ in_list = self._get_field_setting(field, 'in_list')
+ not_in_list = self._get_field_setting(field, 'not_in_list')
+ value = 0 if ((in_list is None or value in in_list) and (not_in_list is None or value not in not_in_list)) else -1
+ elif type == 'ordered':
+ value = self._resolve_field_value(field, value, True)
+
+ # try to convert to number
+ val_num = float_or_none(value, default=self._get_field_setting(field, 'default'))
+ is_num = self._get_field_setting(field, 'convert') != 'string' and val_num is not None
+ if is_num:
+ value = val_num
+
+ return ((-10, 0) if value is None
+ else (1, value, 0) if not is_num # if a field has mixed strings and numbers, strings are sorted higher
+ else (0, -abs(value - limit), value - limit if reverse else limit - value) if closest
+ else (0, value, 0) if not reverse and (limit is None or value <= limit)
+ else (0, -value, 0) if limit is None or (reverse and value == limit) or value > limit
+ else (-1, value, 0))
+
+ def _calculate_field_preference(self, format, field):
+ type = self._get_field_setting(field, 'type') # extractor, boolean, ordered, field, multiple
+ get_value = lambda f: format.get(self._get_field_setting(f, 'field'))
+ if type == 'multiple':
+ type = 'field' # Only 'field' is allowed in multiple for now
+ actual_fields = self._get_field_setting(field, 'field')
+
+ value = self._get_field_setting(field, 'function')(get_value(f) for f in actual_fields)
+ else:
+ value = get_value(field)
+ return self._calculate_field_preference_from_value(format, field, type, value)
+
+ def calculate_preference(self, format):
+ # Determine missing protocol
+ if not format.get('protocol'):
+ format['protocol'] = determine_protocol(format)
+
+ # Determine missing ext
+ if not format.get('ext') and 'url' in format:
+ format['ext'] = determine_ext(format['url'])
+ if format.get('vcodec') == 'none':
+ format['audio_ext'] = format['ext'] if format.get('acodec') != 'none' else 'none'
+ format['video_ext'] = 'none'
+ else:
+ format['video_ext'] = format['ext']
+ format['audio_ext'] = 'none'
+ # if format.get('preference') is None and format.get('ext') in ('f4f', 'f4m'): # Not supported?
+ # format['preference'] = -1000
+
+ # Determine missing bitrates
+ if format.get('tbr') is None:
+ if format.get('vbr') is not None and format.get('abr') is not None:
+ format['tbr'] = format.get('vbr', 0) + format.get('abr', 0)
+ else:
+ if format.get('vcodec') != 'none' and format.get('vbr') is None:
+ format['vbr'] = format.get('tbr') - format.get('abr', 0)
+ if format.get('acodec') != 'none' and format.get('abr') is None:
+ format['abr'] = format.get('tbr') - format.get('vbr', 0)
+
+ return tuple(self._calculate_field_preference(format, field) for field in self._order)
+
+ def _sort_formats(self, formats, field_preference=[]):
+ if not formats:
+ return
+ format_sort = self.FormatSort() # params and to_screen are taken from the downloader
+ format_sort.evaluate_params(self._downloader.params, field_preference)
+ if self.get_param('verbose', False):
+ format_sort.print_verbose_info(self._downloader.write_debug)
+ formats.sort(key=lambda f: format_sort.calculate_preference(f))
def _check_formats(self, formats, video_id):
if formats:
@@ -1481,7 +1819,7 @@ class InfoExtractor(object):
""" Either "http:" or "https:", depending on the user's preferences """
return (
'http:'
- if self._downloader.params.get('prefer_insecure', False)
+ if self.get_param('prefer_insecure', False)
else 'https:')
def _proto_relative_url(self, url, scheme=None):
@@ -1501,7 +1839,7 @@ class InfoExtractor(object):
self.to_screen(msg)
time.sleep(timeout)
- def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None,
+ def _extract_f4m_formats(self, manifest_url, video_id, preference=None, quality=None, f4m_id=None,
transform_source=lambda s: fix_xml_ampersands(s).strip(),
fatal=True, m3u8_id=None, data=None, headers={}, query={}):
manifest = self._download_xml(
@@ -1516,10 +1854,10 @@ class InfoExtractor(object):
return []
return self._parse_f4m_formats(
- manifest, manifest_url, video_id, preference=preference, f4m_id=f4m_id,
+ manifest, manifest_url, video_id, preference=preference, quality=quality, f4m_id=f4m_id,
transform_source=transform_source, fatal=fatal, m3u8_id=m3u8_id)
- def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None, f4m_id=None,
+ def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None, quality=None, f4m_id=None,
transform_source=lambda s: fix_xml_ampersands(s).strip(),
fatal=True, m3u8_id=None):
if not isinstance(manifest, compat_etree_Element) and not fatal:
@@ -1584,7 +1922,7 @@ class InfoExtractor(object):
ext = determine_ext(manifest_url)
if ext == 'f4m':
f4m_formats = self._extract_f4m_formats(
- manifest_url, video_id, preference=preference, f4m_id=f4m_id,
+ manifest_url, video_id, preference=preference, quality=quality, f4m_id=f4m_id,
transform_source=transform_source, fatal=fatal)
# Sometimes stream-level manifest contains single media entry that
# does not contain any quality metadata (e.g. http://matchtv.ru/#live-player).
@@ -1604,7 +1942,7 @@ class InfoExtractor(object):
elif ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
manifest_url, video_id, 'mp4', preference=preference,
- m3u8_id=m3u8_id, fatal=fatal))
+ quality=quality, m3u8_id=m3u8_id, fatal=fatal))
continue
formats.append({
'format_id': format_id,
@@ -1617,56 +1955,88 @@ class InfoExtractor(object):
'height': height,
'vcodec': vcodec,
'preference': preference,
+ 'quality': quality,
})
return formats
- def _m3u8_meta_format(self, m3u8_url, ext=None, preference=None, m3u8_id=None):
+ def _m3u8_meta_format(self, m3u8_url, ext=None, preference=None, quality=None, m3u8_id=None):
return {
'format_id': '-'.join(filter(None, [m3u8_id, 'meta'])),
'url': m3u8_url,
'ext': ext,
'protocol': 'm3u8',
'preference': preference - 100 if preference else -100,
+ 'quality': quality,
'resolution': 'multiple',
'format_note': 'Quality selection URL',
}
- def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
- entry_protocol='m3u8', preference=None,
- m3u8_id=None, note=None, errnote=None,
- fatal=True, live=False, data=None, headers={},
- query={}):
+ def _report_ignoring_subs(self, name):
+ self.report_warning(bug_reports_message(
+ f'Ignoring subtitle tracks found in the {name} manifest; '
+ 'if any subtitle tracks are missing,'
+ ), only_once=True)
+
+ def _extract_m3u8_formats(self, *args, **kwargs):
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(*args, **kwargs)
+ if subs:
+ self._report_ignoring_subs('HLS')
+ return fmts
+
+ def _extract_m3u8_formats_and_subtitles(
+ self, m3u8_url, video_id, ext=None, entry_protocol='m3u8_native',
+ preference=None, quality=None, m3u8_id=None, note=None,
+ errnote=None, fatal=True, live=False, data=None, headers={},
+ query={}):
+
res = self._download_webpage_handle(
m3u8_url, video_id,
- note=note or 'Downloading m3u8 information',
- errnote=errnote or 'Failed to download m3u8 information',
+ note='Downloading m3u8 information' if note is None else note,
+ errnote='Failed to download m3u8 information' if errnote is None else errnote,
fatal=fatal, data=data, headers=headers, query=query)
if res is False:
- return []
+ return [], {}
m3u8_doc, urlh = res
m3u8_url = urlh.geturl()
- return self._parse_m3u8_formats(
+ return self._parse_m3u8_formats_and_subtitles(
m3u8_doc, m3u8_url, ext=ext, entry_protocol=entry_protocol,
- preference=preference, m3u8_id=m3u8_id, live=live)
+ preference=preference, quality=quality, m3u8_id=m3u8_id,
+ note=note, errnote=errnote, fatal=fatal, live=live, data=data,
+ headers=headers, query=query, video_id=video_id)
+
+ def _parse_m3u8_formats_and_subtitles(
+ self, m3u8_doc, m3u8_url, ext=None, entry_protocol='m3u8_native',
+ preference=None, quality=None, m3u8_id=None, live=False, note=None,
+ errnote=None, fatal=True, data=None, headers={}, query={},
+ video_id=None):
+ formats, subtitles = [], {}
- def _parse_m3u8_formats(self, m3u8_doc, m3u8_url, ext=None,
- entry_protocol='m3u8', preference=None,
- m3u8_id=None, live=False):
if '#EXT-X-FAXS-CM:' in m3u8_doc: # Adobe Flash Access
- return []
+ return formats, subtitles
- if re.search(r'#EXT-X-SESSION-KEY:.*?URI="skd://', m3u8_doc): # Apple FairPlay
- return []
+ has_drm = re.search(r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://', m3u8_doc)
- formats = []
+ def format_url(url):
+ return url if re.match(r'^https?://', url) else compat_urlparse.urljoin(m3u8_url, url)
- format_url = lambda u: (
- u
- if re.match(r'^https?://', u)
- else compat_urlparse.urljoin(m3u8_url, u))
+ if self.get_param('hls_split_discontinuity', False):
+ def _extract_m3u8_playlist_indices(manifest_url=None, m3u8_doc=None):
+ if not m3u8_doc:
+ if not manifest_url:
+ return []
+ m3u8_doc = self._download_webpage(
+ manifest_url, video_id, fatal=fatal, data=data, headers=headers,
+ note=False, errnote='Failed to download m3u8 playlist information')
+ if m3u8_doc is False:
+ return []
+ return range(1 + sum(line.startswith('#EXT-X-DISCONTINUITY') for line in m3u8_doc.splitlines()))
+
+ else:
+ def _extract_m3u8_playlist_indices(*args, **kwargs):
+ return [None]
# References:
# 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-21
@@ -1685,13 +2055,18 @@ class InfoExtractor(object):
# clearly detect media playlist with this criterion.
if '#EXT-X-TARGETDURATION' in m3u8_doc: # media playlist, return as is
- return [{
+ formats = [{
+ 'format_id': '-'.join(map(str, filter(None, [m3u8_id, idx]))),
+ 'format_index': idx,
'url': m3u8_url,
- 'format_id': m3u8_id,
'ext': ext,
'protocol': entry_protocol,
'preference': preference,
- }]
+ 'quality': quality,
+ 'has_drm': has_drm,
+ } for idx in _extract_m3u8_playlist_indices(m3u8_doc=m3u8_doc)]
+
+ return formats, subtitles
groups = {}
last_stream_inf = {}
@@ -1703,26 +2078,45 @@ class InfoExtractor(object):
if not (media_type and group_id and name):
return
groups.setdefault(group_id, []).append(media)
+ # <https://tools.ietf.org/html/rfc8216#section-4.3.4.1>
+ if media_type == 'SUBTITLES':
+ # According to RFC 8216 §4.3.4.2.1, URI is REQUIRED in the
+ # EXT-X-MEDIA tag if the media type is SUBTITLES.
+ # However, lack of URI has been spotted in the wild.
+ # e.g. NebulaIE; see https://github.com/hypervideo/hypervideo/issues/339
+ if not media.get('URI'):
+ return
+ url = format_url(media['URI'])
+ sub_info = {
+ 'url': url,
+ 'ext': determine_ext(url),
+ }
+ if sub_info['ext'] == 'm3u8':
+ # Per RFC 8216 §3.1, the only possible subtitle format m3u8
+ # files may contain is WebVTT:
+ # <https://tools.ietf.org/html/rfc8216#section-3.1>
+ sub_info['ext'] = 'vtt'
+ sub_info['protocol'] = 'm3u8_native'
+ lang = media.get('LANGUAGE') or 'und'
+ subtitles.setdefault(lang, []).append(sub_info)
if media_type not in ('VIDEO', 'AUDIO'):
return
media_url = media.get('URI')
if media_url:
- format_id = []
- for v in (m3u8_id, group_id, name):
- if v:
- format_id.append(v)
- f = {
- 'format_id': '-'.join(format_id),
- 'url': format_url(media_url),
+ manifest_url = format_url(media_url)
+ formats.extend({
+ 'format_id': '-'.join(map(str, filter(None, (m3u8_id, group_id, name, idx)))),
+ 'format_note': name,
+ 'format_index': idx,
+ 'url': manifest_url,
'manifest_url': m3u8_url,
'language': media.get('LANGUAGE'),
'ext': ext,
'protocol': entry_protocol,
'preference': preference,
- }
- if media_type == 'AUDIO':
- f['vcodec'] = 'none'
- formats.append(f)
+ 'quality': quality,
+ 'vcodec': 'none' if media_type == 'AUDIO' else None,
+ } for idx in _extract_m3u8_playlist_indices(manifest_url))
def build_stream_name():
# Despite specification does not mention NAME attribute for
@@ -1759,76 +2153,99 @@ class InfoExtractor(object):
tbr = float_or_none(
last_stream_inf.get('AVERAGE-BANDWIDTH')
or last_stream_inf.get('BANDWIDTH'), scale=1000)
- format_id = []
- if m3u8_id:
- format_id.append(m3u8_id)
- stream_name = build_stream_name()
- # Bandwidth of live streams may differ over time thus making
- # format_id unpredictable. So it's better to keep provided
- # format_id intact.
- if not live:
- format_id.append(stream_name if stream_name else '%d' % (tbr if tbr else len(formats)))
manifest_url = format_url(line.strip())
- f = {
- 'format_id': '-'.join(format_id),
- 'url': manifest_url,
- 'manifest_url': m3u8_url,
- 'tbr': tbr,
- 'ext': ext,
- 'fps': float_or_none(last_stream_inf.get('FRAME-RATE')),
- 'protocol': entry_protocol,
- 'preference': preference,
- }
- resolution = last_stream_inf.get('RESOLUTION')
- if resolution:
- mobj = re.search(r'(?P<width>\d+)[xX](?P<height>\d+)', resolution)
+
+ for idx in _extract_m3u8_playlist_indices(manifest_url):
+ format_id = [m3u8_id, None, idx]
+ # Bandwidth of live streams may differ over time thus making
+ # format_id unpredictable. So it's better to keep provided
+ # format_id intact.
+ if not live:
+ stream_name = build_stream_name()
+ format_id[1] = stream_name if stream_name else '%d' % (tbr if tbr else len(formats))
+ f = {
+ 'format_id': '-'.join(map(str, filter(None, format_id))),
+ 'format_index': idx,
+ 'url': manifest_url,
+ 'manifest_url': m3u8_url,
+ 'tbr': tbr,
+ 'ext': ext,
+ 'fps': float_or_none(last_stream_inf.get('FRAME-RATE')),
+ 'protocol': entry_protocol,
+ 'preference': preference,
+ 'quality': quality,
+ }
+ resolution = last_stream_inf.get('RESOLUTION')
+ if resolution:
+ mobj = re.search(r'(?P<width>\d+)[xX](?P<height>\d+)', resolution)
+ if mobj:
+ f['width'] = int(mobj.group('width'))
+ f['height'] = int(mobj.group('height'))
+ # Unified Streaming Platform
+ mobj = re.search(
+ r'audio.*?(?:%3D|=)(\d+)(?:-video.*?(?:%3D|=)(\d+))?', f['url'])
if mobj:
- f['width'] = int(mobj.group('width'))
- f['height'] = int(mobj.group('height'))
- # Unified Streaming Platform
- mobj = re.search(
- r'audio.*?(?:%3D|=)(\d+)(?:-video.*?(?:%3D|=)(\d+))?', f['url'])
- if mobj:
- abr, vbr = mobj.groups()
- abr, vbr = float_or_none(abr, 1000), float_or_none(vbr, 1000)
- f.update({
- 'vbr': vbr,
- 'abr': abr,
- })
- codecs = parse_codecs(last_stream_inf.get('CODECS'))
- f.update(codecs)
- audio_group_id = last_stream_inf.get('AUDIO')
- # As per [1, 4.3.4.1.1] any EXT-X-STREAM-INF tag which
- # references a rendition group MUST have a CODECS attribute.
- # However, this is not always respected, for example, [2]
- # contains EXT-X-STREAM-INF tag which references AUDIO
- # rendition group but does not have CODECS and despite
- # referencing an audio group it represents a complete
- # (with audio and video) format. So, for such cases we will
- # ignore references to rendition groups and treat them
- # as complete formats.
- if audio_group_id and codecs and f.get('vcodec') != 'none':
- audio_group = groups.get(audio_group_id)
- if audio_group and audio_group[0].get('URI'):
- # TODO: update acodec for audio only formats with
- # the same GROUP-ID
- f['acodec'] = 'none'
- formats.append(f)
-
- # for DailyMotion
- progressive_uri = last_stream_inf.get('PROGRESSIVE-URI')
- if progressive_uri:
- http_f = f.copy()
- del http_f['manifest_url']
- http_f.update({
- 'format_id': f['format_id'].replace('hls-', 'http-'),
- 'protocol': 'http',
- 'url': progressive_uri,
- })
- formats.append(http_f)
+ abr, vbr = mobj.groups()
+ abr, vbr = float_or_none(abr, 1000), float_or_none(vbr, 1000)
+ f.update({
+ 'vbr': vbr,
+ 'abr': abr,
+ })
+ codecs = parse_codecs(last_stream_inf.get('CODECS'))
+ f.update(codecs)
+ audio_group_id = last_stream_inf.get('AUDIO')
+ # As per [1, 4.3.4.1.1] any EXT-X-STREAM-INF tag which
+ # references a rendition group MUST have a CODECS attribute.
+ # However, this is not always respected, for example, [2]
+ # contains EXT-X-STREAM-INF tag which references AUDIO
+ # rendition group but does not have CODECS and despite
+ # referencing an audio group it represents a complete
+ # (with audio and video) format. So, for such cases we will
+ # ignore references to rendition groups and treat them
+ # as complete formats.
+ if audio_group_id and codecs and f.get('vcodec') != 'none':
+ audio_group = groups.get(audio_group_id)
+ if audio_group and audio_group[0].get('URI'):
+ # TODO: update acodec for audio only formats with
+ # the same GROUP-ID
+ f['acodec'] = 'none'
+ if not f.get('ext'):
+ f['ext'] = 'm4a' if f.get('vcodec') == 'none' else 'mp4'
+ formats.append(f)
+
+ # for DailyMotion
+ progressive_uri = last_stream_inf.get('PROGRESSIVE-URI')
+ if progressive_uri:
+ http_f = f.copy()
+ del http_f['manifest_url']
+ http_f.update({
+ 'format_id': f['format_id'].replace('hls-', 'http-'),
+ 'protocol': 'http',
+ 'url': progressive_uri,
+ })
+ formats.append(http_f)
last_stream_inf = {}
- return formats
+ return formats, subtitles
+
+ def _extract_m3u8_vod_duration(
+ self, m3u8_vod_url, video_id, note=None, errnote=None, data=None, headers={}, query={}):
+
+ m3u8_vod = self._download_webpage(
+ m3u8_vod_url, video_id,
+ note='Downloading m3u8 VOD manifest' if note is None else note,
+ errnote='Failed to download VOD manifest' if errnote is None else errnote,
+ fatal=False, data=data, headers=headers, query=query)
+
+ return self._parse_m3u8_vod_duration(m3u8_vod or '', video_id)
+
+ def _parse_m3u8_vod_duration(self, m3u8_vod, video_id):
+ if '#EXT-X-PLAYLIST-TYPE:VOD' not in m3u8_vod:
+ return None
+
+ return int(sum(
+ float(line[len('#EXTINF:'):].split(',')[0])
+ for line in m3u8_vod.splitlines() if line.startswith('#EXTINF:'))) or None
@staticmethod
def _xpath_ns(path, namespace=None):
@@ -1842,7 +2259,7 @@ class InfoExtractor(object):
out.append('{%s}%s' % (namespace, c))
return '/'.join(out)
- def _extract_smil_formats(self, smil_url, video_id, fatal=True, f4m_params=None, transform_source=None):
+ def _extract_smil_formats_and_subtitles(self, smil_url, video_id, fatal=True, f4m_params=None, transform_source=None):
smil = self._download_smil(smil_url, video_id, fatal=fatal, transform_source=transform_source)
if smil is False:
@@ -1851,8 +2268,18 @@ class InfoExtractor(object):
namespace = self._parse_smil_namespace(smil)
- return self._parse_smil_formats(
+ fmts = self._parse_smil_formats(
smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params)
+ subs = self._parse_smil_subtitles(
+ smil, namespace=namespace)
+
+ return fmts, subs
+
+ def _extract_smil_formats(self, *args, **kwargs):
+ fmts, subs = self._extract_smil_formats_and_subtitles(*args, **kwargs)
+ if subs:
+ self._report_ignoring_subs('SMIL')
+ return fmts
def _extract_smil_info(self, smil_url, video_id, fatal=True, f4m_params=None):
smil = self._download_smil(smil_url, video_id, fatal=fatal)
@@ -1921,14 +2348,15 @@ class InfoExtractor(object):
rtmp_count = 0
http_count = 0
m3u8_count = 0
+ imgs_count = 0
- srcs = []
+ srcs = set()
media = smil.findall(self._xpath_ns('.//video', namespace)) + smil.findall(self._xpath_ns('.//audio', namespace))
for medium in media:
src = medium.get('src')
if not src or src in srcs:
continue
- srcs.append(src)
+ srcs.add(src)
bitrate = float_or_none(medium.get('system-bitrate') or medium.get('systemBitrate'), 1000)
filesize = int_or_none(medium.get('size') or medium.get('fileSize'))
@@ -2002,6 +2430,24 @@ class InfoExtractor(object):
'height': height,
})
+ for medium in smil.findall(self._xpath_ns('.//imagestream', namespace)):
+ src = medium.get('src')
+ if not src or src in srcs:
+ continue
+ srcs.add(src)
+
+ imgs_count += 1
+ formats.append({
+ 'format_id': 'imagestream-%d' % (imgs_count),
+ 'url': src,
+ 'ext': mimetype2ext(medium.get('type')),
+ 'acodec': 'none',
+ 'vcodec': 'none',
+ 'width': int_or_none(medium.get('width')),
+ 'height': int_or_none(medium.get('height')),
+ 'format_note': 'SMIL storyboards',
+ })
+
return formats
def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
@@ -2071,23 +2517,38 @@ class InfoExtractor(object):
})
return entries
- def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
+ def _extract_mpd_formats(self, *args, **kwargs):
+ fmts, subs = self._extract_mpd_formats_and_subtitles(*args, **kwargs)
+ if subs:
+ self._report_ignoring_subs('DASH')
+ return fmts
+
+ def _extract_mpd_formats_and_subtitles(
+ self, mpd_url, video_id, mpd_id=None, note=None, errnote=None,
+ fatal=True, data=None, headers={}, query={}):
res = self._download_xml_handle(
mpd_url, video_id,
- note=note or 'Downloading MPD manifest',
- errnote=errnote or 'Failed to download MPD manifest',
+ note='Downloading MPD manifest' if note is None else note,
+ errnote='Failed to download MPD manifest' if errnote is None else errnote,
fatal=fatal, data=data, headers=headers, query=query)
if res is False:
- return []
+ return [], {}
mpd_doc, urlh = res
if mpd_doc is None:
- return []
+ return [], {}
mpd_base_url = base_url(urlh.geturl())
- return self._parse_mpd_formats(
+ return self._parse_mpd_formats_and_subtitles(
mpd_doc, mpd_id, mpd_base_url, mpd_url)
- def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', mpd_url=None):
+ def _parse_mpd_formats(self, *args, **kwargs):
+ fmts, subs = self._parse_mpd_formats_and_subtitles(*args, **kwargs)
+ if subs:
+ self._report_ignoring_subs('DASH')
+ return fmts
+
+ def _parse_mpd_formats_and_subtitles(
+ self, mpd_doc, mpd_id=None, mpd_base_url='', mpd_url=None):
"""
Parse formats from MPD manifest.
References:
@@ -2095,8 +2556,9 @@ class InfoExtractor(object):
http://standards.iso.org/ittf/PubliclyAvailableStandards/c065274_ISO_IEC_23009-1_2014.zip
2. https://en.wikipedia.org/wiki/Dynamic_Adaptive_Streaming_over_HTTP
"""
- if mpd_doc.get('type') == 'dynamic':
- return []
+ if not self.get_param('dynamic_mpd', True):
+ if mpd_doc.get('type') == 'dynamic':
+ return [], {}
namespace = self._search_regex(r'(?i)^{([^}]+)?}MPD$', mpd_doc.tag, 'namespace', default=None)
@@ -2165,7 +2627,8 @@ class InfoExtractor(object):
return ms_info
mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
- formats = []
+ formats, subtitles = [], {}
+ stream_numbers = {'audio': 0, 'video': 0}
for period in mpd_doc.findall(_add_ns('Period')):
period_duration = parse_duration(period.get('duration')) or mpd_duration
period_ms_info = extract_multisegment_info(period, {
@@ -2173,39 +2636,53 @@ class InfoExtractor(object):
'timescale': 1,
})
for adaptation_set in period.findall(_add_ns('AdaptationSet')):
- if is_drm_protected(adaptation_set):
- continue
adaption_set_ms_info = extract_multisegment_info(adaptation_set, period_ms_info)
for representation in adaptation_set.findall(_add_ns('Representation')):
- if is_drm_protected(representation):
- continue
representation_attrib = adaptation_set.attrib.copy()
representation_attrib.update(representation.attrib)
# According to [1, 5.3.7.2, Table 9, page 41], @mimeType is mandatory
mime_type = representation_attrib['mimeType']
- content_type = mime_type.split('/')[0]
- if content_type == 'text':
- # TODO implement WebVTT downloading
- pass
- elif content_type in ('video', 'audio'):
- base_url = ''
- for element in (representation, adaptation_set, period, mpd_doc):
- base_url_e = element.find(_add_ns('BaseURL'))
- if base_url_e is not None:
- base_url = base_url_e.text + base_url
- if re.match(r'^https?://', base_url):
- break
- if mpd_base_url and not re.match(r'^https?://', base_url):
- if not mpd_base_url.endswith('/') and not base_url.startswith('/'):
- mpd_base_url += '/'
- base_url = mpd_base_url + base_url
- representation_id = representation_attrib.get('id')
- lang = representation_attrib.get('lang')
- url_el = representation.find(_add_ns('BaseURL'))
- filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None)
- bandwidth = int_or_none(representation_attrib.get('bandwidth'))
+ content_type = representation_attrib.get('contentType', mime_type.split('/')[0])
+
+ codecs = representation_attrib.get('codecs', '')
+ if content_type not in ('video', 'audio', 'text'):
+ if mime_type == 'image/jpeg':
+ content_type = mime_type
+ elif codecs.split('.')[0] == 'stpp':
+ content_type = 'text'
+ elif mimetype2ext(mime_type) in ('tt', 'dfxp', 'ttml', 'xml', 'json'):
+ content_type = 'text'
+ else:
+ self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
+ continue
+
+ base_url = ''
+ for element in (representation, adaptation_set, period, mpd_doc):
+ base_url_e = element.find(_add_ns('BaseURL'))
+ if base_url_e is not None:
+ base_url = base_url_e.text + base_url
+ if re.match(r'^https?://', base_url):
+ break
+ if mpd_base_url and base_url.startswith('/'):
+ base_url = compat_urlparse.urljoin(mpd_base_url, base_url)
+ elif mpd_base_url and not re.match(r'^https?://', base_url):
+ if not mpd_base_url.endswith('/'):
+ mpd_base_url += '/'
+ base_url = mpd_base_url + base_url
+ representation_id = representation_attrib.get('id')
+ lang = representation_attrib.get('lang')
+ url_el = representation.find(_add_ns('BaseURL'))
+ filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None)
+ bandwidth = int_or_none(representation_attrib.get('bandwidth'))
+ if representation_id is not None:
+ format_id = representation_id
+ else:
+ format_id = content_type
+ if mpd_id:
+ format_id = mpd_id + '-' + format_id
+ if content_type in ('video', 'audio'):
f = {
- 'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,
+ 'format_id': format_id,
'manifest_url': mpd_url,
'ext': mimetype2ext(mime_type),
'width': int_or_none(representation_attrib.get('width')),
@@ -2217,198 +2694,230 @@ class InfoExtractor(object):
'format_note': 'DASH %s' % content_type,
'filesize': filesize,
'container': mimetype2ext(mime_type) + '_dash',
+ 'manifest_stream_number': stream_numbers[content_type]
+ }
+ f.update(parse_codecs(codecs))
+ stream_numbers[content_type] += 1
+ elif content_type == 'text':
+ f = {
+ 'ext': mimetype2ext(mime_type),
+ 'manifest_url': mpd_url,
+ 'filesize': filesize,
}
- f.update(parse_codecs(representation_attrib.get('codecs')))
- representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
-
- def prepare_template(template_name, identifiers):
- tmpl = representation_ms_info[template_name]
- # First of, % characters outside $...$ templates
- # must be escaped by doubling for proper processing
- # by % operator string formatting used further (see
- # https://github.com/ytdl-org/youtube-dl/issues/16867).
- t = ''
- in_template = False
- for c in tmpl:
+ elif content_type == 'image/jpeg':
+ # See test case in VikiIE
+ # https://www.viki.com/videos/1175236v-choosing-spouse-by-lottery-episode-1
+ f = {
+ 'format_id': format_id,
+ 'ext': 'mhtml',
+ 'manifest_url': mpd_url,
+ 'format_note': 'DASH storyboards (jpeg)',
+ 'acodec': 'none',
+ 'vcodec': 'none',
+ }
+ if is_drm_protected(adaptation_set) or is_drm_protected(representation):
+ f['has_drm'] = True
+ representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
+
+ def prepare_template(template_name, identifiers):
+ tmpl = representation_ms_info[template_name]
+ # First of, % characters outside $...$ templates
+ # must be escaped by doubling for proper processing
+ # by % operator string formatting used further (see
+ # https://github.com/ytdl-org/youtube-dl/issues/16867).
+ t = ''
+ in_template = False
+ for c in tmpl:
+ t += c
+ if c == '$':
+ in_template = not in_template
+ elif c == '%' and not in_template:
t += c
- if c == '$':
- in_template = not in_template
- elif c == '%' and not in_template:
- t += c
- # Next, $...$ templates are translated to their
- # %(...) counterparts to be used with % operator
+ # Next, $...$ templates are translated to their
+ # %(...) counterparts to be used with % operator
+ if representation_id is not None:
t = t.replace('$RepresentationID$', representation_id)
- t = re.sub(r'\$(%s)\$' % '|'.join(identifiers), r'%(\1)d', t)
- t = re.sub(r'\$(%s)%%([^$]+)\$' % '|'.join(identifiers), r'%(\1)\2', t)
- t.replace('$$', '$')
- return t
-
- # @initialization is a regular template like @media one
- # so it should be handled just the same way (see
- # https://github.com/ytdl-org/youtube-dl/issues/11605)
- if 'initialization' in representation_ms_info:
- initialization_template = prepare_template(
- 'initialization',
- # As per [1, 5.3.9.4.2, Table 15, page 54] $Number$ and
- # $Time$ shall not be included for @initialization thus
- # only $Bandwidth$ remains
- ('Bandwidth', ))
- representation_ms_info['initialization_url'] = initialization_template % {
- 'Bandwidth': bandwidth,
- }
+ t = re.sub(r'\$(%s)\$' % '|'.join(identifiers), r'%(\1)d', t)
+ t = re.sub(r'\$(%s)%%([^$]+)\$' % '|'.join(identifiers), r'%(\1)\2', t)
+ t.replace('$$', '$')
+ return t
+
+ # @initialization is a regular template like @media one
+ # so it should be handled just the same way (see
+ # https://github.com/ytdl-org/youtube-dl/issues/11605)
+ if 'initialization' in representation_ms_info:
+ initialization_template = prepare_template(
+ 'initialization',
+ # As per [1, 5.3.9.4.2, Table 15, page 54] $Number$ and
+ # $Time$ shall not be included for @initialization thus
+ # only $Bandwidth$ remains
+ ('Bandwidth', ))
+ representation_ms_info['initialization_url'] = initialization_template % {
+ 'Bandwidth': bandwidth,
+ }
- def location_key(location):
- return 'url' if re.match(r'^https?://', location) else 'path'
-
- if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info:
-
- media_template = prepare_template('media', ('Number', 'Bandwidth', 'Time'))
- media_location_key = location_key(media_template)
-
- # As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$
- # can't be used at the same time
- if '%(Number' in media_template and 's' not in representation_ms_info:
- segment_duration = None
- if 'total_number' not in representation_ms_info and 'segment_duration' in representation_ms_info:
- segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])
- representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
- representation_ms_info['fragments'] = [{
- media_location_key: media_template % {
- 'Number': segment_number,
- 'Bandwidth': bandwidth,
- },
- 'duration': segment_duration,
- } for segment_number in range(
- representation_ms_info['start_number'],
- representation_ms_info['total_number'] + representation_ms_info['start_number'])]
- else:
- # $Number*$ or $Time$ in media template with S list available
- # Example $Number*$: http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg
- # Example $Time$: https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411
- representation_ms_info['fragments'] = []
- segment_time = 0
- segment_d = None
- segment_number = representation_ms_info['start_number']
-
- def add_segment_url():
- segment_url = media_template % {
- 'Time': segment_time,
- 'Bandwidth': bandwidth,
- 'Number': segment_number,
- }
- representation_ms_info['fragments'].append({
- media_location_key: segment_url,
- 'duration': float_or_none(segment_d, representation_ms_info['timescale']),
- })
-
- for num, s in enumerate(representation_ms_info['s']):
- segment_time = s.get('t') or segment_time
- segment_d = s['d']
+ def location_key(location):
+ return 'url' if re.match(r'^https?://', location) else 'path'
+
+ if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info:
+
+ media_template = prepare_template('media', ('Number', 'Bandwidth', 'Time'))
+ media_location_key = location_key(media_template)
+
+ # As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$
+ # can't be used at the same time
+ if '%(Number' in media_template and 's' not in representation_ms_info:
+ segment_duration = None
+ if 'total_number' not in representation_ms_info and 'segment_duration' in representation_ms_info:
+ segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])
+ representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
+ representation_ms_info['fragments'] = [{
+ media_location_key: media_template % {
+ 'Number': segment_number,
+ 'Bandwidth': bandwidth,
+ },
+ 'duration': segment_duration,
+ } for segment_number in range(
+ representation_ms_info['start_number'],
+ representation_ms_info['total_number'] + representation_ms_info['start_number'])]
+ else:
+ # $Number*$ or $Time$ in media template with S list available
+ # Example $Number*$: http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg
+ # Example $Time$: https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411
+ representation_ms_info['fragments'] = []
+ segment_time = 0
+ segment_d = None
+ segment_number = representation_ms_info['start_number']
+
+ def add_segment_url():
+ segment_url = media_template % {
+ 'Time': segment_time,
+ 'Bandwidth': bandwidth,
+ 'Number': segment_number,
+ }
+ representation_ms_info['fragments'].append({
+ media_location_key: segment_url,
+ 'duration': float_or_none(segment_d, representation_ms_info['timescale']),
+ })
+
+ for num, s in enumerate(representation_ms_info['s']):
+ segment_time = s.get('t') or segment_time
+ segment_d = s['d']
+ add_segment_url()
+ segment_number += 1
+ for r in range(s.get('r', 0)):
+ segment_time += segment_d
add_segment_url()
segment_number += 1
- for r in range(s.get('r', 0)):
- segment_time += segment_d
- add_segment_url()
- segment_number += 1
- segment_time += segment_d
- elif 'segment_urls' in representation_ms_info and 's' in representation_ms_info:
- # No media template
- # Example: https://www.youtube.com/watch?v=iXZV5uAYMJI
- # or any YouTube dashsegments video
- fragments = []
- segment_index = 0
- timescale = representation_ms_info['timescale']
- for s in representation_ms_info['s']:
- duration = float_or_none(s['d'], timescale)
- for r in range(s.get('r', 0) + 1):
- segment_uri = representation_ms_info['segment_urls'][segment_index]
- fragments.append({
- location_key(segment_uri): segment_uri,
- 'duration': duration,
- })
- segment_index += 1
- representation_ms_info['fragments'] = fragments
- elif 'segment_urls' in representation_ms_info:
- # Segment URLs with no SegmentTimeline
- # Example: https://www.seznam.cz/zpravy/clanek/cesko-zasahne-vitr-o-sile-vichrice-muze-byt-i-zivotu-nebezpecny-39091
- # https://github.com/ytdl-org/youtube-dl/pull/14844
- fragments = []
- segment_duration = float_or_none(
- representation_ms_info['segment_duration'],
- representation_ms_info['timescale']) if 'segment_duration' in representation_ms_info else None
- for segment_url in representation_ms_info['segment_urls']:
- fragment = {
- location_key(segment_url): segment_url,
- }
- if segment_duration:
- fragment['duration'] = segment_duration
- fragments.append(fragment)
- representation_ms_info['fragments'] = fragments
- # If there is a fragments key available then we correctly recognized fragmented media.
- # Otherwise we will assume unfragmented media with direct access. Technically, such
- # assumption is not necessarily correct since we may simply have no support for
- # some forms of fragmented media renditions yet, but for now we'll use this fallback.
- if 'fragments' in representation_ms_info:
- f.update({
- # NB: mpd_url may be empty when MPD manifest is parsed from a string
- 'url': mpd_url or base_url,
- 'fragment_base_url': base_url,
- 'fragments': [],
- 'protocol': 'http_dash_segments',
- })
- if 'initialization_url' in representation_ms_info:
- initialization_url = representation_ms_info['initialization_url']
- if not f.get('url'):
- f['url'] = initialization_url
- f['fragments'].append({location_key(initialization_url): initialization_url})
- f['fragments'].extend(representation_ms_info['fragments'])
- else:
- # Assuming direct URL to unfragmented media.
- f['url'] = base_url
- formats.append(f)
+ segment_time += segment_d
+ elif 'segment_urls' in representation_ms_info and 's' in representation_ms_info:
+ # No media template
+ # Example: https://www.youtube.com/watch?v=iXZV5uAYMJI
+ # or any YouTube dashsegments video
+ fragments = []
+ segment_index = 0
+ timescale = representation_ms_info['timescale']
+ for s in representation_ms_info['s']:
+ duration = float_or_none(s['d'], timescale)
+ for r in range(s.get('r', 0) + 1):
+ segment_uri = representation_ms_info['segment_urls'][segment_index]
+ fragments.append({
+ location_key(segment_uri): segment_uri,
+ 'duration': duration,
+ })
+ segment_index += 1
+ representation_ms_info['fragments'] = fragments
+ elif 'segment_urls' in representation_ms_info:
+ # Segment URLs with no SegmentTimeline
+ # Example: https://www.seznam.cz/zpravy/clanek/cesko-zasahne-vitr-o-sile-vichrice-muze-byt-i-zivotu-nebezpecny-39091
+ # https://github.com/ytdl-org/youtube-dl/pull/14844
+ fragments = []
+ segment_duration = float_or_none(
+ representation_ms_info['segment_duration'],
+ representation_ms_info['timescale']) if 'segment_duration' in representation_ms_info else None
+ for segment_url in representation_ms_info['segment_urls']:
+ fragment = {
+ location_key(segment_url): segment_url,
+ }
+ if segment_duration:
+ fragment['duration'] = segment_duration
+ fragments.append(fragment)
+ representation_ms_info['fragments'] = fragments
+ # If there is a fragments key available then we correctly recognized fragmented media.
+ # Otherwise we will assume unfragmented media with direct access. Technically, such
+ # assumption is not necessarily correct since we may simply have no support for
+ # some forms of fragmented media renditions yet, but for now we'll use this fallback.
+ if 'fragments' in representation_ms_info:
+ f.update({
+ # NB: mpd_url may be empty when MPD manifest is parsed from a string
+ 'url': mpd_url or base_url,
+ 'fragment_base_url': base_url,
+ 'fragments': [],
+ 'protocol': 'http_dash_segments' if mime_type != 'image/jpeg' else 'mhtml',
+ })
+ if 'initialization_url' in representation_ms_info:
+ initialization_url = representation_ms_info['initialization_url']
+ if not f.get('url'):
+ f['url'] = initialization_url
+ f['fragments'].append({location_key(initialization_url): initialization_url})
+ f['fragments'].extend(representation_ms_info['fragments'])
else:
- self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
- return formats
+ # Assuming direct URL to unfragmented media.
+ f['url'] = base_url
+ if content_type in ('video', 'audio') or mime_type == 'image/jpeg':
+ formats.append(f)
+ elif content_type == 'text':
+ subtitles.setdefault(lang or 'und', []).append(f)
- def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
+ return formats, subtitles
+
+ def _extract_ism_formats(self, *args, **kwargs):
+ fmts, subs = self._extract_ism_formats_and_subtitles(*args, **kwargs)
+ if subs:
+ self._report_ignoring_subs('ISM')
+ return fmts
+
+ def _extract_ism_formats_and_subtitles(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
res = self._download_xml_handle(
ism_url, video_id,
- note=note or 'Downloading ISM manifest',
- errnote=errnote or 'Failed to download ISM manifest',
+ note='Downloading ISM manifest' if note is None else note,
+ errnote='Failed to download ISM manifest' if errnote is None else errnote,
fatal=fatal, data=data, headers=headers, query=query)
if res is False:
- return []
+ return [], {}
ism_doc, urlh = res
if ism_doc is None:
- return []
+ return [], {}
- return self._parse_ism_formats(ism_doc, urlh.geturl(), ism_id)
+ return self._parse_ism_formats_and_subtitles(ism_doc, urlh.geturl(), ism_id)
- def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None):
+ def _parse_ism_formats_and_subtitles(self, ism_doc, ism_url, ism_id=None):
"""
Parse formats from ISM manifest.
References:
1. [MS-SSTR]: Smooth Streaming Protocol,
https://msdn.microsoft.com/en-us/library/ff469518.aspx
"""
- if ism_doc.get('IsLive') == 'TRUE' or ism_doc.find('Protection') is not None:
- return []
+ if ism_doc.get('IsLive') == 'TRUE':
+ return [], {}
duration = int(ism_doc.attrib['Duration'])
timescale = int_or_none(ism_doc.get('TimeScale')) or 10000000
formats = []
+ subtitles = {}
for stream in ism_doc.findall('StreamIndex'):
stream_type = stream.get('Type')
- if stream_type not in ('video', 'audio'):
+ if stream_type not in ('video', 'audio', 'text'):
continue
url_pattern = stream.attrib['Url']
stream_timescale = int_or_none(stream.get('TimeScale')) or timescale
stream_name = stream.get('Name')
+ stream_language = stream.get('Language', 'und')
for track in stream.findall('QualityLevel'):
- fourcc = track.get('FourCC', 'AACL' if track.get('AudioTag') == '255' else None)
+ fourcc = track.get('FourCC') or ('AACL' if track.get('AudioTag') == '255' else None)
# TODO: add support for WVC1 and WMAP
- if fourcc not in ('H264', 'AVC1', 'AACL'):
+ if fourcc not in ('H264', 'AVC1', 'AACL', 'TTML'):
self.report_warning('%s is not a supported codec' % fourcc)
continue
tbr = int(track.attrib['Bitrate']) // 1000
@@ -2451,35 +2960,55 @@ class InfoExtractor(object):
format_id.append(stream_name)
format_id.append(compat_str(tbr))
- formats.append({
- 'format_id': '-'.join(format_id),
- 'url': ism_url,
- 'manifest_url': ism_url,
- 'ext': 'ismv' if stream_type == 'video' else 'isma',
- 'width': width,
- 'height': height,
- 'tbr': tbr,
- 'asr': sampling_rate,
- 'vcodec': 'none' if stream_type == 'audio' else fourcc,
- 'acodec': 'none' if stream_type == 'video' else fourcc,
- 'protocol': 'ism',
- 'fragments': fragments,
- '_download_params': {
- 'duration': duration,
- 'timescale': stream_timescale,
- 'width': width or 0,
- 'height': height or 0,
- 'fourcc': fourcc,
- 'codec_private_data': track.get('CodecPrivateData'),
- 'sampling_rate': sampling_rate,
- 'channels': int_or_none(track.get('Channels', 2)),
- 'bits_per_sample': int_or_none(track.get('BitsPerSample', 16)),
- 'nal_unit_length_field': int_or_none(track.get('NALUnitLengthField', 4)),
- },
- })
- return formats
+ if stream_type == 'text':
+ subtitles.setdefault(stream_language, []).append({
+ 'ext': 'ismt',
+ 'protocol': 'ism',
+ 'url': ism_url,
+ 'manifest_url': ism_url,
+ 'fragments': fragments,
+ '_download_params': {
+ 'stream_type': stream_type,
+ 'duration': duration,
+ 'timescale': stream_timescale,
+ 'fourcc': fourcc,
+ 'language': stream_language,
+ 'codec_private_data': track.get('CodecPrivateData'),
+ }
+ })
+ elif stream_type in ('video', 'audio'):
+ formats.append({
+ 'format_id': '-'.join(format_id),
+ 'url': ism_url,
+ 'manifest_url': ism_url,
+ 'ext': 'ismv' if stream_type == 'video' else 'isma',
+ 'width': width,
+ 'height': height,
+ 'tbr': tbr,
+ 'asr': sampling_rate,
+ 'vcodec': 'none' if stream_type == 'audio' else fourcc,
+ 'acodec': 'none' if stream_type == 'video' else fourcc,
+ 'protocol': 'ism',
+ 'fragments': fragments,
+ 'has_drm': ism_doc.find('Protection') is not None,
+ '_download_params': {
+ 'stream_type': stream_type,
+ 'duration': duration,
+ 'timescale': stream_timescale,
+ 'width': width or 0,
+ 'height': height or 0,
+ 'fourcc': fourcc,
+ 'language': stream_language,
+ 'codec_private_data': track.get('CodecPrivateData'),
+ 'sampling_rate': sampling_rate,
+ 'channels': int_or_none(track.get('Channels', 2)),
+ 'bits_per_sample': int_or_none(track.get('BitsPerSample', 16)),
+ 'nal_unit_length_field': int_or_none(track.get('NALUnitLengthField', 4)),
+ },
+ })
+ return formats, subtitles
- def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8', mpd_id=None, preference=None):
+ def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8', mpd_id=None, preference=None, quality=None):
def absolute_url(item_url):
return urljoin(base_url, item_url)
@@ -2502,7 +3031,7 @@ class InfoExtractor(object):
formats = self._extract_m3u8_formats(
full_url, video_id, ext='mp4',
entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id,
- preference=preference, fatal=False)
+ preference=preference, quality=quality, fatal=False)
elif ext == 'mpd':
is_plain_url = False
formats = self._extract_mpd_formats(
@@ -2602,7 +3131,13 @@ class InfoExtractor(object):
entries.append(media_info)
return entries
- def _extract_akamai_formats(self, manifest_url, video_id, hosts={}):
+ def _extract_akamai_formats(self, *args, **kwargs):
+ fmts, subs = self._extract_akamai_formats_and_subtitles(*args, **kwargs)
+ if subs:
+ self._report_ignoring_subs('akamai')
+ return fmts
+
+ def _extract_akamai_formats_and_subtitles(self, manifest_url, video_id, hosts={}):
signed = 'hdnea=' in manifest_url
if not signed:
# https://learn.akamai.com/en-us/webhelp/media-services-on-demand/stream-packaging-user-guide/GUID-BE6C0F73-1E06-483B-B0EA-57984B91B7F9.html
@@ -2611,6 +3146,7 @@ class InfoExtractor(object):
'', manifest_url).strip('?')
formats = []
+ subtitles = {}
hdcore_sign = 'hdcore=3.7.0'
f4m_url = re.sub(r'(https?://[^/]+)/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m')
@@ -2629,10 +3165,11 @@ class InfoExtractor(object):
hls_host = hosts.get('hls')
if hls_host:
m3u8_url = re.sub(r'(https?://)[^/]+', r'\1' + hls_host, m3u8_url)
- m3u8_formats = self._extract_m3u8_formats(
+ m3u8_formats, m3u8_subtitles = self._extract_m3u8_formats_and_subtitles(
m3u8_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False)
formats.extend(m3u8_formats)
+ subtitles = self._merge_subtitles(subtitles, m3u8_subtitles)
http_host = hosts.get('http')
if http_host and m3u8_formats and not signed:
@@ -2656,7 +3193,7 @@ class InfoExtractor(object):
formats.append(http_f)
i += 1
- return formats
+ return formats, subtitles
def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
query = compat_urlparse.urlparse(url).query
@@ -2879,7 +3416,7 @@ class InfoExtractor(object):
if fatal:
raise ExtractorError(msg)
else:
- self._downloader.report_warning(msg)
+ self.report_warning(msg)
return res
def _float(self, v, name, fatal=False, **kwargs):
@@ -2889,7 +3426,7 @@ class InfoExtractor(object):
if fatal:
raise ExtractorError(msg)
else:
- self._downloader.report_warning(msg)
+ self.report_warning(msg)
return res
def _set_cookie(self, domain, name, value, expire_time=None, port=None,
@@ -2963,14 +3500,40 @@ class InfoExtractor(object):
return not any_restricted
def extract_subtitles(self, *args, **kwargs):
- if (self._downloader.params.get('writesubtitles', False)
- or self._downloader.params.get('listsubtitles')):
+ if (self.get_param('writesubtitles', False)
+ or self.get_param('listsubtitles')):
return self._get_subtitles(*args, **kwargs)
return {}
def _get_subtitles(self, *args, **kwargs):
raise NotImplementedError('This method must be implemented by subclasses')
+ def extract_comments(self, *args, **kwargs):
+ if not self.get_param('getcomments'):
+ return None
+ generator = self._get_comments(*args, **kwargs)
+
+ def extractor():
+ comments = []
+ try:
+ while True:
+ comments.append(next(generator))
+ except KeyboardInterrupt:
+ interrupted = True
+ self.to_screen('Interrupted by user')
+ except StopIteration:
+ interrupted = False
+ comment_count = len(comments)
+ self.to_screen(f'Extracted {comment_count} comments')
+ return {
+ 'comments': comments,
+ 'comment_count': None if interrupted else comment_count
+ }
+ return extractor
+
+ def _get_comments(self, *args, **kwargs):
+ raise NotImplementedError('This method must be implemented by subclasses')
+
@staticmethod
def _merge_subtitle_items(subtitle_list1, subtitle_list2):
""" Merge subtitle items for one language. Items with duplicated URLs
@@ -2981,16 +3544,18 @@ class InfoExtractor(object):
return ret
@classmethod
- def _merge_subtitles(cls, subtitle_dict1, subtitle_dict2):
- """ Merge two subtitle dictionaries, language by language. """
- ret = dict(subtitle_dict1)
- for lang in subtitle_dict2:
- ret[lang] = cls._merge_subtitle_items(subtitle_dict1.get(lang, []), subtitle_dict2[lang])
- return ret
+ def _merge_subtitles(cls, *dicts, target=None):
+ """ Merge subtitle dictionaries, language by language. """
+ if target is None:
+ target = {}
+ for d in dicts:
+ for lang, subs in d.items():
+ target[lang] = cls._merge_subtitle_items(target.get(lang, []), subs)
+ return target
def extract_automatic_captions(self, *args, **kwargs):
- if (self._downloader.params.get('writeautomaticsub', False)
- or self._downloader.params.get('listsubtitles')):
+ if (self.get_param('writeautomaticsub', False)
+ or self.get_param('listsubtitles')):
return self._get_automatic_captions(*args, **kwargs)
return {}
@@ -2998,9 +3563,11 @@ class InfoExtractor(object):
raise NotImplementedError('This method must be implemented by subclasses')
def mark_watched(self, *args, **kwargs):
- if (self._downloader.params.get('mark_watched', False)
- and (self._get_login_info()[0] is not None
- or self._downloader.params.get('cookiefile') is not None)):
+ if not self.get_param('mark_watched', False):
+ return
+ if (self._get_login_info()[0] is not None
+ or self.get_param('cookiefile')
+ or self.get_param('cookiesfrombrowser')):
self._mark_watched(*args, **kwargs)
def _mark_watched(self, *args, **kwargs):
@@ -3008,7 +3575,7 @@ class InfoExtractor(object):
def geo_verification_headers(self):
headers = {}
- geo_verification_proxy = self._downloader.params.get('geo_verification_proxy')
+ geo_verification_proxy = self.get_param('geo_verification_proxy')
if geo_verification_proxy:
headers['Ytdl-request-proxy'] = geo_verification_proxy
return headers
@@ -3019,6 +3586,33 @@ class InfoExtractor(object):
def _generic_title(self, url):
return compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0])
+ @staticmethod
+ def _availability(is_private=None, needs_premium=None, needs_subscription=None, needs_auth=None, is_unlisted=None):
+ all_known = all(map(
+ lambda x: x is not None,
+ (is_private, needs_premium, needs_subscription, needs_auth, is_unlisted)))
+ return (
+ 'private' if is_private
+ else 'premium_only' if needs_premium
+ else 'subscriber_only' if needs_subscription
+ else 'needs_auth' if needs_auth
+ else 'unlisted' if is_unlisted
+ else 'public' if all_known
+ else None)
+
+ def _configuration_arg(self, key, default=NO_DEFAULT, casesense=False):
+ '''
+ @returns A list of values for the extractor argument given by "key"
+ or "default" if no such key is present
+ @param default The default value to return when the key is not present (default: [])
+ @param casesense When false, the values are converted to lower case
+ '''
+ val = traverse_obj(
+ self._downloader.params, ('extractor_args', self.ie_key().lower(), key))
+ if val is None:
+ return [] if default is NO_DEFAULT else default
+ return list(val) if casesense else [x.lower() for x in val]
+
class SearchInfoExtractor(InfoExtractor):
"""
@@ -3051,12 +3645,19 @@ class SearchInfoExtractor(InfoExtractor):
if n <= 0:
raise ExtractorError('invalid download number %s for query "%s"' % (n, query))
elif n > self._MAX_RESULTS:
- self._downloader.report_warning('%s returns max %i results (you requested %i)' % (self._SEARCH_KEY, self._MAX_RESULTS, n))
+ self.report_warning('%s returns max %i results (you requested %i)' % (self._SEARCH_KEY, self._MAX_RESULTS, n))
n = self._MAX_RESULTS
return self._get_n_results(query, n)
def _get_n_results(self, query, n):
- """Get a specified number of results for a query"""
+ """Get a specified number of results for a query.
+ Either this function or _search_results must be overridden by subclasses """
+ return self.playlist_result(
+ itertools.islice(self._search_results(query), 0, None if n == float('inf') else n),
+ query, query)
+
+ def _search_results(self, query):
+ """Returns an iterator of search results"""
raise NotImplementedError('This method must be implemented by subclasses')
@property
diff --git a/hypervideo_dl/extractor/commonmistakes.py b/hypervideo_dl/extractor/commonmistakes.py
index ed9d26e..eb76fe5 100644
--- a/hypervideo_dl/extractor/commonmistakes.py
+++ b/hypervideo_dl/extractor/commonmistakes.py
@@ -26,8 +26,8 @@ class CommonMistakesIE(InfoExtractor):
'That doesn\'t make any sense. '
'Simply remove the parameter in your command or configuration.'
) % url
- if not self._downloader.params.get('verbose'):
- msg += ' Add -v to the command line to see what arguments and configuration hypervideo got.'
+ if not self.get_param('verbose'):
+ msg += ' Add -v to the command line to see what arguments and configuration hypervideo has'
raise ExtractorError(msg, expected=True)
diff --git a/hypervideo_dl/extractor/commonprotocols.py b/hypervideo_dl/extractor/commonprotocols.py
index d98331a..3708c6a 100644
--- a/hypervideo_dl/extractor/commonprotocols.py
+++ b/hypervideo_dl/extractor/commonprotocols.py
@@ -1,5 +1,6 @@
from __future__ import unicode_literals
+
from .common import InfoExtractor
from ..compat import (
compat_urlparse,
@@ -58,3 +59,16 @@ class MmsIE(InfoExtractor):
'title': title,
'url': url,
}
+
+
+class ViewSourceIE(InfoExtractor):
+ IE_DESC = False
+ _VALID_URL = r'view-source:(?P<url>.+)'
+
+ _TEST = {
+ 'url': 'view-source:https://www.youtube.com/watch?v=BaW_jenozKc',
+ 'only_matching': True
+ }
+
+ def _real_extract(self, url):
+ return self.url_result(self._match_valid_url(url).group('url'))
diff --git a/hypervideo_dl/extractor/condenast.py b/hypervideo_dl/extractor/condenast.py
index d5e77af..54e7af8 100644
--- a/hypervideo_dl/extractor/condenast.py
+++ b/hypervideo_dl/extractor/condenast.py
@@ -222,7 +222,7 @@ class CondeNastIE(InfoExtractor):
}
def _real_extract(self, url):
- video_id, player_id, target, url_type, display_id = re.match(self._VALID_URL, url).groups()
+ video_id, player_id, target, url_type, display_id = self._match_valid_url(url).groups()
if video_id:
return self._extract_video({
diff --git a/hypervideo_dl/extractor/corus.py b/hypervideo_dl/extractor/corus.py
index e11aadf..352951e 100644
--- a/hypervideo_dl/extractor/corus.py
+++ b/hypervideo_dl/extractor/corus.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .theplatform import ThePlatformFeedIE
from ..utils import (
@@ -96,7 +95,7 @@ class CorusIE(ThePlatformFeedIE):
}
def _real_extract(self, url):
- domain, video_id = re.match(self._VALID_URL, url).groups()
+ domain, video_id = self._match_valid_url(url).groups()
site = domain.split('.')[0]
path = self._SITE_MAP.get(site, site)
if path != 'series':
@@ -131,7 +130,7 @@ class CorusIE(ThePlatformFeedIE):
formats.extend(self._parse_smil_formats(
smil, smil_url, video_id, namespace))
if not formats and video.get('drm'):
- raise ExtractorError('This video is DRM protected.', expected=True)
+ self.report_drm(video_id)
self._sort_formats(formats)
subtitles = {}
diff --git a/hypervideo_dl/extractor/coub.py b/hypervideo_dl/extractor/coub.py
index 6ea03e6..eba6b73 100644
--- a/hypervideo_dl/extractor/coub.py
+++ b/hypervideo_dl/extractor/coub.py
@@ -87,7 +87,7 @@ class CoubIE(InfoExtractor):
'filesize': int_or_none(item.get('size')),
'vcodec': 'none' if kind == 'audio' else None,
'quality': quality_key(quality),
- 'preference': preference_key(HTML5),
+ 'source_preference': preference_key(HTML5),
})
iphone_url = file_versions.get(IPHONE, {}).get('url')
@@ -95,7 +95,7 @@ class CoubIE(InfoExtractor):
formats.append({
'url': iphone_url,
'format_id': IPHONE,
- 'preference': preference_key(IPHONE),
+ 'source_preference': preference_key(IPHONE),
})
mobile_url = file_versions.get(MOBILE, {}).get('audio_url')
@@ -103,7 +103,7 @@ class CoubIE(InfoExtractor):
formats.append({
'url': mobile_url,
'format_id': '%s-audio' % MOBILE,
- 'preference': preference_key(MOBILE),
+ 'source_preference': preference_key(MOBILE),
})
self._sort_formats(formats)
diff --git a/hypervideo_dl/extractor/crackle.py b/hypervideo_dl/extractor/crackle.py
index 49bf3a4..2c9d28d 100644
--- a/hypervideo_dl/extractor/crackle.py
+++ b/hypervideo_dl/extractor/crackle.py
@@ -12,6 +12,7 @@ from ..utils import (
determine_ext,
float_or_none,
int_or_none,
+ orderedSet,
parse_age_limit,
parse_duration,
url_or_none,
@@ -66,135 +67,179 @@ class CrackleIE(InfoExtractor):
},
}
+ def _download_json(self, url, *args, **kwargs):
+ # Authorization generation algorithm is reverse engineered from:
+ # https://www.sonycrackle.com/static/js/main.ea93451f.chunk.js
+ timestamp = time.strftime('%Y%m%d%H%M', time.gmtime())
+ h = hmac.new(b'IGSLUQCBDFHEOIFM', '|'.join([url, timestamp]).encode(), hashlib.sha1).hexdigest().upper()
+ headers = {
+ 'Accept': 'application/json',
+ 'Authorization': '|'.join([h, timestamp, '117', '1']),
+ }
+ return InfoExtractor._download_json(self, url, *args, headers=headers, **kwargs)
+
def _real_extract(self, url):
video_id = self._match_id(url)
- country_code = self._downloader.params.get('geo_bypass_country', None)
- countries = [country_code] if country_code else (
- 'US', 'AU', 'CA', 'AS', 'FM', 'GU', 'MP', 'PR', 'PW', 'MH', 'VI')
-
- last_e = None
+ geo_bypass_country = self.get_param('geo_bypass_country', None)
+ countries = orderedSet((geo_bypass_country, 'US', 'AU', 'CA', 'AS', 'FM', 'GU', 'MP', 'PR', 'PW', 'MH', 'VI', ''))
+ num_countries, num = len(countries) - 1, 0
+
+ media = {}
+ for num, country in enumerate(countries):
+ if num == 1: # start hard-coded list
+ self.report_warning('%s. Trying with a list of known countries' % (
+ 'Unable to obtain video formats from %s API' % geo_bypass_country if geo_bypass_country
+ else 'No country code was given using --geo-bypass-country'))
+ elif num == num_countries: # end of list
+ geo_info = self._download_json(
+ 'https://web-api-us.crackle.com/Service.svc/geo/country',
+ video_id, fatal=False, note='Downloading geo-location information from crackle API',
+ errnote='Unable to fetch geo-location information from crackle') or {}
+ country = geo_info.get('CountryCode')
+ if country is None:
+ continue
+ self.to_screen('%s identified country as %s' % (self.IE_NAME, country))
+ if country in countries:
+ self.to_screen('Downloading from %s API was already attempted. Skipping...' % country)
+ continue
- for country in countries:
+ if country is None:
+ continue
try:
- # Authorization generation algorithm is reverse engineered from:
- # https://www.sonycrackle.com/static/js/main.ea93451f.chunk.js
- media_detail_url = 'https://web-api-us.crackle.com/Service.svc/details/media/%s/%s?disableProtocols=true' % (video_id, country)
- timestamp = time.strftime('%Y%m%d%H%M', time.gmtime())
- h = hmac.new(b'IGSLUQCBDFHEOIFM', '|'.join([media_detail_url, timestamp]).encode(), hashlib.sha1).hexdigest().upper()
media = self._download_json(
- media_detail_url, video_id, 'Downloading media JSON as %s' % country,
- 'Unable to download media JSON', headers={
- 'Accept': 'application/json',
- 'Authorization': '|'.join([h, timestamp, '117', '1']),
- })
+ 'https://web-api-us.crackle.com/Service.svc/details/media/%s/%s?disableProtocols=true' % (video_id, country),
+ video_id, note='Downloading media JSON from %s API' % country,
+ errnote='Unable to download media JSON')
except ExtractorError as e:
# 401 means geo restriction, trying next country
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
- last_e = e
continue
raise
- media_urls = media.get('MediaURLs')
- if not media_urls or not isinstance(media_urls, list):
+ status = media.get('status')
+ if status.get('messageCode') != '0':
+ raise ExtractorError(
+ '%s said: %s %s - %s' % (
+ self.IE_NAME, status.get('messageCodeDescription'), status.get('messageCode'), status.get('message')),
+ expected=True)
+
+ # Found video formats
+ if isinstance(media.get('MediaURLs'), list):
+ break
+
+ ignore_no_formats = self.get_param('ignore_no_formats_error')
+ allow_unplayable_formats = self.get_param('allow_unplayable_formats')
+
+ if not media or (not media.get('MediaURLs') and not ignore_no_formats):
+ raise ExtractorError(
+ 'Unable to access the crackle API. Try passing your country code '
+ 'to --geo-bypass-country. If it still does not work and the '
+ 'video is available in your country')
+ title = media['Title']
+
+ formats, subtitles = [], {}
+ has_drm = False
+ for e in media.get('MediaURLs') or []:
+ if e.get('UseDRM'):
+ has_drm = True
+ if not allow_unplayable_formats:
+ continue
+ format_url = url_or_none(e.get('Path'))
+ if not format_url:
continue
-
- title = media['Title']
-
- formats = []
- for e in media['MediaURLs']:
- if e.get('UseDRM') is True:
+ ext = determine_ext(format_url)
+ if ext == 'm3u8':
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ format_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False)
+ formats.extend(fmts)
+ subtitles = self._merge_subtitles(subtitles, subs)
+ elif ext == 'mpd':
+ fmts, subs = self._extract_mpd_formats_and_subtitles(
+ format_url, video_id, mpd_id='dash', fatal=False)
+ formats.extend(fmts)
+ subtitles = self._merge_subtitles(subtitles, subs)
+ elif format_url.endswith('.ism/Manifest'):
+ fmts, subs = self._extract_ism_formats_and_subtitles(
+ format_url, video_id, ism_id='mss', fatal=False)
+ formats.extend(fmts)
+ subtitles = self._merge_subtitles(subtitles, subs)
+ else:
+ mfs_path = e.get('Type')
+ mfs_info = self._MEDIA_FILE_SLOTS.get(mfs_path)
+ if not mfs_info:
continue
- format_url = url_or_none(e.get('Path'))
- if not format_url:
+ formats.append({
+ 'url': format_url,
+ 'format_id': 'http-' + mfs_path.split('.')[0],
+ 'width': mfs_info['width'],
+ 'height': mfs_info['height'],
+ })
+ if not formats and has_drm:
+ self.report_drm(video_id)
+ self._sort_formats(formats)
+
+ description = media.get('Description')
+ duration = int_or_none(media.get(
+ 'DurationInSeconds')) or parse_duration(media.get('Duration'))
+ view_count = int_or_none(media.get('CountViews'))
+ average_rating = float_or_none(media.get('UserRating'))
+ age_limit = parse_age_limit(media.get('Rating'))
+ genre = media.get('Genre')
+ release_year = int_or_none(media.get('ReleaseYear'))
+ creator = media.get('Directors')
+ artist = media.get('Cast')
+
+ if media.get('MediaTypeDisplayValue') == 'Full Episode':
+ series = media.get('ShowName')
+ episode = title
+ season_number = int_or_none(media.get('Season'))
+ episode_number = int_or_none(media.get('Episode'))
+ else:
+ series = episode = season_number = episode_number = None
+
+ cc_files = media.get('ClosedCaptionFiles')
+ if isinstance(cc_files, list):
+ for cc_file in cc_files:
+ if not isinstance(cc_file, dict):
continue
- ext = determine_ext(format_url)
- if ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- format_url, video_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id='hls', fatal=False))
- elif ext == 'mpd':
- formats.extend(self._extract_mpd_formats(
- format_url, video_id, mpd_id='dash', fatal=False))
- elif format_url.endswith('.ism/Manifest'):
- formats.extend(self._extract_ism_formats(
- format_url, video_id, ism_id='mss', fatal=False))
- else:
- mfs_path = e.get('Type')
- mfs_info = self._MEDIA_FILE_SLOTS.get(mfs_path)
- if not mfs_info:
- continue
- formats.append({
- 'url': format_url,
- 'format_id': 'http-' + mfs_path.split('.')[0],
- 'width': mfs_info['width'],
- 'height': mfs_info['height'],
- })
- self._sort_formats(formats)
-
- description = media.get('Description')
- duration = int_or_none(media.get(
- 'DurationInSeconds')) or parse_duration(media.get('Duration'))
- view_count = int_or_none(media.get('CountViews'))
- average_rating = float_or_none(media.get('UserRating'))
- age_limit = parse_age_limit(media.get('Rating'))
- genre = media.get('Genre')
- release_year = int_or_none(media.get('ReleaseYear'))
- creator = media.get('Directors')
- artist = media.get('Cast')
-
- if media.get('MediaTypeDisplayValue') == 'Full Episode':
- series = media.get('ShowName')
- episode = title
- season_number = int_or_none(media.get('Season'))
- episode_number = int_or_none(media.get('Episode'))
- else:
- series = episode = season_number = episode_number = None
-
- subtitles = {}
- cc_files = media.get('ClosedCaptionFiles')
- if isinstance(cc_files, list):
- for cc_file in cc_files:
- if not isinstance(cc_file, dict):
- continue
- cc_url = url_or_none(cc_file.get('Path'))
- if not cc_url:
- continue
- lang = cc_file.get('Locale') or 'en'
- subtitles.setdefault(lang, []).append({'url': cc_url})
-
- thumbnails = []
- images = media.get('Images')
- if isinstance(images, list):
- for image_key, image_url in images.items():
- mobj = re.search(r'Img_(\d+)[xX](\d+)', image_key)
- if not mobj:
- continue
- thumbnails.append({
- 'url': image_url,
- 'width': int(mobj.group(1)),
- 'height': int(mobj.group(2)),
- })
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'duration': duration,
- 'view_count': view_count,
- 'average_rating': average_rating,
- 'age_limit': age_limit,
- 'genre': genre,
- 'creator': creator,
- 'artist': artist,
- 'release_year': release_year,
- 'series': series,
- 'episode': episode,
- 'season_number': season_number,
- 'episode_number': episode_number,
- 'thumbnails': thumbnails,
- 'subtitles': subtitles,
- 'formats': formats,
- }
-
- raise last_e
+ cc_url = url_or_none(cc_file.get('Path'))
+ if not cc_url:
+ continue
+ lang = cc_file.get('Locale') or 'en'
+ subtitles.setdefault(lang, []).append({'url': cc_url})
+
+ thumbnails = []
+ images = media.get('Images')
+ if isinstance(images, list):
+ for image_key, image_url in images.items():
+ mobj = re.search(r'Img_(\d+)[xX](\d+)', image_key)
+ if not mobj:
+ continue
+ thumbnails.append({
+ 'url': image_url,
+ 'width': int(mobj.group(1)),
+ 'height': int(mobj.group(2)),
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'average_rating': average_rating,
+ 'age_limit': age_limit,
+ 'genre': genre,
+ 'creator': creator,
+ 'artist': artist,
+ 'release_year': release_year,
+ 'series': series,
+ 'episode': episode,
+ 'season_number': season_number,
+ 'episode_number': episode_number,
+ 'thumbnails': thumbnails,
+ 'subtitles': subtitles,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/crunchyroll.py b/hypervideo_dl/extractor/crunchyroll.py
index bc2d1fa..511ac1b 100644
--- a/hypervideo_dl/extractor/crunchyroll.py
+++ b/hypervideo_dl/extractor/crunchyroll.py
@@ -29,6 +29,7 @@ from ..utils import (
merge_dicts,
remove_end,
sanitized_Request,
+ try_get,
urlencode_postdata,
xpath_text,
)
@@ -120,7 +121,7 @@ class CrunchyrollBaseIE(InfoExtractor):
class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
IE_NAME = 'crunchyroll'
- _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|(?:[^/]*/){1,2}[^/?&]*?)(?P<video_id>[0-9]+))(?:[/?&]|$)'
+ _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|(?:[^/]*/){1,2}[^/?&]*?)(?P<id>[0-9]+))(?:[/?&]|$)'
_TESTS = [{
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
'info_dict': {
@@ -412,8 +413,8 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
return subtitles
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('video_id')
+ mobj = self._match_valid_url(url)
+ video_id = mobj.group('id')
if mobj.group('prefix') == 'm':
mobile_webpage = self._download_webpage(url, video_id, 'Downloading mobile webpage')
@@ -428,7 +429,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
r'<div class="showmedia-trailer-notice">(.+?)</div>',
webpage, 'trailer-notice', default='')
if note_m:
- raise ExtractorError(note_m)
+ raise ExtractorError(note_m, expected=True)
mobj = re.search(r'Page\.messaging_box_controller\.addItems\(\[(?P<msg>{.+?})\]\)', webpage)
if mobj:
@@ -458,6 +459,18 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
video_description = (self._parse_json(self._html_search_regex(
r'<script[^>]*>\s*.+?\[media_id=%s\].+?({.+?"description"\s*:.+?})\);' % video_id,
webpage, 'description', default='{}'), video_id) or media_metadata).get('description')
+
+ thumbnails = []
+ thumbnail_url = (self._parse_json(self._html_search_regex(
+ r'<script type="application\/ld\+json">\n\s*(.+?)<\/script>',
+ webpage, 'thumbnail_url', default='{}'), video_id)).get('image')
+ if thumbnail_url:
+ thumbnails.append({
+ 'url': thumbnail_url,
+ 'width': 1920,
+ 'height': 1080
+ })
+
if video_description:
video_description = lowercase_escape(video_description.replace(r'\r\n', '\n'))
video_uploader = self._html_search_regex(
@@ -473,15 +486,11 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
stream.get('url'), video_id, stream.get('format'),
audio_lang, hardsub_lang)
for f in vrv_formats:
- if not hardsub_lang:
- f['preference'] = 1
- language_preference = 0
- if audio_lang == language:
- language_preference += 1
- if hardsub_lang == language:
- language_preference += 1
- if language_preference:
- f['language_preference'] = language_preference
+ f['language_preference'] = 1 if audio_lang == language else 0
+ f['quality'] = (
+ 1 if not hardsub_lang
+ else 0 if hardsub_lang == language
+ else -1)
formats.extend(vrv_formats)
if not formats:
available_fmts = []
@@ -571,7 +580,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
'ext': 'flv',
})
formats.append(format_info)
- self._sort_formats(formats, ('preference', 'language_preference', 'height', 'width', 'tbr', 'fps'))
+ self._sort_formats(formats)
metadata = self._call_rpc_api(
'VideoPlayer_GetMediaMetadata', video_id,
@@ -596,21 +605,25 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
r'(?s)<h\d[^>]+\bid=["\']showmedia_about_episode_num[^>]+>(.+?)</h\d',
webpage, 'series', fatal=False)
- season = episode = episode_number = duration = thumbnail = None
+ season = episode = episode_number = duration = None
if isinstance(metadata, compat_etree_Element):
season = xpath_text(metadata, 'series_title')
episode = xpath_text(metadata, 'episode_title')
episode_number = int_or_none(xpath_text(metadata, 'episode_number'))
duration = float_or_none(media_metadata.get('duration'), 1000)
- thumbnail = xpath_text(metadata, 'episode_image_url')
if not episode:
episode = media_metadata.get('title')
if not episode_number:
episode_number = int_or_none(media_metadata.get('episode_number'))
- if not thumbnail:
- thumbnail = media_metadata.get('thumbnail', {}).get('url')
+ thumbnail_url = try_get(media, lambda x: x['thumbnail']['url'])
+ if thumbnail_url:
+ thumbnails.append({
+ 'url': thumbnail_url,
+ 'width': 640,
+ 'height': 360
+ })
season_number = int_or_none(self._search_regex(
r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)',
@@ -623,7 +636,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
'title': video_title,
'description': video_description,
'duration': duration,
- 'thumbnail': thumbnail,
+ 'thumbnails': thumbnails,
'uploader': video_uploader,
'series': series,
'season': season,
@@ -637,10 +650,10 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
IE_NAME = 'crunchyroll:playlist'
- _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login|media-\d+))(?P<id>[\w\-]+))/?(?:\?|$)'
+ _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?:\w{1,2}/)?(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login|media-\d+))(?P<id>[\w\-]+))/?(?:\?|$)'
_TESTS = [{
- 'url': 'http://www.crunchyroll.com/a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi',
+ 'url': 'https://www.crunchyroll.com/a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi',
'info_dict': {
'id': 'a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi',
'title': 'A Bridge to the Starry Skies - Hoshizora e Kakaru Hashi'
@@ -659,28 +672,86 @@ class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
# geo-restricted (US), 18+ maturity wall, non-premium will be available since 2015.11.14
'url': 'http://www.crunchyroll.com/ladies-versus-butlers?skip_wall=1',
'only_matching': True,
+ }, {
+ 'url': 'http://www.crunchyroll.com/fr/ladies-versus-butlers',
+ 'only_matching': True,
}]
def _real_extract(self, url):
show_id = self._match_id(url)
webpage = self._download_webpage(
- self._add_skip_wall(url), show_id,
+ # https:// gives a 403, but http:// does not
+ self._add_skip_wall(url).replace('https://', 'http://'), show_id,
headers=self.geo_verification_headers())
title = self._html_search_meta('name', webpage, default=None)
- episode_paths = re.findall(
- r'(?s)<li id="showview_videos_media_(\d+)"[^>]+>.*?<a href="([^"]+)"',
- webpage)
- entries = [
- self.url_result('http://www.crunchyroll.com' + ep, 'Crunchyroll', ep_id)
- for ep_id, ep in episode_paths
- ]
- entries.reverse()
+ episode_re = r'<li id="showview_videos_media_(\d+)"[^>]+>.*?<a href="([^"]+)"'
+ season_re = r'<a [^>]+season-dropdown[^>]+>([^<]+)'
+ paths = re.findall(f'(?s){episode_re}|{season_re}', webpage)
+
+ entries, current_season = [], None
+ for ep_id, ep, season in paths:
+ if season:
+ current_season = season
+ continue
+ entries.append(self.url_result(
+ f'http://www.crunchyroll.com{ep}', CrunchyrollIE.ie_key(), ep_id, season=current_season))
return {
'_type': 'playlist',
'id': show_id,
'title': title,
- 'entries': entries,
+ 'entries': reversed(entries),
}
+
+
+class CrunchyrollBetaIE(CrunchyrollBaseIE):
+ IE_NAME = 'crunchyroll:beta'
+ _VALID_URL = r'https?://beta\.crunchyroll\.com/(?P<lang>(?:\w{1,2}/)?)watch/(?P<internal_id>\w+)/(?P<id>[\w\-]+)/?(?:\?|$)'
+ _TESTS = [{
+ 'url': 'https://beta.crunchyroll.com/watch/GY2P1Q98Y/to-the-future',
+ 'info_dict': {
+ 'id': '696363',
+ 'ext': 'mp4',
+ 'timestamp': 1459610100,
+ 'description': 'md5:a022fbec4fbb023d43631032c91ed64b',
+ 'uploader': 'Toei Animation',
+ 'title': 'World Trigger Episode 73 – To the Future',
+ 'upload_date': '20160402',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ 'expected_warnings': ['Unable to download XML']
+ }]
+
+ def _real_extract(self, url):
+ lang, internal_id, display_id = self._match_valid_url(url).group('lang', 'internal_id', 'id')
+ webpage = self._download_webpage(url, display_id)
+ episode_data = self._parse_json(
+ self._search_regex(r'__INITIAL_STATE__\s*=\s*({.+?})\s*;', webpage, 'episode data'),
+ display_id)['content']['byId'][internal_id]
+ video_id = episode_data['external_id'].split('.')[1]
+ series_id = episode_data['episode_metadata']['series_slug_title']
+ return self.url_result(f'https://www.crunchyroll.com/{lang}{series_id}/{display_id}-{video_id}',
+ CrunchyrollIE.ie_key(), video_id)
+
+
+class CrunchyrollBetaShowIE(CrunchyrollBaseIE):
+ IE_NAME = 'crunchyroll:playlist:beta'
+ _VALID_URL = r'https?://beta\.crunchyroll\.com/(?P<lang>(?:\w{1,2}/)?)series/\w+/(?P<id>[\w\-]+)/?(?:\?|$)'
+ _TESTS = [{
+ 'url': 'https://beta.crunchyroll.com/series/GY19NQ2QR/Girl-Friend-BETA',
+ 'info_dict': {
+ 'id': 'girl-friend-beta',
+ 'title': 'Girl Friend BETA',
+ },
+ 'playlist_mincount': 10,
+ }, {
+ 'url': 'https://beta.crunchyroll.com/it/series/GY19NQ2QR/Girl-Friend-BETA',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ lang, series_id = self._match_valid_url(url).group('lang', 'id')
+ return self.url_result(f'https://www.crunchyroll.com/{lang}{series_id.lower()}',
+ CrunchyrollShowPlaylistIE.ie_key(), series_id)
diff --git a/hypervideo_dl/extractor/cultureunplugged.py b/hypervideo_dl/extractor/cultureunplugged.py
index bcdf273..9002e4c 100644
--- a/hypervideo_dl/extractor/cultureunplugged.py
+++ b/hypervideo_dl/extractor/cultureunplugged.py
@@ -1,6 +1,5 @@
from __future__ import unicode_literals
-import re
import time
from .common import InfoExtractor
@@ -32,7 +31,7 @@ class CultureUnpluggedIE(InfoExtractor):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
display_id = mobj.group('display_id') or video_id
diff --git a/hypervideo_dl/extractor/curiositystream.py b/hypervideo_dl/extractor/curiositystream.py
index ae64a07..034a5c9 100644
--- a/hypervideo_dl/extractor/curiositystream.py
+++ b/hypervideo_dl/extractor/curiositystream.py
@@ -145,8 +145,17 @@ class CuriosityStreamIE(CuriosityStreamBaseIE):
class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
IE_NAME = 'curiositystream:collection'
- _VALID_URL = r'https?://(?:app\.)?curiositystream\.com/(?:collection|series)/(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:app\.)?curiositystream\.com/(?:collections?|series)/(?P<id>\d+)'
+ _API_BASE_URL = 'https://api.curiositystream.com/v2/collections/'
_TESTS = [{
+ 'url': 'https://curiositystream.com/collections/86',
+ 'info_dict': {
+ 'id': '86',
+ 'title': 'Staff Picks',
+ 'description': 'Wondering where to start? Here are a few of our favorite series and films... from our couch to yours.',
+ },
+ 'playlist_mincount': 7,
+ }, {
'url': 'https://app.curiositystream.com/collection/2',
'info_dict': {
'id': '2',
@@ -157,18 +166,21 @@ class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
}, {
'url': 'https://curiositystream.com/series/2',
'only_matching': True,
+ }, {
+ 'url': 'https://curiositystream.com/collections/36',
+ 'only_matching': True,
}]
def _real_extract(self, url):
collection_id = self._match_id(url)
- collection = self._call_api(
- 'collections/' + collection_id, collection_id)
+ collection = self._call_api(collection_id, collection_id)
entries = []
for media in collection.get('media', []):
media_id = compat_str(media.get('id'))
+ media_type, ie = ('series', CuriosityStreamCollectionIE) if media.get('is_collection') else ('video', CuriosityStreamIE)
entries.append(self.url_result(
- 'https://curiositystream.com/video/' + media_id,
- CuriosityStreamIE.ie_key(), media_id))
+ 'https://curiositystream.com/%s/%s' % (media_type, media_id),
+ ie=ie.ie_key(), video_id=media_id))
return self.playlist_result(
entries, collection_id,
collection.get('title'), collection.get('description'))
diff --git a/hypervideo_dl/extractor/dailymotion.py b/hypervideo_dl/extractor/dailymotion.py
index b852905..e04e10b 100644
--- a/hypervideo_dl/extractor/dailymotion.py
+++ b/hypervideo_dl/extractor/dailymotion.py
@@ -42,7 +42,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
def _real_initialize(self):
cookies = self._get_dailymotion_cookies()
ff = self._get_cookie_value(cookies, 'ff')
- self._FAMILY_FILTER = ff == 'on' if ff else age_restricted(18, self._downloader.params.get('age_limit'))
+ self._FAMILY_FILTER = ff == 'on' if ff else age_restricted(18, self.get_param('age_limit'))
self._set_dailymotion_cookie('ff', 'on' if self._FAMILY_FILTER else 'off')
def _call_api(self, object_type, xid, object_fields, note, filter_extra=None):
@@ -204,17 +204,17 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
return urls
def _real_extract(self, url):
- video_id, playlist_id = re.match(self._VALID_URL, url).groups()
+ video_id, playlist_id = self._match_valid_url(url).groups()
if playlist_id:
- if not self._downloader.params.get('noplaylist'):
+ if not self.get_param('noplaylist'):
self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % playlist_id)
return self.url_result(
'http://www.dailymotion.com/playlist/' + playlist_id,
'DailymotionPlaylist', playlist_id)
self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
- password = self._downloader.params.get('videopassword')
+ password = self.get_param('videopassword')
media = self._call_api(
'media', video_id, '''... on Video {
%s
@@ -232,7 +232,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
audienceCount
isOnAir
}''' % (self._COMMON_MEDIA_FIELDS, self._COMMON_MEDIA_FIELDS), 'Downloading media JSON metadata',
- 'password: "%s"' % self._downloader.params.get('videopassword') if password else None)
+ 'password: "%s"' % self.get_param('videopassword') if password else None)
xid = media['xid']
metadata = self._download_json(
diff --git a/hypervideo_dl/extractor/damtomo.py b/hypervideo_dl/extractor/damtomo.py
new file mode 100644
index 0000000..456cd35
--- /dev/null
+++ b/hypervideo_dl/extractor/damtomo.py
@@ -0,0 +1,113 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import ExtractorError, clean_html, int_or_none, try_get, unified_strdate
+from ..compat import compat_str
+
+
+class DamtomoBaseIE(InfoExtractor):
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage, handle = self._download_webpage_handle(self._WEBPAGE_URL_TMPL % video_id, video_id, encoding='sjis')
+
+ if handle.url == 'https://www.clubdam.com/sorry/':
+ raise ExtractorError('You are rate-limited. Try again later.', expected=True)
+ if '<h2>予期せぬエラーが発生しました。</h2>' in webpage:
+ raise ExtractorError('There is an error on server-side. Try again later.', expected=True)
+
+ description = self._search_regex(r'(?m)<div id="public_comment">\s*<p>\s*([^<]*?)\s*</p>', webpage, 'description', default=None)
+ uploader_id = self._search_regex(r'<a href="https://www\.clubdam\.com/app/damtomo/member/info/Profile\.do\?damtomoId=([^"]+)"', webpage, 'uploader_id', default=None)
+
+ data_dict = {
+ mobj.group('class'): re.sub(r'\s+', ' ', clean_html(mobj.group('value')))
+ for mobj in re.finditer(r'(?s)<(p|div)\s+class="(?P<class>[^" ]+?)">(?P<value>.+?)</\1>', webpage)}
+
+ # since videos do not have title, give the name of song instead
+ data_dict['user_name'] = re.sub(r'\s*さん\s*$', '', data_dict['user_name'])
+ title = data_dict.get('song_title')
+
+ stream_tree = self._download_xml(
+ self._DKML_XML_URL % video_id, video_id, note='Requesting stream information', encoding='sjis',
+ # doing this has no problem since there is no character outside ASCII,
+ # and never likely to happen in the future
+ transform_source=lambda x: re.sub(r'\s*encoding="[^"]+?"', '', x))
+ m3u8_url = try_get(stream_tree, lambda x: x.find(
+ './/d:streamingUrl', {'d': self._DKML_XML_NS}).text.strip(), compat_str)
+ if not m3u8_url:
+ raise ExtractorError('Failed to obtain m3u8 URL')
+ formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'uploader_id': uploader_id,
+ 'description': description,
+ 'uploader': data_dict.get('user_name'),
+ 'upload_date': unified_strdate(self._search_regex(r'(\d{4}/\d{2}/\d{2})', data_dict.get('date'), 'upload_date', default=None)),
+ 'view_count': int_or_none(self._search_regex(r'(\d+)', data_dict['audience'], 'view_count', default=None)),
+ 'like_count': int_or_none(self._search_regex(r'(\d+)', data_dict['nice'], 'like_count', default=None)),
+ 'track': title,
+ 'artist': data_dict.get('song_artist'),
+ 'formats': formats,
+ }
+
+
+class DamtomoVideoIE(DamtomoBaseIE):
+ IE_NAME = 'damtomo:video'
+ _VALID_URL = r'https?://(?:www\.)?clubdam\.com/app/damtomo/(?:SP/)?karaokeMovie/StreamingDkm\.do\?karaokeMovieId=(?P<id>\d+)'
+ _WEBPAGE_URL_TMPL = 'https://www.clubdam.com/app/damtomo/karaokeMovie/StreamingDkm.do?karaokeMovieId=%s'
+ _DKML_XML_URL = 'https://www.clubdam.com/app/damtomo/karaokeMovie/GetStreamingDkmUrlXML.do?movieSelectFlg=2&karaokeMovieId=%s'
+ _DKML_XML_NS = 'https://www.clubdam.com/app/damtomo/karaokeMovie/GetStreamingDkmUrlXML'
+ _TESTS = [{
+ 'url': 'https://www.clubdam.com/app/damtomo/karaokeMovie/StreamingDkm.do?karaokeMovieId=2414316',
+ 'info_dict': {
+ 'id': '2414316',
+ 'title': 'Get Wild',
+ 'uploader': 'Kドロン',
+ 'uploader_id': 'ODk5NTQwMzQ',
+ 'track': 'Get Wild',
+ 'artist': 'TM NETWORK(TMN)',
+ 'upload_date': '20201226',
+ }
+ }]
+
+
+class DamtomoRecordIE(DamtomoBaseIE):
+ IE_NAME = 'damtomo:record'
+ _VALID_URL = r'https?://(?:www\.)?clubdam\.com/app/damtomo/(?:SP/)?karaokePost/StreamingKrk\.do\?karaokeContributeId=(?P<id>\d+)'
+ _WEBPAGE_URL_TMPL = 'https://www.clubdam.com/app/damtomo/karaokePost/StreamingKrk.do?karaokeContributeId=%s'
+ _DKML_XML_URL = 'https://www.clubdam.com/app/damtomo/karaokePost/GetStreamingKrkUrlXML.do?karaokeContributeId=%s'
+ _DKML_XML_NS = 'https://www.clubdam.com/app/damtomo/karaokePost/GetStreamingKrkUrlXML'
+ _TESTS = [{
+ 'url': 'https://www.clubdam.com/app/damtomo/karaokePost/StreamingKrk.do?karaokeContributeId=27376862',
+ 'info_dict': {
+ 'id': '27376862',
+ 'title': 'イカSUMMER [良音]',
+ 'description': None,
+ 'uploader': 'NANA',
+ 'uploader_id': 'MzAyMDExNTY',
+ 'upload_date': '20210721',
+ 'view_count': 4,
+ 'like_count': 1,
+ 'track': 'イカSUMMER [良音]',
+ 'artist': 'ORANGE RANGE',
+ }
+ }, {
+ 'url': 'https://www.clubdam.com/app/damtomo/karaokePost/StreamingKrk.do?karaokeContributeId=27489418',
+ 'info_dict': {
+ 'id': '27489418',
+ 'title': '心みだれて〜say it with flowers〜(生音)',
+ 'uploader_id': 'NjI1MjI2MjU',
+ 'description': 'やっぱりキーを下げて正解だった感じ。リベンジ成功ということで。',
+ 'uploader': '箱の「中の人」',
+ 'upload_date': '20210815',
+ 'view_count': 5,
+ 'like_count': 3,
+ 'track': '心みだれて〜say it with flowers〜(生音)',
+ 'artist': '小林明子',
+ }
+ }]
diff --git a/hypervideo_dl/extractor/daum.py b/hypervideo_dl/extractor/daum.py
index 1370955..8aa2af9 100644
--- a/hypervideo_dl/extractor/daum.py
+++ b/hypervideo_dl/extractor/daum.py
@@ -6,10 +6,9 @@ import itertools
from .common import InfoExtractor
from ..compat import (
- compat_parse_qs,
compat_urllib_parse_unquote,
- compat_urlparse,
)
+from ..utils import parse_qs
class DaumBaseIE(InfoExtractor):
@@ -155,10 +154,10 @@ class DaumListIE(InfoExtractor):
return name, entries
def _check_clip(self, url, list_id):
- query_dict = compat_parse_qs(compat_urlparse.urlparse(url).query)
+ query_dict = parse_qs(url)
if 'clipid' in query_dict:
clip_id = query_dict['clipid'][0]
- if self._downloader.params.get('noplaylist'):
+ if self.get_param('noplaylist'):
self.to_screen('Downloading just video %s because of --no-playlist' % clip_id)
return self.url_result(DaumClipIE._URL_TEMPLATE % clip_id, 'DaumClip')
else:
@@ -256,7 +255,7 @@ class DaumUserIE(DaumListIE):
if clip_result:
return clip_result
- query_dict = compat_parse_qs(compat_urlparse.urlparse(url).query)
+ query_dict = parse_qs(url)
if 'playlistid' in query_dict:
playlist_id = query_dict['playlistid'][0]
return self.url_result(DaumPlaylistIE._URL_TEMPLATE % playlist_id, 'DaumPlaylist')
diff --git a/hypervideo_dl/extractor/dbtv.py b/hypervideo_dl/extractor/dbtv.py
index aaedf2e..8e73176 100644
--- a/hypervideo_dl/extractor/dbtv.py
+++ b/hypervideo_dl/extractor/dbtv.py
@@ -38,7 +38,7 @@ class DBTVIE(InfoExtractor):
webpage)]
def _real_extract(self, url):
- display_id, video_id = re.match(self._VALID_URL, url).groups()
+ display_id, video_id = self._match_valid_url(url).groups()
info = {
'_type': 'url_transparent',
'id': video_id,
diff --git a/hypervideo_dl/extractor/deezer.py b/hypervideo_dl/extractor/deezer.py
index a38b268..7ba02e5 100644
--- a/hypervideo_dl/extractor/deezer.py
+++ b/hypervideo_dl/extractor/deezer.py
@@ -1,7 +1,6 @@
from __future__ import unicode_literals
import json
-import re
from .common import InfoExtractor
from ..utils import (
@@ -11,28 +10,15 @@ from ..utils import (
)
-class DeezerPlaylistIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?deezer\.com/playlist/(?P<id>[0-9]+)'
- _TEST = {
- 'url': 'http://www.deezer.com/playlist/176747451',
- 'info_dict': {
- 'id': '176747451',
- 'title': 'Best!',
- 'uploader': 'Anonymous',
- 'thumbnail': r're:^https?://cdn-images\.deezer\.com/images/cover/.*\.jpg$',
- },
- 'playlist_count': 30,
- 'skip': 'Only available in .de',
- }
-
- def _real_extract(self, url):
- if 'test' not in self._downloader.params:
- self._downloader.report_warning('For now, this extractor only supports the 30 second previews. Patches welcome!')
+class DeezerBaseInfoExtractor(InfoExtractor):
+ def get_data(self, url):
+ if not self.get_param('test'):
+ self.report_warning('For now, this extractor only supports the 30 second previews. Patches welcome!')
- mobj = re.match(self._VALID_URL, url)
- playlist_id = mobj.group('id')
+ mobj = self._match_valid_url(url)
+ data_id = mobj.group('id')
- webpage = self._download_webpage(url, playlist_id)
+ webpage = self._download_webpage(url, data_id)
geoblocking_msg = self._html_search_regex(
r'<p class="soon-txt">(.*?)</p>', webpage, 'geoblocking message',
default=None)
@@ -45,6 +31,24 @@ class DeezerPlaylistIE(InfoExtractor):
r'naboo\.display\(\'[^\']+\',\s*(.*?)\);\n'),
webpage, 'data JSON')
data = json.loads(data_json)
+ return data_id, webpage, data
+
+
+class DeezerPlaylistIE(DeezerBaseInfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?deezer\.com/(../)?playlist/(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'http://www.deezer.com/playlist/176747451',
+ 'info_dict': {
+ 'id': '176747451',
+ 'title': 'Best!',
+ 'uploader': 'anonymous',
+ 'thumbnail': r're:^https?://(e-)?cdns-images\.dzcdn\.net/images/cover/.*\.jpg$',
+ },
+ 'playlist_count': 29,
+ }
+
+ def _real_extract(self, url):
+ playlist_id, webpage, data = self.get_data(url)
playlist_title = data.get('DATA', {}).get('TITLE')
playlist_uploader = data.get('DATA', {}).get('PARENT_USERNAME')
@@ -52,31 +56,23 @@ class DeezerPlaylistIE(InfoExtractor):
r'<img id="naboo_playlist_image".*?src="([^"]+)"', webpage,
'playlist thumbnail')
- preview_pattern = self._search_regex(
- r"var SOUND_PREVIEW_GATEWAY\s*=\s*'([^']+)';", webpage,
- 'preview URL pattern', fatal=False)
entries = []
- for s in data['SONGS']['data']:
- puid = s['MD5_ORIGIN']
- preview_video_url = preview_pattern.\
- replace('{0}', puid[0]).\
- replace('{1}', puid).\
- replace('{2}', s['MEDIA_VERSION'])
+ for s in data.get('SONGS', {}).get('data'):
formats = [{
'format_id': 'preview',
- 'url': preview_video_url,
+ 'url': s.get('MEDIA', [{}])[0].get('HREF'),
'preference': -100, # Only the first 30 seconds
'ext': 'mp3',
}]
self._sort_formats(formats)
artists = ', '.join(
- orderedSet(a['ART_NAME'] for a in s['ARTISTS']))
+ orderedSet(a.get('ART_NAME') for a in s.get('ARTISTS')))
entries.append({
- 'id': s['SNG_ID'],
+ 'id': s.get('SNG_ID'),
'duration': int_or_none(s.get('DURATION')),
- 'title': '%s - %s' % (artists, s['SNG_TITLE']),
- 'uploader': s['ART_NAME'],
- 'uploader_id': s['ART_ID'],
+ 'title': '%s - %s' % (artists, s.get('SNG_TITLE')),
+ 'uploader': s.get('ART_NAME'),
+ 'uploader_id': s.get('ART_ID'),
'age_limit': 16 if s.get('EXPLICIT_LYRICS') == '1' else 0,
'formats': formats,
})
@@ -89,3 +85,62 @@ class DeezerPlaylistIE(InfoExtractor):
'thumbnail': playlist_thumbnail,
'entries': entries,
}
+
+
+class DeezerAlbumIE(DeezerBaseInfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?deezer\.com/(../)?album/(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'https://www.deezer.com/fr/album/67505622',
+ 'info_dict': {
+ 'id': '67505622',
+ 'title': 'Last Week',
+ 'uploader': 'Home Brew',
+ 'thumbnail': r're:^https?://(e-)?cdns-images\.dzcdn\.net/images/cover/.*\.jpg$',
+ },
+ 'playlist_count': 7,
+ }
+
+ def _real_extract(self, url):
+ album_id, webpage, data = self.get_data(url)
+
+ album_title = data.get('DATA', {}).get('ALB_TITLE')
+ album_uploader = data.get('DATA', {}).get('ART_NAME')
+ album_thumbnail = self._search_regex(
+ r'<img id="naboo_album_image".*?src="([^"]+)"', webpage,
+ 'album thumbnail')
+
+ entries = []
+ for s in data.get('SONGS', {}).get('data'):
+ formats = [{
+ 'format_id': 'preview',
+ 'url': s.get('MEDIA', [{}])[0].get('HREF'),
+ 'preference': -100, # Only the first 30 seconds
+ 'ext': 'mp3',
+ }]
+ self._sort_formats(formats)
+ artists = ', '.join(
+ orderedSet(a.get('ART_NAME') for a in s.get('ARTISTS')))
+ entries.append({
+ 'id': s.get('SNG_ID'),
+ 'duration': int_or_none(s.get('DURATION')),
+ 'title': '%s - %s' % (artists, s.get('SNG_TITLE')),
+ 'uploader': s.get('ART_NAME'),
+ 'uploader_id': s.get('ART_ID'),
+ 'age_limit': 16 if s.get('EXPLICIT_LYRICS') == '1' else 0,
+ 'formats': formats,
+ 'track': s.get('SNG_TITLE'),
+ 'track_number': int_or_none(s.get('TRACK_NUMBER')),
+ 'track_id': s.get('SNG_ID'),
+ 'artist': album_uploader,
+ 'album': album_title,
+ 'album_artist': album_uploader,
+ })
+
+ return {
+ '_type': 'playlist',
+ 'id': album_id,
+ 'title': album_title,
+ 'uploader': album_uploader,
+ 'thumbnail': album_thumbnail,
+ 'entries': entries,
+ }
diff --git a/hypervideo_dl/extractor/dfb.py b/hypervideo_dl/extractor/dfb.py
index a4d0448..97f70fc 100644
--- a/hypervideo_dl/extractor/dfb.py
+++ b/hypervideo_dl/extractor/dfb.py
@@ -1,6 +1,5 @@
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..utils import unified_strdate
@@ -23,7 +22,7 @@ class DFBIE(InfoExtractor):
}
def _real_extract(self, url):
- display_id, video_id = re.match(self._VALID_URL, url).groups()
+ display_id, video_id = self._match_valid_url(url).groups()
player_info = self._download_xml(
'http://tv.dfb.de/server/hd_video.php?play=%s' % video_id,
diff --git a/hypervideo_dl/extractor/digiteka.py b/hypervideo_dl/extractor/digiteka.py
index 3dfde0d..d632047 100644
--- a/hypervideo_dl/extractor/digiteka.py
+++ b/hypervideo_dl/extractor/digiteka.py
@@ -70,7 +70,7 @@ class DigitekaIE(InfoExtractor):
return mobj.group('url')
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
video_type = mobj.group('embed_type') or mobj.group('site_type')
if video_type == 'music':
diff --git a/hypervideo_dl/extractor/discovery.py b/hypervideo_dl/extractor/discovery.py
index e0139cc..fd3ad75 100644
--- a/hypervideo_dl/extractor/discovery.py
+++ b/hypervideo_dl/extractor/discovery.py
@@ -1,7 +1,6 @@
from __future__ import unicode_literals
import random
-import re
import string
from .discoverygo import DiscoveryGoBaseIE
@@ -62,7 +61,7 @@ class DiscoveryIE(DiscoveryGoBaseIE):
_API_BASE_URL = 'https://api.discovery.com/v1/'
def _real_extract(self, url):
- site, show_slug, display_id = re.match(self._VALID_URL, url).groups()
+ site, show_slug, display_id = self._match_valid_url(url).groups()
access_token = None
cookies = self._get_cookies(url)
diff --git a/hypervideo_dl/extractor/discoverynetworks.py b/hypervideo_dl/extractor/discoverynetworks.py
index c512b95..f43c871 100644
--- a/hypervideo_dl/extractor/discoverynetworks.py
+++ b/hypervideo_dl/extractor/discoverynetworks.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .dplay import DPlayIE
@@ -35,7 +34,7 @@ class DiscoveryNetworksDeIE(DPlayIE):
}]
def _real_extract(self, url):
- domain, programme, alternate_id = re.match(self._VALID_URL, url).groups()
+ domain, programme, alternate_id = self._match_valid_url(url).groups()
country = 'GB' if domain == 'dplay.co.uk' else 'DE'
realm = 'questuk' if country == 'GB' else domain.replace('.', '')
return self._get_disco_api_info(
diff --git a/hypervideo_dl/extractor/discoveryplusindia.py b/hypervideo_dl/extractor/discoveryplusindia.py
new file mode 100644
index 0000000..5180140
--- /dev/null
+++ b/hypervideo_dl/extractor/discoveryplusindia.py
@@ -0,0 +1,98 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+
+from ..compat import compat_str
+from ..utils import try_get
+from .common import InfoExtractor
+from .dplay import DPlayIE
+
+
+class DiscoveryPlusIndiaIE(DPlayIE):
+ _VALID_URL = r'https?://(?:www\.)?discoveryplus\.in/videos?' + DPlayIE._PATH_REGEX
+ _TESTS = [{
+ 'url': 'https://www.discoveryplus.in/videos/how-do-they-do-it/fugu-and-more?seasonId=8&type=EPISODE',
+ 'info_dict': {
+ 'id': '27104',
+ 'ext': 'mp4',
+ 'display_id': 'how-do-they-do-it/fugu-and-more',
+ 'title': 'Fugu and More',
+ 'description': 'The Japanese catch, prepare and eat the deadliest fish on the planet.',
+ 'duration': 1319,
+ 'timestamp': 1582309800,
+ 'upload_date': '20200221',
+ 'series': 'How Do They Do It?',
+ 'season_number': 8,
+ 'episode_number': 2,
+ 'creator': 'Discovery Channel',
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ 'skip_download': True,
+ },
+ 'skip': 'Cookies (not necessarily logged in) are needed'
+ }]
+
+ def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
+ headers['x-disco-params'] = 'realm=%s' % realm
+ headers['x-disco-client'] = 'WEB:UNKNOWN:dplus-india:17.0.0'
+
+ def _download_video_playback_info(self, disco_base, video_id, headers):
+ return self._download_json(
+ disco_base + 'playback/v3/videoPlaybackInfo',
+ video_id, headers=headers, data=json.dumps({
+ 'deviceInfo': {
+ 'adBlocker': False,
+ },
+ 'videoId': video_id,
+ }).encode('utf-8'))['data']['attributes']['streaming']
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ return self._get_disco_api_info(
+ url, display_id, 'ap2-prod-direct.discoveryplus.in', 'dplusindia', 'in')
+
+
+class DiscoveryPlusIndiaShowIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?discoveryplus\.in/show/(?P<show_name>[^/]+)/?(?:[?#]|$)'
+ _TESTS = [{
+ 'url': 'https://www.discoveryplus.in/show/how-do-they-do-it',
+ 'playlist_mincount': 140,
+ 'info_dict': {
+ 'id': 'how-do-they-do-it',
+ },
+ }]
+
+ def _entries(self, show_name):
+ headers = {
+ 'x-disco-client': 'WEB:UNKNOWN:dplus-india:prod',
+ 'x-disco-params': 'realm=dplusindia',
+ 'referer': 'https://www.discoveryplus.in/',
+ }
+ show_url = 'https://ap2-prod-direct.discoveryplus.in/cms/routes/show/{}?include=default'.format(show_name)
+ show_json = self._download_json(show_url,
+ video_id=show_name,
+ headers=headers)['included'][4]['attributes']['component']
+ show_id = show_json['mandatoryParams'].split('=')[-1]
+ season_url = 'https://ap2-prod-direct.discoveryplus.in/content/videos?sort=episodeNumber&filter[seasonNumber]={}&filter[show.id]={}&page[size]=100&page[number]={}'
+ for season in show_json['filters'][0]['options']:
+ season_id = season['id']
+ total_pages, page_num = 1, 0
+ while page_num < total_pages:
+ season_json = self._download_json(season_url.format(season_id, show_id, compat_str(page_num + 1)),
+ video_id=show_id, headers=headers,
+ note='Downloading JSON metadata%s' % (' page %d' % page_num if page_num else ''))
+ if page_num == 0:
+ total_pages = try_get(season_json, lambda x: x['meta']['totalPages'], int) or 1
+ episodes_json = season_json['data']
+ for episode in episodes_json:
+ video_id = episode['attributes']['path']
+ yield self.url_result(
+ 'https://discoveryplus.in/videos/%s' % video_id,
+ ie=DiscoveryPlusIndiaIE.ie_key(), video_id=video_id)
+ page_num += 1
+
+ def _real_extract(self, url):
+ show_name = self._match_valid_url(url).group('show_name')
+ return self.playlist_result(self._entries(show_name), playlist_id=show_name)
diff --git a/hypervideo_dl/extractor/disney.py b/hypervideo_dl/extractor/disney.py
index 0eee82f..f018cbe 100644
--- a/hypervideo_dl/extractor/disney.py
+++ b/hypervideo_dl/extractor/disney.py
@@ -9,7 +9,6 @@ from ..utils import (
unified_strdate,
compat_str,
determine_ext,
- ExtractorError,
update_url_query,
)
@@ -78,7 +77,7 @@ class DisneyIE(InfoExtractor):
}]
def _real_extract(self, url):
- domain, video_id, display_id = re.match(self._VALID_URL, url).groups()
+ domain, video_id, display_id = self._match_valid_url(url).groups()
if not video_id:
webpage = self._download_webpage(url, display_id)
grill = re.sub(r'"\s*\+\s*"', '', self._search_regex(
@@ -140,7 +139,7 @@ class DisneyIE(InfoExtractor):
'vcodec': 'none' if (width == 0 and height == 0) else None,
})
if not formats and video_data.get('expired'):
- raise ExtractorError(
+ self.raise_no_formats(
'%s said: %s' % (self.IE_NAME, page_data['translations']['video_expired']),
expected=True)
self._sort_formats(formats)
diff --git a/hypervideo_dl/extractor/dispeak.py b/hypervideo_dl/extractor/dispeak.py
index 276fd4b..be7ad12 100644
--- a/hypervideo_dl/extractor/dispeak.py
+++ b/hypervideo_dl/extractor/dispeak.py
@@ -94,6 +94,7 @@ class DigitallySpeakingIE(InfoExtractor):
'play_path': remove_end(audio.get('url'), '.flv'),
'ext': 'flv',
'vcodec': 'none',
+ 'quality': 1,
'format_id': audio.get('code'),
})
for video_key, format_id, preference in (
@@ -107,7 +108,6 @@ class DigitallySpeakingIE(InfoExtractor):
'ext': 'flv',
'format_note': '%s video' % video_key,
'quality': preference,
- 'preference': preference,
'format_id': format_id,
})
return formats
diff --git a/hypervideo_dl/extractor/dlive.py b/hypervideo_dl/extractor/dlive.py
index d95c67a..90462c0 100644
--- a/hypervideo_dl/extractor/dlive.py
+++ b/hypervideo_dl/extractor/dlive.py
@@ -1,7 +1,6 @@
from __future__ import unicode_literals
import json
-import re
from .common import InfoExtractor
from ..utils import int_or_none
@@ -26,7 +25,7 @@ class DLiveVODIE(InfoExtractor):
}]
def _real_extract(self, url):
- uploader_id, vod_id = re.match(self._VALID_URL, url).groups()
+ uploader_id, vod_id = self._match_valid_url(url).groups()
broadcast = self._download_json(
'https://graphigo.prd.dlive.tv/', vod_id,
data=json.dumps({'query': '''query {
diff --git a/hypervideo_dl/extractor/doodstream.py b/hypervideo_dl/extractor/doodstream.py
new file mode 100644
index 0000000..2c9ea68
--- /dev/null
+++ b/hypervideo_dl/extractor/doodstream.py
@@ -0,0 +1,71 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import string
+import random
+import time
+
+from .common import InfoExtractor
+
+
+class DoodStreamIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?dood\.(?:to|watch)/[ed]/(?P<id>[a-z0-9]+)'
+ _TESTS = [{
+ 'url': 'http://dood.to/e/5s1wmbdacezb',
+ 'md5': '4568b83b31e13242b3f1ff96c55f0595',
+ 'info_dict': {
+ 'id': '5s1wmbdacezb',
+ 'ext': 'mp4',
+ 'title': 'Kat Wonders - Monthly May 2020',
+ 'description': 'Kat Wonders - Monthly May 2020 | DoodStream.com',
+ 'thumbnail': 'https://img.doodcdn.com/snaps/flyus84qgl2fsk4g.jpg',
+ }
+ }, {
+ 'url': 'https://dood.to/d/jzrxn12t2s7n',
+ 'md5': '3207e199426eca7c2aa23c2872e6728a',
+ 'info_dict': {
+ 'id': 'jzrxn12t2s7n',
+ 'ext': 'mp4',
+ 'title': 'Stacy Cruz Cute ALLWAYSWELL',
+ 'description': 'Stacy Cruz Cute ALLWAYSWELL | DoodStream.com',
+ 'thumbnail': 'https://img.doodcdn.com/snaps/8edqd5nppkac3x8u.jpg',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ if '/d/' in url:
+ url = "https://dood.to" + self._html_search_regex(
+ r'<iframe src="(/e/[a-z0-9]+)"', webpage, 'embed')
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._html_search_meta(['og:title', 'twitter:title'],
+ webpage, default=None)
+ thumb = self._html_search_meta(['og:image', 'twitter:image'],
+ webpage, default=None)
+ token = self._html_search_regex(r'[?&]token=([a-z0-9]+)[&\']', webpage, 'token')
+ description = self._html_search_meta(
+ ['og:description', 'description', 'twitter:description'],
+ webpage, default=None)
+ auth_url = 'https://dood.to' + self._html_search_regex(
+ r'(/pass_md5.*?)\'', webpage, 'pass_md5')
+ headers = {
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:53.0) Gecko/20100101 Firefox/66.0',
+ 'referer': url
+ }
+
+ webpage = self._download_webpage(auth_url, video_id, headers=headers)
+ final_url = webpage + ''.join([random.choice(string.ascii_letters + string.digits) for _ in range(10)]) + "?token=" + token + "&expiry=" + str(int(time.time() * 1000))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'url': final_url,
+ 'http_headers': headers,
+ 'ext': 'mp4',
+ 'description': description,
+ 'thumbnail': thumb,
+ }
diff --git a/hypervideo_dl/extractor/dplay.py b/hypervideo_dl/extractor/dplay.py
index bbb1990..e0e446b 100644
--- a/hypervideo_dl/extractor/dplay.py
+++ b/hypervideo_dl/extractor/dplay.py
@@ -2,7 +2,6 @@
from __future__ import unicode_literals
import json
-import re
from .common import InfoExtractor
from ..compat import compat_HTTPError
@@ -287,7 +286,7 @@ class DPlayIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
display_id = mobj.group('id')
domain = mobj.group('domain').lstrip('www.')
country = mobj.group('country') or mobj.group('subdomain_country') or mobj.group('plus_country')
@@ -296,6 +295,35 @@ class DPlayIE(InfoExtractor):
url, display_id, host, 'dplay' + country, country)
+class HGTVDeIE(DPlayIE):
+ _VALID_URL = r'https?://de\.hgtv\.com/sendungen' + DPlayIE._PATH_REGEX
+ _TESTS = [{
+ 'url': 'https://de.hgtv.com/sendungen/tiny-house-klein-aber-oho/wer-braucht-schon-eine-toilette/',
+ 'info_dict': {
+ 'id': '151205',
+ 'display_id': 'tiny-house-klein-aber-oho/wer-braucht-schon-eine-toilette',
+ 'ext': 'mp4',
+ 'title': 'Wer braucht schon eine Toilette',
+ 'description': 'md5:05b40a27e7aed2c9172de34d459134e2',
+ 'duration': 1177.024,
+ 'timestamp': 1595705400,
+ 'upload_date': '20200725',
+ 'creator': 'HGTV',
+ 'series': 'Tiny House - klein, aber oho',
+ 'season_number': 3,
+ 'episode_number': 3,
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ },
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ return self._get_disco_api_info(
+ url, display_id, 'eu1-prod.disco-api.com', 'hgtv', 'de')
+
+
class DiscoveryPlusIE(DPlayIE):
_VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/video' + DPlayIE._PATH_REGEX
_TESTS = [{
@@ -317,8 +345,11 @@ class DiscoveryPlusIE(DPlayIE):
'skip': 'Available for Premium users',
}]
+ _PRODUCT = 'dplus_us'
+ _API_URL = 'us1-prod-direct.discoveryplus.com'
+
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
- headers['x-disco-client'] = 'WEB:UNKNOWN:dplus_us:15.0.0'
+ headers['x-disco-client'] = f'WEB:UNKNOWN:{self._PRODUCT}:25.2.6'
def _download_video_playback_info(self, disco_base, video_id, headers):
return self._download_json(
@@ -330,40 +361,71 @@ class DiscoveryPlusIE(DPlayIE):
'videoId': video_id,
'wisteriaProperties': {
'platform': 'desktop',
- 'product': 'dplus_us',
+ 'product': self._PRODUCT,
},
}).encode('utf-8'))['data']['attributes']['streaming']
def _real_extract(self, url):
display_id = self._match_id(url)
return self._get_disco_api_info(
- url, display_id, 'us1-prod-direct.discoveryplus.com', 'go', 'us')
+ url, display_id, self._API_URL, 'go', 'us')
-class HGTVDeIE(DPlayIE):
- _VALID_URL = r'https?://de\.hgtv\.com/sendungen' + DPlayIE._PATH_REGEX
+class ScienceChannelIE(DiscoveryPlusIE):
+ _VALID_URL = r'https?://(?:www\.)?sciencechannel\.com/video' + DPlayIE._PATH_REGEX
_TESTS = [{
- 'url': 'https://de.hgtv.com/sendungen/tiny-house-klein-aber-oho/wer-braucht-schon-eine-toilette/',
+ 'url': 'https://www.sciencechannel.com/video/strangest-things-science-atve-us/nazi-mystery-machine',
'info_dict': {
- 'id': '151205',
- 'display_id': 'tiny-house-klein-aber-oho/wer-braucht-schon-eine-toilette',
+ 'id': '2842849',
+ 'display_id': 'strangest-things-science-atve-us/nazi-mystery-machine',
'ext': 'mp4',
- 'title': 'Wer braucht schon eine Toilette',
- 'description': 'md5:05b40a27e7aed2c9172de34d459134e2',
- 'duration': 1177.024,
- 'timestamp': 1595705400,
- 'upload_date': '20200725',
- 'creator': 'HGTV',
- 'series': 'Tiny House - klein, aber oho',
- 'season_number': 3,
- 'episode_number': 3,
+ 'title': 'Nazi Mystery Machine',
+ 'description': 'Experts investigate the secrets of a revolutionary encryption machine.',
+ 'season_number': 1,
+ 'episode_number': 1,
},
- 'params': {
- 'format': 'bestvideo',
+ 'skip': 'Available for Premium users',
+ }]
+
+ _PRODUCT = 'sci'
+ _API_URL = 'us1-prod-direct.sciencechannel.com'
+
+
+class DIYNetworkIE(DiscoveryPlusIE):
+ _VALID_URL = r'https?://(?:watch\.)?diynetwork\.com/video' + DPlayIE._PATH_REGEX
+ _TESTS = [{
+ 'url': 'https://watch.diynetwork.com/video/pool-kings-diy-network/bringing-beach-life-to-texas',
+ 'info_dict': {
+ 'id': '2309730',
+ 'display_id': 'pool-kings-diy-network/bringing-beach-life-to-texas',
+ 'ext': 'mp4',
+ 'title': 'Bringing Beach Life to Texas',
+ 'description': 'The Pool Kings give a family a day at the beach in their own backyard.',
+ 'season_number': 10,
+ 'episode_number': 2,
},
+ 'skip': 'Available for Premium users',
}]
- def _real_extract(self, url):
- display_id = self._match_id(url)
- return self._get_disco_api_info(
- url, display_id, 'eu1-prod.disco-api.com', 'hgtv', 'de')
+ _PRODUCT = 'diy'
+ _API_URL = 'us1-prod-direct.watch.diynetwork.com'
+
+
+class AnimalPlanetIE(DiscoveryPlusIE):
+ _VALID_URL = r'https?://(?:www\.)?animalplanet\.com/video' + DPlayIE._PATH_REGEX
+ _TESTS = [{
+ 'url': 'https://www.animalplanet.com/video/north-woods-law-animal-planet/squirrel-showdown',
+ 'info_dict': {
+ 'id': '3338923',
+ 'display_id': 'north-woods-law-animal-planet/squirrel-showdown',
+ 'ext': 'mp4',
+ 'title': 'Squirrel Showdown',
+ 'description': 'A woman is suspected of being in possession of flying squirrel kits.',
+ 'season_number': 16,
+ 'episode_number': 11,
+ },
+ 'skip': 'Available for Premium users',
+ }]
+
+ _PRODUCT = 'apl'
+ _API_URL = 'us1-prod-direct.animalplanet.com'
diff --git a/hypervideo_dl/extractor/drbonanza.py b/hypervideo_dl/extractor/drbonanza.py
index 164e97c..ea0f06d 100644
--- a/hypervideo_dl/extractor/drbonanza.py
+++ b/hypervideo_dl/extractor/drbonanza.py
@@ -1,6 +1,5 @@
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..utils import (
@@ -26,7 +25,7 @@ class DRBonanzaIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id, display_id = mobj.group('id', 'display_id')
webpage = self._download_webpage(url, display_id)
diff --git a/hypervideo_dl/extractor/dropbox.py b/hypervideo_dl/extractor/dropbox.py
index 2bedcc1..6a7d050 100644
--- a/hypervideo_dl/extractor/dropbox.py
+++ b/hypervideo_dl/extractor/dropbox.py
@@ -17,7 +17,7 @@ class DropboxIE(InfoExtractor):
'info_dict': {
'id': 'nelirfsxnmcfbfh',
'ext': 'mp4',
- 'title': 'hypervideo test video \'ä"BaW_jenozKc'
+ 'title': 'youtube-dl test video \'ä"BaW_jenozKc'
}
}, {
'url': 'https://www.dropbox.com/sh/662glsejgzoj9sr/AAByil3FGH9KFNZ13e08eSa1a/Pregame%20Ceremony%20Program%20PA%2020140518.m4v',
@@ -26,7 +26,7 @@ class DropboxIE(InfoExtractor):
]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
fn = compat_urllib_parse_unquote(url_basename(url))
title = os.path.splitext(fn)[0]
diff --git a/hypervideo_dl/extractor/drtuber.py b/hypervideo_dl/extractor/drtuber.py
index 2baea58..540b86a 100644
--- a/hypervideo_dl/extractor/drtuber.py
+++ b/hypervideo_dl/extractor/drtuber.py
@@ -42,7 +42,7 @@ class DrTuberIE(InfoExtractor):
webpage)
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
display_id = mobj.group('display_id') or video_id
diff --git a/hypervideo_dl/extractor/drtv.py b/hypervideo_dl/extractor/drtv.py
index c0036ad..7bb15f8 100644
--- a/hypervideo_dl/extractor/drtv.py
+++ b/hypervideo_dl/extractor/drtv.py
@@ -242,7 +242,7 @@ class DRTVIE(InfoExtractor):
elif target == 'HLS':
formats.extend(self._extract_m3u8_formats(
uri, video_id, 'mp4', entry_protocol='m3u8_native',
- preference=preference, m3u8_id=format_id,
+ quality=preference, m3u8_id=format_id,
fatal=False))
else:
bitrate = link.get('Bitrate')
@@ -254,7 +254,7 @@ class DRTVIE(InfoExtractor):
'tbr': int_or_none(bitrate),
'ext': link.get('FileFormat'),
'vcodec': 'none' if kind == 'AudioResource' else None,
- 'preference': preference,
+ 'quality': preference,
})
subtitles_list = asset.get('SubtitlesList') or asset.get('Subtitleslist')
if isinstance(subtitles_list, list):
diff --git a/hypervideo_dl/extractor/dtube.py b/hypervideo_dl/extractor/dtube.py
index 114d2db..ad247b7 100644
--- a/hypervideo_dl/extractor/dtube.py
+++ b/hypervideo_dl/extractor/dtube.py
@@ -2,7 +2,6 @@
from __future__ import unicode_literals
import json
-import re
from socket import timeout
from .common import InfoExtractor
@@ -32,7 +31,7 @@ class DTubeIE(InfoExtractor):
}
def _real_extract(self, url):
- uploader_id, video_id = re.match(self._VALID_URL, url).groups()
+ uploader_id, video_id = self._match_valid_url(url).groups()
result = self._download_json('https://api.steemit.com/', video_id, data=json.dumps({
'jsonrpc': '2.0',
'method': 'get_content',
diff --git a/hypervideo_dl/extractor/duboku.py b/hypervideo_dl/extractor/duboku.py
new file mode 100644
index 0000000..a875978
--- /dev/null
+++ b/hypervideo_dl/extractor/duboku.py
@@ -0,0 +1,242 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_urlparse
+from ..utils import (
+ clean_html,
+ extract_attributes,
+ ExtractorError,
+ get_elements_by_class,
+ int_or_none,
+ js_to_json,
+ smuggle_url,
+ unescapeHTML,
+)
+
+
+def _get_elements_by_tag_and_attrib(html, tag=None, attribute=None, value=None, escape_value=True):
+ """Return the content of the tag with the specified attribute in the passed HTML document"""
+
+ if tag is None:
+ tag = '[a-zA-Z0-9:._-]+'
+ if attribute is None:
+ attribute = ''
+ else:
+ attribute = r'\s+(?P<attribute>%s)' % re.escape(attribute)
+ if value is None:
+ value = ''
+ else:
+ value = re.escape(value) if escape_value else value
+ value = '=[\'"]?(?P<value>%s)[\'"]?' % value
+
+ retlist = []
+ for m in re.finditer(r'''(?xs)
+ <(?P<tag>%s)
+ (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
+ %s%s
+ (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
+ \s*>
+ (?P<content>.*?)
+ </\1>
+ ''' % (tag, attribute, value), html):
+ retlist.append(m)
+
+ return retlist
+
+
+def _get_element_by_tag_and_attrib(html, tag=None, attribute=None, value=None, escape_value=True):
+ retval = _get_elements_by_tag_and_attrib(html, tag, attribute, value, escape_value)
+ return retval[0] if retval else None
+
+
+class DubokuIE(InfoExtractor):
+ IE_NAME = 'duboku'
+ IE_DESC = 'www.duboku.co'
+
+ _VALID_URL = r'(?:https?://[^/]+\.duboku\.co/vodplay/)(?P<id>[0-9]+-[0-9-]+)\.html.*'
+ _TESTS = [{
+ 'url': 'https://www.duboku.co/vodplay/1575-1-1.html',
+ 'info_dict': {
+ 'id': '1575-1-1',
+ 'ext': 'ts',
+ 'series': '白色月光',
+ 'title': 'contains:白色月光',
+ 'season_number': 1,
+ 'episode_number': 1,
+ },
+ 'params': {
+ 'skip_download': 'm3u8 download',
+ },
+ }, {
+ 'url': 'https://www.duboku.co/vodplay/1588-1-1.html',
+ 'info_dict': {
+ 'id': '1588-1-1',
+ 'ext': 'ts',
+ 'series': '亲爱的自己',
+ 'title': 'contains:预告片',
+ 'season_number': 1,
+ 'episode_number': 1,
+ },
+ 'params': {
+ 'skip_download': 'm3u8 download',
+ },
+ }]
+
+ _PLAYER_DATA_PATTERN = r'player_data\s*=\s*(\{\s*(.*)})\s*;?\s*</script'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ temp = video_id.split('-')
+ series_id = temp[0]
+ season_id = temp[1]
+ episode_id = temp[2]
+
+ webpage_url = 'https://www.duboku.co/vodplay/%s.html' % video_id
+ webpage_html = self._download_webpage(webpage_url, video_id)
+
+ # extract video url
+
+ player_data = self._search_regex(
+ self._PLAYER_DATA_PATTERN, webpage_html, 'player_data')
+ player_data = self._parse_json(player_data, video_id, js_to_json)
+
+ # extract title
+
+ temp = get_elements_by_class('title', webpage_html)
+ series_title = None
+ title = None
+ for html in temp:
+ mobj = re.search(r'<a\s+.*>(.*)</a>', html)
+ if mobj:
+ href = extract_attributes(mobj.group(0)).get('href')
+ if href:
+ mobj1 = re.search(r'/(\d+)\.html', href)
+ if mobj1 and mobj1.group(1) == series_id:
+ series_title = clean_html(mobj.group(0))
+ series_title = re.sub(r'[\s\r\n\t]+', ' ', series_title)
+ title = clean_html(html)
+ title = re.sub(r'[\s\r\n\t]+', ' ', title)
+ break
+
+ data_url = player_data.get('url')
+ if not data_url:
+ raise ExtractorError('Cannot find url in player_data')
+ data_from = player_data.get('from')
+
+ # if it is an embedded iframe, maybe it's an external source
+ if data_from == 'iframe':
+ # use _type url_transparent to retain the meaningful details
+ # of the video.
+ return {
+ '_type': 'url_transparent',
+ 'url': smuggle_url(data_url, {'http_headers': {'Referer': webpage_url}}),
+ 'id': video_id,
+ 'title': title,
+ 'series': series_title,
+ 'season_number': int_or_none(season_id),
+ 'season_id': season_id,
+ 'episode_number': int_or_none(episode_id),
+ 'episode_id': episode_id,
+ }
+
+ formats = self._extract_m3u8_formats(data_url, video_id, 'mp4')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'series': series_title,
+ 'season_number': int_or_none(season_id),
+ 'season_id': season_id,
+ 'episode_number': int_or_none(episode_id),
+ 'episode_id': episode_id,
+ 'formats': formats,
+ 'http_headers': {'Referer': 'https://www.duboku.co/static/player/videojs.html'}
+ }
+
+
+class DubokuPlaylistIE(InfoExtractor):
+ IE_NAME = 'duboku:list'
+ IE_DESC = 'www.duboku.co entire series'
+
+ _VALID_URL = r'(?:https?://[^/]+\.duboku\.co/voddetail/)(?P<id>[0-9]+)\.html.*'
+ _TESTS = [{
+ 'url': 'https://www.duboku.co/voddetail/1575.html',
+ 'info_dict': {
+ 'id': 'startswith:1575',
+ 'title': '白色月光',
+ },
+ 'playlist_count': 12,
+ }, {
+ 'url': 'https://www.duboku.co/voddetail/1554.html',
+ 'info_dict': {
+ 'id': 'startswith:1554',
+ 'title': '以家人之名',
+ },
+ 'playlist_mincount': 30,
+ }, {
+ 'url': 'https://www.duboku.co/voddetail/1554.html#playlist2',
+ 'info_dict': {
+ 'id': '1554#playlist2',
+ 'title': '以家人之名',
+ },
+ 'playlist_mincount': 27,
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ if mobj is None:
+ raise ExtractorError('Invalid URL: %s' % url)
+ series_id = mobj.group('id')
+ fragment = compat_urlparse.urlparse(url).fragment
+
+ webpage_url = 'https://www.duboku.co/voddetail/%s.html' % series_id
+ webpage_html = self._download_webpage(webpage_url, series_id)
+
+ # extract title
+
+ title = _get_element_by_tag_and_attrib(webpage_html, 'h1', 'class', 'title')
+ title = unescapeHTML(title.group('content')) if title else None
+ if not title:
+ title = self._html_search_meta('keywords', webpage_html)
+ if not title:
+ title = _get_element_by_tag_and_attrib(webpage_html, 'title')
+ title = unescapeHTML(title.group('content')) if title else None
+
+ # extract playlists
+
+ playlists = {}
+ for div in _get_elements_by_tag_and_attrib(
+ webpage_html, attribute='id', value='playlist\\d+', escape_value=False):
+ playlist_id = div.group('value')
+ playlist = []
+ for a in _get_elements_by_tag_and_attrib(
+ div.group('content'), 'a', 'href', value='[^\'"]+?', escape_value=False):
+ playlist.append({
+ 'href': unescapeHTML(a.group('value')),
+ 'title': unescapeHTML(a.group('content'))
+ })
+ playlists[playlist_id] = playlist
+
+ # select the specified playlist if url fragment exists
+ playlist = None
+ playlist_id = None
+ if fragment:
+ playlist = playlists.get(fragment)
+ playlist_id = fragment
+ else:
+ first = next(iter(playlists.items()), None)
+ if first:
+ (playlist_id, playlist) = first
+ if not playlist:
+ raise ExtractorError(
+ 'Cannot find %s' % fragment if fragment else 'Cannot extract playlist')
+
+ # return url results
+ return self.playlist_result([
+ self.url_result(
+ compat_urlparse.urljoin('https://www.duboku.co', x['href']),
+ ie=DubokuIE.ie_key(), video_title=x.get('title'))
+ for x in playlist], series_id + '#' + playlist_id, title)
diff --git a/hypervideo_dl/extractor/dw.py b/hypervideo_dl/extractor/dw.py
index d740652..6eaee07 100644
--- a/hypervideo_dl/extractor/dw.py
+++ b/hypervideo_dl/extractor/dw.py
@@ -5,6 +5,7 @@ from .common import InfoExtractor
from ..utils import (
int_or_none,
unified_strdate,
+ url_or_none,
)
from ..compat import compat_urlparse
@@ -15,13 +16,13 @@ class DWIE(InfoExtractor):
_TESTS = [{
# video
'url': 'http://www.dw.com/en/intelligent-light/av-19112290',
- 'md5': '7372046e1815c5a534b43f3c3c36e6e9',
+ 'md5': 'fb9dfd9520811d3ece80f04befd73428',
'info_dict': {
'id': '19112290',
'ext': 'mp4',
'title': 'Intelligent light',
'description': 'md5:90e00d5881719f2a6a5827cb74985af1',
- 'upload_date': '20160311',
+ 'upload_date': '20160605',
}
}, {
# audio
@@ -55,15 +56,16 @@ class DWIE(InfoExtractor):
title = hidden_inputs['media_title']
media_id = hidden_inputs.get('media_id') or media_id
- if hidden_inputs.get('player_type') == 'video' and hidden_inputs.get('stream_file') == '1':
+ direct_url = url_or_none(hidden_inputs.get('file_name'))
+ if direct_url:
+ formats = [{'url': hidden_inputs['file_name']}]
+ else:
formats = self._extract_smil_formats(
'http://www.dw.com/smil/v-%s' % media_id, media_id,
transform_source=lambda s: s.replace(
'rtmp://tv-od.dw.de/flash/',
'http://tv-download.dw.de/dwtv_video/flv/'))
- self._sort_formats(formats)
- else:
- formats = [{'url': hidden_inputs['file_name']}]
+ self._sort_formats(formats)
upload_date = hidden_inputs.get('display_date')
if not upload_date:
diff --git a/hypervideo_dl/extractor/eagleplatform.py b/hypervideo_dl/extractor/eagleplatform.py
index 36fef07..f86731a 100644
--- a/hypervideo_dl/extractor/eagleplatform.py
+++ b/hypervideo_dl/extractor/eagleplatform.py
@@ -123,7 +123,7 @@ class EaglePlatformIE(InfoExtractor):
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
host, video_id = mobj.group('custom_host') or mobj.group('host'), mobj.group('id')
headers = {}
diff --git a/hypervideo_dl/extractor/egghead.py b/hypervideo_dl/extractor/egghead.py
index aff9b88..f6b50e7 100644
--- a/hypervideo_dl/extractor/egghead.py
+++ b/hypervideo_dl/extractor/egghead.py
@@ -22,16 +22,19 @@ class EggheadBaseIE(InfoExtractor):
class EggheadCourseIE(EggheadBaseIE):
IE_DESC = 'egghead.io course'
IE_NAME = 'egghead:course'
- _VALID_URL = r'https://egghead\.io/courses/(?P<id>[^/?#&]+)'
- _TEST = {
+ _VALID_URL = r'https://(?:app\.)?egghead\.io/(?:course|playlist)s/(?P<id>[^/?#&]+)'
+ _TESTS = [{
'url': 'https://egghead.io/courses/professor-frisby-introduces-composable-functional-javascript',
'playlist_count': 29,
'info_dict': {
- 'id': '72',
+ 'id': '432655',
'title': 'Professor Frisby Introduces Composable Functional JavaScript',
'description': 're:(?s)^This course teaches the ubiquitous.*You\'ll start composing functionality before you know it.$',
},
- }
+ }, {
+ 'url': 'https://app.egghead.io/playlists/professor-frisby-introduces-composable-functional-javascript',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
playlist_id = self._match_id(url)
@@ -65,7 +68,7 @@ class EggheadCourseIE(EggheadBaseIE):
class EggheadLessonIE(EggheadBaseIE):
IE_DESC = 'egghead.io lesson'
IE_NAME = 'egghead:lesson'
- _VALID_URL = r'https://egghead\.io/(?:api/v1/)?lessons/(?P<id>[^/?#&]+)'
+ _VALID_URL = r'https://(?:app\.)?egghead\.io/(?:api/v1/)?lessons/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box',
'info_dict': {
@@ -88,6 +91,9 @@ class EggheadLessonIE(EggheadBaseIE):
}, {
'url': 'https://egghead.io/api/v1/lessons/react-add-redux-to-a-react-application',
'only_matching': True,
+ }, {
+ 'url': 'https://app.egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box',
+ 'only_matching': True,
}]
def _real_extract(self, url):
@@ -107,8 +113,7 @@ class EggheadLessonIE(EggheadBaseIE):
ext = determine_ext(format_url)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
- format_url, lesson_id, 'mp4', entry_protocol='m3u8',
- m3u8_id='hls', fatal=False))
+ format_url, lesson_id, 'mp4', m3u8_id='hls', fatal=False))
elif ext == 'mpd':
formats.extend(self._extract_mpd_formats(
format_url, lesson_id, mpd_id='dash', fatal=False))
diff --git a/hypervideo_dl/extractor/eighttracks.py b/hypervideo_dl/extractor/eighttracks.py
index 9b1e1ce..9a44f89 100644
--- a/hypervideo_dl/extractor/eighttracks.py
+++ b/hypervideo_dl/extractor/eighttracks.py
@@ -21,9 +21,9 @@ class EightTracksIE(InfoExtractor):
'url': 'http://8tracks.com/ytdl/youtube-dl-test-tracks-a',
'info_dict': {
'id': '1336550',
- 'display_id': 'hypervideo-test-tracks-a',
+ 'display_id': 'youtube-dl-test-tracks-a',
'description': "test chars: \"'/\\ä↭",
- 'title': "hypervideo test tracks \"'/\\ä↭<>",
+ 'title': "youtube-dl test tracks \"'/\\ä↭<>",
},
'playlist': [
{
@@ -31,7 +31,7 @@ class EightTracksIE(InfoExtractor):
'info_dict': {
'id': '11885610',
'ext': 'm4a',
- 'title': "youtue-dl project<>\"' - hypervideo test track 1 \"'/\\\u00e4\u21ad",
+ 'title': "youtue-dl project<>\"' - youtube-dl test track 1 \"'/\\\u00e4\u21ad",
'uploader_id': 'ytdl'
}
},
@@ -40,7 +40,7 @@ class EightTracksIE(InfoExtractor):
'info_dict': {
'id': '11885608',
'ext': 'm4a',
- 'title': "hypervideo project - hypervideo test track 2 \"'/\\\u00e4\u21ad",
+ 'title': "youtube-dl project - youtube-dl test track 2 \"'/\\\u00e4\u21ad",
'uploader_id': 'ytdl'
}
},
@@ -49,7 +49,7 @@ class EightTracksIE(InfoExtractor):
'info_dict': {
'id': '11885679',
'ext': 'm4a',
- 'title': "hypervideo project as well - hypervideo test track 3 \"'/\\\u00e4\u21ad",
+ 'title': "youtube-dl project as well - youtube-dl test track 3 \"'/\\\u00e4\u21ad",
'uploader_id': 'ytdl'
}
},
@@ -58,7 +58,7 @@ class EightTracksIE(InfoExtractor):
'info_dict': {
'id': '11885680',
'ext': 'm4a',
- 'title': "hypervideo project as well - hypervideo test track 4 \"'/\\\u00e4\u21ad",
+ 'title': "youtube-dl project as well - youtube-dl test track 4 \"'/\\\u00e4\u21ad",
'uploader_id': 'ytdl'
}
},
@@ -67,7 +67,7 @@ class EightTracksIE(InfoExtractor):
'info_dict': {
'id': '11885682',
'ext': 'm4a',
- 'title': "PH - hypervideo test track 5 \"'/\\\u00e4\u21ad",
+ 'title': "PH - youtube-dl test track 5 \"'/\\\u00e4\u21ad",
'uploader_id': 'ytdl'
}
},
@@ -76,7 +76,7 @@ class EightTracksIE(InfoExtractor):
'info_dict': {
'id': '11885683',
'ext': 'm4a',
- 'title': "PH - hypervideo test track 6 \"'/\\\u00e4\u21ad",
+ 'title': "PH - youtube-dl test track 6 \"'/\\\u00e4\u21ad",
'uploader_id': 'ytdl'
}
},
@@ -85,7 +85,7 @@ class EightTracksIE(InfoExtractor):
'info_dict': {
'id': '11885684',
'ext': 'm4a',
- 'title': "phihag - hypervideo test track 7 \"'/\\\u00e4\u21ad",
+ 'title': "phihag - youtube-dl test track 7 \"'/\\\u00e4\u21ad",
'uploader_id': 'ytdl'
}
},
@@ -94,7 +94,7 @@ class EightTracksIE(InfoExtractor):
'info_dict': {
'id': '11885685',
'ext': 'm4a',
- 'title': "phihag - hypervideo test track 8 \"'/\\\u00e4\u21ad",
+ 'title': "phihag - youtube-dl test track 8 \"'/\\\u00e4\u21ad",
'uploader_id': 'ytdl'
}
}
diff --git a/hypervideo_dl/extractor/einthusan.py b/hypervideo_dl/extractor/einthusan.py
index 4e0f8bc..7af279a 100644
--- a/hypervideo_dl/extractor/einthusan.py
+++ b/hypervideo_dl/extractor/einthusan.py
@@ -2,7 +2,6 @@
from __future__ import unicode_literals
import json
-import re
from .common import InfoExtractor
from ..compat import (
@@ -48,7 +47,7 @@ class EinthusanIE(InfoExtractor):
)).decode('utf-8'), video_id)
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
host = mobj.group('host')
video_id = mobj.group('id')
diff --git a/hypervideo_dl/extractor/elonet.py b/hypervideo_dl/extractor/elonet.py
new file mode 100644
index 0000000..eefba4e
--- /dev/null
+++ b/hypervideo_dl/extractor/elonet.py
@@ -0,0 +1,89 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ base_url,
+ ExtractorError,
+ try_get,
+)
+from ..compat import compat_str
+
+
+class ElonetIE(InfoExtractor):
+ _VALID_URL = r'https?://elonet\.finna\.fi/Record/kavi\.elonet_elokuva_(?P<id>[0-9]+)'
+ _TESTS = [{
+ # m3u8 with subtitles
+ 'url': 'https://elonet.finna.fi/Record/kavi.elonet_elokuva_107867',
+ 'md5': '8efc954b96c543711707f87de757caea',
+ 'info_dict': {
+ 'id': '107867',
+ 'ext': 'mp4',
+ 'title': 'Valkoinen peura',
+ 'description': 'Valkoinen peura (1952) on Erik Blombergin ohjaama ja yhdessä Mirjami Kuosmasen kanssa käsikirjoittama tarunomainen kertomus valkoisen peuran hahmossa lii...',
+ 'thumbnail': 'https://elonet.finna.fi/Cover/Show?id=kavi.elonet_elokuva_107867&index=0&size=large',
+ },
+ }, {
+ # DASH with subtitles
+ 'url': 'https://elonet.finna.fi/Record/kavi.elonet_elokuva_116539',
+ 'info_dict': {
+ 'id': '116539',
+ 'ext': 'mp4',
+ 'title': 'Minulla on tiikeri',
+ 'description': 'Pienellä pojalla, joka asuu kerrostalossa, on kotieläimenä tiikeri. Se on kuitenkin salaisuus. Kerrostalon räpätäti on Kotilaisen täti, joka on aina vali...',
+ 'thumbnail': 'https://elonet.finna.fi/Cover/Show?id=kavi.elonet_elokuva_116539&index=0&size=large&source=Solr',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._html_search_regex(
+ r'<meta .*property="og&#x3A;title" .*content="(.+?)"', webpage, 'title')
+ description = self._html_search_regex(
+ r'<meta .*property="og&#x3A;description" .*content="(.+?)"', webpage, 'description')
+ thumbnail = self._html_search_regex(
+ r'<meta .*property="og&#x3A;image" .*content="(.+?)"', webpage, 'thumbnail')
+
+ json_s = self._html_search_regex(
+ r'data-video-sources="(.+?)"', webpage, 'json')
+ src = try_get(
+ self._parse_json(json_s, video_id),
+ lambda x: x[0]["src"], compat_str)
+ formats = []
+ subtitles = {}
+ if re.search(r'\.m3u8\??', src):
+ res = self._download_webpage_handle(
+ # elonet servers have certificate problems
+ src.replace('https:', 'http:'), video_id,
+ note='Downloading m3u8 information',
+ errnote='Failed to download m3u8 information')
+ if res:
+ doc, urlh = res
+ url = urlh.geturl()
+ formats, subtitles = self._parse_m3u8_formats_and_subtitles(doc, url)
+ for f in formats:
+ f['ext'] = 'mp4'
+ elif re.search(r'\.mpd\??', src):
+ res = self._download_xml_handle(
+ src, video_id,
+ note='Downloading MPD manifest',
+ errnote='Failed to download MPD manifest')
+ if res:
+ doc, urlh = res
+ url = base_url(urlh.geturl())
+ formats, subtitles = self._parse_mpd_formats_and_subtitles(doc, mpd_base_url=url)
+ else:
+ raise ExtractorError("Unknown streaming format")
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
diff --git a/hypervideo_dl/extractor/epicon.py b/hypervideo_dl/extractor/epicon.py
new file mode 100644
index 0000000..b4e544d
--- /dev/null
+++ b/hypervideo_dl/extractor/epicon.py
@@ -0,0 +1,119 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import ExtractorError
+
+
+class EpiconIE(InfoExtractor):
+ _VALID_URL = r'(?:https?://)(?:www\.)?epicon\.in/(?:documentaries|movies|tv-shows/[^/?#]+/[^/?#]+)/(?P<id>[^/?#]+)'
+ _TESTS = [{
+ 'url': 'https://www.epicon.in/documentaries/air-battle-of-srinagar',
+ 'info_dict': {
+ 'id': 'air-battle-of-srinagar',
+ 'ext': 'mp4',
+ 'title': 'Air Battle of Srinagar',
+ 'description': 'md5:c4de2013af9bc05ae4392e4115d518d7',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ }
+ }, {
+ 'url': 'https://www.epicon.in/movies/krit',
+ 'info_dict': {
+ 'id': 'krit',
+ 'ext': 'mp4',
+ 'title': 'Krit',
+ 'description': 'md5:c12b35dad915d48ccff7f013c79bab4a',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ }
+ }, {
+ 'url': 'https://www.epicon.in/tv-shows/paapnaashini-ganga/season-1/vardaan',
+ 'info_dict': {
+ 'id': 'vardaan',
+ 'ext': 'mp4',
+ 'title': 'Paapnaashini Ganga - Season 1 - Ep 1 - VARDAAN',
+ 'description': 'md5:f517058c3d0402398eefa6242f4dd6ae',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ }
+ }, {
+ 'url': 'https://www.epicon.in/movies/jayadev',
+ 'info_dict': {
+ 'id': 'jayadev',
+ 'ext': 'mp4',
+ 'title': 'Jayadev',
+ 'description': 'md5:09e349eecd8e585a3b6466904f19df6c',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ }
+ }]
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+ webpage = self._download_webpage(url, id)
+ cid = self._search_regex(r'class=\"mylist-icon\ iconclick\"\ id=\"(\d+)', webpage, 'cid')
+ headers = {'content-type': 'application/x-www-form-urlencoded; charset=UTF-8'}
+ data = f'cid={cid}&action=st&type=video'.encode()
+ data_json = self._parse_json(self._download_json('https://www.epicon.in/ajaxplayer/', id, headers=headers, data=data), id)
+
+ if not data_json['success']:
+ raise ExtractorError(data_json['message'], expected=True)
+
+ title = self._search_regex(r'setplaytitle=\"([^\"]+)', webpage, 'title')
+ description = self._og_search_description(webpage) or None
+ thumbnail = self._og_search_thumbnail(webpage) or None
+ formats = self._extract_m3u8_formats(data_json['url']['video_url'], id)
+ self._sort_formats(formats)
+
+ subtitles = {}
+ for subtitle in data_json.get('subtitles', []):
+ sub_url = subtitle.get('file')
+ if not sub_url:
+ continue
+ subtitles.setdefault(subtitle.get('lang', 'English'), []).append({
+ 'url': self._proto_relative_url(sub_url),
+ })
+
+ return {
+ 'id': id,
+ 'formats': formats,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'subtitles': subtitles,
+ }
+
+
+class EpiconSeriesIE(InfoExtractor):
+ _VALID_URL = r'(?!.*season)(?:https?://)(?:www\.)?epicon\.in/tv-shows/(?P<id>[^/?#]+)'
+ _TESTS = [{
+ 'url': 'https://www.epicon.in/tv-shows/1-of-something',
+ 'playlist_mincount': 5,
+ 'info_dict': {
+ 'id': '1-of-something',
+ },
+ }, {
+ 'url': 'https://www.epicon.in/tv-shows/eco-india-english',
+ 'playlist_mincount': 76,
+ 'info_dict': {
+ 'id': 'eco-india-english',
+ },
+ }, {
+ 'url': 'https://www.epicon.in/tv-shows/s/',
+ 'playlist_mincount': 25,
+ 'info_dict': {
+ 'id': 's',
+ },
+ }, {
+ 'url': 'https://www.epicon.in/tv-shows/ekaant',
+ 'playlist_mincount': 38,
+ 'info_dict': {
+ 'id': 'ekaant',
+ },
+ }]
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+ webpage = self._download_webpage(url, id)
+ episodes = re.findall(r'ct-tray-url=\"(tv-shows/%s/[^\"]+)' % id, webpage)
+ entries = [self.url_result('https://www.epicon.in/%s' % episode, ie=EpiconIE.ie_key()) for episode in episodes]
+ return self.playlist_result(entries, playlist_id=id)
diff --git a/hypervideo_dl/extractor/eporner.py b/hypervideo_dl/extractor/eporner.py
index bfecd3a..25a0d97 100644
--- a/hypervideo_dl/extractor/eporner.py
+++ b/hypervideo_dl/extractor/eporner.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..utils import (
@@ -51,7 +50,7 @@ class EpornerIE(InfoExtractor):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
display_id = mobj.group('display_id') or video_id
diff --git a/hypervideo_dl/extractor/eroprofile.py b/hypervideo_dl/extractor/eroprofile.py
index c460dc7..a8396f1 100644
--- a/hypervideo_dl/extractor/eroprofile.py
+++ b/hypervideo_dl/extractor/eroprofile.py
@@ -90,3 +90,42 @@ class EroProfileIE(InfoExtractor):
'title': title,
'age_limit': 18,
})
+
+
+class EroProfileAlbumIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?eroprofile\.com/m/videos/album/(?P<id>[^/]+)'
+ IE_NAME = 'EroProfile:album'
+
+ _TESTS = [{
+ 'url': 'https://www.eroprofile.com/m/videos/album/BBW-2-893',
+ 'info_dict': {
+ 'id': 'BBW-2-893',
+ 'title': 'BBW 2'
+ },
+ 'playlist_mincount': 486,
+ },
+ ]
+
+ def _extract_from_page(self, page):
+ for url in re.findall(r'href=".*?(/m/videos/view/[^"]+)"', page):
+ yield self.url_result(f'https://www.eroprofile.com{url}', EroProfileIE.ie_key())
+
+ def _entries(self, playlist_id, first_page):
+ yield from self._extract_from_page(first_page)
+
+ page_urls = re.findall(rf'href=".*?(/m/videos/album/{playlist_id}\?pnum=(\d+))"', first_page)
+ max_page = max(int(n) for _, n in page_urls)
+
+ for n in range(2, max_page + 1):
+ url = f'https://www.eroprofile.com/m/videos/album/{playlist_id}?pnum={n}'
+ yield from self._extract_from_page(
+ self._download_webpage(url, playlist_id,
+ note=f'Downloading playlist page {int(n) - 1}'))
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ first_page = self._download_webpage(url, playlist_id, note='Downloading playlist')
+ playlist_title = self._search_regex(
+ r'<title>Album: (.*) - EroProfile</title>', first_page, 'playlist_title')
+
+ return self.playlist_result(self._entries(playlist_id, first_page), playlist_id, playlist_title)
diff --git a/hypervideo_dl/extractor/espn.py b/hypervideo_dl/extractor/espn.py
index 6cf05e6..d4a66c2 100644
--- a/hypervideo_dl/extractor/espn.py
+++ b/hypervideo_dl/extractor/espn.py
@@ -154,7 +154,7 @@ class ESPNIE(OnceIE):
'tbr': int(mobj.group(3)),
})
if source_id == 'mezzanine':
- f['preference'] = 1
+ f['quality'] = 1
formats.append(f)
links = clip.get('links', {})
diff --git a/hypervideo_dl/extractor/europa.py b/hypervideo_dl/extractor/europa.py
index 2c1c747..60ab2ce 100644
--- a/hypervideo_dl/extractor/europa.py
+++ b/hypervideo_dl/extractor/europa.py
@@ -2,11 +2,11 @@
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..compat import compat_urlparse
from ..utils import (
int_or_none,
orderedSet,
parse_duration,
+ parse_qs,
qualities,
unified_strdate,
xpath_text
@@ -53,7 +53,7 @@ class EuropaIE(InfoExtractor):
if items.get(p):
return items[p]
- query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+ query = parse_qs(url)
preferred_lang = query.get('sitelang', ('en', ))[0]
preferred_langs = orderedSet((preferred_lang, 'en', 'int'))
diff --git a/hypervideo_dl/extractor/euscreen.py b/hypervideo_dl/extractor/euscreen.py
new file mode 100644
index 0000000..3980c23
--- /dev/null
+++ b/hypervideo_dl/extractor/euscreen.py
@@ -0,0 +1,64 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+from ..utils import (
+ parse_duration,
+ js_to_json,
+)
+
+
+class EUScreenIE(InfoExtractor):
+ _VALID_URL = r'(?:https?://)(?:www\.)?euscreen\.eu/item.html\?id=(?P<id>[^&?$/]+)'
+
+ _TESTS = [{
+ 'url': 'https://euscreen.eu/item.html?id=EUS_0EBCBF356BFC4E12A014023BA41BD98C',
+ 'info_dict': {
+ 'id': 'EUS_0EBCBF356BFC4E12A014023BA41BD98C',
+ 'ext': 'mp4',
+ 'title': "L'effondrement du stade du Heysel",
+ 'alt_title': 'Collapse of the Heysel Stadium',
+ 'duration': 318.0,
+ 'description': 'md5:f0ffffdfce6821139357a1b8359d6152',
+ 'series': 'JA2 DERNIERE',
+ 'episode': '-',
+ 'uploader': 'INA / France',
+ 'thumbnail': 'http://images3.noterik.com/domain/euscreenxl/user/eu_ina/video/EUS_0EBCBF356BFC4E12A014023BA41BD98C/image.jpg'
+ },
+ 'params': {'skip_download': True}
+ }]
+
+ _payload = b'<fsxml><screen><properties><screenId>-1</screenId></properties><capabilities id="1"><properties><platform>Win32</platform><appcodename>Mozilla</appcodename><appname>Netscape</appname><appversion>5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36</appversion><useragent>Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36</useragent><cookiesenabled>true</cookiesenabled><screenwidth>784</screenwidth><screenheight>758</screenheight><orientation>undefined</orientation><smt_browserid>Sat, 07 Oct 2021 08:56:50 GMT</smt_browserid><smt_sessionid>1633769810758</smt_sessionid></properties></capabilities></screen></fsxml>'
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+ args_for_js_request = self._download_webpage(
+ 'https://euscreen.eu/lou/LouServlet/domain/euscreenxl/html5application/euscreenxlitem',
+ id, data=self._payload, query={'actionlist': 'itempage', 'id': id})
+ info_js = self._download_webpage(
+ 'https://euscreen.eu/lou/LouServlet/domain/euscreenxl/html5application/euscreenxlitem',
+ id, data=args_for_js_request.replace('screenid', 'screenId').encode())
+ video_json = self._parse_json(
+ self._search_regex(r'setVideo\(({.+})\)\(\$end\$\)put', info_js, 'Video JSON'),
+ id, transform_source=js_to_json)
+ meta_json = self._parse_json(
+ self._search_regex(r'setData\(({.+})\)\(\$end\$\)', info_js, 'Metadata JSON'),
+ id, transform_source=js_to_json)
+ formats = [{
+ 'url': source['src'],
+ } for source in video_json.get('sources', [])]
+ self._sort_formats(formats)
+
+ return {
+ 'id': id,
+ 'title': meta_json.get('originalTitle'),
+ 'alt_title': meta_json.get('title'),
+ 'duration': parse_duration(meta_json.get('duration')),
+ 'description': '%s\n%s' % (meta_json.get('summaryOriginal', ''), meta_json.get('summaryEnglish', '')),
+ 'series': meta_json.get('series') or meta_json.get('seriesEnglish'),
+ 'episode': meta_json.get('episodeNumber'),
+ 'uploader': meta_json.get('provider'),
+ 'thumbnail': meta_json.get('screenshot') or video_json.get('screenshot'),
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/everyonesmixtape.py b/hypervideo_dl/extractor/everyonesmixtape.py
new file mode 100644
index 0000000..80cb032
--- /dev/null
+++ b/hypervideo_dl/extractor/everyonesmixtape.py
@@ -0,0 +1,76 @@
+from __future__ import unicode_literals
+
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ sanitized_Request,
+)
+
+
+class EveryonesMixtapeIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?everyonesmixtape\.com/#/mix/(?P<id>[0-9a-zA-Z]+)(?:/(?P<songnr>[0-9]))?$'
+
+ _TESTS = [{
+ 'url': 'http://everyonesmixtape.com/#/mix/m7m0jJAbMQi/5',
+ 'info_dict': {
+ 'id': '5bfseWNmlds',
+ 'ext': 'mp4',
+ 'title': "Passion Pit - \"Sleepyhead\" (Official Music Video)",
+ 'uploader': 'FKR.TV',
+ 'uploader_id': 'frenchkissrecords',
+ 'description': "Music video for \"Sleepyhead\" from Passion Pit's debut EP Chunk Of Change.\nBuy on iTunes: https://itunes.apple.com/us/album/chunk-of-change-ep/id300087641\n\nDirected by The Wilderness.\n\nhttp://www.passionpitmusic.com\nhttp://www.frenchkissrecords.com",
+ 'upload_date': '20081015'
+ },
+ 'params': {
+ 'skip_download': True, # This is simply YouTube
+ }
+ }, {
+ 'url': 'http://everyonesmixtape.com/#/mix/m7m0jJAbMQi',
+ 'info_dict': {
+ 'id': 'm7m0jJAbMQi',
+ 'title': 'Driving',
+ },
+ 'playlist_count': 24
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ playlist_id = mobj.group('id')
+
+ pllist_url = 'http://everyonesmixtape.com/mixtape.php?a=getMixes&u=-1&linked=%s&explore=' % playlist_id
+ pllist_req = sanitized_Request(pllist_url)
+ pllist_req.add_header('X-Requested-With', 'XMLHttpRequest')
+
+ playlist_list = self._download_json(
+ pllist_req, playlist_id, note='Downloading playlist metadata')
+ try:
+ playlist_no = next(playlist['id']
+ for playlist in playlist_list
+ if playlist['code'] == playlist_id)
+ except StopIteration:
+ raise ExtractorError('Playlist id not found')
+
+ pl_url = 'http://everyonesmixtape.com/mixtape.php?a=getMix&id=%s&userId=null&code=' % playlist_no
+ pl_req = sanitized_Request(pl_url)
+ pl_req.add_header('X-Requested-With', 'XMLHttpRequest')
+ playlist = self._download_json(
+ pl_req, playlist_id, note='Downloading playlist info')
+
+ entries = [{
+ '_type': 'url',
+ 'url': t['url'],
+ 'title': t['title'],
+ } for t in playlist['tracks']]
+
+ if mobj.group('songnr'):
+ songnr = int(mobj.group('songnr')) - 1
+ return entries[songnr]
+
+ playlist_title = playlist['mixData']['name']
+ return {
+ '_type': 'playlist',
+ 'id': playlist_id,
+ 'title': playlist_title,
+ 'entries': entries,
+ }
diff --git a/hypervideo_dl/extractor/extractors.py b/hypervideo_dl/extractor/extractors.py
index 402e542..f4f817f 100644
--- a/hypervideo_dl/extractor/extractors.py
+++ b/hypervideo_dl/extractor/extractors.py
@@ -41,7 +41,15 @@ from .airmozilla import AirMozillaIE
from .aljazeera import AlJazeeraIE
from .alphaporno import AlphaPornoIE
from .amara import AmaraIE
+from .alura import (
+ AluraIE,
+ AluraCourseIE
+)
from .amcnetworks import AMCNetworksIE
+from .animelab import (
+ AnimeLabIE,
+ AnimeLabShowsIE,
+)
from .americastestkitchen import (
AmericasTestKitchenIE,
AmericasTestKitchenSeasonIE,
@@ -59,7 +67,10 @@ from .appletrailers import (
AppleTrailersSectionIE,
)
from .applepodcasts import ApplePodcastsIE
-from .archiveorg import ArchiveOrgIE
+from .archiveorg import (
+ ArchiveOrgIE,
+ YoutubeWebArchiveIE,
+)
from .arcpublishing import ArcPublishingIE
from .arkena import ArkenaIE
from .ard import (
@@ -83,6 +94,12 @@ from .atvat import ATVAtIE
from .audimedia import AudiMediaIE
from .audioboom import AudioBoomIE
from .audiomack import AudiomackIE, AudiomackAlbumIE
+from .audius import (
+ AudiusIE,
+ AudiusTrackIE,
+ AudiusPlaylistIE,
+ AudiusProfileIE,
+)
from .awaan import (
AWAANIE,
AWAANVideoIE,
@@ -92,7 +109,13 @@ from .awaan import (
from .azmedien import AZMedienIE
from .baidu import BaiduVideoIE
from .bandaichannel import BandaiChannelIE
-from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE
+from .bandcamp import (
+ BandcampIE,
+ BandcampAlbumIE,
+ BandcampWeeklyIE,
+ BandcampMusicIE,
+)
+from .bannedvideo import BannedVideoIE
from .bbc import (
BBCCoUkIE,
BBCCoUkArticleIE,
@@ -117,17 +140,27 @@ from .bigflix import BigflixIE
from .bild import BildIE
from .bilibili import (
BiliBiliIE,
+ BiliBiliSearchIE,
+ BilibiliCategoryIE,
BiliBiliBangumiIE,
BilibiliAudioIE,
BilibiliAudioAlbumIE,
BiliBiliPlayerIE,
+ BilibiliChannelIE,
+ BiliIntlIE,
+ BiliIntlSeriesIE,
)
from .biobiochiletv import BioBioChileTVIE
from .bitchute import (
BitChuteIE,
BitChuteChannelIE,
)
+from .bitwave import (
+ BitwaveReplayIE,
+ BitwaveStreamIE,
+)
from .biqle import BIQLEIE
+from .blackboardcollaborate import BlackboardCollaborateIE
from .bleacherreport import (
BleacherReportIE,
BleacherReportCMSIE,
@@ -152,12 +185,12 @@ from .businessinsider import BusinessInsiderIE
from .buzzfeed import BuzzFeedIE
from .byutv import BYUtvIE
from .c56 import C56IE
+from .cam4 import CAM4IE
from .camdemy import (
CamdemyIE,
CamdemyFolderIE
)
from .cammodels import CamModelsIE
-from .camtube import CamTubeIE
from .camwithher import CamWithHerIE
from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE
@@ -175,9 +208,9 @@ from .cartoonnetwork import CartoonNetworkIE
from .cbc import (
CBCIE,
CBCPlayerIE,
- CBCWatchVideoIE,
- CBCWatchIE,
- CBCOlympicsIE,
+ CBCGemIE,
+ CBCGemPlaylistIE,
+ CBCGemLiveIE,
)
from .cbs import CBSIE
from .cbslocal import (
@@ -206,10 +239,15 @@ from .ceskatelevize import (
CeskaTelevizeIE,
CeskaTelevizePoradyIE,
)
+from .cgtn import CGTNIE
from .channel9 import Channel9IE
from .charlierose import CharlieRoseIE
from .chaturbate import ChaturbateIE
from .chilloutzone import ChilloutzoneIE
+from .chingari import (
+ ChingariIE,
+ ChingariUserIE,
+)
from .chirbit import (
ChirbitIE,
ChirbitProfileIE,
@@ -220,6 +258,7 @@ from .ciscolive import (
CiscoLiveSessionIE,
CiscoLiveSearchIE,
)
+from .ciscowebex import CiscoWebexIE
from .cjsw import CJSWIE
from .cliphunter import CliphunterIE
from .clippit import ClippitIE
@@ -249,6 +288,7 @@ from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
from .commonprotocols import (
MmsIE,
RtmpIE,
+ ViewSourceIE,
)
from .condenast import CondeNastIE
from .contv import CONtvIE
@@ -258,7 +298,9 @@ from .crackle import CrackleIE
from .crooksandliars import CrooksAndLiarsIE
from .crunchyroll import (
CrunchyrollIE,
- CrunchyrollShowPlaylistIE
+ CrunchyrollShowPlaylistIE,
+ CrunchyrollBetaIE,
+ CrunchyrollBetaShowIE,
)
from .cspan import CSpanIE
from .ctsnews import CtsNewsIE
@@ -276,6 +318,10 @@ from .dailymotion import (
DailymotionPlaylistIE,
DailymotionUserIE,
)
+from .damtomo import (
+ DamtomoRecordIE,
+ DamtomoVideoIE,
+)
from .daum import (
DaumIE,
DaumClipIE,
@@ -284,11 +330,18 @@ from .daum import (
)
from .dbtv import DBTVIE
from .dctp import DctpTvIE
-from .deezer import DeezerPlaylistIE
+from .deezer import (
+ DeezerPlaylistIE,
+ DeezerAlbumIE,
+)
from .democracynow import DemocracynowIE
from .dfb import DFBIE
from .dhm import DHMIE
from .digg import DiggIE
+from .discoveryplusindia import (
+ DiscoveryPlusIndiaIE,
+ DiscoveryPlusIndiaShowIE,
+)
from .dotsub import DotsubIE
from .douyutv import (
DouyuShowIE,
@@ -298,6 +351,9 @@ from .dplay import (
DPlayIE,
DiscoveryPlusIE,
HGTVDeIE,
+ ScienceChannelIE,
+ DIYNetworkIE,
+ AnimalPlanetIE
)
from .dreisat import DreiSatIE
from .drbonanza import DRBonanzaIE
@@ -308,6 +364,10 @@ from .drtv import (
)
from .dtube import DTubeIE
from .dvtv import DVTVIE
+from .duboku import (
+ DubokuIE,
+ DubokuPlaylistIE
+)
from .dumpert import DumpertIE
from .defense import DefenseGouvFrIE
from .discovery import DiscoveryIE
@@ -319,6 +379,7 @@ from .discoverynetworks import DiscoveryNetworksDeIE
from .discoveryvr import DiscoveryVRIE
from .disney import DisneyIE
from .dispeak import DigitallySpeakingIE
+from .doodstream import DoodStreamIE
from .dropbox import DropboxIE
from .dw import (
DWIE,
@@ -340,11 +401,19 @@ from .ellentube import (
EllenTubeVideoIE,
EllenTubePlaylistIE,
)
+from .elonet import ElonetIE
from .elpais import ElPaisIE
from .embedly import EmbedlyIE
from .engadget import EngadgetIE
+from .epicon import (
+ EpiconIE,
+ EpiconSeriesIE,
+)
from .eporner import EpornerIE
-from .eroprofile import EroProfileIE
+from .eroprofile import (
+ EroProfileIE,
+ EroProfileAlbumIE,
+)
from .escapist import EscapistIE
from .espn import (
ESPNIE,
@@ -353,6 +422,7 @@ from .espn import (
)
from .esri import EsriVideoIE
from .europa import EuropaIE
+from .euscreen import EUScreenIE
from .expotv import ExpoTVIE
from .expressen import ExpressenIE
from .extremetube import ExtremeTubeIE
@@ -361,12 +431,18 @@ from .facebook import (
FacebookIE,
FacebookPluginsVideoIE,
)
+from .fancode import (
+ FancodeVodIE,
+ FancodeLiveIE
+)
+
from .faz import FazIE
from .fc2 import (
FC2IE,
FC2EmbedIE,
)
from .fczenit import FczenitIE
+from .filmmodu import FilmmoduIE
from .filmon import (
FilmOnIE,
FilmOnChannelIE,
@@ -401,12 +477,7 @@ from .franceinter import FranceInterIE
from .francetv import (
FranceTVIE,
FranceTVSiteIE,
- FranceTVEmbedIE,
FranceTVInfoIE,
- FranceTVInfoSportIE,
- FranceTVJeunesseIE,
- GenerationWhatIE,
- CultureboxIE,
)
from .freesound import FreesoundIE
from .freespeech import FreespeechIE
@@ -417,9 +488,14 @@ from .frontendmasters import (
FrontendMastersCourseIE
)
from .fujitv import FujiTVFODPlus7IE
-from .funimation import FunimationIE
+from .funimation import (
+ FunimationIE,
+ FunimationPageIE,
+ FunimationShowIE,
+)
from .funk import FunkIE
from .fusion import FusionIE
+from .gab import GabTVIE
from .gaia import GaiaIE
from .gameinformer import GameInformerIE
from .gamespot import GameSpotIE
@@ -429,6 +505,7 @@ from .gazeta import GazetaIE
from .gdcvault import GDCVaultIE
from .gedidigital import GediDigitalIE
from .generic import GenericIE
+from .gettr import GettrIE
from .gfycat import GfycatIE
from .giantbomb import GiantBombIE
from .giga import GigaIE
@@ -446,8 +523,11 @@ from .googlepodcasts import (
GooglePodcastsFeedIE,
)
from .googlesearch import GoogleSearchIE
+from .gopro import GoProIE
from .goshgay import GoshgayIE
+from .gotostage import GoToStageIE
from .gputechconf import GPUTechConfIE
+from .gronkh import GronkhIE
from .groupon import GrouponIE
from .hbo import HBOIE
from .hearthisat import HearThisAtIE
@@ -466,9 +546,11 @@ from .hotnewhiphop import HotNewHipHopIE
from .hotstar import (
HotStarIE,
HotStarPlaylistIE,
+ HotStarSeriesIE,
)
from .howcast import HowcastIE
from .howstuffworks import HowStuffWorksIE
+from .hrfensehen import HRFernsehenIE
from .hrti import (
HRTiIE,
HRTiPlaylistIE,
@@ -478,8 +560,13 @@ from .huffpost import HuffPostIE
from .hungama import (
HungamaIE,
HungamaSongIE,
+ HungamaAlbumPlaylistIE,
)
from .hypem import HypemIE
+from .ichinanalive import (
+ IchinanaLiveIE,
+ IchinanaLiveClipIE,
+)
from .ign import (
IGNIE,
IGNVideoIE,
@@ -546,6 +633,7 @@ from .kickstarter import KickStarterIE
from .kinja import KinjaEmbedIE
from .kinopoisk import KinoPoiskIE
from .konserthusetplay import KonserthusetPlayIE
+from .koo import KooIE
from .krasview import KrasViewIE
from .ku6 import Ku6IE
from .kusi import KUSIIE
@@ -557,7 +645,11 @@ from .kuwo import (
KuwoCategoryIE,
KuwoMvIE,
)
-from .la7 import LA7IE
+from .la7 import (
+ LA7IE,
+ LA7PodcastEpisodeIE,
+ LA7PodcastIE,
+)
from .laola1tv import (
Laola1TvEmbedIE,
Laola1TvIE,
@@ -610,10 +702,6 @@ from .linkedin import (
from .linuxacademy import LinuxAcademyIE
from .litv import LiTVIE
from .livejournal import LiveJournalIE
-from .liveleak import (
- LiveLeakIE,
- LiveLeakEmbedIE,
-)
from .livestream import (
LivestreamIE,
LivestreamOriginalIE,
@@ -628,6 +716,7 @@ from .lynda import (
LyndaCourseIE
)
from .m6 import M6IE
+from .magentamusik360 import MagentaMusik360IE
from .mailru import (
MailRuIE,
MailRuMusicIE,
@@ -638,6 +727,11 @@ from .mangomolo import (
MangomoloVideoIE,
MangomoloLiveIE,
)
+from .manoto import (
+ ManotoTVIE,
+ ManotoTVShowIE,
+ ManotoTVLiveIE,
+)
from .manyvids import ManyVidsIE
from .maoritv import MaoriTVIE
from .markiza import (
@@ -648,6 +742,8 @@ from .massengeschmacktv import MassengeschmackTVIE
from .matchtv import MatchTVIE
from .mdr import MDRIE
from .medaltv import MedalTVIE
+from .mediaite import MediaiteIE
+from .mediaklikk import MediaKlikkIE
from .mediaset import MediasetIE
from .mediasite import (
MediasiteIE,
@@ -668,6 +764,11 @@ from .microsoftvirtualacademy import (
MicrosoftVirtualAcademyIE,
MicrosoftVirtualAcademyCourseIE,
)
+from .mildom import (
+ MildomIE,
+ MildomVodIE,
+ MildomUserVodIE,
+)
from .minds import (
MindsIE,
MindsChannelIE,
@@ -676,6 +777,10 @@ from .minds import (
from .ministrygrid import MinistryGridIE
from .minoto import MinotoIE
from .miomio import MioMioIE
+from .mirrativ import (
+ MirrativIE,
+ MirrativUserIE,
+)
from .mit import TechTVMITIE, OCWMITIE
from .mitele import MiTeleIE
from .mixcloud import (
@@ -710,9 +815,16 @@ from .mtv import (
MTVServicesEmbeddedIE,
MTVDEIE,
MTVJapanIE,
+ MTVItaliaIE,
+ MTVItaliaProgrammaIE,
)
from .muenchentv import MuenchenTVIE
+from .musescore import MuseScoreIE
from .mwave import MwaveIE, MwaveMeetGreetIE
+from .mxplayer import (
+ MxplayerIE,
+ MxplayerShowIE,
+)
from .mychannels import MyChannelsIE
from .myspace import MySpaceIE, MySpaceAlbumIE
from .myspass import MySpassIE
@@ -720,12 +832,17 @@ from .myvi import (
MyviIE,
MyviEmbedIE,
)
+from .myvideoge import MyVideoGeIE
from .myvidster import MyVidsterIE
+from .n1 import N1InfoIIE, N1InfoAssetIE
from .nationalgeographic import (
NationalGeographicVideoIE,
NationalGeographicTVIE,
)
-from .naver import NaverIE
+from .naver import (
+ NaverIE,
+ NaverLiveIE,
+)
from .nba import (
NBAWatchEmbedIE,
NBAWatchIE,
@@ -751,8 +868,9 @@ from .ndr import (
NJoyEmbedIE,
)
from .ndtv import NDTVIE
-from .netzkino import NetzkinoIE
+from .nebula import NebulaIE
from .nerdcubed import NerdCubedFeedIE
+from .netzkino import NetzkinoIE
from .neteasemusic import (
NetEaseMusicIE,
NetEaseMusicAlbumIE,
@@ -765,6 +883,7 @@ from .neteasemusic import (
from .newgrounds import (
NewgroundsIE,
NewgroundsPlaylistIE,
+ NewgroundsUserIE,
)
from .newstube import NewstubeIE
from .nextmedia import (
@@ -777,6 +896,7 @@ from .nexx import (
NexxIE,
NexxEmbedIE,
)
+from .nfhsnetwork import NFHSNetworkIE
from .nfl import (
NFLIE,
NFLArticleIE,
@@ -793,11 +913,20 @@ from .nick import (
NickNightIE,
NickRuIE,
)
-from .niconico import NiconicoIE, NiconicoPlaylistIE
+
+from .niconico import (
+ NiconicoIE,
+ NiconicoPlaylistIE,
+ NiconicoUserIE,
+ NicovideoSearchDateIE,
+ NicovideoSearchIE,
+ NicovideoSearchURLIE,
+)
from .ninecninemedia import NineCNineMediaIE
from .ninegag import NineGagIE
from .ninenow import NineNowIE
from .nintendo import NintendoIE
+from .nitter import NitterIE
from .njpwworld import NJPWWorldIE
from .nobelprize import NobelPrizeIE
from .nonktube import NonkTubeIE
@@ -808,6 +937,7 @@ from .nova import (
NovaEmbedIE,
NovaIE,
)
+from .novaplay import NovaPlayIE
from .nowness import (
NownessIE,
NownessPlaylistIE,
@@ -848,10 +978,13 @@ from .nytimes import (
NYTimesCookingIE,
)
from .nuvid import NuvidIE
+from .nzherald import NZHeraldIE
from .nzz import NZZIE
from .odatv import OdaTVIE
from .odnoklassniki import OdnoklassnikiIE
from .oktoberfesttv import OktoberfestTVIE
+from .olympics import OlympicsReplayIE
+from .on24 import On24IE
from .ondemandkorea import OnDemandKoreaIE
from .onet import (
OnetIE,
@@ -864,6 +997,10 @@ from .ooyala import (
OoyalaIE,
OoyalaExternalIE,
)
+from .openrec import (
+ OpenRecIE,
+ OpenRecCaptureIE,
+)
from .ora import OraTVIE
from .orf import (
ORFTVthekIE,
@@ -893,11 +1030,26 @@ from .palcomp3 import (
PalcoMP3VideoIE,
)
from .pandoratv import PandoraTVIE
+from .paramountplus import (
+ ParamountPlusIE,
+ ParamountPlusSeriesIE,
+)
from .parliamentliveuk import ParliamentLiveUKIE
-from .patreon import PatreonIE
+from .parlview import ParlviewIE
+from .patreon import (
+ PatreonIE,
+ PatreonUserIE
+)
from .pbs import PBSIE
from .pearvideo import PearVideoIE
-from .peertube import PeerTubeIE
+from .peertube import (
+ PeerTubeIE,
+ PeerTubePlaylistIE,
+)
+from .peloton import (
+ PelotonIE,
+ PelotonLiveIE
+)
from .people import PeopleIE
from .performgroup import PerformGroupIE
from .periscope import (
@@ -929,12 +1081,16 @@ from .playstuff import PlayStuffIE
from .playtvak import PlaytvakIE
from .playvid import PlayvidIE
from .playwire import PlaywireIE
+from .plutotv import PlutoTVIE
from .pluralsight import (
PluralsightIE,
PluralsightCourseIE,
)
from .podomatic import PodomaticIE
-from .pokemon import PokemonIE
+from .pokemon import (
+ PokemonIE,
+ PokemonWatchIE,
+)
from .polskieradio import (
PolskieRadioIE,
PolskieRadioCategoryIE,
@@ -943,10 +1099,12 @@ from .popcorntimes import PopcorntimesIE
from .popcorntv import PopcornTVIE
from .porn91 import Porn91IE
from .porncom import PornComIE
+from .pornflip import PornFlipIE
from .pornhd import PornHdIE
from .pornhub import (
PornHubIE,
PornHubUserIE,
+ PornHubPlaylistIE,
PornHubPagedVideoListIE,
PornHubUserVideosUploadIE,
)
@@ -958,6 +1116,7 @@ from .puhutv import (
PuhuTVSerieIE,
)
from .presstv import PressTVIE
+from .projectveritas import ProjectVeritasIE
from .prosiebensat1 import ProSiebenSat1IE
from .puls4 import Puls4IE
from .pyvideo import PyvideoIE
@@ -972,6 +1131,7 @@ from .r7 import (
R7IE,
R7ArticleIE,
)
+from .radiko import RadikoIE, RadikoRadioIE
from .radiocanada import (
RadioCanadaIE,
RadioCanadaAudioVideoIE,
@@ -980,6 +1140,11 @@ from .radiode import RadioDeIE
from .radiojavan import RadioJavanIE
from .radiobremen import RadioBremenIE
from .radiofrance import RadioFranceIE
+from .radlive import (
+ RadLiveIE,
+ RadLiveChannelIE,
+ RadLiveSeasonIE,
+)
from .rai import (
RaiPlayIE,
RaiPlayLiveIE,
@@ -991,6 +1156,16 @@ from .raywenderlich import (
RayWenderlichCourseIE,
)
from .rbmaradio import RBMARadioIE
+from .rcs import (
+ RCSIE,
+ RCSEmbedsIE,
+ RCSVariousIE,
+)
+from .rcti import (
+ RCTIPlusIE,
+ RCTIPlusSeriesIE,
+ RCTIPlusTVIE,
+)
from .rds import RDSIE
from .redbulltv import (
RedBullTVIE,
@@ -1033,7 +1208,10 @@ from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETe
from .rtvnh import RTVNHIE
from .rtvs import RTVSIE
from .ruhd import RUHDIE
-from .rumble import RumbleEmbedIE
+from .rumble import (
+ RumbleEmbedIE,
+ RumbleChannelIE,
+)
from .rutube import (
RutubeIE,
RutubeChannelIE,
@@ -1050,6 +1228,7 @@ from .safari import (
SafariApiIE,
SafariCourseIE,
)
+from .saitosan import SaitosanIE
from .samplefocus import SampleFocusIE
from .sapo import SapoIE
from .savefrom import SaveFromIE
@@ -1082,6 +1261,7 @@ from .shared import (
SharedIE,
VivoIE,
)
+from .shemaroome import ShemarooMeIE
from .showroomlive import ShowRoomLiveIE
from .simplecast import (
SimplecastIE,
@@ -1105,6 +1285,7 @@ from .skynewsarabia import (
SkyNewsArabiaIE,
SkyNewsArabiaArticleIE,
)
+from .skynewsau import SkyNewsAUIE
from .sky import (
SkyNewsIE,
SkySportsIE,
@@ -1115,7 +1296,10 @@ from .slideslive import SlidesLiveIE
from .slutload import SlutloadIE
from .snotr import SnotrIE
from .sohu import SohuIE
-from .sonyliv import SonyLIVIE
+from .sonyliv import (
+ SonyLIVIE,
+ SonyLIVSeriesIE,
+)
from .soundcloud import (
SoundcloudEmbedIE,
SoundcloudIE,
@@ -1136,6 +1320,10 @@ from .southpark import (
SouthParkEsIE,
SouthParkNlIE
)
+from .sovietscloset import (
+ SovietsClosetIE,
+ SovietsClosetPlaylistIE
+)
from .spankbang import (
SpankBangIE,
SpankBangPlaylistIE,
@@ -1171,6 +1359,7 @@ from .srgssr import (
)
from .srmediathek import SRMediathekIE
from .stanfordoc import StanfordOpenClassroomIE
+from .startv import StarTVIE
from .steam import SteamIE
from .storyfire import (
StoryFireIE,
@@ -1178,6 +1367,7 @@ from .storyfire import (
StoryFireSeriesIE,
)
from .streamable import StreamableIE
+from .streamanity import StreamanityIE
from .streamcloud import StreamcloudIE
from .streamcz import StreamCZIE
from .streetvoice import StreetVoiceIE
@@ -1223,6 +1413,7 @@ from .telebruxelles import TeleBruxellesIE
from .telecinco import TelecincoIE
from .telegraaf import TelegraafIE
from .telemb import TeleMBIE
+from .telemundo import TelemundoIE
from .telequebec import (
TeleQuebecIE,
TeleQuebecSquatIE,
@@ -1245,6 +1436,10 @@ from .theplatform import (
from .thescene import TheSceneIE
from .thestar import TheStarIE
from .thesun import TheSunIE
+from .theta import (
+ ThetaVideoIE,
+ ThetaStreamIE,
+)
from .theweatherchannel import TheWeatherChannelIE
from .thisamericanlife import ThisAmericanLifeIE
from .thisav import ThisAVIE
@@ -1253,12 +1448,10 @@ from .threeqsdn import ThreeQSDNIE
from .tiktok import (
TikTokIE,
TikTokUserIE,
+ DouyinIE,
)
from .tinypic import TinyPicIE
-from .tmz import (
- TMZIE,
- TMZArticleIE,
-)
+from .tmz import TMZIE
from .tnaflix import (
TNAFlixNetworkEmbedIE,
TNAFlixIE,
@@ -1269,6 +1462,10 @@ from .toggle import (
ToggleIE,
MeWatchIE,
)
+from .tokentube import (
+ TokentubeIE,
+ TokentubeChannelIE
+)
from .tonline import TOnlineIE
from .toongoggles import ToonGogglesIE
from .toutv import TouTvIE
@@ -1278,11 +1475,16 @@ from .trilulilu import TriluliluIE
from .trovo import (
TrovoIE,
TrovoVodIE,
+ TrovoChannelVodIE,
+ TrovoChannelClipIE,
)
from .trunews import TruNewsIE
from .trutv import TruTVIE
from .tube8 import Tube8IE
-from .tubitv import TubiTvIE
+from .tubitv import (
+ TubiTvIE,
+ TubiTvShowIE,
+)
from .tumblr import TumblrIE
from .tunein import (
TuneInClipIE,
@@ -1303,7 +1505,10 @@ from .tv2dk import (
TV2DKIE,
TV2DKBornholmPlayIE,
)
-from .tv2hu import TV2HuIE
+from .tv2hu import (
+ TV2HuIE,
+ TV2HuSeriesIE,
+)
from .tv4 import TV4IE
from .tv5mondeplus import TV5MondePlusIE
from .tv5unis import (
@@ -1330,6 +1535,7 @@ from .tvnet import TVNetIE
from .tvnoe import TVNoeIE
from .tvnow import (
TVNowIE,
+ TVNowFilmIE,
TVNowNewIE,
TVNowSeasonIE,
TVNowAnnualIE,
@@ -1350,7 +1556,11 @@ from .tweakers import TweakersIE
from .twentyfourvideo import TwentyFourVideoIE
from .twentymin import TwentyMinutenIE
from .twentythreevideo import TwentyThreeVideoIE
-from .twitcasting import TwitCastingIE
+from .twitcasting import (
+ TwitCastingIE,
+ TwitCastingLiveIE,
+ TwitCastingUserIE,
+)
from .twitch import (
TwitchVodIE,
TwitchCollectionIE,
@@ -1365,6 +1575,7 @@ from .twitter import (
TwitterIE,
TwitterAmplifyIE,
TwitterBroadcastIE,
+ TwitterShortenerIE,
)
from .udemy import (
UdemyIE,
@@ -1375,6 +1586,7 @@ from .ufctv import (
UFCTVIE,
UFCArabiaIE,
)
+from .ukcolumn import UkColumnIE
from .uktvplay import UKTVPlayIE
from .digiteka import DigitekaIE
from .dlive import (
@@ -1398,9 +1610,11 @@ from .ustudio import (
UstudioIE,
UstudioEmbedIE,
)
+from .utreon import UtreonIE
from .varzesh3 import Varzesh3IE
from .vbox7 import Vbox7IE
from .veehd import VeeHDIE
+from .veo import VeoIE
from .veoh import VeohIE
from .vesti import VestiIE
from .vevo import (
@@ -1429,13 +1643,12 @@ from .videomore import (
VideomoreSeasonIE,
)
from .videopress import VideoPressIE
-from .vidio import VidioIE
-from .vidlii import VidLiiIE
-from .vidme import (
- VidmeIE,
- VidmeUserIE,
- VidmeUserLikesIE,
+from .vidio import (
+ VidioIE,
+ VidioPremierIE,
+ VidioLiveIE
)
+from .vidlii import VidLiiIE
from .vier import VierIE, VierVideosIE
from .viewlift import (
ViewLiftIE,
@@ -1483,7 +1696,14 @@ from .vodlocker import VodlockerIE
from .vodpl import VODPlIE
from .vodplatform import VODPlatformIE
from .voicerepublic import VoiceRepublicIE
-from .voot import VootIE
+from .voicy import (
+ VoicyIE,
+ VoicyChannelIE,
+)
+from .voot import (
+ VootIE,
+ VootSeriesIE,
+)
from .voxmedia import (
VoxMediaVolumeIE,
VoxMediaIE,
@@ -1499,6 +1719,7 @@ from .vtm import VTMIE
from .medialaan import MedialaanIE
from .vube import VubeIE
from .vuclip import VuClipIE
+from .vupload import VuploadIE
from .vvvvid import (
VVVVIDIE,
VVVVIDShowIE,
@@ -1533,6 +1754,8 @@ from .weibo import (
WeiboMobileIE
)
from .weiqitv import WeiqiTVIE
+from .wimtv import WimTVIE
+from .whowatch import WhoWatchIE
from .wistia import (
WistiaIE,
WistiaPlaylistIE,
@@ -1583,7 +1806,11 @@ from .yandexmusic import (
YandexMusicArtistTracksIE,
YandexMusicArtistAlbumsIE,
)
-from .yandexvideo import YandexVideoIE
+from .yandexvideo import (
+ YandexVideoIE,
+ ZenYandexIE,
+ ZenYandexChannelIE,
+)
from .yapfiles import YapFilesIE
from .yesjapan import YesJapanIE
from .yinyuetai import YinYueTaiIE
@@ -1603,6 +1830,7 @@ from .yourporn import YourPornIE
from .yourupload import YourUploadIE
from .youtube import (
YoutubeIE,
+ YoutubeClipIE,
YoutubeFavouritesIE,
YoutubeHistoryIE,
YoutubeTabIE,
@@ -1610,7 +1838,7 @@ from .youtube import (
YoutubeRecommendedIE,
YoutubeSearchDateIE,
YoutubeSearchIE,
- #YoutubeSearchURLIE,
+ YoutubeSearchURLIE,
YoutubeSubscriptionsIE,
YoutubeTruncatedIDIE,
YoutubeTruncatedURLIE,
@@ -1639,6 +1867,10 @@ from .zattoo import (
ZattooLiveIE,
)
from .zdf import ZDFIE, ZDFChannelIE
+from .zee5 import (
+ Zee5IE,
+ Zee5SeriesIE,
+)
from .zhihu import ZhihuIE
from .zingmp3 import (
ZingMp3IE,
diff --git a/hypervideo_dl/extractor/facebook.py b/hypervideo_dl/extractor/facebook.py
index 04650af..f32700f 100644
--- a/hypervideo_dl/extractor/facebook.py
+++ b/hypervideo_dl/extractor/facebook.py
@@ -3,14 +3,11 @@ from __future__ import unicode_literals
import json
import re
-import socket
from .common import InfoExtractor
from ..compat import (
compat_etree_fromstring,
- compat_http_client,
compat_str,
- compat_urllib_error,
compat_urllib_parse_unquote,
compat_urllib_parse_unquote_plus,
)
@@ -23,6 +20,8 @@ from ..utils import (
int_or_none,
js_to_json,
limit_length,
+ merge_dicts,
+ network_exceptions,
parse_count,
qualities,
sanitized_Request,
@@ -36,7 +35,7 @@ class FacebookIE(InfoExtractor):
_VALID_URL = r'''(?x)
(?:
https?://
- (?:[\w-]+\.)?(?:facebook\.com|facebookcorewwwi\.onion)/
+ (?:[\w-]+\.)?(?:facebook\.com|facebookwkhpilnemxj7asaniu7vnjjbiltxjqhye3mhbshg7kx5tfyd\.onion)/
(?:[^#]*?\#!/)?
(?:
(?:
@@ -82,7 +81,8 @@ class FacebookIE(InfoExtractor):
'info_dict': {
'id': '274175099429670',
'ext': 'mp4',
- 'title': 're:^Asif Nawab Butt posted a video',
+ 'title': 'Asif Nawab Butt',
+ 'description': 'Asif Nawab Butt',
'uploader': 'Asif Nawab Butt',
'upload_date': '20140506',
'timestamp': 1399398998,
@@ -137,15 +137,17 @@ class FacebookIE(InfoExtractor):
'upload_date': '20160223',
'uploader': 'Barack Obama',
},
+ 'skip': 'Gif on giphy.com gone',
}, {
# have 1080P, but only up to 720p in swf params
# data.video.story.attachments[].media
'url': 'https://www.facebook.com/cnn/videos/10155529876156509/',
- 'md5': '9571fae53d4165bbbadb17a94651dcdc',
+ 'md5': '3f3798adb2b73423263e59376f1f5eb7',
'info_dict': {
'id': '10155529876156509',
'ext': 'mp4',
- 'title': 'She survived the holocaust — and years later, she’s getting her citizenship s...',
+ 'title': 'Holocaust survivor becomes US citizen',
+ 'description': 'She survived the holocaust — and years later, she’s getting her citizenship so she can vote for Hillary Clinton http://cnn.it/2eERh5f',
'timestamp': 1477818095,
'upload_date': '20161030',
'uploader': 'CNN',
@@ -159,15 +161,18 @@ class FacebookIE(InfoExtractor):
'info_dict': {
'id': '1417995061575415',
'ext': 'mp4',
- 'title': 'md5:1db063d6a8c13faa8da727817339c857',
- 'timestamp': 1486648217,
+ 'title': 'Yaroslav Korpan - Довгоочікуване відео',
+ 'description': 'Довгоочікуване відео',
+ 'timestamp': 1486648771,
'upload_date': '20170209',
'uploader': 'Yaroslav Korpan',
+ 'uploader_id': '100000948048708',
},
'params': {
'skip_download': True,
},
}, {
+ # FIXME
'url': 'https://www.facebook.com/LaGuiaDelVaron/posts/1072691702860471',
'info_dict': {
'id': '1072691702860471',
@@ -185,12 +190,14 @@ class FacebookIE(InfoExtractor):
# data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media
'url': 'https://www.facebook.com/groups/1024490957622648/permalink/1396382447100162/',
'info_dict': {
- 'id': '1396382447100162',
+ 'id': '202882990186699',
'ext': 'mp4',
- 'title': 'md5:19a428bbde91364e3de815383b54a235',
- 'timestamp': 1486035494,
+ 'title': 'Elisabeth Ahtn - Hello? Yes your uber ride is here\n* Jukin...',
+ 'description': 'Hello? Yes your uber ride is here\n* Jukin Media Verified *\nFind this video and others like it by visiting...',
+ 'timestamp': 1486035513,
'upload_date': '20170202',
'uploader': 'Elisabeth Ahtn',
+ 'uploader_id': '100013949973717',
},
'params': {
'skip_download': True,
@@ -219,7 +226,7 @@ class FacebookIE(InfoExtractor):
'only_matching': True,
}, {
# data.video
- 'url': 'https://www.facebookcorewwwi.onion/video.php?v=274175099429670',
+ 'url': 'https://www.facebookwkhpilnemxj7asaniu7vnjjbiltxjqhye3mhbshg7kx5tfyd.onion/video.php?v=274175099429670',
'only_matching': True,
}, {
# no title
@@ -231,8 +238,12 @@ class FacebookIE(InfoExtractor):
'info_dict': {
'id': '359649331226507',
'ext': 'mp4',
- 'title': '#ESLOne VoD - Birmingham Finals Day#1 Fnatic vs. @Evil Geniuses',
+ 'title': 'Fnatic vs. EG - Group A - Opening Match - ESL One Birmingham Day 1',
+ 'description': '#ESLOne VoD - Birmingham Finals Day#1 Fnatic vs. @Evil Geniuses',
+ 'timestamp': 1527084179,
+ 'upload_date': '20180523',
'uploader': 'ESL One Dota 2',
+ 'uploader_id': '234218833769558',
},
'params': {
'skip_download': True,
@@ -249,6 +260,7 @@ class FacebookIE(InfoExtractor):
'url': 'https://www.facebook.com/watch/?v=647537299265662',
'only_matching': True,
}, {
+ # FIXME: https://github.com/hypervideo/hypervideo/issues/542
# data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.all_subattachments.nodes[].media
'url': 'https://www.facebook.com/PankajShahLondon/posts/10157667649866271',
'info_dict': {
@@ -279,6 +291,7 @@ class FacebookIE(InfoExtractor):
'upload_date': '20161122',
'timestamp': 1479793574,
},
+ 'skip': 'No video',
}, {
# data.video.creation_story.attachments[].media
'url': 'https://www.facebook.com/watch/live/?v=1823658634322275',
@@ -348,7 +361,7 @@ class FacebookIE(InfoExtractor):
login_results, 'login error', default=None, group='error')
if error:
raise ExtractorError('Unable to login: %s' % error, expected=True)
- self._downloader.report_warning('unable to log in: bad username/password, or exceeded login rate limit (~3/min). Check credentials or wait.')
+ self.report_warning('unable to log in: bad username/password, or exceeded login rate limit (~3/min). Check credentials or wait.')
return
fb_dtsg = self._search_regex(
@@ -369,9 +382,9 @@ class FacebookIE(InfoExtractor):
check_response = self._download_webpage(check_req, None,
note='Confirming login')
if re.search(r'id="checkpointSubmitButton"', check_response) is not None:
- self._downloader.report_warning('Unable to confirm login, you have to login in your browser and authorize the login.')
- except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
- self._downloader.report_warning('unable to log in: %s' % error_to_compat_str(err))
+ self.report_warning('Unable to confirm login, you have to login in your browser and authorize the login.')
+ except network_exceptions as err:
+ self.report_warning('unable to log in: %s' % error_to_compat_str(err))
return
def _real_initialize(self):
@@ -381,6 +394,56 @@ class FacebookIE(InfoExtractor):
webpage = self._download_webpage(
url.replace('://m.facebook.com/', '://www.facebook.com/'), video_id)
+ def extract_metadata(webpage):
+ video_title = self._html_search_regex(
+ r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>([^<]*)</h2>', webpage,
+ 'title', default=None)
+ if not video_title:
+ video_title = self._html_search_regex(
+ r'(?s)<span class="fbPhotosPhotoCaption".*?id="fbPhotoPageCaption"><span class="hasCaption">(.*?)</span>',
+ webpage, 'alternative title', default=None)
+ if not video_title:
+ video_title = self._html_search_meta(
+ ['og:title', 'twitter:title', 'description'],
+ webpage, 'title', default=None)
+ if video_title:
+ video_title = limit_length(video_title, 80)
+ else:
+ video_title = 'Facebook video #%s' % video_id
+ description = self._html_search_meta(
+ ['description', 'og:description', 'twitter:description'],
+ webpage, 'description', default=None)
+ uploader = clean_html(get_element_by_id(
+ 'fbPhotoPageAuthorName', webpage)) or self._search_regex(
+ r'ownerName\s*:\s*"([^"]+)"', webpage, 'uploader',
+ default=None) or self._og_search_title(webpage, fatal=False)
+ timestamp = int_or_none(self._search_regex(
+ r'<abbr[^>]+data-utime=["\'](\d+)', webpage,
+ 'timestamp', default=None))
+ thumbnail = self._html_search_meta(
+ ['og:image', 'twitter:image'], webpage, 'thumbnail', default=None)
+ # some webpages contain unretrievable thumbnail urls
+ # like https://lookaside.fbsbx.com/lookaside/crawler/media/?media_id=10155168902769113&get_thumbnail=1
+ # in https://www.facebook.com/yaroslav.korpan/videos/1417995061575415/
+ if thumbnail and not re.search(r'\.(?:jpg|png)', thumbnail):
+ thumbnail = None
+ view_count = parse_count(self._search_regex(
+ r'\bviewCount\s*:\s*["\']([\d,.]+)', webpage, 'view count',
+ default=None))
+ info_dict = {
+ 'title': video_title,
+ 'description': description,
+ 'uploader': uploader,
+ 'timestamp': timestamp,
+ 'thumbnail': thumbnail,
+ 'view_count': view_count,
+ }
+ info_json_ld = self._search_json_ld(webpage, video_id, default={})
+ if info_json_ld.get('title'):
+ info_json_ld['title'] = limit_length(
+ re.sub(r'\s*\|\s*Facebook$', '', info_json_ld['title']), 80)
+ return merge_dicts(info_json_ld, info_dict)
+
video_data = None
def extract_video_data(instances):
@@ -416,7 +479,7 @@ class FacebookIE(InfoExtractor):
for f in formats:
f.setdefault('http_headers', {})['User-Agent'] = 'facebookexternalhit/1.1'
- self._sort_formats(formats)
+ self._sort_formats(formats, ('res', 'quality'))
def extract_relay_data(_filter):
return self._parse_json(self._search_regex(
@@ -513,7 +576,15 @@ class FacebookIE(InfoExtractor):
if not entries:
parse_graphql_video(video)
- return self.playlist_result(entries, video_id)
+ if len(entries) > 1:
+ return self.playlist_result(entries, video_id)
+
+ video_info = entries[0]
+ webpage_info = extract_metadata(webpage)
+ # honor precise duration in video info
+ if video_info.get('duration'):
+ webpage_info['duration'] = video_info['duration']
+ return merge_dicts(webpage_info, video_info)
if not video_data:
m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage)
@@ -616,60 +687,28 @@ class FacebookIE(InfoExtractor):
for src_type in ('src', 'src_no_ratelimit'):
src = f[0].get('%s_%s' % (quality, src_type))
if src:
- preference = -10 if format_id == 'progressive' else 0
+ preference = -10 if format_id == 'progressive' else -1
if quality == 'hd':
preference += 5
formats.append({
'format_id': '%s_%s_%s' % (format_id, quality, src_type),
'url': src,
- 'preference': preference,
+ 'quality': preference,
+ 'height': 720 if quality == 'hd' else None
})
extract_dash_manifest(f[0], formats)
subtitles_src = f[0].get('subtitles_src')
if subtitles_src:
subtitles.setdefault('en', []).append({'url': subtitles_src})
- if not formats:
- raise ExtractorError('Cannot find video formats')
process_formats(formats)
- video_title = self._html_search_regex(
- r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>([^<]*)</h2>', webpage,
- 'title', default=None)
- if not video_title:
- video_title = self._html_search_regex(
- r'(?s)<span class="fbPhotosPhotoCaption".*?id="fbPhotoPageCaption"><span class="hasCaption">(.*?)</span>',
- webpage, 'alternative title', default=None)
- if not video_title:
- video_title = self._html_search_meta(
- 'description', webpage, 'title', default=None)
- if video_title:
- video_title = limit_length(video_title, 80)
- else:
- video_title = 'Facebook video #%s' % video_id
- uploader = clean_html(get_element_by_id(
- 'fbPhotoPageAuthorName', webpage)) or self._search_regex(
- r'ownerName\s*:\s*"([^"]+)"', webpage, 'uploader',
- default=None) or self._og_search_title(webpage, fatal=False)
- timestamp = int_or_none(self._search_regex(
- r'<abbr[^>]+data-utime=["\'](\d+)', webpage,
- 'timestamp', default=None))
- thumbnail = self._html_search_meta(['og:image', 'twitter:image'], webpage)
-
- view_count = parse_count(self._search_regex(
- r'\bviewCount\s*:\s*["\']([\d,.]+)', webpage, 'view count',
- default=None))
-
info_dict = {
'id': video_id,
- 'title': video_title,
'formats': formats,
- 'uploader': uploader,
- 'timestamp': timestamp,
- 'thumbnail': thumbnail,
- 'view_count': view_count,
'subtitles': subtitles,
}
+ info_dict.update(extract_metadata(webpage))
return info_dict
diff --git a/hypervideo_dl/extractor/fancode.py b/hypervideo_dl/extractor/fancode.py
new file mode 100644
index 0000000..912feb7
--- /dev/null
+++ b/hypervideo_dl/extractor/fancode.py
@@ -0,0 +1,187 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+from ..compat import compat_str
+from ..utils import (
+ parse_iso8601,
+ ExtractorError,
+ try_get,
+ mimetype2ext
+)
+
+
+class FancodeVodIE(InfoExtractor):
+ IE_NAME = 'fancode:vod'
+
+ _VALID_URL = r'https?://(?:www\.)?fancode\.com/video/(?P<id>[0-9]+)\b'
+
+ _TESTS = [{
+ 'url': 'https://fancode.com/video/15043/match-preview-pbks-vs-mi',
+ 'params': {
+ 'skip_download': True,
+ 'format': 'bestvideo'
+ },
+ 'info_dict': {
+ 'id': '6249806281001',
+ 'ext': 'mp4',
+ 'title': 'Match Preview: PBKS vs MI',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ "timestamp": 1619081590,
+ 'view_count': int,
+ 'like_count': int,
+ 'upload_date': '20210422',
+ 'uploader_id': '6008340455001'
+ }
+ }, {
+ 'url': 'https://fancode.com/video/15043',
+ 'only_matching': True,
+ }]
+
+ _ACCESS_TOKEN = None
+ _NETRC_MACHINE = 'fancode'
+
+ _LOGIN_HINT = 'Use "--user refresh --password <refresh_token>" to login using a refresh token'
+
+ headers = {
+ 'content-type': 'application/json',
+ 'origin': 'https://fancode.com',
+ 'referer': 'https://fancode.com',
+ }
+
+ def _login(self):
+ # Access tokens are shortlived, so get them using the refresh token.
+ username, password = self._get_login_info()
+ if username == 'refresh' and password is not None:
+ self.report_login()
+ data = '''{
+ "query":"mutation RefreshToken($refreshToken: String\\u0021) { refreshToken(refreshToken: $refreshToken) { accessToken }}",
+ "variables":{
+ "refreshToken":"%s"
+ },
+ "operationName":"RefreshToken"
+ }''' % password
+
+ token_json = self.download_gql('refresh token', data, "Getting the Access token")
+ self._ACCESS_TOKEN = try_get(token_json, lambda x: x['data']['refreshToken']['accessToken'])
+ if self._ACCESS_TOKEN is None:
+ self.report_warning('Failed to get Access token')
+ else:
+ self.headers.update({'Authorization': 'Bearer %s' % self._ACCESS_TOKEN})
+ elif username is not None:
+ self.report_warning(f'Login using username and password is not currently supported. {self._LOGIN_HINT}')
+
+ def _real_initialize(self):
+ self._login()
+
+ def _check_login_required(self, is_available, is_premium):
+ msg = None
+ if is_premium and self._ACCESS_TOKEN is None:
+ msg = f'This video is only available for registered users. {self._LOGIN_HINT}'
+ elif not is_available and self._ACCESS_TOKEN is not None:
+ msg = 'This video isn\'t available to the current logged in account'
+ if msg:
+ self.raise_login_required(msg, metadata_available=True, method=None)
+
+ def download_gql(self, variable, data, note, fatal=False, headers=headers):
+ return self._download_json(
+ 'https://www.fancode.com/graphql', variable,
+ data=data.encode(), note=note,
+ headers=headers, fatal=fatal)
+
+ def _real_extract(self, url):
+
+ BRIGHTCOVE_URL_TEMPLATE = 'https://players.brightcove.net/%s/default_default/index.html?videoId=%s'
+ video_id = self._match_id(url)
+
+ brightcove_user_id = '6008340455001'
+ data = '''{
+ "query":"query Video($id: Int\\u0021, $filter: SegmentFilter) { media(id: $id, filter: $filter) { id contentId title contentId publishedTime totalViews totalUpvotes provider thumbnail { src } mediaSource {brightcove } duration isPremium isUserEntitled tags duration }}",
+ "variables":{
+ "id":%s,
+ "filter":{
+ "contentDataType":"DEFAULT"
+ }
+ },
+ "operationName":"Video"
+ }''' % video_id
+
+ metadata_json = self.download_gql(video_id, data, note='Downloading metadata')
+
+ media = try_get(metadata_json, lambda x: x['data']['media'], dict) or {}
+ brightcove_video_id = try_get(media, lambda x: x['mediaSource']['brightcove'], compat_str)
+
+ if brightcove_video_id is None:
+ raise ExtractorError('Unable to extract brightcove Video ID')
+
+ is_premium = media.get('isPremium')
+
+ self._check_login_required(media.get('isUserEntitled'), is_premium)
+
+ return {
+ '_type': 'url_transparent',
+ 'url': BRIGHTCOVE_URL_TEMPLATE % (brightcove_user_id, brightcove_video_id),
+ 'ie_key': 'BrightcoveNew',
+ 'id': video_id,
+ 'title': media['title'],
+ 'like_count': media.get('totalUpvotes'),
+ 'view_count': media.get('totalViews'),
+ 'tags': media.get('tags'),
+ 'release_timestamp': parse_iso8601(media.get('publishedTime')),
+ 'availability': self._availability(needs_premium=is_premium),
+ }
+
+
+class FancodeLiveIE(FancodeVodIE):
+ IE_NAME = 'fancode:live'
+
+ _VALID_URL = r'https?://(www\.)?fancode\.com/match/(?P<id>[0-9]+).+'
+
+ _TESTS = [{
+ 'url': 'https://fancode.com/match/35328/cricket-fancode-ecs-hungary-2021-bub-vs-blb?slug=commentary',
+ 'info_dict': {
+ 'id': '35328',
+ 'ext': 'mp4',
+ 'title': 'BUB vs BLB',
+ "timestamp": 1624863600,
+ 'is_live': True,
+ 'upload_date': '20210628',
+ },
+ 'skip': 'Ended'
+ }, {
+ 'url': 'https://fancode.com/match/35328/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://fancode.com/match/35567?slug=scorecard',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+
+ id = self._match_id(url)
+ data = '''{
+ "query":"query MatchResponse($id: Int\\u0021, $isLoggedIn: Boolean\\u0021) { match: matchWithScores(id: $id) { id matchDesc mediaId videoStreamId videoStreamUrl { ...VideoSource } liveStreams { videoStreamId videoStreamUrl { ...VideoSource } contentId } name startTime streamingStatus isPremium isUserEntitled @include(if: $isLoggedIn) status metaTags bgImage { src } sport { name slug } tour { id name } squads { name shortName } liveStreams { contentId } mediaId }}fragment VideoSource on VideoSource { title description posterUrl url deliveryType playerType}",
+ "variables":{
+ "id":%s,
+ "isLoggedIn":true
+ },
+ "operationName":"MatchResponse"
+ }''' % id
+
+ info_json = self.download_gql(id, data, "Info json")
+
+ match_info = try_get(info_json, lambda x: x['data']['match'])
+
+ if match_info.get('streamingStatus') != "STARTED":
+ raise ExtractorError('The stream can\'t be accessed', expected=True)
+ self._check_login_required(match_info.get('isUserEntitled'), True) # all live streams are premium only
+
+ return {
+ 'id': id,
+ 'title': match_info.get('name'),
+ 'formats': self._extract_akamai_formats(try_get(match_info, lambda x: x['videoStreamUrl']['url']), id),
+ 'ext': mimetype2ext(try_get(match_info, lambda x: x['videoStreamUrl']['deliveryType'])),
+ 'is_live': True,
+ 'release_timestamp': parse_iso8601(match_info.get('startTime'))
+ }
diff --git a/hypervideo_dl/extractor/fc2.py b/hypervideo_dl/extractor/fc2.py
index 4355611..4d85e62 100644
--- a/hypervideo_dl/extractor/fc2.py
+++ b/hypervideo_dl/extractor/fc2.py
@@ -2,7 +2,6 @@
from __future__ import unicode_literals
import hashlib
-import re
from .common import InfoExtractor
from ..compat import (
@@ -138,7 +137,7 @@ class FC2EmbedIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
query = compat_parse_qs(mobj.group('query'))
video_id = query['i'][-1]
diff --git a/hypervideo_dl/extractor/filmmodu.py b/hypervideo_dl/extractor/filmmodu.py
new file mode 100644
index 0000000..2746876
--- /dev/null
+++ b/hypervideo_dl/extractor/filmmodu.py
@@ -0,0 +1,74 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class FilmmoduIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www.)?filmmodu.org/(?P<id>[^/]+-(?:turkce-dublaj-izle|altyazili-izle))'
+ _TESTS = [{
+ 'url': 'https://www.filmmodu.org/f9-altyazili-izle',
+ 'md5': 'aeefd955c2a508a5bdaa3bcec8eeb0d4',
+ 'info_dict': {
+ 'id': '10804',
+ 'ext': 'mp4',
+ 'title': 'F9',
+ 'description': 'md5:2713f584a4d65afa2611e2948d0b953c',
+ 'subtitles': {
+ 'tr': [{
+ 'ext': 'vtt',
+ }],
+ },
+ 'thumbnail': r're:https://s[0-9]+.filmmodu.org/uploads/movie/cover/10804/xXHZeb1yhJvnSHPzZDqee0zfMb6.jpg',
+ },
+ }, {
+ 'url': 'https://www.filmmodu.org/the-godfather-turkce-dublaj-izle',
+ 'md5': '109f2fcb9c941330eed133971c035c00',
+ 'info_dict': {
+ 'id': '3646',
+ 'ext': 'mp4',
+ 'title': 'Baba',
+ 'description': 'md5:d43fd651937cd75cc650883ebd8d8461',
+ 'thumbnail': r're:https://s[0-9]+.filmmodu.org/uploads/movie/cover/3646/6xKCYgH16UuwEGAyroLU6p8HLIn.jpg',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ title = self._og_search_title(webpage, fatal=True)
+ description = self._og_search_description(webpage)
+ thumbnail = self._og_search_thumbnail(webpage)
+ real_video_id = self._search_regex(r'var\s*videoId\s*=\s*\'([0-9]+)\'', webpage, 'video_id')
+ video_type = self._search_regex(r'var\s*videoType\s*=\s*\'([a-z]+)\'', webpage, 'video_type')
+ data = self._download_json('https://www.filmmodu.org/get-source', real_video_id, query={
+ 'movie_id': real_video_id,
+ 'type': video_type,
+ })
+ formats = [{
+ 'url': source['src'],
+ 'ext': 'mp4',
+ 'format_id': source['label'],
+ 'height': int_or_none(source.get('res')),
+ 'protocol': 'm3u8_native',
+ } for source in data['sources']]
+
+ self._sort_formats(formats)
+
+ subtitles = {}
+
+ if data.get('subtitle'):
+ subtitles['tr'] = [{
+ 'url': data['subtitle'],
+ }]
+
+ return {
+ 'id': real_video_id,
+ 'display_id': video_id,
+ 'title': title,
+ 'description': description,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'thumbnail': thumbnail,
+ }
diff --git a/hypervideo_dl/extractor/filmweb.py b/hypervideo_dl/extractor/filmweb.py
index 56000bc..5e323b4 100644
--- a/hypervideo_dl/extractor/filmweb.py
+++ b/hypervideo_dl/extractor/filmweb.py
@@ -1,6 +1,5 @@
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
@@ -22,7 +21,7 @@ class FilmwebIE(InfoExtractor):
}
def _real_extract(self, url):
- article_type, article_id = re.match(self._VALID_URL, url).groups()
+ article_type, article_id = self._match_valid_url(url).groups()
if article_type == 'filmnytt':
webpage = self._download_webpage(url, article_id)
article_id = self._search_regex(r'data-videoid="(\d+)"', webpage, 'article id')
diff --git a/hypervideo_dl/extractor/firsttv.py b/hypervideo_dl/extractor/firsttv.py
index 28617d8..ccad173 100644
--- a/hypervideo_dl/extractor/firsttv.py
+++ b/hypervideo_dl/extractor/firsttv.py
@@ -104,7 +104,7 @@ class FirstTVIE(InfoExtractor):
'tbr': tbr,
'source_preference': quality(f.get('name')),
# quality metadata of http formats may be incorrect
- 'preference': -1,
+ 'preference': -10,
})
# m3u8 URL format is reverse engineered from [1] (search for
# master.m3u8). dashEdges (that is currently balancer-vod.1tv.ru)
diff --git a/hypervideo_dl/extractor/fivetv.py b/hypervideo_dl/extractor/fivetv.py
index c4c0f1b..be81fcc 100644
--- a/hypervideo_dl/extractor/fivetv.py
+++ b/hypervideo_dl/extractor/fivetv.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..utils import int_or_none
@@ -66,7 +65,7 @@ class FiveTVIE(InfoExtractor):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id') or mobj.group('path')
webpage = self._download_webpage(url, video_id)
diff --git a/hypervideo_dl/extractor/flickr.py b/hypervideo_dl/extractor/flickr.py
index 9f166ef..6c82fae 100644
--- a/hypervideo_dl/extractor/flickr.py
+++ b/hypervideo_dl/extractor/flickr.py
@@ -88,7 +88,7 @@ class FlickrIE(InfoExtractor):
formats.append({
'format_id': stream_type,
'url': stream['_content'],
- 'preference': preference(stream_type),
+ 'quality': preference(stream_type),
})
self._sort_formats(formats)
diff --git a/hypervideo_dl/extractor/fourtube.py b/hypervideo_dl/extractor/fourtube.py
index be4e813..d4d955b 100644
--- a/hypervideo_dl/extractor/fourtube.py
+++ b/hypervideo_dl/extractor/fourtube.py
@@ -41,7 +41,7 @@ class FourTubeBaseIE(InfoExtractor):
return formats
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
kind, video_id, display_id = mobj.group('kind', 'id', 'display_id')
if kind == 'm' or not display_id:
@@ -228,7 +228,7 @@ class PornTubeIE(FourTubeBaseIE):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id, display_id = mobj.group('id', 'display_id')
webpage = self._download_webpage(url, display_id)
diff --git a/hypervideo_dl/extractor/foxnews.py b/hypervideo_dl/extractor/foxnews.py
index 63613cb..18fa0a5 100644
--- a/hypervideo_dl/extractor/foxnews.py
+++ b/hypervideo_dl/extractor/foxnews.py
@@ -67,7 +67,7 @@ class FoxNewsIE(AMPIE):
webpage)]
def _real_extract(self, url):
- host, video_id = re.match(self._VALID_URL, url).groups()
+ host, video_id = self._match_valid_url(url).groups()
info = self._extract_feed_info(
'http://%s/v/feed/video/%s.js?template=fox' % (host, video_id))
diff --git a/hypervideo_dl/extractor/francetv.py b/hypervideo_dl/extractor/francetv.py
index e4ec2e2..3bbab69 100644
--- a/hypervideo_dl/extractor/francetv.py
+++ b/hypervideo_dl/extractor/francetv.py
@@ -2,22 +2,14 @@
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
-from ..compat import (
- compat_str,
- compat_urlparse,
-)
from ..utils import (
- clean_html,
determine_ext,
ExtractorError,
- int_or_none,
- parse_duration,
- try_get,
- url_or_none,
- urljoin,
+ format_field,
+ parse_iso8601,
+ parse_qs,
)
from .dailymotion import DailymotionIE
@@ -90,94 +82,81 @@ class FranceTVIE(InfoExtractor):
# Videos are identified by idDiffusion so catalogue part is optional.
# However when provided, some extra formats may be returned so we pass
# it if available.
- info = self._download_json(
- 'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/',
- video_id, 'Downloading video JSON', query={
- 'idDiffusion': video_id,
- 'catalogue': catalogue or '',
- })
-
- if info.get('status') == 'NOK':
- raise ExtractorError(
- '%s returned error: %s' % (self.IE_NAME, info['message']),
- expected=True)
- allowed_countries = info['videos'][0].get('geoblocage')
- if allowed_countries:
- georestricted = True
- geo_info = self._download_json(
- 'http://geo.francetv.fr/ws/edgescape.json', video_id,
- 'Downloading geo restriction info')
- country = geo_info['reponse']['geo_info']['country_code']
- if country not in allowed_countries:
- raise ExtractorError(
- 'The video is not available from your location',
- expected=True)
- else:
- georestricted = False
-
- def sign(manifest_url, manifest_id):
- for host in ('hdfauthftv-a.akamaihd.net', 'hdfauth.francetv.fr'):
- signed_url = url_or_none(self._download_webpage(
- 'https://%s/esi/TA' % host, video_id,
- 'Downloading signed %s manifest URL' % manifest_id,
- fatal=False, query={
- 'url': manifest_url,
- }))
- if signed_url:
- return signed_url
- return manifest_url
-
is_live = None
-
videos = []
-
- for video in (info.get('videos') or []):
- if video.get('statut') != 'ONLINE':
+ title = None
+ subtitle = None
+ image = None
+ duration = None
+ timestamp = None
+ spritesheets = None
+
+ for device_type in ('desktop', 'mobile'):
+ dinfo = self._download_json(
+ 'https://player.webservices.francetelevisions.fr/v1/videos/%s' % video_id,
+ video_id, 'Downloading %s video JSON' % device_type, query={
+ 'device_type': device_type,
+ 'browser': 'chrome',
+ }, fatal=False)
+
+ if not dinfo:
continue
- if not video.get('url'):
- continue
- videos.append(video)
-
- if not videos:
- for device_type in ['desktop', 'mobile']:
- fallback_info = self._download_json(
- 'https://player.webservices.francetelevisions.fr/v1/videos/%s' % video_id,
- video_id, 'Downloading fallback %s video JSON' % device_type, query={
- 'device_type': device_type,
- 'browser': 'chrome',
- }, fatal=False)
- if fallback_info and fallback_info.get('video'):
- videos.append(fallback_info['video'])
+ video = dinfo.get('video')
+ if video:
+ videos.append(video)
+ if duration is None:
+ duration = video.get('duration')
+ if is_live is None:
+ is_live = video.get('is_live')
+ if spritesheets is None:
+ spritesheets = video.get('spritesheets')
+
+ meta = dinfo.get('meta')
+ if meta:
+ if title is None:
+ title = meta.get('title')
+ # XXX: what is meta['pre_title']?
+ if subtitle is None:
+ subtitle = meta.get('additional_title')
+ if image is None:
+ image = meta.get('image_url')
+ if timestamp is None:
+ timestamp = parse_iso8601(meta.get('broadcasted_at'))
formats = []
+ subtitles = {}
for video in videos:
- video_url = video.get('url')
- if not video_url:
- continue
- if is_live is None:
- is_live = (try_get(
- video, lambda x: x['plages_ouverture'][0]['direct'], bool) is True
- or video.get('is_live') is True
- or '/live.francetv.fr/' in video_url)
format_id = video.get('format')
+
+ video_url = None
+ if video.get('workflow') == 'token-akamai':
+ token_url = video.get('token')
+ if token_url:
+ token_json = self._download_json(
+ token_url, video_id,
+ 'Downloading signed %s manifest URL' % format_id)
+ if token_json:
+ video_url = token_json.get('url')
+ if not video_url:
+ video_url = video.get('url')
+
ext = determine_ext(video_url)
if ext == 'f4m':
- if georestricted:
- # See https://github.com/ytdl-org/youtube-dl/issues/3963
- # m3u8 urls work fine
- continue
formats.extend(self._extract_f4m_formats(
- sign(video_url, format_id) + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44',
- video_id, f4m_id=format_id, fatal=False))
+ video_url, video_id, f4m_id=format_id, fatal=False))
elif ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- sign(video_url, format_id), video_id, 'mp4',
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ video_url, video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id=format_id,
- fatal=False))
+ fatal=False)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
elif ext == 'mpd':
- formats.extend(self._extract_mpd_formats(
- sign(video_url, format_id), video_id, mpd_id=format_id, fatal=False))
+ fmts, subs = self._extract_mpd_formats_and_subtitles(
+ video_url, video_id, mpd_id=format_id, fatal=False)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
elif video_url.startswith('rtmp'):
formats.append({
'url': video_url,
@@ -191,41 +170,55 @@ class FranceTVIE(InfoExtractor):
'format_id': format_id,
})
+ # XXX: what is video['captions']?
+
+ for f in formats:
+ if f.get('acodec') != 'none' and f.get('language') in ('qtz', 'qad'):
+ f['language_preference'] = -10
+ f['format_note'] = 'audio description%s' % format_field(f, 'format_note', ', %s')
+
+ if spritesheets:
+ formats.append({
+ 'format_id': 'spritesheets',
+ 'format_note': 'storyboard',
+ 'acodec': 'none',
+ 'vcodec': 'none',
+ 'ext': 'mhtml',
+ 'protocol': 'mhtml',
+ 'url': 'about:dummy',
+ 'fragments': [{
+ 'path': sheet,
+ # XXX: not entirely accurate; each spritesheet seems to be
+ # a 10×10 grid of thumbnails corresponding to approximately
+ # 2 seconds of the video; the last spritesheet may be shorter
+ 'duration': 200,
+ } for sheet in spritesheets]
+ })
+
self._sort_formats(formats)
- title = info['titre']
- subtitle = info.get('sous_titre')
if subtitle:
title += ' - %s' % subtitle
title = title.strip()
- subtitles = {}
- subtitles_list = [{
- 'url': subformat['url'],
- 'ext': subformat.get('format'),
- } for subformat in info.get('subtitles', []) if subformat.get('url')]
- if subtitles_list:
- subtitles['fr'] = subtitles_list
-
return {
'id': video_id,
'title': self._live_title(title) if is_live else title,
- 'description': clean_html(info.get('synopsis')),
- 'thumbnail': urljoin('https://sivideo.webservices.francetelevisions.fr', info.get('image')),
- 'duration': int_or_none(info.get('real_duration')) or parse_duration(info.get('duree')),
- 'timestamp': int_or_none(try_get(info, lambda x: x['diffusion']['timestamp'])),
+ 'thumbnail': image,
+ 'duration': duration,
+ 'timestamp': timestamp,
'is_live': is_live,
'formats': formats,
'subtitles': subtitles,
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
catalog = mobj.group('catalog')
if not video_id:
- qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+ qs = parse_qs(url)
video_id = qs.get('idDiffusion', [None])[0]
catalog = qs.get('catalogue', [None])[0]
if not video_id:
@@ -307,47 +300,19 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
return self._make_url_result(video_id, catalogue)
-class FranceTVEmbedIE(FranceTVBaseInfoExtractor):
- _VALID_URL = r'https?://embed\.francetv\.fr/*\?.*?\bue=(?P<id>[^&]+)'
-
- _TESTS = [{
- 'url': 'http://embed.francetv.fr/?ue=7fd581a2ccf59d2fc5719c5c13cf6961',
- 'info_dict': {
- 'id': 'NI_983319',
- 'ext': 'mp4',
- 'title': 'Le Pen Reims',
- 'upload_date': '20170505',
- 'timestamp': 1493981780,
- 'duration': 16,
- },
- 'params': {
- 'skip_download': True,
- },
- 'add_ie': [FranceTVIE.ie_key()],
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- video = self._download_json(
- 'http://api-embed.webservices.francetelevisions.fr/key/%s' % video_id,
- video_id)
-
- return self._make_url_result(video['video_id'], video.get('catalog'))
-
-
class FranceTVInfoIE(FranceTVBaseInfoExtractor):
IE_NAME = 'francetvinfo.fr'
_VALID_URL = r'https?://(?:www|mobile|france3-regions)\.francetvinfo\.fr/(?:[^/]+/)*(?P<id>[^/?#&.]+)'
_TESTS = [{
- 'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
+ 'url': 'https://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-jeudi-22-aout-2019_3561461.html',
'info_dict': {
- 'id': '84981923',
+ 'id': 'd12458ee-5062-48fe-bfdd-a30d6a01b793',
'ext': 'mp4',
'title': 'Soir 3',
- 'upload_date': '20130826',
- 'timestamp': 1377548400,
+ 'upload_date': '20190822',
+ 'timestamp': 1566510900,
+ 'description': 'md5:72d167097237701d6e8452ff03b83c00',
'subtitles': {
'fr': 'mincount:2',
},
@@ -357,6 +322,22 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
},
'add_ie': [FranceTVIE.ie_key()],
}, {
+ 'note': 'Only an image exists in initial webpage instead of the video',
+ 'url': 'https://www.francetvinfo.fr/sante/maladie/coronavirus/covid-19-en-inde-une-situation-catastrophique-a-new-dehli_4381095.html',
+ 'info_dict': {
+ 'id': '7d204c9e-a2d3-11eb-9e4c-000d3a23d482',
+ 'ext': 'mp4',
+ 'title': 'Covid-19 : une situation catastrophique à New Dehli',
+ 'thumbnail': str,
+ 'duration': 76,
+ 'timestamp': 1619028518,
+ 'upload_date': '20210421',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [FranceTVIE.ie_key()],
+ }, {
'url': 'http://www.francetvinfo.fr/elections/europeennes/direct-europeennes-regardez-le-debat-entre-les-candidats-a-la-presidence-de-la-commission_600639.html',
'only_matching': True,
}, {
@@ -408,139 +389,3 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
webpage, 'video id')
return self._make_url_result(video_id)
-
-
-class FranceTVInfoSportIE(FranceTVBaseInfoExtractor):
- IE_NAME = 'sport.francetvinfo.fr'
- _VALID_URL = r'https?://sport\.francetvinfo\.fr/(?:[^/]+/)*(?P<id>[^/?#&]+)'
- _TESTS = [{
- 'url': 'https://sport.francetvinfo.fr/les-jeux-olympiques/retour-sur-les-meilleurs-moments-de-pyeongchang-2018',
- 'info_dict': {
- 'id': '6e49080e-3f45-11e8-b459-000d3a2439ea',
- 'ext': 'mp4',
- 'title': 'Retour sur les meilleurs moments de Pyeongchang 2018',
- 'timestamp': 1523639962,
- 'upload_date': '20180413',
- },
- 'params': {
- 'skip_download': True,
- },
- 'add_ie': [FranceTVIE.ie_key()],
- }]
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
- webpage = self._download_webpage(url, display_id)
- video_id = self._search_regex(r'data-video="([^"]+)"', webpage, 'video_id')
- return self._make_url_result(video_id, 'Sport-web')
-
-
-class GenerationWhatIE(InfoExtractor):
- IE_NAME = 'france2.fr:generation-what'
- _VALID_URL = r'https?://generation-what\.francetv\.fr/[^/]+/video/(?P<id>[^/?#&]+)'
-
- _TESTS = [{
- 'url': 'http://generation-what.francetv.fr/portrait/video/present-arms',
- 'info_dict': {
- 'id': 'wtvKYUG45iw',
- 'ext': 'mp4',
- 'title': 'Generation What - Garde à vous - FRA',
- 'uploader': 'Generation What',
- 'uploader_id': 'UCHH9p1eetWCgt4kXBYCb3_w',
- 'upload_date': '20160411',
- },
- 'params': {
- 'skip_download': True,
- },
- 'add_ie': ['Youtube'],
- }, {
- 'url': 'http://generation-what.francetv.fr/europe/video/present-arms',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
-
- webpage = self._download_webpage(url, display_id)
-
- youtube_id = self._search_regex(
- r"window\.videoURL\s*=\s*'([0-9A-Za-z_-]{11})';",
- webpage, 'youtube id')
-
- return self.url_result(youtube_id, ie='Youtube', video_id=youtube_id)
-
-
-class CultureboxIE(FranceTVBaseInfoExtractor):
- _VALID_URL = r'https?://(?:m\.)?culturebox\.francetvinfo\.fr/(?:[^/]+/)*(?P<id>[^/?#&]+)'
-
- _TESTS = [{
- 'url': 'https://culturebox.francetvinfo.fr/opera-classique/musique-classique/c-est-baroque/concerts/cantates-bwv-4-106-et-131-de-bach-par-raphael-pichon-57-268689',
- 'info_dict': {
- 'id': 'EV_134885',
- 'ext': 'mp4',
- 'title': 'Cantates BWV 4, 106 et 131 de Bach par Raphaël Pichon 5/7',
- 'description': 'md5:19c44af004b88219f4daa50fa9a351d4',
- 'upload_date': '20180206',
- 'timestamp': 1517945220,
- 'duration': 5981,
- },
- 'params': {
- 'skip_download': True,
- },
- 'add_ie': [FranceTVIE.ie_key()],
- }]
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
-
- webpage = self._download_webpage(url, display_id)
-
- if ">Ce live n'est plus disponible en replay<" in webpage:
- raise ExtractorError(
- 'Video %s is not available' % display_id, expected=True)
-
- video_id, catalogue = self._search_regex(
- r'["\'>]https?://videos\.francetv\.fr/video/([^@]+@.+?)["\'<]',
- webpage, 'video id').split('@')
-
- return self._make_url_result(video_id, catalogue)
-
-
-class FranceTVJeunesseIE(FranceTVBaseInfoExtractor):
- _VALID_URL = r'(?P<url>https?://(?:www\.)?(?:zouzous|ludo)\.fr/heros/(?P<id>[^/?#&]+))'
-
- _TESTS = [{
- 'url': 'https://www.zouzous.fr/heros/simon',
- 'info_dict': {
- 'id': 'simon',
- },
- 'playlist_count': 9,
- }, {
- 'url': 'https://www.ludo.fr/heros/ninjago',
- 'info_dict': {
- 'id': 'ninjago',
- },
- 'playlist_count': 10,
- }, {
- 'url': 'https://www.zouzous.fr/heros/simon?abc',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- playlist_id = mobj.group('id')
-
- playlist = self._download_json(
- '%s/%s' % (mobj.group('url'), 'playlist'), playlist_id)
-
- if not playlist.get('count'):
- raise ExtractorError(
- '%s is not available' % playlist_id, expected=True)
-
- entries = []
- for item in playlist['items']:
- identity = item.get('identity')
- if identity and isinstance(identity, compat_str):
- entries.append(self._make_url_result(identity))
-
- return self.playlist_result(entries, playlist_id)
diff --git a/hypervideo_dl/extractor/frontendmasters.py b/hypervideo_dl/extractor/frontendmasters.py
index f1db33f..40b8cb0 100644
--- a/hypervideo_dl/extractor/frontendmasters.py
+++ b/hypervideo_dl/extractor/frontendmasters.py
@@ -207,7 +207,7 @@ class FrontendMastersLessonIE(FrontendMastersPageBaseIE):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
course_name, lesson_name = mobj.group('course_name', 'lesson_name')
course = self._download_course(course_name, url)
diff --git a/hypervideo_dl/extractor/funimation.py b/hypervideo_dl/extractor/funimation.py
index d8f1e16..382cbe1 100644
--- a/hypervideo_dl/extractor/funimation.py
+++ b/hypervideo_dl/extractor/funimation.py
@@ -2,6 +2,7 @@
from __future__ import unicode_literals
import random
+import re
import string
from .common import InfoExtractor
@@ -10,52 +11,29 @@ from ..utils import (
determine_ext,
int_or_none,
js_to_json,
+ orderedSet,
+ qualities,
+ str_or_none,
+ traverse_obj,
+ try_get,
+ urlencode_postdata,
ExtractorError,
- urlencode_postdata
)
-class FunimationIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?funimation(?:\.com|now\.uk)/(?:[^/]+/)?shows/[^/]+/(?P<id>[^/?#&]+)'
-
+class FunimationBaseIE(InfoExtractor):
_NETRC_MACHINE = 'funimation'
+ _REGION = None
_TOKEN = None
- _TESTS = [{
- 'url': 'https://www.funimation.com/shows/hacksign/role-play/',
- 'info_dict': {
- 'id': '91144',
- 'display_id': 'role-play',
- 'ext': 'mp4',
- 'title': '.hack//SIGN - Role Play',
- 'description': 'md5:b602bdc15eef4c9bbb201bb6e6a4a2dd',
- 'thumbnail': r're:https?://.*\.jpg',
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- }, {
- 'url': 'https://www.funimation.com/shows/attack-on-titan-junior-high/broadcast-dub-preview/',
- 'info_dict': {
- 'id': '210051',
- 'display_id': 'broadcast-dub-preview',
- 'ext': 'mp4',
- 'title': 'Attack on Titan: Junior High - Broadcast Dub Preview',
- 'thumbnail': r're:https?://.*\.(?:jpg|png)',
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- }, {
- 'url': 'https://www.funimationnow.uk/shows/puzzle-dragons-x/drop-impact/simulcast/',
- 'only_matching': True,
- }, {
- # with lang code
- 'url': 'https://www.funimation.com/en/shows/hacksign/role-play/',
- 'only_matching': True,
- }]
+ def _get_region(self):
+ region_cookie = self._get_cookies('https://www.funimation.com').get('region')
+ region = region_cookie.value if region_cookie else self.get_param('geo_bypass_country')
+ return region or traverse_obj(
+ self._download_json(
+ 'https://geo-service.prd.funimationsvc.com/geo/v1/region/check', None, fatal=False,
+ note='Checking geo-location', errnote='Unable to fetch geo-location information'),
+ 'region') or 'US'
def _login(self):
username, password = self._get_login_info()
@@ -68,91 +46,307 @@ class FunimationIE(InfoExtractor):
'username': username,
'password': password,
}))
- self._TOKEN = data['token']
+ return data['token']
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
error = self._parse_json(e.cause.read().decode(), None)['error']
raise ExtractorError(error, expected=True)
raise
+
+class FunimationPageIE(FunimationBaseIE):
+ IE_NAME = 'funimation:page'
+ _VALID_URL = r'https?://(?:www\.)?funimation(?:\.com|now\.uk)/(?:(?P<lang>[^/]+)/)?(?:shows|v)/(?P<show>[^/]+)/(?P<episode>[^/?#&]+)'
+
+ _TESTS = [{
+ 'url': 'https://www.funimation.com/shows/attack-on-titan-junior-high/broadcast-dub-preview/',
+ 'info_dict': {
+ 'id': '210050',
+ 'ext': 'mp4',
+ 'title': 'Broadcast Dub Preview',
+ # Other metadata is tested in FunimationIE
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ 'add_ie': ['Funimation'],
+ }, {
+ # Not available in US
+ 'url': 'https://www.funimation.com/shows/hacksign/role-play/',
+ 'only_matching': True,
+ }, {
+ # with lang code
+ 'url': 'https://www.funimation.com/en/shows/hacksign/role-play/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.funimationnow.uk/shows/puzzle-dragons-x/drop-impact/simulcast/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.funimation.com/v/a-certain-scientific-railgun/super-powered-level-5',
+ 'only_matching': True,
+ }]
+
def _real_initialize(self):
- self._login()
+ if not self._REGION:
+ FunimationBaseIE._REGION = self._get_region()
+ if not self._TOKEN:
+ FunimationBaseIE._TOKEN = self._login()
def _real_extract(self, url):
- display_id = self._match_id(url)
- webpage = self._download_webpage(url, display_id)
-
- def _search_kane(name):
- return self._search_regex(
- r"KANE_customdimensions\.%s\s*=\s*'([^']+)';" % name,
- webpage, name, default=None)
-
- title_data = self._parse_json(self._search_regex(
- r'TITLE_DATA\s*=\s*({[^}]+})',
- webpage, 'title data', default=''),
- display_id, js_to_json, fatal=False) or {}
-
- video_id = title_data.get('id') or self._search_regex([
- r"KANE_customdimensions.videoID\s*=\s*'(\d+)';",
- r'<iframe[^>]+src="/player/(\d+)',
- ], webpage, 'video_id', default=None)
- if not video_id:
- player_url = self._html_search_meta([
- 'al:web:url',
- 'og:video:url',
- 'og:video:secure_url',
- ], webpage, fatal=True)
- video_id = self._search_regex(r'/player/(\d+)', player_url, 'video id')
-
- title = episode = title_data.get('title') or _search_kane('videoTitle') or self._og_search_title(webpage)
- series = _search_kane('showName')
- if series:
- title = '%s - %s' % (series, title)
- description = self._html_search_meta(['description', 'og:description'], webpage, fatal=True)
+ locale, show, episode = self._match_valid_url(url).group('lang', 'show', 'episode')
+
+ video_id = traverse_obj(self._download_json(
+ f'https://title-api.prd.funimationsvc.com/v1/shows/{show}/episodes/{episode}',
+ f'{show}_{episode}', query={
+ 'deviceType': 'web',
+ 'region': self._REGION,
+ 'locale': locale or 'en'
+ }), ('videoList', ..., 'id'), get_all=False)
+
+ return self.url_result(f'https://www.funimation.com/player/{video_id}', FunimationIE.ie_key(), video_id)
+
+
+class FunimationIE(FunimationBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?funimation\.com/player/(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'https://www.funimation.com/player/210051',
+ 'info_dict': {
+ 'id': '210050',
+ 'display_id': 'broadcast-dub-preview',
+ 'ext': 'mp4',
+ 'title': 'Broadcast Dub Preview',
+ 'thumbnail': r're:https?://.*\.(?:jpg|png)',
+ 'episode': 'Broadcast Dub Preview',
+ 'episode_id': '210050',
+ 'season': 'Extras',
+ 'season_id': '166038',
+ 'season_number': 99,
+ 'series': 'Attack on Titan: Junior High',
+ 'description': '',
+ 'duration': 155,
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ }, {
+ 'note': 'player_id should be extracted with the relevent compat-opt',
+ 'url': 'https://www.funimation.com/player/210051',
+ 'info_dict': {
+ 'id': '210051',
+ 'display_id': 'broadcast-dub-preview',
+ 'ext': 'mp4',
+ 'title': 'Broadcast Dub Preview',
+ 'thumbnail': r're:https?://.*\.(?:jpg|png)',
+ 'episode': 'Broadcast Dub Preview',
+ 'episode_id': '210050',
+ 'season': 'Extras',
+ 'season_id': '166038',
+ 'season_number': 99,
+ 'series': 'Attack on Titan: Junior High',
+ 'description': '',
+ 'duration': 155,
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ 'compat_opts': ['seperate-video-versions'],
+ },
+ }]
+
+ def _real_initialize(self):
+ if not self._TOKEN:
+ FunimationBaseIE._TOKEN = self._login()
+
+ @staticmethod
+ def _get_experiences(episode):
+ for lang, lang_data in episode.get('languages', {}).items():
+ for video_data in lang_data.values():
+ for version, f in video_data.items():
+ yield lang, version.title(), f
+
+ def _get_episode(self, webpage, experience_id=None, episode_id=None, fatal=True):
+ ''' Extract the episode, season and show objects given either episode/experience id '''
+ show = self._parse_json(
+ self._search_regex(
+ r'show\s*=\s*({.+?})\s*;', webpage, 'show data', fatal=fatal),
+ experience_id, transform_source=js_to_json, fatal=fatal) or []
+ for season in show.get('seasons', []):
+ for episode in season.get('episodes', []):
+ if episode_id is not None:
+ if str(episode.get('episodePk')) == episode_id:
+ return episode, season, show
+ continue
+ for _, _, f in self._get_experiences(episode):
+ if f.get('experienceId') == experience_id:
+ return episode, season, show
+ if fatal:
+ raise ExtractorError('Unable to find episode information')
+ else:
+ self.report_warning('Unable to find episode information')
+ return {}, {}, {}
+
+ def _real_extract(self, url):
+ initial_experience_id = self._match_id(url)
+ webpage = self._download_webpage(
+ url, initial_experience_id, note=f'Downloading player webpage for {initial_experience_id}')
+ episode, season, show = self._get_episode(webpage, experience_id=int(initial_experience_id))
+ episode_id = str(episode['episodePk'])
+ display_id = episode.get('slug') or episode_id
+
+ formats, subtitles, thumbnails, duration = [], {}, [], 0
+ requested_languages, requested_versions = self._configuration_arg('language'), self._configuration_arg('version')
+ language_preference = qualities((requested_languages or [''])[::-1])
+ source_preference = qualities((requested_versions or ['uncut', 'simulcast'])[::-1])
+ only_initial_experience = 'seperate-video-versions' in self.get_param('compat_opts', [])
+
+ for lang, version, fmt in self._get_experiences(episode):
+ experience_id = str(fmt['experienceId'])
+ if (only_initial_experience and experience_id != initial_experience_id
+ or requested_languages and lang.lower() not in requested_languages
+ or requested_versions and version.lower() not in requested_versions):
+ continue
+ thumbnails.append({'url': fmt.get('poster')})
+ duration = max(duration, fmt.get('duration', 0))
+ format_name = '%s %s (%s)' % (version, lang, experience_id)
+ self.extract_subtitles(
+ subtitles, experience_id, display_id=display_id, format_name=format_name,
+ episode=episode if experience_id == initial_experience_id else episode_id)
- try:
headers = {}
if self._TOKEN:
headers['Authorization'] = 'Token %s' % self._TOKEN
- sources = self._download_json(
- 'https://www.funimation.com/api/showexperience/%s/' % video_id,
- video_id, headers=headers, query={
+ page = self._download_json(
+ 'https://www.funimation.com/api/showexperience/%s/' % experience_id,
+ display_id, headers=headers, expected_status=403, query={
'pinst_id': ''.join([random.choice(string.digits + string.ascii_letters) for _ in range(8)]),
- })['items']
- except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
- error = self._parse_json(e.cause.read(), video_id)['errors'][0]
- raise ExtractorError('%s said: %s' % (
- self.IE_NAME, error.get('detail') or error.get('title')), expected=True)
- raise
+ }, note=f'Downloading {format_name} JSON')
+ sources = page.get('items') or []
+ if not sources:
+ error = try_get(page, lambda x: x['errors'][0], dict)
+ if error:
+ self.report_warning('%s said: Error %s - %s' % (
+ self.IE_NAME, error.get('code'), error.get('detail') or error.get('title')))
+ else:
+ self.report_warning('No sources found for format')
- formats = []
- for source in sources:
- source_url = source.get('src')
- if not source_url:
- continue
- source_type = source.get('videoType') or determine_ext(source_url)
- if source_type == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- source_url, video_id, 'mp4',
- m3u8_id='hls', fatal=False))
- else:
- formats.append({
- 'format_id': source_type,
- 'url': source_url,
- })
- self._sort_formats(formats)
+ current_formats = []
+ for source in sources:
+ source_url = source.get('src')
+ source_type = source.get('videoType') or determine_ext(source_url)
+ if source_type == 'm3u8':
+ current_formats.extend(self._extract_m3u8_formats(
+ source_url, display_id, 'mp4', m3u8_id='%s-%s' % (experience_id, 'hls'), fatal=False,
+ note=f'Downloading {format_name} m3u8 information'))
+ else:
+ current_formats.append({
+ 'format_id': '%s-%s' % (experience_id, source_type),
+ 'url': source_url,
+ })
+ for f in current_formats:
+ # TODO: Convert language to code
+ f.update({
+ 'language': lang,
+ 'format_note': version,
+ 'source_preference': source_preference(version.lower()),
+ 'language_preference': language_preference(lang.lower()),
+ })
+ formats.extend(current_formats)
+ self._remove_duplicate_formats(formats)
+ self._sort_formats(formats, ('lang', 'source'))
return {
- 'id': video_id,
+ 'id': initial_experience_id if only_initial_experience else episode_id,
'display_id': display_id,
- 'title': title,
- 'description': description,
- 'thumbnail': self._og_search_thumbnail(webpage),
- 'series': series,
- 'season_number': int_or_none(title_data.get('seasonNum') or _search_kane('season')),
- 'episode_number': int_or_none(title_data.get('episodeNum')),
- 'episode': episode,
- 'season_id': title_data.get('seriesId'),
+ 'duration': duration,
+ 'title': episode['episodeTitle'],
+ 'description': episode.get('episodeSummary'),
+ 'episode': episode.get('episodeTitle'),
+ 'episode_number': int_or_none(episode.get('episodeId')),
+ 'episode_id': episode_id,
+ 'season': season.get('seasonTitle'),
+ 'season_number': int_or_none(season.get('seasonId')),
+ 'season_id': str_or_none(season.get('seasonPk')),
+ 'series': show.get('showTitle'),
'formats': formats,
+ 'thumbnails': thumbnails,
+ 'subtitles': subtitles,
+ }
+
+ def _get_subtitles(self, subtitles, experience_id, episode, display_id, format_name):
+ if isinstance(episode, str):
+ webpage = self._download_webpage(
+ f'https://www.funimation.com/player/{experience_id}', display_id,
+ fatal=False, note=f'Downloading player webpage for {format_name}')
+ episode, _, _ = self._get_episode(webpage, episode_id=episode, fatal=False)
+
+ for _, version, f in self._get_experiences(episode):
+ for source in f.get('sources'):
+ for text_track in source.get('textTracks'):
+ if not text_track.get('src'):
+ continue
+ sub_type = text_track.get('type').upper()
+ sub_type = sub_type if sub_type != 'FULL' else None
+ current_sub = {
+ 'url': text_track['src'],
+ 'name': ' '.join(filter(None, (version, text_track.get('label'), sub_type)))
+ }
+ lang = '_'.join(filter(None, (
+ text_track.get('language', 'und'), version if version != 'Simulcast' else None, sub_type)))
+ if current_sub not in subtitles.get(lang, []):
+ subtitles.setdefault(lang, []).append(current_sub)
+ return subtitles
+
+
+class FunimationShowIE(FunimationBaseIE):
+ IE_NAME = 'funimation:show'
+ _VALID_URL = r'(?P<url>https?://(?:www\.)?funimation(?:\.com|now\.uk)/(?P<locale>[^/]+)?/?shows/(?P<id>[^/?#&]+))/?(?:[?#]|$)'
+
+ _TESTS = [{
+ 'url': 'https://www.funimation.com/en/shows/sk8-the-infinity',
+ 'info_dict': {
+ 'id': 1315000,
+ 'title': 'SK8 the Infinity'
+ },
+ 'playlist_count': 13,
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # without lang code
+ 'url': 'https://www.funimation.com/shows/ouran-high-school-host-club/',
+ 'info_dict': {
+ 'id': 39643,
+ 'title': 'Ouran High School Host Club'
+ },
+ 'playlist_count': 26,
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ def _real_initialize(self):
+ if not self._REGION:
+ FunimationBaseIE._REGION = self._get_region()
+
+ def _real_extract(self, url):
+ base_url, locale, display_id = self._match_valid_url(url).groups()
+
+ show_info = self._download_json(
+ 'https://title-api.prd.funimationsvc.com/v2/shows/%s?region=%s&deviceType=web&locale=%s'
+ % (display_id, self._REGION, locale or 'en'), display_id)
+ items_info = self._download_json(
+ 'https://prod-api-funimationnow.dadcdigital.com/api/funimation/episodes/?limit=99999&title_id=%s'
+ % show_info.get('id'), display_id)
+
+ vod_items = traverse_obj(items_info, ('items', ..., re.compile('(?i)mostRecent[AS]vod').match, 'item'))
+
+ return {
+ '_type': 'playlist',
+ 'id': show_info['id'],
+ 'title': show_info['name'],
+ 'entries': orderedSet(
+ self.url_result(
+ '%s/%s' % (base_url, vod_item.get('episodeSlug')), FunimationPageIE.ie_key(),
+ vod_item.get('episodeId'), vod_item.get('episodeName'))
+ for vod_item in sorted(vod_items, key=lambda x: x.get('episodeOrder', -1))),
}
diff --git a/hypervideo_dl/extractor/funk.py b/hypervideo_dl/extractor/funk.py
index 81d1949..e5e3260 100644
--- a/hypervideo_dl/extractor/funk.py
+++ b/hypervideo_dl/extractor/funk.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from .nexx import NexxIE
@@ -31,7 +30,7 @@ class FunkIE(InfoExtractor):
}]
def _real_extract(self, url):
- display_id, nexx_id = re.match(self._VALID_URL, url).groups()
+ display_id, nexx_id = self._match_valid_url(url).groups()
video = self._download_json(
'https://www.funk.net/api/v4.0/videos/' + nexx_id, nexx_id)
return {
diff --git a/hypervideo_dl/extractor/fxnetworks.py b/hypervideo_dl/extractor/fxnetworks.py
new file mode 100644
index 0000000..00e6742
--- /dev/null
+++ b/hypervideo_dl/extractor/fxnetworks.py
@@ -0,0 +1,77 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .adobepass import AdobePassIE
+from ..utils import (
+ extract_attributes,
+ int_or_none,
+ parse_age_limit,
+ smuggle_url,
+ update_url_query,
+)
+
+
+class FXNetworksIE(AdobePassIE):
+ _VALID_URL = r'https?://(?:www\.)?(?:fxnetworks|simpsonsworld)\.com/video/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://www.fxnetworks.com/video/1032565827847',
+ 'md5': '8d99b97b4aa7a202f55b6ed47ea7e703',
+ 'info_dict': {
+ 'id': 'dRzwHC_MMqIv',
+ 'ext': 'mp4',
+ 'title': 'First Look: Better Things - Season 2',
+ 'description': 'Because real life is like a fart. Watch this FIRST LOOK to see what inspired the new season of Better Things.',
+ 'age_limit': 14,
+ 'uploader': 'NEWA-FNG-FX',
+ 'upload_date': '20170825',
+ 'timestamp': 1503686274,
+ 'episode_number': 0,
+ 'season_number': 2,
+ 'series': 'Better Things',
+ },
+ 'add_ie': ['ThePlatform'],
+ }, {
+ 'url': 'http://www.simpsonsworld.com/video/716094019682',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ if 'The content you are trying to access is not available in your region.' in webpage:
+ self.raise_geo_restricted()
+ video_data = extract_attributes(self._search_regex(
+ r'(<a.+?rel="https?://link\.theplatform\.com/s/.+?</a>)', webpage, 'video data'))
+ player_type = self._search_regex(r'playerType\s*=\s*[\'"]([^\'"]+)', webpage, 'player type', default=None)
+ release_url = video_data['rel']
+ title = video_data['data-title']
+ rating = video_data.get('data-rating')
+ query = {
+ 'mbr': 'true',
+ }
+ if player_type == 'movies':
+ query.update({
+ 'manifest': 'm3u',
+ })
+ else:
+ query.update({
+ 'switch': 'http',
+ })
+ if video_data.get('data-req-auth') == '1':
+ resource = self._get_mvpd_resource(
+ video_data['data-channel'], title,
+ video_data.get('data-guid'), rating)
+ query['auth'] = self._extract_mvpd_auth(url, video_id, 'fx', resource)
+
+ return {
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'title': title,
+ 'url': smuggle_url(update_url_query(release_url, query), {'force_smil_url': True}),
+ 'series': video_data.get('data-show-title'),
+ 'episode_number': int_or_none(video_data.get('data-episode')),
+ 'season_number': int_or_none(video_data.get('data-season')),
+ 'thumbnail': video_data.get('data-large-thumb'),
+ 'age_limit': parse_age_limit(rating),
+ 'ie_key': 'ThePlatform',
+ }
diff --git a/hypervideo_dl/extractor/gab.py b/hypervideo_dl/extractor/gab.py
new file mode 100644
index 0000000..25b5cb0
--- /dev/null
+++ b/hypervideo_dl/extractor/gab.py
@@ -0,0 +1,64 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ str_to_int,
+)
+
+
+class GabTVIE(InfoExtractor):
+ _VALID_URL = r'(?:https?://)tv.gab.com/channel/[^/]+/view/(?P<id>[a-z0-9-]+)'
+ _TESTS = [{
+ 'url': 'https://tv.gab.com/channel/wurzelroot/view/why-was-america-in-afghanistan-61217eacea5665de450d0488',
+ 'info_dict': {
+ 'id': '61217eacea5665de450d0488',
+ 'ext': 'mp4',
+ 'title': 'WHY WAS AMERICA IN AFGHANISTAN - AMERICA FIRST AGAINST AMERICAN OLIGARCHY',
+ 'description': None,
+ 'uploader': 'Wurzelroot',
+ 'uploader_id': '608fb0a85738fd1974984f7d',
+ 'thumbnail': 'https://tv.gab.com/image/61217eacea5665de450d0488',
+ }
+ }]
+
+ def _real_extract(self, url):
+ id = self._match_id(url).split('-')[-1]
+ webpage = self._download_webpage(url, id)
+ channel_id = self._search_regex(r'data-channel-id=\"(?P<channel_id>[^\"]+)', webpage, 'channel_id')
+ channel_name = self._search_regex(r'data-channel-name=\"(?P<channel_id>[^\"]+)', webpage, 'channel_name')
+ title = self._search_regex(r'data-episode-title=\"(?P<channel_id>[^\"]+)', webpage, 'title')
+ view_key = self._search_regex(r'data-view-key=\"(?P<channel_id>[^\"]+)', webpage, 'view_key')
+ description = clean_html(self._html_search_regex(self._meta_regex('description'), webpage, 'description', group='content')) or None
+ available_resolutions = re.findall(r'<a\ data-episode-id=\"%s\"\ data-resolution=\"(?P<resolution>[^\"]+)' % id, webpage)
+
+ formats = []
+ for resolution in available_resolutions:
+ frmt = {
+ 'url': f'https://tv.gab.com/media/{id}?viewKey={view_key}&r={resolution}',
+ 'format_id': resolution,
+ 'vcodec': 'h264',
+ 'acodec': 'aac',
+ 'ext': 'mp4'
+ }
+ if 'audio-' in resolution:
+ frmt['abr'] = str_to_int(resolution.replace('audio-', ''))
+ frmt['height'] = 144
+ frmt['quality'] = -10
+ else:
+ frmt['height'] = str_to_int(resolution.replace('p', ''))
+ formats.append(frmt)
+ self._sort_formats(formats)
+
+ return {
+ 'id': id,
+ 'title': title,
+ 'formats': formats,
+ 'description': description,
+ 'uploader': channel_name,
+ 'uploader_id': channel_id,
+ 'thumbnail': f'https://tv.gab.com/image/{id}',
+ }
diff --git a/hypervideo_dl/extractor/gaia.py b/hypervideo_dl/extractor/gaia.py
index e952775..7821fb7 100644
--- a/hypervideo_dl/extractor/gaia.py
+++ b/hypervideo_dl/extractor/gaia.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..compat import (
@@ -76,7 +75,7 @@ class GaiaIE(InfoExtractor):
self._jwt = auth.get('jwt')
def _real_extract(self, url):
- display_id, vtype = re.search(self._VALID_URL, url).groups()
+ display_id, vtype = self._match_valid_url(url).groups()
node_id = self._download_json(
'https://brooklyn.gaia.com/pathinfo', display_id, query={
'path': 'video/' + display_id,
diff --git a/hypervideo_dl/extractor/gamestar.py b/hypervideo_dl/extractor/gamestar.py
index f00dab2..e882fa6 100644
--- a/hypervideo_dl/extractor/gamestar.py
+++ b/hypervideo_dl/extractor/gamestar.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..utils import (
@@ -34,7 +33,7 @@ class GameStarIE(InfoExtractor):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
site = mobj.group('site')
video_id = mobj.group('id')
diff --git a/hypervideo_dl/extractor/gaskrank.py b/hypervideo_dl/extractor/gaskrank.py
index 1726a67..03acd2a 100644
--- a/hypervideo_dl/extractor/gaskrank.py
+++ b/hypervideo_dl/extractor/gaskrank.py
@@ -51,7 +51,7 @@ class GaskrankIE(InfoExtractor):
webpage, default=None) or self._html_search_meta(
'title', webpage, fatal=True)
- categories = [re.match(self._VALID_URL, url).group('categories')]
+ categories = [self._match_valid_url(url).group('categories')]
mobj = re.search(
r'Video von:\s*(?P<uploader_id>[^|]*?)\s*\|\s*vom:\s*(?P<upload_date>[0-9][0-9]\.[0-9][0-9]\.[0-9][0-9][0-9][0-9])',
diff --git a/hypervideo_dl/extractor/gazeta.py b/hypervideo_dl/extractor/gazeta.py
index 57c67a4..3671870 100644
--- a/hypervideo_dl/extractor/gazeta.py
+++ b/hypervideo_dl/extractor/gazeta.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
@@ -34,7 +33,7 @@ class GazetaIE(InfoExtractor):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
display_id = mobj.group('id')
embed_url = '%s?p=embed' % mobj.group('url')
diff --git a/hypervideo_dl/extractor/gdcvault.py b/hypervideo_dl/extractor/gdcvault.py
index acc6478..c3ad6b4 100644
--- a/hypervideo_dl/extractor/gdcvault.py
+++ b/hypervideo_dl/extractor/gdcvault.py
@@ -149,7 +149,7 @@ class GDCVaultIE(InfoExtractor):
return start_page
def _real_extract(self, url):
- video_id, name = re.match(self._VALID_URL, url).groups()
+ video_id, name = self._match_valid_url(url).groups()
display_id = name or video_id
webpage_url = 'http://www.gdcvault.com/play/' + video_id
diff --git a/hypervideo_dl/extractor/gedidigital.py b/hypervideo_dl/extractor/gedidigital.py
index 6c4153b..ec386c2 100644
--- a/hypervideo_dl/extractor/gedidigital.py
+++ b/hypervideo_dl/extractor/gedidigital.py
@@ -5,18 +5,22 @@ import re
from .common import InfoExtractor
from ..utils import (
+ base_url,
determine_ext,
int_or_none,
+ url_basename,
+ urljoin,
)
class GediDigitalIE(InfoExtractor):
- _VALID_URL = r'''(?x)https?://video\.
+ _VALID_URL = r'''(?x)(?P<url>(?:https?:)//video\.
(?:
(?:
(?:espresso\.)?repubblica
|lastampa
|ilsecoloxix
+ |huffingtonpost
)|
(?:
iltirreno
@@ -32,12 +36,12 @@ class GediDigitalIE(InfoExtractor):
|corrierealpi
|lasentinella
)\.gelocal
- )\.it(?:/[^/]+){2,3}?/(?P<id>\d+)(?:[/?&#]|$)'''
+ )\.it(?:/[^/]+){2,4}/(?P<id>\d+))(?:$|[?&].*)'''
_TESTS = [{
'url': 'https://video.lastampa.it/politica/il-paradosso-delle-regionali-la-lega-vince-ma-sembra-aver-perso/121559/121683',
'md5': '84658d7fb9e55a6e57ecc77b73137494',
'info_dict': {
- 'id': '121559',
+ 'id': '121683',
'ext': 'mp4',
'title': 'Il paradosso delle Regionali: ecco perché la Lega vince ma sembra aver perso',
'description': 'md5:de7f4d6eaaaf36c153b599b10f8ce7ca',
@@ -45,6 +49,9 @@ class GediDigitalIE(InfoExtractor):
'duration': 125,
},
}, {
+ 'url': 'https://video.huffingtonpost.it/embed/politica/cotticelli-non-so-cosa-mi-sia-successo-sto-cercando-di-capire-se-ho-avuto-un-malore/29312/29276?responsive=true&el=video971040871621586700',
+ 'only_matching': True,
+ }, {
'url': 'https://video.espresso.repubblica.it/embed/tutti-i-video/01-ted-villa/14772/14870&width=640&height=360',
'only_matching': True,
}, {
@@ -94,9 +101,49 @@ class GediDigitalIE(InfoExtractor):
'only_matching': True,
}]
+ @staticmethod
+ def _sanitize_urls(urls):
+ # add protocol if missing
+ for i, e in enumerate(urls):
+ if e.startswith('//'):
+ urls[i] = 'https:%s' % e
+ # clean iframes urls
+ for i, e in enumerate(urls):
+ urls[i] = urljoin(base_url(e), url_basename(e))
+ return urls
+
+ @staticmethod
+ def _extract_urls(webpage):
+ entries = [
+ mobj.group('eurl')
+ for mobj in re.finditer(r'''(?x)
+ (?:
+ data-frame-src=|
+ <iframe[^\n]+src=
+ )
+ (["'])(?P<eurl>%s)\1''' % GediDigitalIE._VALID_URL, webpage)]
+ return GediDigitalIE._sanitize_urls(entries)
+
+ @staticmethod
+ def _extract_url(webpage):
+ urls = GediDigitalIE._extract_urls(webpage)
+ return urls[0] if urls else None
+
+ @staticmethod
+ def _clean_formats(formats):
+ format_urls = set()
+ clean_formats = []
+ for f in formats:
+ if f['url'] not in format_urls:
+ if f.get('audio_ext') != 'none' and not f.get('acodec'):
+ continue
+ format_urls.add(f['url'])
+ clean_formats.append(f)
+ formats[:] = clean_formats
+
def _real_extract(self, url):
video_id = self._match_id(url)
-
+ url = self._match_valid_url(url).group('url')
webpage = self._download_webpage(url, video_id)
title = self._html_search_meta(
['twitter:title', 'og:title'], webpage, fatal=True)
@@ -129,6 +176,7 @@ class GediDigitalIE(InfoExtractor):
f.update({
'abr': abr,
'tbr': abr,
+ 'acodec': ext,
'vcodec': 'none'
})
else:
@@ -148,6 +196,7 @@ class GediDigitalIE(InfoExtractor):
elif n == 'videoDuration':
duration = int_or_none(v)
+ self._clean_formats(formats)
self._sort_formats(formats)
return {
diff --git a/hypervideo_dl/extractor/generic.py b/hypervideo_dl/extractor/generic.py
index 7da038a..8387646 100644
--- a/hypervideo_dl/extractor/generic.py
+++ b/hypervideo_dl/extractor/generic.py
@@ -84,7 +84,6 @@ from .jwplatform import JWPlatformIE
from .digiteka import DigitekaIE
from .arkena import ArkenaIE
from .instagram import InstagramIE
-from .liveleak import LiveLeakIE
from .threeqsdn import ThreeQSDNIE
from .theplatform import ThePlatformIE
from .kaltura import KalturaIE
@@ -128,9 +127,14 @@ from .zype import ZypeIE
from .odnoklassniki import OdnoklassnikiIE
from .vk import VKIE
from .kinja import KinjaEmbedIE
+from .gedidigital import GediDigitalIE
+from .rcs import RCSEmbedsIE
+from .bitchute import BitChuteIE
+from .rumble import RumbleEmbedIE
from .arcpublishing import ArcPublishingIE
from .medialaan import MedialaanIE
from .simplecast import SimplecastIE
+from .wimtv import WimTVIE
class GenericIE(InfoExtractor):
@@ -216,12 +220,10 @@ class GenericIE(InfoExtractor):
'playlist': [{
'info_dict': {
'ext': 'mov',
- 'id': 'pdv_maddow_netcast_mov-12-04-2020-224335',
- 'title': 're:MSNBC Rachel Maddow',
+ 'id': 'pdv_maddow_netcast_mov-12-03-2020-223726',
+ 'title': 'MSNBC Rachel Maddow (video) - 12-03-2020-223726',
'description': 're:.*her unique approach to storytelling.*',
- 'timestamp': int,
- 'upload_date': compat_str,
- 'duration': float,
+ 'upload_date': '20201204',
},
}],
},
@@ -1213,14 +1215,13 @@ class GenericIE(InfoExtractor):
},
{
# JWPlatform iframe
- 'url': 'https://www.mediaite.com/tv/dem-senator-claims-gary-cohn-faked-a-bad-connection-during-trump-call-to-get-him-off-the-phone/',
- 'md5': 'ca00a040364b5b439230e7ebfd02c4e9',
+ 'url': 'https://www.covermagazine.co.uk/feature/2465255/business-protection-involved',
'info_dict': {
- 'id': 'O0c5JcKT',
+ 'id': 'AG26UQXM',
'ext': 'mp4',
- 'upload_date': '20171122',
- 'timestamp': 1511366290,
- 'title': 'Dem Senator Claims Gary Cohn Faked a Bad Connection During Trump Call to Get Him Off the Phone',
+ 'upload_date': '20160719',
+ 'timestamp': 468923808,
+ 'title': '2016_05_18 Cover L&G Business Protection V1 FINAL.mp4',
},
'add_ie': [JWPlatformIE.ie_key()],
},
@@ -1629,31 +1630,6 @@ class GenericIE(InfoExtractor):
'upload_date': '20160409',
},
},
- # LiveLeak embed
- {
- 'url': 'http://www.wykop.pl/link/3088787/',
- 'md5': '7619da8c820e835bef21a1efa2a0fc71',
- 'info_dict': {
- 'id': '874_1459135191',
- 'ext': 'mp4',
- 'title': 'Man shows poor quality of new apartment building',
- 'description': 'The wall is like a sand pile.',
- 'uploader': 'Lake8737',
- },
- 'add_ie': [LiveLeakIE.ie_key()],
- },
- # Another LiveLeak embed pattern (#13336)
- {
- 'url': 'https://milo.yiannopoulos.net/2017/06/concealed-carry-robbery/',
- 'info_dict': {
- 'id': '2eb_1496309988',
- 'ext': 'mp4',
- 'title': 'Thief robs place where everyone was armed',
- 'description': 'md5:694d73ee79e535953cf2488562288eee',
- 'uploader': 'brazilwtf',
- },
- 'add_ie': [LiveLeakIE.ie_key()],
- },
# Duplicated embedded video URLs
{
'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443',
@@ -2253,6 +2229,95 @@ class GenericIE(InfoExtractor):
# Sibnet embed (https://help.sibnet.ru/?sibnet_video_embed)
'url': 'https://phpbb3.x-tk.ru/bbcode-video-sibnet-t24.html',
'only_matching': True,
+ }, {
+ # WimTv embed player
+ 'url': 'http://www.msmotor.tv/wearefmi-pt-2-2021/',
+ 'info_dict': {
+ 'id': 'wearefmi-pt-2-2021',
+ 'title': '#WEAREFMI – PT.2 – 2021 – MsMotorTV',
+ },
+ 'playlist_count': 1,
+ }, {
+ # KVS Player
+ 'url': 'https://www.kvs-demo.com/videos/105/kelis-4th-of-july/',
+ 'info_dict': {
+ 'id': '105',
+ 'display_id': 'kelis-4th-of-july',
+ 'ext': 'mp4',
+ 'title': 'Kelis - 4th Of July',
+ 'thumbnail': 'https://kvs-demo.com/contents/videos_screenshots/0/105/preview.jpg',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # KVS Player
+ 'url': 'https://www.kvs-demo.com/embed/105/',
+ 'info_dict': {
+ 'id': '105',
+ 'display_id': 'kelis-4th-of-july',
+ 'ext': 'mp4',
+ 'title': 'Kelis - 4th Of July / Embed Player',
+ 'thumbnail': 'https://kvs-demo.com/contents/videos_screenshots/0/105/preview.jpg',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # KVS Player
+ 'url': 'https://thisvid.com/videos/french-boy-pantsed/',
+ 'md5': '3397979512c682f6b85b3b04989df224',
+ 'info_dict': {
+ 'id': '2400174',
+ 'display_id': 'french-boy-pantsed',
+ 'ext': 'mp4',
+ 'title': 'French Boy Pantsed - ThisVid.com',
+ 'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/2400000/2400174/preview.mp4.jpg',
+ }
+ }, {
+ # KVS Player
+ 'url': 'https://thisvid.com/embed/2400174/',
+ 'md5': '3397979512c682f6b85b3b04989df224',
+ 'info_dict': {
+ 'id': '2400174',
+ 'display_id': 'french-boy-pantsed',
+ 'ext': 'mp4',
+ 'title': 'French Boy Pantsed - ThisVid.com',
+ 'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/2400000/2400174/preview.mp4.jpg',
+ }
+ }, {
+ # KVS Player
+ 'url': 'https://youix.com/video/leningrad-zoj/',
+ 'md5': '94f96ba95706dc3880812b27b7d8a2b8',
+ 'info_dict': {
+ 'id': '18485',
+ 'display_id': 'leningrad-zoj',
+ 'ext': 'mp4',
+ 'title': 'Клип: Ленинград - ЗОЖ скачать, смотреть онлайн | Youix.com',
+ 'thumbnail': 'https://youix.com/contents/videos_screenshots/18000/18485/preview_480x320_youix_com.mp4.jpg',
+ }
+ }, {
+ # KVS Player
+ 'url': 'https://youix.com/embed/18485',
+ 'md5': '94f96ba95706dc3880812b27b7d8a2b8',
+ 'info_dict': {
+ 'id': '18485',
+ 'display_id': 'leningrad-zoj',
+ 'ext': 'mp4',
+ 'title': 'Ленинград - ЗОЖ',
+ 'thumbnail': 'https://youix.com/contents/videos_screenshots/18000/18485/preview_480x320_youix_com.mp4.jpg',
+ }
+ }, {
+ # KVS Player
+ 'url': 'https://bogmedia.org/videos/21217/40-nochey-40-nights-2016/',
+ 'md5': '94166bdb26b4cb1fb9214319a629fc51',
+ 'info_dict': {
+ 'id': '21217',
+ 'display_id': '40-nochey-40-nights-2016',
+ 'ext': 'mp4',
+ 'title': '40 ночей (2016) - BogMedia.org',
+ 'thumbnail': 'https://bogmedia.org/contents/videos_screenshots/21000/21217/preview_480p.mp4.jpg',
+ }
},
]
@@ -2358,19 +2423,57 @@ class GenericIE(InfoExtractor):
'title': title,
}
+ def _kvs_getrealurl(self, video_url, license_code):
+ if not video_url.startswith('function/0/'):
+ return video_url # not obfuscated
+
+ url_path, _, url_query = video_url.partition('?')
+ urlparts = url_path.split('/')[2:]
+ license = self._kvs_getlicensetoken(license_code)
+ newmagic = urlparts[5][:32]
+
+ for o in range(len(newmagic) - 1, -1, -1):
+ new = ''
+ l = (o + sum([int(n) for n in license[o:]])) % 32
+
+ for i in range(0, len(newmagic)):
+ if i == o:
+ new += newmagic[l]
+ elif i == l:
+ new += newmagic[o]
+ else:
+ new += newmagic[i]
+ newmagic = new
+
+ urlparts[5] = newmagic + urlparts[5][32:]
+ return '/'.join(urlparts) + '?' + url_query
+
+ def _kvs_getlicensetoken(self, license):
+ modlicense = license.replace('$', '').replace('0', '1')
+ center = int(len(modlicense) / 2)
+ fronthalf = int(modlicense[:center + 1])
+ backhalf = int(modlicense[center:])
+
+ modlicense = str(4 * abs(fronthalf - backhalf))
+ retval = ''
+ for o in range(0, center + 1):
+ for i in range(1, 5):
+ retval += str((int(license[o + i]) + int(modlicense[o])) % 10)
+ return retval
+
def _real_extract(self, url):
if url.startswith('//'):
return self.url_result(self.http_scheme() + url)
parsed_url = compat_urlparse.urlparse(url)
if not parsed_url.scheme:
- default_search = self._downloader.params.get('default_search')
+ default_search = self.get_param('default_search')
if default_search is None:
default_search = 'fixup_error'
if default_search in ('auto', 'auto_warning', 'fixup_error'):
if re.match(r'^[^\s/]+\.[^\s/]+/', url):
- self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
+ self.report_warning('The url doesn\'t specify the protocol, trying with http')
return self.url_result('http://' + url)
elif default_search != 'fixup_error':
if default_search == 'auto_warning':
@@ -2379,7 +2482,7 @@ class GenericIE(InfoExtractor):
'Invalid URL: %r . Call hypervideo like this: hypervideo -v "https://www.youtube.com/watch?v=BaW_jenozKc" ' % url,
expected=True)
else:
- self._downloader.report_warning(
+ self.report_warning(
'Falling back to youtube search for %s . Set --default-search "auto" to suppress this warning.' % url)
return self.url_result('ytsearch:' + url)
@@ -2438,8 +2541,9 @@ class GenericIE(InfoExtractor):
m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
if m:
format_id = compat_str(m.group('format_id'))
+ subtitles = {}
if format_id.endswith('mpegurl'):
- formats = self._extract_m3u8_formats(url, video_id, 'mp4')
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4')
elif format_id == 'f4m':
formats = self._extract_f4m_formats(url, video_id)
else:
@@ -2451,11 +2555,12 @@ class GenericIE(InfoExtractor):
info_dict['direct'] = True
self._sort_formats(formats)
info_dict['formats'] = formats
+ info_dict['subtitles'] = subtitles
return info_dict
- if not self._downloader.params.get('test', False) and not is_intentional:
- force = self._downloader.params.get('force_generic_extractor', False)
- self._downloader.report_warning(
+ if not self.get_param('test', False) and not is_intentional:
+ force = self.get_param('force_generic_extractor', False)
+ self.report_warning(
'%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
if not full_response:
@@ -2475,14 +2580,14 @@ class GenericIE(InfoExtractor):
# Is it an M3U playlist?
if first_bytes.startswith(b'#EXTM3U'):
- info_dict['formats'] = self._extract_m3u8_formats(url, video_id, 'mp4')
+ info_dict['formats'], info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4')
self._sort_formats(info_dict['formats'])
return info_dict
# Maybe it's a direct link to a video?
# Be careful not to download the whole thing!
if not is_html(first_bytes):
- self._downloader.report_warning(
+ self.report_warning(
'URL could be a direct video link, returning it as such.')
info_dict.update({
'direct': True,
@@ -2500,11 +2605,14 @@ class GenericIE(InfoExtractor):
# Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest?
try:
- doc = compat_etree_fromstring(webpage.encode('utf-8'))
+ try:
+ doc = compat_etree_fromstring(webpage)
+ except compat_xml_parse_error:
+ doc = compat_etree_fromstring(webpage.encode('utf-8'))
if doc.tag == 'rss':
return self._extract_rss(url, video_id, doc)
elif doc.tag == 'SmoothStreamingMedia':
- info_dict['formats'] = self._parse_ism_formats(doc, url)
+ info_dict['formats'], info_dict['subtitles'] = self._parse_ism_formats_and_subtitles(doc, url)
self._sort_formats(info_dict['formats'])
return info_dict
elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
@@ -2518,7 +2626,7 @@ class GenericIE(InfoExtractor):
xspf_base_url=full_response.geturl()),
video_id)
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
- info_dict['formats'] = self._parse_mpd_formats(
+ info_dict['formats'], info_dict['subtitles'] = self._parse_mpd_formats_and_subtitles(
doc,
mpd_base_url=full_response.geturl().rpartition('/')[0],
mpd_url=url)
@@ -2647,11 +2755,14 @@ class GenericIE(InfoExtractor):
if vhx_url:
return self.url_result(vhx_url, VHXEmbedIE.ie_key())
- vid_me_embed_url = self._search_regex(
- r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
- webpage, 'vid.me embed', default=None)
- if vid_me_embed_url is not None:
- return self.url_result(vid_me_embed_url, 'Vidme')
+ # Invidious Instances
+ # https://github.com/hypervideo/hypervideo/issues/195
+ # https://github.com/iv-org/invidious/pull/1730
+ youtube_url = self._search_regex(
+ r'<link rel="alternate" href="(https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',
+ webpage, 'youtube link', default=None)
+ if youtube_url:
+ return self.url_result(youtube_url, YoutubeIE.ie_key())
# Look for YouTube embeds
youtube_urls = YoutubeIE._extract_urls(webpage)
@@ -3179,11 +3290,6 @@ class GenericIE(InfoExtractor):
return self.url_result(
self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key())
- # Look for LiveLeak embeds
- liveleak_urls = LiveLeakIE._extract_urls(webpage)
- if liveleak_urls:
- return self.playlist_from_matches(liveleak_urls, video_id, video_title)
-
# Look for 3Q SDN embeds
threeqsdn_url = ThreeQSDNIE._extract_url(webpage)
if threeqsdn_url:
@@ -3348,6 +3454,34 @@ class GenericIE(InfoExtractor):
return self.playlist_from_matches(
zype_urls, video_id, video_title, ie=ZypeIE.ie_key())
+ gedi_urls = GediDigitalIE._extract_urls(webpage)
+ if gedi_urls:
+ return self.playlist_from_matches(
+ gedi_urls, video_id, video_title, ie=GediDigitalIE.ie_key())
+
+ # Look for RCS media group embeds
+ rcs_urls = RCSEmbedsIE._extract_urls(webpage)
+ if rcs_urls:
+ return self.playlist_from_matches(
+ rcs_urls, video_id, video_title, ie=RCSEmbedsIE.ie_key())
+
+ wimtv_urls = WimTVIE._extract_urls(webpage)
+ if wimtv_urls:
+ return self.playlist_from_matches(
+ wimtv_urls, video_id, video_title, ie=WimTVIE.ie_key())
+
+ bitchute_urls = BitChuteIE._extract_urls(webpage)
+ if bitchute_urls:
+ return self.playlist_from_matches(
+ bitchute_urls, video_id, video_title, ie=BitChuteIE.ie_key())
+
+ rumble_urls = RumbleEmbedIE._extract_urls(webpage)
+ if len(rumble_urls) == 1:
+ return self.url_result(rumble_urls[0], RumbleEmbedIE.ie_key())
+ if rumble_urls:
+ return self.playlist_from_matches(
+ rumble_urls, video_id, video_title, ie=RumbleEmbedIE.ie_key())
+
# Look for HTML5 media
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
if entries:
@@ -3388,6 +3522,7 @@ class GenericIE(InfoExtractor):
if not isinstance(sources, list):
sources = [sources]
formats = []
+ subtitles = {}
for source in sources:
src = source.get('src')
if not src or not isinstance(src, compat_str):
@@ -3400,12 +3535,16 @@ class GenericIE(InfoExtractor):
if src_type == 'video/youtube':
return self.url_result(src, YoutubeIE.ie_key())
if src_type == 'application/dash+xml' or ext == 'mpd':
- formats.extend(self._extract_mpd_formats(
- src, video_id, mpd_id='dash', fatal=False))
+ fmts, subs = self._extract_mpd_formats_and_subtitles(
+ src, video_id, mpd_id='dash', fatal=False)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
elif src_type == 'application/x-mpegurl' or ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
src, video_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id='hls', fatal=False))
+ m3u8_id='hls', fatal=False)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
else:
formats.append({
'url': src,
@@ -3415,9 +3554,10 @@ class GenericIE(InfoExtractor):
'Referer': full_response.geturl(),
},
})
- if formats:
+ if formats or subtitles:
self._sort_formats(formats)
info_dict['formats'] = formats
+ info_dict['subtitles'] = subtitles
return info_dict
# Looking for http://schema.org/VideoObject
@@ -3451,6 +3591,52 @@ class GenericIE(InfoExtractor):
.*?
['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
if not found:
+ # Look for generic KVS player
+ found = re.search(r'<script [^>]*?src="https://.+?/kt_player\.js\?v=(?P<ver>(?P<maj_ver>\d+)(\.\d+)+)".*?>', webpage)
+ if found:
+ if found.group('maj_ver') not in ['4', '5']:
+ self.report_warning('Untested major version (%s) in player engine--Download may fail.' % found.group('ver'))
+ flashvars = re.search(r'(?ms)<script.*?>.*?var\s+flashvars\s*=\s*(\{.*?\});.*?</script>', webpage)
+ flashvars = self._parse_json(flashvars.group(1), video_id, transform_source=js_to_json)
+
+ # extract the part after the last / as the display_id from the
+ # canonical URL.
+ display_id = self._search_regex(
+ r'(?:<link href="https?://[^"]+/(.+?)/?" rel="canonical"\s*/?>'
+ r'|<link rel="canonical" href="https?://[^"]+/(.+?)/?"\s*/?>)',
+ webpage, 'display_id', fatal=False
+ )
+ title = self._html_search_regex(r'<(?:h1|title)>(?:Video: )?(.+?)</(?:h1|title)>', webpage, 'title')
+
+ thumbnail = flashvars['preview_url']
+ if thumbnail.startswith('//'):
+ protocol, _, _ = url.partition('/')
+ thumbnail = protocol + thumbnail
+
+ formats = []
+ for key in ('video_url', 'video_alt_url', 'video_alt_url2'):
+ if key in flashvars and '/get_file/' in flashvars[key]:
+ next_format = {
+ 'url': self._kvs_getrealurl(flashvars[key], flashvars['license_code']),
+ 'format_id': flashvars.get(key + '_text', key),
+ 'ext': 'mp4',
+ }
+ height = re.search(r'%s_(\d+)p\.mp4(?:/[?].*)?$' % flashvars['video_id'], flashvars[key])
+ if height:
+ next_format['height'] = int(height.group(1))
+ else:
+ next_format['quality'] = 1
+ formats.append(next_format)
+ self._sort_formats(formats)
+
+ return {
+ 'id': flashvars['video_id'],
+ 'display_id': display_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'formats': formats,
+ }
+ if not found:
# Broaden the search a little bit
found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
if not found:
@@ -3552,13 +3738,13 @@ class GenericIE(InfoExtractor):
ext = determine_ext(video_url)
if ext == 'smil':
- entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id)
+ entry_info_dict = {**self._extract_smil_info(video_url, video_id), **entry_info_dict}
elif ext == 'xspf':
return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
elif ext == 'm3u8':
- entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
+ entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(video_url, video_id, ext='mp4')
elif ext == 'mpd':
- entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id)
+ entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_mpd_formats_and_subtitles(video_url, video_id)
elif ext == 'f4m':
entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id)
elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url:
diff --git a/hypervideo_dl/extractor/gettr.py b/hypervideo_dl/extractor/gettr.py
new file mode 100644
index 0000000..aa50b2f
--- /dev/null
+++ b/hypervideo_dl/extractor/gettr.py
@@ -0,0 +1,110 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ dict_get,
+ float_or_none,
+ int_or_none,
+ remove_end,
+ str_or_none,
+ try_get,
+ url_or_none,
+ urljoin,
+)
+
+
+class GettrIE(InfoExtractor):
+ _VALID_URL = r'https?://(www\.)?gettr\.com/post/(?P<id>[a-z0-9]+)'
+ _MEDIA_BASE_URL = 'https://media.gettr.com/'
+
+ _TESTS = [{
+ 'url': 'https://www.gettr.com/post/pcf6uv838f',
+ 'info_dict': {
+ 'id': 'pcf6uv838f',
+ 'title': 'md5:9086a646bbd06c41c4fe8e52b3c93454',
+ 'description': 'md5:be0577f1e4caadc06de4a002da2bf287',
+ 'ext': 'mp4',
+ 'uploader': 'EpochTV',
+ 'uploader_id': 'epochtv',
+ 'thumbnail': r're:^https?://.+/out\.jpg',
+ 'timestamp': 1632782451058,
+ 'duration': 58.5585,
+ }
+ }, {
+ 'url': 'https://gettr.com/post/p4iahp',
+ 'info_dict': {
+ 'id': 'p4iahp',
+ 'title': 'md5:b03c07883db6fbc1aab88877a6c3b149',
+ 'description': 'md5:741b7419d991c403196ed2ea7749a39d',
+ 'ext': 'mp4',
+ 'uploader': 'Neues Forum Freiheit',
+ 'uploader_id': 'nf_freiheit',
+ 'thumbnail': r're:^https?://.+/out\.jpg',
+ 'timestamp': 1626594455017,
+ 'duration': 23,
+ }
+ }]
+
+ def _real_extract(self, url):
+ post_id = self._match_id(url)
+ webpage = self._download_webpage(url, post_id)
+
+ api_data = self._download_json(
+ 'https://api.gettr.com/u/post/%s?incl="poststats|userinfo"' % post_id, post_id)
+
+ post_data = try_get(api_data, lambda x: x['result']['data'])
+ user_data = try_get(api_data, lambda x: x['result']['aux']['uinf'][post_data['uid']]) or {}
+
+ if post_data.get('nfound'):
+ raise ExtractorError(post_data.get('txt'), expected=True)
+
+ title = description = str_or_none(
+ post_data.get('txt') or self._og_search_description(webpage))
+
+ uploader = str_or_none(
+ user_data.get('nickname')
+ or remove_end(self._og_search_title(webpage), ' on GETTR'))
+ if uploader:
+ title = '%s - %s' % (uploader, title)
+
+ if not dict_get(post_data, ['vid', 'ovid']):
+ raise ExtractorError('There\'s no video in this post.')
+
+ vid = post_data.get('vid')
+ ovid = post_data.get('ovid')
+
+ formats = self._extract_m3u8_formats(
+ urljoin(self._MEDIA_BASE_URL, vid), post_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls') if vid else []
+
+ if ovid:
+ formats.append({
+ 'url': urljoin(self._MEDIA_BASE_URL, ovid),
+ 'format_id': 'ovid',
+ 'ext': 'mp4',
+ 'width': int_or_none(post_data.get('vid_wid')),
+ 'height': int_or_none(post_data.get('vid_hgt')),
+ 'source_preference': 1,
+ 'quality': 1,
+ })
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': post_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': url_or_none(
+ urljoin(self._MEDIA_BASE_URL, post_data.get('main'))
+ or self._og_search_thumbnail(webpage)),
+ 'timestamp': int_or_none(post_data.get('cdate')),
+ 'uploader_id': str_or_none(
+ dict_get(user_data, ['_id', 'username'])
+ or post_data.get('uid')),
+ 'uploader': uploader,
+ 'formats': formats,
+ 'duration': float_or_none(post_data.get('vid_dur')),
+ 'tags': post_data.get('htgs'),
+ }
diff --git a/hypervideo_dl/extractor/giantbomb.py b/hypervideo_dl/extractor/giantbomb.py
index c647795..1920923 100644
--- a/hypervideo_dl/extractor/giantbomb.py
+++ b/hypervideo_dl/extractor/giantbomb.py
@@ -1,6 +1,5 @@
from __future__ import unicode_literals
-import re
import json
from .common import InfoExtractor
@@ -32,7 +31,7 @@ class GiantBombIE(InfoExtractor):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
display_id = mobj.group('display_id')
diff --git a/hypervideo_dl/extractor/globo.py b/hypervideo_dl/extractor/globo.py
index 60d842d..a3f0241 100644
--- a/hypervideo_dl/extractor/globo.py
+++ b/hypervideo_dl/extractor/globo.py
@@ -9,15 +9,14 @@ import re
from .common import InfoExtractor
from ..compat import (
- compat_HTTPError,
compat_str,
)
from ..utils import (
ExtractorError,
float_or_none,
- int_or_none,
orderedSet,
str_or_none,
+ try_get,
)
@@ -26,18 +25,19 @@ class GloboIE(InfoExtractor):
_NETRC_MACHINE = 'globo'
_TESTS = [{
'url': 'http://g1.globo.com/carros/autoesporte/videos/t/exclusivos-do-g1/v/mercedes-benz-gla-passa-por-teste-de-colisao-na-europa/3607726/',
- 'md5': 'b3ccc801f75cd04a914d51dadb83a78d',
'info_dict': {
'id': '3607726',
'ext': 'mp4',
'title': 'Mercedes-Benz GLA passa por teste de colisão na Europa',
'duration': 103.204,
- 'uploader': 'Globo.com',
- 'uploader_id': '265',
+ 'uploader': 'G1',
+ 'uploader_id': '2015',
+ },
+ 'params': {
+ 'skip_download': True,
},
}, {
'url': 'http://globoplay.globo.com/v/4581987/',
- 'md5': 'f36a1ecd6a50da1577eee6dd17f67eff',
'info_dict': {
'id': '4581987',
'ext': 'mp4',
@@ -46,6 +46,9 @@ class GloboIE(InfoExtractor):
'uploader': 'Rede Globo',
'uploader_id': '196',
},
+ 'params': {
+ 'skip_download': True,
+ },
}, {
'url': 'http://canalbrasil.globo.com/programas/sangue-latino/videos/3928201.html',
'only_matching': True,
@@ -66,109 +69,79 @@ class GloboIE(InfoExtractor):
'only_matching': True,
}]
- def _real_initialize(self):
- email, password = self._get_login_info()
- if email is None:
- return
-
- try:
- glb_id = (self._download_json(
- 'https://login.globo.com/api/authentication', None, data=json.dumps({
- 'payload': {
- 'email': email,
- 'password': password,
- 'serviceId': 4654,
- },
- }).encode(), headers={
- 'Content-Type': 'application/json; charset=utf-8',
- }) or {}).get('glbId')
- if glb_id:
- self._set_cookie('.globo.com', 'GLBID', glb_id)
- except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
- resp = self._parse_json(e.cause.read(), None)
- raise ExtractorError(resp.get('userMessage') or resp['id'], expected=True)
- raise
-
def _real_extract(self, url):
video_id = self._match_id(url)
video = self._download_json(
'http://api.globovideos.com/videos/%s/playlist' % video_id,
video_id)['videos'][0]
- if video.get('encrypted') is True:
- raise ExtractorError('This video is DRM protected.', expected=True)
+ if not self.get_param('allow_unplayable_formats') and video.get('encrypted') is True:
+ self.report_drm(video_id)
title = video['title']
formats = []
+ security = self._download_json(
+ 'https://playback.video.globo.com/v1/video-session', video_id, 'Downloading security hash for %s' % video_id,
+ headers={'content-type': 'application/json'}, data=json.dumps({
+ "player_type": "desktop",
+ "video_id": video_id,
+ "quality": "max",
+ "content_protection": "widevine",
+ "vsid": "581b986b-4c40-71f0-5a58-803e579d5fa2",
+ "tz": "-3.0:00"
+ }).encode())
+
+ security_hash = security['source']['token']
+ if not security_hash:
+ message = security.get('message')
+ if message:
+ raise ExtractorError(
+ '%s returned error: %s' % (self.IE_NAME, message), expected=True)
+
+ hash_code = security_hash[:2]
+ padding = '%010d' % random.randint(1, 10000000000)
+ if hash_code in ('04', '14'):
+ received_time = security_hash[3:13]
+ received_md5 = security_hash[24:]
+ hash_prefix = security_hash[:23]
+ elif hash_code in ('02', '12', '03', '13'):
+ received_time = security_hash[2:12]
+ received_md5 = security_hash[22:]
+ padding += '1'
+ hash_prefix = '05' + security_hash[:22]
+
+ padded_sign_time = compat_str(int(received_time) + 86400) + padding
+ md5_data = (received_md5 + padded_sign_time + '0xAC10FD').encode()
+ signed_md5 = base64.urlsafe_b64encode(hashlib.md5(md5_data).digest()).decode().strip('=')
+ signed_hash = hash_prefix + padded_sign_time + signed_md5
+ source = security['source']['url_parts']
+ resource_url = source['scheme'] + '://' + source['domain'] + source['path']
+ signed_url = '%s?h=%s&k=html5&a=%s' % (resource_url, signed_hash, 'F' if video.get('subscriber_only') else 'A')
+
+ formats.extend(self._extract_m3u8_formats(
+ signed_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
+ self._sort_formats(formats)
+
subtitles = {}
for resource in video['resources']:
- resource_id = resource.get('_id')
- resource_url = resource.get('url')
- resource_type = resource.get('type')
- if not resource_url or (resource_type == 'media' and not resource_id) or resource_type not in ('subtitle', 'media'):
- continue
-
- if resource_type == 'subtitle':
+ if resource.get('type') == 'subtitle':
subtitles.setdefault(resource.get('language') or 'por', []).append({
- 'url': resource_url,
+ 'url': resource.get('url'),
})
- continue
-
- security = self._download_json(
- 'http://security.video.globo.com/videos/%s/hash' % video_id,
- video_id, 'Downloading security hash for %s' % resource_id, query={
- 'player': 'desktop',
- 'version': '5.19.1',
- 'resource_id': resource_id,
+ subs = try_get(security, lambda x: x['source']['subtitles'], expected_type=dict) or {}
+ for sub_lang, sub_url in subs.items():
+ if sub_url:
+ subtitles.setdefault(sub_lang or 'por', []).append({
+ 'url': sub_url,
})
-
- security_hash = security.get('hash')
- if not security_hash:
- message = security.get('message')
- if message:
- raise ExtractorError(
- '%s returned error: %s' % (self.IE_NAME, message), expected=True)
- continue
-
- hash_code = security_hash[:2]
- padding = '%010d' % random.randint(1, 10000000000)
- if hash_code in ('04', '14'):
- received_time = security_hash[3:13]
- received_md5 = security_hash[24:]
- hash_prefix = security_hash[:23]
- elif hash_code in ('02', '12', '03', '13'):
- received_time = security_hash[2:12]
- received_md5 = security_hash[22:]
- padding += '1'
- hash_prefix = '05' + security_hash[:22]
-
- padded_sign_time = compat_str(int(received_time) + 86400) + padding
- md5_data = (received_md5 + padded_sign_time + '0xAC10FD').encode()
- signed_md5 = base64.urlsafe_b64encode(hashlib.md5(md5_data).digest()).decode().strip('=')
- signed_hash = hash_prefix + padded_sign_time + signed_md5
- signed_url = '%s?h=%s&k=html5&a=%s&u=%s' % (resource_url, signed_hash, 'F' if video.get('subscriber_only') else 'A', security.get('user') or '')
-
- if resource_id.endswith('m3u8') or resource_url.endswith('.m3u8'):
- formats.extend(self._extract_m3u8_formats(
- signed_url, resource_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id='hls', fatal=False))
- elif resource_id.endswith('mpd') or resource_url.endswith('.mpd'):
- formats.extend(self._extract_mpd_formats(
- signed_url, resource_id, mpd_id='dash', fatal=False))
- elif resource_id.endswith('manifest') or resource_url.endswith('/manifest'):
- formats.extend(self._extract_ism_formats(
- signed_url, resource_id, ism_id='mss', fatal=False))
- else:
- formats.append({
- 'url': signed_url,
- 'format_id': 'http-%s' % resource_id,
- 'height': int_or_none(resource.get('height')),
+ subs = try_get(security, lambda x: x['source']['subtitles_webvtt'], expected_type=dict) or {}
+ for sub_lang, sub_url in subs.items():
+ if sub_url:
+ subtitles.setdefault(sub_lang or 'por', []).append({
+ 'url': sub_url,
})
- self._sort_formats(formats)
-
duration = float_or_none(video.get('duration'), 1000)
uploader = video.get('channel')
uploader_id = str_or_none(video.get('channel_id'))
diff --git a/hypervideo_dl/extractor/go.py b/hypervideo_dl/extractor/go.py
index 878ba14..2ccc6df 100644
--- a/hypervideo_dl/extractor/go.py
+++ b/hypervideo_dl/extractor/go.py
@@ -9,6 +9,8 @@ from ..utils import (
int_or_none,
determine_ext,
parse_age_limit,
+ remove_start,
+ remove_end,
try_get,
urlencode_postdata,
ExtractorError,
@@ -48,15 +50,15 @@ class GoIE(AdobePassIE):
}
_VALID_URL = r'''(?x)
https?://
- (?:
- (?:(?P<sub_domain>%s)\.)?go|
- (?P<sub_domain_2>abc|freeform|disneynow|fxnow\.fxnetworks)
+ (?P<sub_domain>
+ (?:%s\.)?go|fxnow\.fxnetworks|
+ (?:www\.)?(?:abc|freeform|disneynow)
)\.com/
(?:
(?:[^/]+/)*(?P<id>[Vv][Dd][Kk][Aa]\w+)|
(?:[^/]+/)*(?P<display_id>[^/?\#]+)
)
- ''' % '|'.join(list(_SITE_INFO.keys()))
+ ''' % r'\.|'.join(list(_SITE_INFO.keys()))
_TESTS = [{
'url': 'http://abc.go.com/shows/designated-survivor/video/most-recent/VDKA3807643',
'info_dict': {
@@ -147,6 +149,9 @@ class GoIE(AdobePassIE):
}, {
'url': 'https://disneynow.com/shows/minnies-bow-toons/video/happy-campers/vdka4872013',
'only_matching': True,
+ }, {
+ 'url': 'https://www.freeform.com/shows/cruel-summer/episode-guide/season-01/01-happy-birthday-jeanette-turner',
+ 'only_matching': True,
}]
def _extract_videos(self, brand, video_id='-1', show_id='-1'):
@@ -156,8 +161,8 @@ class GoIE(AdobePassIE):
display_id)['video']
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- sub_domain = mobj.group('sub_domain') or mobj.group('sub_domain_2')
+ mobj = self._match_valid_url(url)
+ sub_domain = remove_start(remove_end(mobj.group('sub_domain') or '', '.go'), 'www.')
video_id, display_id = mobj.group('id', 'display_id')
site_info = self._SITE_INFO.get(sub_domain, {})
brand = site_info.get('brand')
@@ -262,7 +267,7 @@ class GoIE(AdobePassIE):
if re.search(r'(?:/mp4/source/|_source\.mp4)', asset_url):
f.update({
'format_id': ('%s-' % format_id if format_id else '') + 'SOURCE',
- 'preference': 1,
+ 'quality': 1,
})
else:
mobj = re.search(r'/(\d+)x(\d+)/', asset_url)
diff --git a/hypervideo_dl/extractor/godtube.py b/hypervideo_dl/extractor/godtube.py
index 92efd16..96e68b4 100644
--- a/hypervideo_dl/extractor/godtube.py
+++ b/hypervideo_dl/extractor/godtube.py
@@ -1,6 +1,5 @@
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..utils import (
@@ -29,7 +28,7 @@ class GodTubeIE(InfoExtractor):
]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
config = self._download_xml(
diff --git a/hypervideo_dl/extractor/googledrive.py b/hypervideo_dl/extractor/googledrive.py
index 3f2de00..7b5bf28 100644
--- a/hypervideo_dl/extractor/googledrive.py
+++ b/hypervideo_dl/extractor/googledrive.py
@@ -253,7 +253,7 @@ class GoogleDriveIE(InfoExtractor):
or 'unable to extract confirmation code')
if not formats and reason:
- raise ExtractorError(reason, expected=True)
+ self.raise_no_formats(reason, expected=True)
self._sort_formats(formats)
@@ -266,6 +266,8 @@ class GoogleDriveIE(InfoExtractor):
subtitles_id = ttsurl.encode('utf-8').decode(
'unicode_escape').split('=')[-1]
+ self._downloader.cookiejar.clear(domain='.google.com', path='/', name='NID')
+
return {
'id': video_id,
'title': title,
diff --git a/hypervideo_dl/extractor/googlepodcasts.py b/hypervideo_dl/extractor/googlepodcasts.py
index 31ad799..25631e2 100644
--- a/hypervideo_dl/extractor/googlepodcasts.py
+++ b/hypervideo_dl/extractor/googlepodcasts.py
@@ -2,7 +2,6 @@
from __future__ import unicode_literals
import json
-import re
from .common import InfoExtractor
from ..utils import (
@@ -56,7 +55,7 @@ class GooglePodcastsIE(GooglePodcastsBaseIE):
}
def _real_extract(self, url):
- b64_feed_url, b64_guid = re.match(self._VALID_URL, url).groups()
+ b64_feed_url, b64_guid = self._match_valid_url(url).groups()
episode = self._batch_execute(
'oNjqVe', b64_guid, [b64_feed_url, b64_guid])[1]
return self._extract_episode(episode)
diff --git a/hypervideo_dl/extractor/googlesearch.py b/hypervideo_dl/extractor/googlesearch.py
index 5279fa8..f605c0c 100644
--- a/hypervideo_dl/extractor/googlesearch.py
+++ b/hypervideo_dl/extractor/googlesearch.py
@@ -11,6 +11,7 @@ class GoogleSearchIE(SearchInfoExtractor):
_MAX_RESULTS = 1000
IE_NAME = 'video.google:search'
_SEARCH_KEY = 'gvsearch'
+ _WORKING = False
_TEST = {
'url': 'gvsearch15:python language',
'info_dict': {
@@ -20,16 +21,7 @@ class GoogleSearchIE(SearchInfoExtractor):
'playlist_count': 15,
}
- def _get_n_results(self, query, n):
- """Get a specified number of results for a query"""
-
- entries = []
- res = {
- '_type': 'playlist',
- 'id': query,
- 'title': query,
- }
-
+ def _search_results(self, query):
for pagenum in itertools.count():
webpage = self._download_webpage(
'http://www.google.com/search',
@@ -44,16 +36,8 @@ class GoogleSearchIE(SearchInfoExtractor):
for hit_idx, mobj in enumerate(re.finditer(
r'<h3 class="r"><a href="([^"]+)"', webpage)):
+ if re.search(f'id="vidthumb{hit_idx + 1}"', webpage):
+ yield self.url_result(mobj.group(1))
- # Skip playlists
- if not re.search(r'id="vidthumb%d"' % (hit_idx + 1), webpage):
- continue
-
- entries.append({
- '_type': 'url',
- 'url': mobj.group(1)
- })
-
- if (len(entries) >= n) or not re.search(r'id="pnnext"', webpage):
- res['entries'] = entries[:n]
- return res
+ if not re.search(r'id="pnnext"', webpage):
+ return
diff --git a/hypervideo_dl/extractor/gopro.py b/hypervideo_dl/extractor/gopro.py
new file mode 100644
index 0000000..10cc1ae
--- /dev/null
+++ b/hypervideo_dl/extractor/gopro.py
@@ -0,0 +1,110 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ remove_end,
+ str_or_none,
+ try_get,
+ unified_timestamp,
+ url_or_none,
+)
+
+
+class GoProIE(InfoExtractor):
+ _VALID_URL = r'https?://(www\.)?gopro\.com/v/(?P<id>[A-Za-z0-9]+)'
+
+ _TESTS = [{
+ 'url': 'https://gopro.com/v/ZNVvED8QDzR5V',
+ 'info_dict': {
+ 'id': 'ZNVvED8QDzR5V',
+ 'title': 'My GoPro Adventure - 9/19/21',
+ 'thumbnail': r're:https?://.+',
+ 'ext': 'mp4',
+ 'timestamp': 1632072947,
+ 'upload_date': '20210919',
+ 'uploader_id': 'fireydive30018',
+ 'duration': 396062,
+ }
+ }, {
+ 'url': 'https://gopro.com/v/KRm6Vgp2peg4e',
+ 'info_dict': {
+ 'id': 'KRm6Vgp2peg4e',
+ 'title': 'じゃがいも カリカリ オーブン焼き',
+ 'thumbnail': r're:https?://.+',
+ 'ext': 'mp4',
+ 'timestamp': 1607231125,
+ 'upload_date': '20201206',
+ 'uploader_id': 'dc9bcb8b-47d2-47c6-afbc-4c48f9a3769e',
+ 'duration': 45187,
+ 'track': 'The Sky Machine',
+ }
+ }, {
+ 'url': 'https://gopro.com/v/kVrK9wlJvBMwn',
+ 'info_dict': {
+ 'id': 'kVrK9wlJvBMwn',
+ 'title': 'DARKNESS',
+ 'thumbnail': r're:https?://.+',
+ 'ext': 'mp4',
+ 'timestamp': 1594183735,
+ 'upload_date': '20200708',
+ 'uploader_id': '闇夜乃皇帝',
+ 'duration': 313075,
+ 'track': 'Battery (Live)',
+ 'artist': 'Metallica',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ metadata = self._parse_json(
+ self._html_search_regex(r'window\.__reflectData\s*=\s*([^;]+)', webpage, 'metadata'), video_id)
+
+ video_info = metadata['collectionMedia'][0]
+ media_data = self._download_json(
+ 'https://api.gopro.com/media/%s/download' % video_info['id'], video_id)
+
+ formats = []
+ for fmt in try_get(media_data, lambda x: x['_embedded']['variations']) or []:
+ format_url = url_or_none(fmt.get('url'))
+ if not format_url:
+ continue
+ formats.append({
+ 'url': format_url,
+ 'format_id': str_or_none(fmt.get('quality')),
+ 'format_note': str_or_none(fmt.get('label')),
+ 'ext': str_or_none(fmt.get('type')),
+ 'width': int_or_none(fmt.get('width')),
+ 'height': int_or_none(fmt.get('height')),
+ })
+
+ self._sort_formats(formats)
+
+ title = str_or_none(
+ try_get(metadata, lambda x: x['collection']['title'])
+ or self._html_search_meta(['og:title', 'twitter:title'], webpage)
+ or remove_end(self._html_search_regex(
+ r'<title[^>]*>([^<]+)</title>', webpage, 'title', fatal=False), ' | GoPro'))
+ if title:
+ title = title.replace('\n', ' ')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': url_or_none(
+ self._html_search_meta(['og:image', 'twitter:image'], webpage)),
+ 'timestamp': unified_timestamp(
+ try_get(metadata, lambda x: x['collection']['created_at'])),
+ 'uploader_id': str_or_none(
+ try_get(metadata, lambda x: x['account']['nickname'])),
+ 'duration': int_or_none(
+ video_info.get('source_duration')),
+ 'artist': str_or_none(
+ video_info.get('music_track_artist')),
+ 'track': str_or_none(
+ video_info.get('music_track_name')),
+ }
diff --git a/hypervideo_dl/extractor/gotostage.py b/hypervideo_dl/extractor/gotostage.py
new file mode 100644
index 0000000..6aa9610
--- /dev/null
+++ b/hypervideo_dl/extractor/gotostage.py
@@ -0,0 +1,73 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ try_get,
+ url_or_none
+)
+
+import json
+
+
+class GoToStageIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?gotostage\.com/channel/[a-z0-9]+/recording/(?P<id>[a-z0-9]+)/watch'
+ _TESTS = [{
+ 'url': 'https://www.gotostage.com/channel/8901680603948959494/recording/60bb55548d434f21b9ce4f0e225c4895/watch',
+ 'md5': 'ca72ce990cdcd7a2bd152f7217e319a2',
+ 'info_dict': {
+ 'id': '60bb55548d434f21b9ce4f0e225c4895',
+ 'ext': 'mp4',
+ 'title': 'What is GoToStage?',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 93.924711
+ }
+ }, {
+ 'url': 'https://www.gotostage.com/channel/bacc3d3535b34bafacc3f4ef8d4df78a/recording/831e74cd3e0042be96defba627b6f676/watch?source=HOMEPAGE',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ metadata = self._download_json(
+ 'https://api.gotostage.com/contents?ids=%s' % video_id,
+ video_id,
+ note='Downloading video metadata',
+ errnote='Unable to download video metadata')[0]
+
+ registration_data = {
+ 'product': metadata['product'],
+ 'resourceType': metadata['contentType'],
+ 'productReferenceKey': metadata['productRefKey'],
+ 'firstName': 'foo',
+ 'lastName': 'bar',
+ 'email': 'foobar@example.com'
+ }
+
+ registration_response = self._download_json(
+ 'https://api-registrations.logmeininc.com/registrations',
+ video_id,
+ data=json.dumps(registration_data).encode(),
+ expected_status=409,
+ headers={'Content-Type': 'application/json'},
+ note='Register user',
+ errnote='Unable to register user')
+
+ content_response = self._download_json(
+ 'https://api.gotostage.com/contents/%s/asset' % video_id,
+ video_id,
+ headers={'x-registrantkey': registration_response['registrationKey']},
+ note='Get download url',
+ errnote='Unable to get download url')
+
+ return {
+ 'id': video_id,
+ 'title': try_get(metadata, lambda x: x['title'], compat_str),
+ 'url': try_get(content_response, lambda x: x['cdnLocation'], compat_str),
+ 'ext': 'mp4',
+ 'thumbnail': url_or_none(try_get(metadata, lambda x: x['thumbnail']['location'])),
+ 'duration': try_get(metadata, lambda x: x['duration'], float),
+ 'categories': [try_get(metadata, lambda x: x['category'], compat_str)],
+ 'is_live': False
+ }
diff --git a/hypervideo_dl/extractor/gronkh.py b/hypervideo_dl/extractor/gronkh.py
new file mode 100644
index 0000000..a7792a5
--- /dev/null
+++ b/hypervideo_dl/extractor/gronkh.py
@@ -0,0 +1,43 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import unified_strdate
+
+
+class GronkhIE(InfoExtractor):
+ _VALID_URL = r'(?:https?://)(?:www\.)?gronkh\.tv/stream/(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'https://gronkh.tv/stream/536',
+ 'info_dict': {
+ 'id': '536',
+ 'ext': 'mp4',
+ 'title': 'GTV0536, 2021-10-01 - MARTHA IS DEAD #FREiAB1830 !FF7 !horde !archiv',
+ 'view_count': 19491,
+ 'thumbnail': 'https://01.cdn.vod.farm/preview/6436746cce14e25f751260a692872b9b.jpg',
+ 'upload_date': '20211001'
+ },
+ 'params': {'skip_download': True}
+ }]
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+ data_json = self._download_json(f'https://api.gronkh.tv/v1/video/info?episode={id}', id)
+ m3u8_url = self._download_json(f'https://api.gronkh.tv/v1/video/playlist?episode={id}', id)['playlist_url']
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, id)
+ if data_json.get('vtt_url'):
+ subtitles.setdefault('en', []).append({
+ 'url': data_json['vtt_url'],
+ 'ext': 'vtt',
+ })
+ self._sort_formats(formats)
+ return {
+ 'id': id,
+ 'title': data_json.get('title'),
+ 'view_count': data_json.get('views'),
+ 'thumbnail': data_json.get('preview_url'),
+ 'upload_date': unified_strdate(data_json.get('created_at')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
diff --git a/hypervideo_dl/extractor/hearthisat.py b/hypervideo_dl/extractor/hearthisat.py
index 18c2520..a3d6a05 100644
--- a/hypervideo_dl/extractor/hearthisat.py
+++ b/hypervideo_dl/extractor/hearthisat.py
@@ -1,17 +1,12 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
-from ..compat import compat_urlparse
from ..utils import (
- HEADRequest,
+ determine_ext,
KNOWN_EXTENSIONS,
- sanitized_Request,
str_to_int,
- urlencode_postdata,
- urlhandle_detect_ext,
)
@@ -27,13 +22,11 @@ class HearThisAtIE(InfoExtractor):
'title': 'Moofi - Dr. Kreep',
'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1421564134,
- 'description': 'Listen to Dr. Kreep by Moofi on hearthis.at - Modular, Eurorack, Mutable Intruments Braids, Valhalla-DSP',
+ 'description': 'md5:1adb0667b01499f9d27e97ddfd53852a',
'upload_date': '20150118',
- 'comment_count': int,
'view_count': int,
- 'like_count': int,
'duration': 71,
- 'categories': ['Experimental'],
+ 'genre': 'Experimental',
}
}, {
# 'download' link redirects to the original webpage
@@ -43,79 +36,54 @@ class HearThisAtIE(InfoExtractor):
'id': '811296',
'ext': 'mp3',
'title': 'TwitchSF - DJ Jim Hopkins - Totally Bitchin\' 80\'s Dance Mix!',
- 'description': 'Listen to DJ Jim Hopkins - Totally Bitchin\' 80\'s Dance Mix! by TwitchSF on hearthis.at - Dance',
+ 'description': 'md5:ef26815ca8f483272a87b137ff175be2',
'upload_date': '20160328',
'timestamp': 1459186146,
'thumbnail': r're:^https?://.*\.jpg$',
- 'comment_count': int,
'view_count': int,
- 'like_count': int,
'duration': 4360,
- 'categories': ['Dance'],
+ 'genre': 'Dance',
},
}]
def _real_extract(self, url):
- m = re.match(self._VALID_URL, url)
+ m = self._match_valid_url(url)
display_id = '{artist:s} - {title:s}'.format(**m.groupdict())
-
- webpage = self._download_webpage(url, display_id)
- track_id = self._search_regex(
- r'intTrackId\s*=\s*(\d+)', webpage, 'track ID')
-
- payload = urlencode_postdata({'tracks[]': track_id})
- req = sanitized_Request(self._PLAYLIST_URL, payload)
- req.add_header('Content-type', 'application/x-www-form-urlencoded')
-
- track = self._download_json(req, track_id, 'Downloading playlist')[0]
- title = '{artist:s} - {title:s}'.format(**track)
-
- categories = None
- if track.get('category'):
- categories = [track['category']]
-
- description = self._og_search_description(webpage)
- thumbnail = self._og_search_thumbnail(webpage)
-
- meta_span = r'<span[^>]+class="%s".*?</i>([^<]+)</span>'
- view_count = str_to_int(self._search_regex(
- meta_span % 'plays_count', webpage, 'view count', fatal=False))
- like_count = str_to_int(self._search_regex(
- meta_span % 'likes_count', webpage, 'like count', fatal=False))
- comment_count = str_to_int(self._search_regex(
- meta_span % 'comment_count', webpage, 'comment count', fatal=False))
- duration = str_to_int(self._search_regex(
- r'data-length="(\d+)', webpage, 'duration', fatal=False))
- timestamp = str_to_int(self._search_regex(
- r'<span[^>]+class="calctime"[^>]+data-time="(\d+)', webpage, 'timestamp', fatal=False))
+ api_url = url.replace('www.', '').replace('hearthis.at', 'api-v2.hearthis.at')
+ data_json = self._download_json(api_url, display_id)
+ track_id = data_json.get('id')
+ artist_json = data_json.get('user')
+ title = '{} - {}'.format(artist_json.get('username'), data_json.get('title'))
+ genre = data_json.get('genre')
+ description = data_json.get('description')
+ thumbnail = data_json.get('artwork_url') or data_json.get('thumb')
+ view_count = str_to_int(data_json.get('playback_count'))
+ duration = str_to_int(data_json.get('duration'))
+ timestamp = data_json.get('release_timestamp')
formats = []
- mp3_url = self._search_regex(
- r'(?s)<a class="player-link"\s+(?:[a-zA-Z0-9_:-]+="[^"]+"\s+)*?data-mp3="([^"]+)"',
- webpage, 'mp3 URL', fatal=False)
+ mp3_url = data_json.get('stream_url')
+
if mp3_url:
formats.append({
'format_id': 'mp3',
'vcodec': 'none',
'acodec': 'mp3',
'url': mp3_url,
+ 'ext': 'mp3',
})
- download_path = self._search_regex(
- r'<a class="[^"]*download_fct[^"]*"\s+href="([^"]+)"',
- webpage, 'download URL', default=None)
- if download_path:
- download_url = compat_urlparse.urljoin(url, download_path)
- ext_req = HEADRequest(download_url)
- ext_handle = self._request_webpage(
- ext_req, display_id, note='Determining extension')
- ext = urlhandle_detect_ext(ext_handle)
+
+ if data_json.get('download_url'):
+ download_url = data_json['download_url']
+ ext = determine_ext(data_json['download_filename'])
if ext in KNOWN_EXTENSIONS:
formats.append({
- 'format_id': 'download',
+ 'format_id': ext,
'vcodec': 'none',
'ext': ext,
'url': download_url,
- 'preference': 2, # Usually better quality
+ 'acodec': ext,
+ 'quality': 2, # Usually better quality
})
self._sort_formats(formats)
@@ -129,7 +97,5 @@ class HearThisAtIE(InfoExtractor):
'duration': duration,
'timestamp': timestamp,
'view_count': view_count,
- 'comment_count': comment_count,
- 'like_count': like_count,
- 'categories': categories,
+ 'genre': genre,
}
diff --git a/hypervideo_dl/extractor/hidive.py b/hypervideo_dl/extractor/hidive.py
index f26f802..15bd444 100644
--- a/hypervideo_dl/extractor/hidive.py
+++ b/hypervideo_dl/extractor/hidive.py
@@ -1,20 +1,18 @@
# coding: utf-8
-from __future__ import unicode_literals
-
import re
from .common import InfoExtractor
-from ..compat import compat_str
from ..utils import (
ExtractorError,
int_or_none,
+ try_get,
url_or_none,
urlencode_postdata,
)
class HiDiveIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?hidive\.com/stream/(?P<title>[^/]+)/(?P<key>[^/?#&]+)'
+ _VALID_URL = r'https?://(?:www\.)?hidive\.com/stream/(?P<id>(?P<title>[^/]+)/(?P<key>[^/?#&]+))'
# Using X-Forwarded-For results in 403 HTTP error for HLS fragments,
# so disabling geo bypass completely
_GEO_BYPASS = False
@@ -54,65 +52,71 @@ class HiDiveIE(InfoExtractor):
self._download_webpage(
self._LOGIN_URL, None, 'Logging in', data=urlencode_postdata(data))
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- title, key = mobj.group('title', 'key')
- video_id = '%s/%s' % (title, key)
-
- settings = self._download_json(
+ def _call_api(self, video_id, title, key, data={}, **kwargs):
+ data = {
+ **data,
+ 'Title': title,
+ 'Key': key,
+ 'PlayerId': 'f4f895ce1ca713ba263b91caeb1daa2d08904783',
+ }
+ return self._download_json(
'https://www.hidive.com/play/settings', video_id,
- data=urlencode_postdata({
- 'Title': title,
- 'Key': key,
- 'PlayerId': 'f4f895ce1ca713ba263b91caeb1daa2d08904783',
- }))
+ data=urlencode_postdata(data), **kwargs) or {}
+
+ def _extract_subtitles_from_rendition(self, rendition, subtitles, parsed_urls):
+ for cc_file in rendition.get('ccFiles', []):
+ cc_url = url_or_none(try_get(cc_file, lambda x: x[2]))
+ # name is used since we cant distinguish subs with same language code
+ cc_lang = try_get(cc_file, (lambda x: x[1].replace(' ', '-').lower(), lambda x: x[0]), str)
+ if cc_url not in parsed_urls and cc_lang:
+ parsed_urls.add(cc_url)
+ subtitles.setdefault(cc_lang, []).append({'url': cc_url})
+
+ def _get_subtitles(self, url, video_id, title, key, parsed_urls):
+ webpage = self._download_webpage(url, video_id, fatal=False) or ''
+ subtitles = {}
+ for caption in set(re.findall(r'data-captions=\"([^\"]+)\"', webpage)):
+ renditions = self._call_api(
+ video_id, title, key, {'Captions': caption}, fatal=False,
+ note=f'Downloading {caption} subtitle information').get('renditions') or {}
+ for rendition_id, rendition in renditions.items():
+ self._extract_subtitles_from_rendition(rendition, subtitles, parsed_urls)
+ return subtitles
+
+ def _real_extract(self, url):
+ video_id, title, key = self._match_valid_url(url).group('id', 'title', 'key')
+ settings = self._call_api(video_id, title, key)
restriction = settings.get('restrictionReason')
if restriction == 'RegionRestricted':
self.raise_geo_restricted()
-
if restriction and restriction != 'None':
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, restriction), expected=True)
- formats = []
- subtitles = {}
+ formats, parsed_urls = [], {None}
for rendition_id, rendition in settings['renditions'].items():
- bitrates = rendition.get('bitrates')
- if not isinstance(bitrates, dict):
- continue
- m3u8_url = url_or_none(bitrates.get('hls'))
- if not m3u8_url:
- continue
- formats.extend(self._extract_m3u8_formats(
- m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id='%s-hls' % rendition_id, fatal=False))
- cc_files = rendition.get('ccFiles')
- if not isinstance(cc_files, list):
- continue
- for cc_file in cc_files:
- if not isinstance(cc_file, list) or len(cc_file) < 3:
- continue
- cc_lang = cc_file[0]
- cc_url = url_or_none(cc_file[2])
- if not isinstance(cc_lang, compat_str) or not cc_url:
- continue
- subtitles.setdefault(cc_lang, []).append({
- 'url': cc_url,
- })
+ audio, version, extra = rendition_id.split('_')
+ m3u8_url = url_or_none(try_get(rendition, lambda x: x['bitrates']['hls']))
+ if m3u8_url not in parsed_urls:
+ parsed_urls.add(m3u8_url)
+ frmt = self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id=rendition_id, fatal=False)
+ for f in frmt:
+ f['language'] = audio
+ f['format_note'] = f'{version}, {extra}'
+ formats.extend(frmt)
self._sort_formats(formats)
- season_number = int_or_none(self._search_regex(
- r's(\d+)', key, 'season number', default=None))
- episode_number = int_or_none(self._search_regex(
- r'e(\d+)', key, 'episode number', default=None))
-
return {
'id': video_id,
'title': video_id,
- 'subtitles': subtitles,
+ 'subtitles': self.extract_subtitles(url, video_id, title, key, parsed_urls),
'formats': formats,
'series': title,
- 'season_number': season_number,
- 'episode_number': episode_number,
+ 'season_number': int_or_none(
+ self._search_regex(r's(\d+)', key, 'season number', default=None)),
+ 'episode_number': int_or_none(
+ self._search_regex(r'e(\d+)', key, 'episode number', default=None)),
+ 'http_headers': {'Referer': url}
}
diff --git a/hypervideo_dl/extractor/hotstar.py b/hypervideo_dl/extractor/hotstar.py
index 1620822..74e2728 100644
--- a/hypervideo_dl/extractor/hotstar.py
+++ b/hypervideo_dl/extractor/hotstar.py
@@ -3,15 +3,15 @@ from __future__ import unicode_literals
import hashlib
import hmac
-import json
import re
import time
import uuid
+import json
from .common import InfoExtractor
from ..compat import (
compat_HTTPError,
- compat_str,
+ compat_str
)
from ..utils import (
determine_ext,
@@ -26,52 +26,79 @@ from ..utils import (
class HotStarBaseIE(InfoExtractor):
_AKAMAI_ENCRYPTION_KEY = b'\x05\xfc\x1a\x01\xca\xc9\x4b\xc4\x12\xfc\x53\x12\x07\x75\xf9\xee'
- def _call_api_impl(self, path, video_id, headers, query, data=None):
- st = int(time.time())
+ def _call_api_impl(self, path, video_id, query, st=None, cookies=None):
+ st = int_or_none(st) or int(time.time())
exp = st + 6000
auth = 'st=%d~exp=%d~acl=/*' % (st, exp)
auth += '~hmac=' + hmac.new(self._AKAMAI_ENCRYPTION_KEY, auth.encode(), hashlib.sha256).hexdigest()
- h = {'hotstarauth': auth}
- h.update(headers)
- return self._download_json(
- 'https://api.hotstar.com/' + path,
- video_id, headers=h, query=query, data=data)
+
+ if cookies and cookies.get('userUP'):
+ token = cookies.get('userUP').value
+ else:
+ token = self._download_json(
+ 'https://api.hotstar.com/um/v3/users',
+ video_id, note='Downloading token',
+ data=json.dumps({"device_ids": [{"id": compat_str(uuid.uuid4()), "type": "device_id"}]}).encode('utf-8'),
+ headers={
+ 'hotstarauth': auth,
+ 'x-hs-platform': 'PCTV', # or 'web'
+ 'Content-Type': 'application/json',
+ })['user_identity']
+
+ response = self._download_json(
+ 'https://api.hotstar.com/' + path, video_id, headers={
+ 'hotstarauth': auth,
+ 'x-hs-appversion': '6.72.2',
+ 'x-hs-platform': 'web',
+ 'x-hs-usertoken': token,
+ }, query=query)
+
+ if response['message'] != "Playback URL's fetched successfully":
+ raise ExtractorError(
+ response['message'], expected=True)
+ return response['data']
def _call_api(self, path, video_id, query_name='contentId'):
- response = self._call_api_impl(path, video_id, {
- 'x-country-code': 'IN',
- 'x-platform-code': 'JIO',
- }, {
+ return self._download_json('https://api.hotstar.com/' + path, video_id=video_id, query={
query_name: video_id,
'tas': 10000,
+ }, headers={
+ 'x-country-code': 'IN',
+ 'x-platform-code': 'PCTV',
})
- if response['statusCode'] != 'OK':
- raise ExtractorError(
- response['body']['message'], expected=True)
- return response['body']['results']
-
- def _call_api_v2(self, path, video_id, headers, query=None, data=None):
- h = {'X-Request-Id': compat_str(uuid.uuid4())}
- h.update(headers)
- try:
- return self._call_api_impl(
- path, video_id, h, query, data)
- except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError):
- if e.cause.code == 402:
- self.raise_login_required()
- message = self._parse_json(e.cause.read().decode(), video_id)['message']
- if message in ('Content not available in region', 'Country is not supported'):
- raise self.raise_geo_restricted(message)
- raise ExtractorError(message)
- raise e
+
+ def _call_api_v2(self, path, video_id, st=None, cookies=None):
+ return self._call_api_impl(
+ '%s/content/%s' % (path, video_id), video_id, st=st, cookies=cookies, query={
+ 'desired-config': 'audio_channel:stereo|container:fmp4|dynamic_range:hdr|encryption:plain|ladder:tv|package:dash|resolution:fhd|subs-tag:HotstarVIP|video_codec:h265',
+ 'device-id': cookies.get('device_id').value if cookies.get('device_id') else compat_str(uuid.uuid4()),
+ 'os-name': 'Windows',
+ 'os-version': '10',
+ })
class HotStarIE(HotStarBaseIE):
IE_NAME = 'hotstar'
- _VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+[/-])?(?P<id>\d{10})'
+ _VALID_URL = r'''(?x)
+ (?:
+ hotstar\:|
+ https?://(?:www\.)?hotstar\.com(?:/in)?/(?!in/)
+ )
+ (?:
+ (?P<type>movies|sports|episode|(?P<tv>tv))
+ (?:
+ \:|
+ /[^/?#]+/
+ (?(tv)
+ (?:[^/?#]+/){2}|
+ (?:[^/?#]+/)*
+ )
+ )|
+ [^/?#]+/
+ )?
+ (?P<id>\d{10})
+ '''
_TESTS = [{
- # contentData
'url': 'https://www.hotstar.com/can-you-not-spread-rumours/1000076273',
'info_dict': {
'id': '1000076273',
@@ -82,147 +109,161 @@ class HotStarIE(HotStarBaseIE):
'upload_date': '20151111',
'duration': 381,
},
- 'params': {
- # m3u8 download
- 'skip_download': True,
- }
}, {
- # contentDetail
+ 'url': 'hotstar:1000076273',
+ 'only_matching': True,
+ }, {
'url': 'https://www.hotstar.com/movies/radha-gopalam/1000057157',
+ 'info_dict': {
+ 'id': '1000057157',
+ 'ext': 'mp4',
+ 'title': 'Radha Gopalam',
+ 'description': 'md5:be3bc342cc120bbc95b3b0960e2b0d22',
+ 'timestamp': 1140805800,
+ 'upload_date': '20060224',
+ 'duration': 9182,
+ },
+ }, {
+ 'url': 'hotstar:movies:1000057157',
'only_matching': True,
}, {
- 'url': 'http://www.hotstar.com/sports/cricket/rajitha-sizzles-on-debut-with-329/2001477583',
+ 'url': 'https://www.hotstar.com/in/sports/cricket/follow-the-blues-2021/recap-eng-fight-back-on-day-2/1260066104',
'only_matching': True,
}, {
- 'url': 'http://www.hotstar.com/1000000515',
+ 'url': 'https://www.hotstar.com/in/sports/football/most-costly-pl-transfers-ft-grealish/1260065956',
'only_matching': True,
}, {
- # only available via api v2
- 'url': 'https://www.hotstar.com/tv/ek-bhram-sarvagun-sampanna/s-2116/janhvi-targets-suman/1000234847',
+ # contentData
+ 'url': 'hotstar:sports:1260065956',
'only_matching': True,
}, {
- 'url': 'https://www.hotstar.com/in/tv/start-music/1260005217/cooks-vs-comalis/1100039717',
+ # contentData
+ 'url': 'hotstar:sports:1260066104',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.hotstar.com/tv/ek-bhram-sarvagun-sampanna/s-2116/janhvi-targets-suman/1000234847',
+ 'info_dict': {
+ 'id': '1000234847',
+ 'ext': 'mp4',
+ 'title': 'Janhvi Targets Suman',
+ 'description': 'md5:78a85509348910bd1ca31be898c5796b',
+ 'timestamp': 1556670600,
+ 'upload_date': '20190501',
+ 'duration': 1219,
+ 'channel': 'StarPlus',
+ 'channel_id': 3,
+ 'series': 'Ek Bhram - Sarvagun Sampanna',
+ 'season': 'Chapter 1',
+ 'season_number': 1,
+ 'season_id': 6771,
+ 'episode': 'Janhvi Targets Suman',
+ 'episode_number': 8,
+ },
+ }, {
+ 'url': 'hotstar:episode:1000234847',
'only_matching': True,
}]
_GEO_BYPASS = False
- _DEVICE_ID = None
- _USER_TOKEN = None
+ _TYPE = {
+ 'movies': 'movie',
+ 'sports': 'match',
+ 'episode': 'episode',
+ 'tv': 'episode',
+ None: 'content',
+ }
def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
- app_state = self._parse_json(self._search_regex(
- r'<script>window\.APP_STATE\s*=\s*({.+?})</script>',
- webpage, 'app state'), video_id)
- video_data = {}
- getters = list(
- lambda x, k=k: x['initialState']['content%s' % k]['content']
- for k in ('Data', 'Detail')
- )
- for v in app_state.values():
- content = try_get(v, getters, dict)
- if content and content.get('contentId') == video_id:
- video_data = content
- break
-
+ mobj = self._match_valid_url(url)
+ video_id = mobj.group('id')
+ video_type = mobj.group('type')
+ cookies = self._get_cookies(url)
+ video_type = self._TYPE.get(video_type, video_type)
+ video_data = self._call_api(f'o/v1/{video_type}/detail', video_id)['body']['results']['item']
title = video_data['title']
- if video_data.get('drmProtected'):
- raise ExtractorError('This video is DRM protected.', expected=True)
+ if not self.get_param('allow_unplayable_formats') and video_data.get('drmProtected'):
+ self.report_drm(video_id)
- headers = {'Referer': url}
+ headers = {'Referer': 'https://www.hotstar.com/in'}
formats = []
+ subs = {}
geo_restricted = False
-
- if not self._USER_TOKEN:
- self._DEVICE_ID = compat_str(uuid.uuid4())
- self._USER_TOKEN = self._call_api_v2('um/v3/users', video_id, {
- 'X-HS-Platform': 'PCTV',
- 'Content-Type': 'application/json',
- }, data=json.dumps({
- 'device_ids': [{
- 'id': self._DEVICE_ID,
- 'type': 'device_id',
- }],
- }).encode())['user_identity']
-
- playback_sets = self._call_api_v2(
- 'play/v2/playback/content/' + video_id, video_id, {
- 'X-HS-Platform': 'web',
- 'X-HS-AppVersion': '6.99.1',
- 'X-HS-UserToken': self._USER_TOKEN,
- }, query={
- 'device-id': self._DEVICE_ID,
- 'desired-config': 'encryption:plain',
- 'os-name': 'Windows',
- 'os-version': '10',
- })['data']['playBackSets']
+ _, urlh = self._download_webpage_handle('https://www.hotstar.com/in', video_id)
+ # Required to fix https://github.com/hypervideo/hypervideo/issues/396
+ st = urlh.headers.get('x-origin-date')
+ # change to v2 in the future
+ playback_sets = self._call_api_v2('play/v1/playback', video_id, st=st, cookies=cookies)['playBackSets']
for playback_set in playback_sets:
if not isinstance(playback_set, dict):
continue
+ dr = re.search(r'dynamic_range:(?P<dr>[a-z]+)', playback_set.get('tagsCombination')).group('dr')
format_url = url_or_none(playback_set.get('playbackUrl'))
if not format_url:
continue
format_url = re.sub(
r'(?<=//staragvod)(\d)', r'web\1', format_url)
tags = str_or_none(playback_set.get('tagsCombination')) or ''
- if tags and 'encryption:plain' not in tags:
- continue
ext = determine_ext(format_url)
+ current_formats, current_subs = [], {}
try:
if 'package:hls' in tags or ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
+ current_formats, current_subs = self._extract_m3u8_formats_and_subtitles(
format_url, video_id, 'mp4',
entry_protocol='m3u8_native',
- m3u8_id='hls', headers=headers))
+ m3u8_id=f'{dr}-hls', headers=headers)
elif 'package:dash' in tags or ext == 'mpd':
- formats.extend(self._extract_mpd_formats(
- format_url, video_id, mpd_id='dash', headers=headers))
+ current_formats, current_subs = self._extract_mpd_formats_and_subtitles(
+ format_url, video_id, mpd_id=f'{dr}-dash', headers=headers)
elif ext == 'f4m':
# produce broken files
pass
else:
- formats.append({
+ current_formats = [{
'url': format_url,
'width': int_or_none(playback_set.get('width')),
'height': int_or_none(playback_set.get('height')),
- })
+ }]
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
geo_restricted = True
continue
+ if tags and 'encryption:plain' not in tags:
+ for f in current_formats:
+ f['has_drm'] = True
+ formats.extend(current_formats)
+ subs = self._merge_subtitles(subs, current_subs)
if not formats and geo_restricted:
- self.raise_geo_restricted(countries=['IN'])
+ self.raise_geo_restricted(countries=['IN'], metadata_available=True)
self._sort_formats(formats)
for f in formats:
f.setdefault('http_headers', {}).update(headers)
- image = try_get(video_data, lambda x: x['image']['h'], compat_str)
-
return {
'id': video_id,
'title': title,
- 'thumbnail': 'https://img1.hotstarext.com/image/upload/' + image if image else None,
'description': video_data.get('description'),
'duration': int_or_none(video_data.get('duration')),
'timestamp': int_or_none(video_data.get('broadcastDate') or video_data.get('startDate')),
'formats': formats,
+ 'subtitles': subs,
'channel': video_data.get('channelName'),
- 'channel_id': str_or_none(video_data.get('channelId')),
+ 'channel_id': video_data.get('channelId'),
'series': video_data.get('showName'),
'season': video_data.get('seasonName'),
'season_number': int_or_none(video_data.get('seasonNo')),
- 'season_id': str_or_none(video_data.get('seasonId')),
+ 'season_id': video_data.get('seasonId'),
'episode': title,
'episode_number': int_or_none(video_data.get('episodeNo')),
+ 'http_headers': {
+ 'Referer': 'https://www.hotstar.com/in',
+ }
}
class HotStarPlaylistIE(HotStarBaseIE):
IE_NAME = 'hotstar:playlist'
- _VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:[a-z]{2}/)?tv/[^/]+/s-\w+/list/[^/]+/t-(?P<id>\w+)'
+ _VALID_URL = r'https?://(?:www\.)?hotstar\.com/tv/[^/]+/s-\w+/list/[^/]+/t-(?P<id>\w+)'
_TESTS = [{
'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/popular-clips/t-3_2_26',
'info_dict': {
@@ -232,16 +273,12 @@ class HotStarPlaylistIE(HotStarBaseIE):
}, {
'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/extras/t-2480',
'only_matching': True,
- }, {
- 'url': 'https://www.hotstar.com/us/tv/masterchef-india/s-830/list/episodes/t-1_2_830',
- 'only_matching': True,
}]
def _real_extract(self, url):
playlist_id = self._match_id(url)
- collection = self._call_api('o/v1/tray/find', playlist_id, 'uqId')
-
+ collection = self._call_api('o/v1/tray/find', playlist_id, 'uqId')['body']['results']
entries = [
self.url_result(
'https://www.hotstar.com/%s' % video['contentId'],
@@ -250,3 +287,47 @@ class HotStarPlaylistIE(HotStarBaseIE):
if video.get('contentId')]
return self.playlist_result(entries, playlist_id)
+
+
+class HotStarSeriesIE(HotStarBaseIE):
+ IE_NAME = 'hotstar:series'
+ _VALID_URL = r'(?P<url>(?:https?://)(?:www\.)?hotstar\.com(?:/in)?/tv/[^/]+/(?P<id>\d+))'
+ _TESTS = [{
+ 'url': 'https://www.hotstar.com/in/tv/radhakrishn/1260000646',
+ 'info_dict': {
+ 'id': '1260000646',
+ },
+ 'playlist_mincount': 690,
+ }, {
+ 'url': 'https://www.hotstar.com/tv/dancee-/1260050431',
+ 'info_dict': {
+ 'id': '1260050431',
+ },
+ 'playlist_mincount': 43,
+ }, {
+ 'url': 'https://www.hotstar.com/in/tv/mahabharat/435/',
+ 'info_dict': {
+ 'id': '435',
+ },
+ 'playlist_mincount': 269,
+ }]
+
+ def _real_extract(self, url):
+ url, series_id = self._match_valid_url(url).groups()
+ headers = {
+ 'x-country-code': 'IN',
+ 'x-platform-code': 'PCTV',
+ }
+ detail_json = self._download_json('https://api.hotstar.com/o/v1/show/detail?contentId=' + series_id,
+ video_id=series_id, headers=headers)
+ id = compat_str(try_get(detail_json, lambda x: x['body']['results']['item']['id'], int))
+ item_json = self._download_json('https://api.hotstar.com/o/v1/tray/g/1/items?etid=0&tao=0&tas=10000&eid=' + id,
+ video_id=series_id, headers=headers)
+ entries = [
+ self.url_result(
+ '%s/ignoreme/%d' % (url, video['contentId']),
+ ie=HotStarIE.ie_key(), video_id=video['contentId'])
+ for video in item_json['body']['results']['items']
+ if video.get('contentId')]
+
+ return self.playlist_result(entries, series_id)
diff --git a/hypervideo_dl/extractor/hrfensehen.py b/hypervideo_dl/extractor/hrfensehen.py
new file mode 100644
index 0000000..2a994d4
--- /dev/null
+++ b/hypervideo_dl/extractor/hrfensehen.py
@@ -0,0 +1,102 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from ..utils import int_or_none, unified_timestamp, unescapeHTML
+from .common import InfoExtractor
+
+
+class HRFernsehenIE(InfoExtractor):
+ IE_NAME = 'hrfernsehen'
+ _VALID_URL = r'^https?://www\.(?:hr-fernsehen|hessenschau)\.de/.*,video-(?P<id>[0-9]{6})\.html'
+
+ _TESTS = [{
+ 'url': 'https://www.hessenschau.de/tv-sendung/hessenschau-vom-26082020,video-130546.html',
+ 'md5': '5c4e0ba94677c516a2f65a84110fc536',
+ 'info_dict': {
+ 'id': '130546',
+ 'ext': 'mp4',
+ 'description': 'Sturmtief Kirsten fegt über Hessen / Die Corona-Pandemie – eine Chronologie / '
+ 'Sterbehilfe: Die Lage in Hessen / Miss Hessen leitet zwei eigene Unternehmen / '
+ 'Pop-Up Museum zeigt Schwarze Unterhaltung und Black Music',
+ 'subtitles': {'de': [{
+ 'url': 'https://hr-a.akamaihd.net/video/as/hessenschau/2020_08/hrLogo_200826200407_L385592_512x288-25p-500kbit.vtt'
+ }]},
+ 'timestamp': 1598470200,
+ 'upload_date': '20200826',
+ 'thumbnails': [{
+ 'url': 'https://www.hessenschau.de/tv-sendung/hs_ganz-1554~_t-1598465545029_v-16to9.jpg',
+ 'id': '0'
+ }, {
+ 'url': 'https://www.hessenschau.de/tv-sendung/hs_ganz-1554~_t-1598465545029_v-16to9__medium.jpg',
+ 'id': '1'
+ }],
+ 'title': 'hessenschau vom 26.08.2020'
+ }
+ }, {
+ 'url': 'https://www.hr-fernsehen.de/sendungen-a-z/mex/sendungen/fair-und-gut---was-hinter-aldis-eigenem-guetesiegel-steckt,video-130544.html',
+ 'only_matching': True
+ }]
+
+ _GEO_COUNTRIES = ['DE']
+
+ def extract_airdate(self, loader_data):
+ airdate_str = loader_data.get('mediaMetadata', {}).get('agf', {}).get('airdate')
+
+ if airdate_str is None:
+ return None
+
+ return unified_timestamp(airdate_str)
+
+ def extract_formats(self, loader_data):
+ stream_formats = []
+ for stream_obj in loader_data["videoResolutionLevels"]:
+ stream_format = {
+ 'format_id': str(stream_obj['verticalResolution']) + "p",
+ 'height': stream_obj['verticalResolution'],
+ 'url': stream_obj['url'],
+ }
+
+ quality_information = re.search(r'([0-9]{3,4})x([0-9]{3,4})-([0-9]{2})p-([0-9]{3,4})kbit',
+ stream_obj['url'])
+ if quality_information:
+ stream_format['width'] = int_or_none(quality_information.group(1))
+ stream_format['height'] = int_or_none(quality_information.group(2))
+ stream_format['fps'] = int_or_none(quality_information.group(3))
+ stream_format['tbr'] = int_or_none(quality_information.group(4))
+
+ stream_formats.append(stream_format)
+
+ self._sort_formats(stream_formats)
+ return stream_formats
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._html_search_meta(
+ ['og:title', 'twitter:title', 'name'], webpage)
+ description = self._html_search_meta(
+ ['description'], webpage)
+
+ loader_str = unescapeHTML(self._search_regex(r"data-hr-mediaplayer-loader='([^']*)'", webpage, "ardloader"))
+ loader_data = json.loads(loader_str)
+
+ info = {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'formats': self.extract_formats(loader_data),
+ 'timestamp': self.extract_airdate(loader_data)
+ }
+
+ if "subtitle" in loader_data:
+ info["subtitles"] = {"de": [{"url": loader_data["subtitle"]}]}
+
+ thumbnails = list(set([t for t in loader_data.get("previewImageUrl", {}).values()]))
+ if len(thumbnails) > 0:
+ info["thumbnails"] = [{"url": t} for t in thumbnails]
+
+ return info
diff --git a/hypervideo_dl/extractor/hrti.py b/hypervideo_dl/extractor/hrti.py
index 23f7b1f..dc5b967 100644
--- a/hypervideo_dl/extractor/hrti.py
+++ b/hypervideo_dl/extractor/hrti.py
@@ -2,7 +2,6 @@
from __future__ import unicode_literals
import json
-import re
from .common import InfoExtractor
from ..compat import compat_HTTPError
@@ -135,7 +134,7 @@ class HRTiIE(HRTiBaseIE):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('short_id') or mobj.group('id')
display_id = mobj.group('display_id') or video_id
@@ -191,7 +190,7 @@ class HRTiPlaylistIE(HRTiBaseIE):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
category_id = mobj.group('id')
display_id = mobj.group('display_id') or category_id
diff --git a/hypervideo_dl/extractor/hungama.py b/hypervideo_dl/extractor/hungama.py
index 3fdaac5..821b16e 100644
--- a/hypervideo_dl/extractor/hungama.py
+++ b/hypervideo_dl/extractor/hungama.py
@@ -1,9 +1,12 @@
# coding: utf-8
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
from ..utils import (
int_or_none,
+ try_get,
urlencode_postdata,
)
@@ -71,14 +74,14 @@ class HungamaSongIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?hungama\.com/song/[^/]+/(?P<id>\d+)'
_TEST = {
'url': 'https://www.hungama.com/song/kitni-haseen-zindagi/2931166/',
- 'md5': 'a845a6d1ebd08d80c1035126d49bd6a0',
+ 'md5': 'd4a6a05a394ad0453a9bea3ca00e6024',
'info_dict': {
'id': '2931166',
- 'ext': 'mp4',
+ 'ext': 'mp3',
'title': 'Lucky Ali - Kitni Haseen Zindagi',
'track': 'Kitni Haseen Zindagi',
'artist': 'Lucky Ali',
- 'album': 'Aks',
+ 'album': None,
'release_year': 2000,
}
}
@@ -89,18 +92,20 @@ class HungamaSongIE(InfoExtractor):
data = self._download_json(
'https://www.hungama.com/audio-player-data/track/%s' % audio_id,
audio_id, query={'_country': 'IN'})[0]
-
track = data['song_name']
artist = data.get('singer_name')
-
- m3u8_url = self._download_json(
- data.get('file') or data['preview_link'],
- audio_id)['response']['media_url']
-
- formats = self._extract_m3u8_formats(
- m3u8_url, audio_id, ext='mp4', entry_protocol='m3u8_native',
- m3u8_id='hls')
- self._sort_formats(formats)
+ formats = []
+ media_json = self._download_json(data.get('file') or data['preview_link'], audio_id)
+ media_url = try_get(media_json, lambda x: x['response']['media_url'], str)
+ media_type = try_get(media_json, lambda x: x['response']['type'], str)
+
+ if media_url:
+ formats.append({
+ 'url': media_url,
+ 'ext': media_type,
+ 'vcodec': 'none',
+ 'acodec': media_type,
+ })
title = '%s - %s' % (artist, track) if artist else track
thumbnail = data.get('img_src') or data.get('album_image')
@@ -111,7 +116,32 @@ class HungamaSongIE(InfoExtractor):
'thumbnail': thumbnail,
'track': track,
'artist': artist,
- 'album': data.get('album_name'),
+ 'album': data.get('album_name') or None,
'release_year': int_or_none(data.get('date')),
'formats': formats,
}
+
+
+class HungamaAlbumPlaylistIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?hungama\.com/(?:playlists|album)/[^/]+/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.hungama.com/album/bhuj-the-pride-of-india/69481490/',
+ 'playlist_mincount': 7,
+ 'info_dict': {
+ 'id': '69481490',
+ },
+ }, {
+ 'url': 'https://www.hungama.com/playlists/hindi-jan-to-june-2021/123063/',
+ 'playlist_mincount': 50,
+ 'info_dict': {
+ 'id': '123063',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ ptrn = r'<meta[^>]+?property=[\"\']?music:song:url[\"\']?[^>]+?content=[\"\']?([^\"\']+)'
+ items = re.findall(ptrn, webpage)
+ entries = [self.url_result(item, ie=HungamaSongIE.ie_key()) for item in items]
+ return self.playlist_result(entries, video_id)
diff --git a/hypervideo_dl/extractor/ichinanalive.py b/hypervideo_dl/extractor/ichinanalive.py
new file mode 100644
index 0000000..cb39f82
--- /dev/null
+++ b/hypervideo_dl/extractor/ichinanalive.py
@@ -0,0 +1,167 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import ExtractorError, str_or_none, traverse_obj, unified_strdate
+from ..compat import compat_str
+
+
+class IchinanaLiveIE(InfoExtractor):
+ IE_NAME = '17live'
+ _VALID_URL = r'https?://(?:www\.)?17\.live/(?:[^/]+/)*(?:live|profile/r)/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://17.live/live/3773096',
+ 'info_dict': {
+ 'id': '3773096',
+ 'title': '萠珈☕🤡🍫moka',
+ 'is_live': True,
+ 'uploader': '萠珈☕🤡🍫moka',
+ 'uploader_id': '3773096',
+ 'like_count': 366,
+ 'view_count': 18121,
+ 'timestamp': 1630569012,
+ },
+ 'skip': 'running as of writing, but may be ended as of testing',
+ }, {
+ 'note': 'nothing except language differs',
+ 'url': 'https://17.live/ja/live/3773096',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return not IchinanaLiveClipIE.suitable(url) and super(IchinanaLiveIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ url = 'https://17.live/live/%s' % video_id
+
+ enter = self._download_json(
+ 'https://api-dsa.17app.co/api/v1/lives/%s/enter' % video_id, video_id,
+ headers={'Referer': url}, fatal=False, expected_status=420,
+ data=b'\0')
+ if enter and enter.get('message') == 'ended':
+ raise ExtractorError('This live has ended.', expected=True)
+
+ view_data = self._download_json(
+ 'https://api-dsa.17app.co/api/v1/lives/%s' % video_id, video_id,
+ headers={'Referer': url})
+
+ uploader = traverse_obj(
+ view_data, ('userInfo', 'displayName'), ('userInfo', 'openID'))
+
+ video_urls = view_data.get('rtmpUrls')
+ if not video_urls:
+ raise ExtractorError('unable to extract live URL information')
+ formats = []
+ for (name, value) in video_urls[0].items():
+ if not isinstance(value, compat_str):
+ continue
+ if not value.startswith('http'):
+ continue
+ quality = -1
+ if 'web' in name:
+ quality -= 1
+ if 'High' in name:
+ quality += 4
+ if 'Low' in name:
+ quality -= 2
+ formats.append({
+ 'format_id': name,
+ 'url': value,
+ 'quality': quality,
+ 'http_headers': {'Referer': url},
+ 'ext': 'flv',
+ 'vcodec': 'h264',
+ 'acodec': 'aac',
+ })
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': uploader or video_id,
+ 'formats': formats,
+ 'is_live': True,
+ 'uploader': uploader,
+ 'uploader_id': video_id,
+ 'like_count': view_data.get('receivedLikeCount'),
+ 'view_count': view_data.get('viewerCount'),
+ 'thumbnail': view_data.get('coverPhoto'),
+ 'description': view_data.get('caption'),
+ 'timestamp': view_data.get('beginTime'),
+ }
+
+
+class IchinanaLiveClipIE(InfoExtractor):
+ IE_NAME = '17live:clip'
+ _VALID_URL = r'https?://(?:www\.)?17\.live/(?:[^/]+/)*profile/r/(?P<uploader_id>\d+)/clip/(?P<id>[^/]+)'
+ _TESTS = [{
+ 'url': 'https://17.live/profile/r/1789280/clip/1bHQSK8KUieruFXaCH4A4upCzlN',
+ 'info_dict': {
+ 'id': '1bHQSK8KUieruFXaCH4A4upCzlN',
+ 'title': 'マチコ先生🦋Class💋',
+ 'description': 'マチ戦隊 第一次 バスターコール\n総額200万coin!\n動画制作@うぉーかー🌱Walker🎫',
+ 'uploader_id': '1789280',
+ },
+ }, {
+ 'url': 'https://17.live/ja/profile/r/1789280/clip/1bHQSK8KUieruFXaCH4A4upCzlN',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ uploader_id, video_id = self._match_valid_url(url).groups()
+ url = 'https://17.live/profile/r/%s/clip/%s' % (uploader_id, video_id)
+
+ view_data = self._download_json(
+ 'https://api-dsa.17app.co/api/v1/clips/%s' % video_id, video_id,
+ headers={'Referer': url})
+
+ uploader = traverse_obj(
+ view_data, ('userInfo', 'displayName'), ('userInfo', 'name'))
+
+ formats = []
+ if view_data.get('videoURL'):
+ formats.append({
+ 'id': 'video',
+ 'url': view_data['videoURL'],
+ 'quality': -1,
+ })
+ if view_data.get('transcodeURL'):
+ formats.append({
+ 'id': 'transcode',
+ 'url': view_data['transcodeURL'],
+ 'quality': -1,
+ })
+ if view_data.get('srcVideoURL'):
+ # highest quality
+ formats.append({
+ 'id': 'srcVideo',
+ 'url': view_data['srcVideoURL'],
+ 'quality': 1,
+ })
+
+ for fmt in formats:
+ fmt.update({
+ 'ext': 'mp4',
+ 'protocol': 'https',
+ 'vcodec': 'h264',
+ 'acodec': 'aac',
+ 'http_headers': {'Referer': url},
+ })
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': uploader or video_id,
+ 'formats': formats,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'like_count': view_data.get('likeCount'),
+ 'view_count': view_data.get('viewCount'),
+ 'thumbnail': view_data.get('imageURL'),
+ 'duration': view_data.get('duration'),
+ 'description': view_data.get('caption'),
+ 'upload_date': unified_strdate(str_or_none(view_data.get('createdAt'))),
+ }
diff --git a/hypervideo_dl/extractor/ign.py b/hypervideo_dl/extractor/ign.py
index 0d9f50e..c826eb3 100644
--- a/hypervideo_dl/extractor/ign.py
+++ b/hypervideo_dl/extractor/ign.py
@@ -100,7 +100,7 @@ class IGNIE(IGNBaseIE):
formats.append({
'ext': determine_ext(mezzanine_url, 'mp4'),
'format_id': 'mezzanine',
- 'preference': 1,
+ 'quality': 1,
'url': mezzanine_url,
})
diff --git a/hypervideo_dl/extractor/imggaming.py b/hypervideo_dl/extractor/imggaming.py
index e11f920..ef20a4b 100644
--- a/hypervideo_dl/extractor/imggaming.py
+++ b/hypervideo_dl/extractor/imggaming.py
@@ -2,7 +2,6 @@
from __future__ import unicode_literals
import json
-import re
from .common import InfoExtractor
from ..compat import compat_HTTPError
@@ -62,10 +61,10 @@ class ImgGamingBaseIE(InfoExtractor):
raise
def _real_extract(self, url):
- domain, media_type, media_id, playlist_id = re.match(self._VALID_URL, url).groups()
+ domain, media_type, media_id, playlist_id = self._match_valid_url(url).groups()
if playlist_id:
- if self._downloader.params.get('noplaylist'):
+ if self.get_param('noplaylist'):
self.to_screen('Downloading just video %s because of --no-playlist' % media_id)
else:
self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % playlist_id)
diff --git a/hypervideo_dl/extractor/imgur.py b/hypervideo_dl/extractor/imgur.py
index 511fa5f..c917cf1 100644
--- a/hypervideo_dl/extractor/imgur.py
+++ b/hypervideo_dl/extractor/imgur.py
@@ -72,7 +72,7 @@ class ImgurIE(InfoExtractor):
gif_json, video_id, transform_source=js_to_json)
formats.append({
'format_id': 'gif',
- 'preference': -10,
+ 'preference': -10, # gifs are worse than videos
'width': width,
'height': height,
'ext': 'gif',
diff --git a/hypervideo_dl/extractor/instagram.py b/hypervideo_dl/extractor/instagram.py
index 12e1014..3801c7a 100644
--- a/hypervideo_dl/extractor/instagram.py
+++ b/hypervideo_dl/extractor/instagram.py
@@ -19,6 +19,7 @@ from ..utils import (
std_headers,
try_get,
url_or_none,
+ variadic,
)
@@ -140,11 +141,13 @@ class InstagramIE(InfoExtractor):
return mobj.group('link')
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
url = mobj.group('url')
- webpage = self._download_webpage(url, video_id)
+ webpage, urlh = self._download_webpage_handle(url, video_id)
+ if 'www.instagram.com/accounts/login' in urlh.geturl().rstrip('/'):
+ self.raise_login_required('You need to log in to access this content', method='cookies')
(media, video_url, description, thumbnail, timestamp, uploader,
uploader_id, like_count, comment_count, comments, height,
@@ -188,26 +191,29 @@ class InstagramIE(InfoExtractor):
uploader_id = media.get('owner', {}).get('username')
def get_count(keys, kind):
- if not isinstance(keys, (list, tuple)):
- keys = [keys]
- for key in keys:
+ for key in variadic(keys):
count = int_or_none(try_get(
media, (lambda x: x['edge_media_%s' % key]['count'],
lambda x: x['%ss' % kind]['count'])))
if count is not None:
return count
+
like_count = get_count('preview_like', 'like')
comment_count = get_count(
('preview_comment', 'to_comment', 'to_parent_comment'), 'comment')
- comments = [{
- 'author': comment.get('user', {}).get('username'),
- 'author_id': comment.get('user', {}).get('id'),
- 'id': comment.get('id'),
- 'text': comment.get('text'),
- 'timestamp': int_or_none(comment.get('created_at')),
- } for comment in media.get(
- 'comments', {}).get('nodes', []) if comment.get('text')]
+ comments = []
+ for comment in try_get(media, lambda x: x['edge_media_to_parent_comment']['edges']):
+ comment_dict = comment.get('node', {})
+ comment_text = comment_dict.get('text')
+ if comment_text:
+ comments.append({
+ 'author': try_get(comment_dict, lambda x: x['owner']['username']),
+ 'author_id': try_get(comment_dict, lambda x: x['owner']['id']),
+ 'id': comment_dict.get('id'),
+ 'text': comment_text,
+ 'timestamp': int_or_none(comment_dict.get('created_at')),
+ })
if not video_url:
edges = try_get(
media, lambda x: x['edge_sidecar_to_children']['edges'],
@@ -273,6 +279,9 @@ class InstagramIE(InfoExtractor):
'like_count': like_count,
'comment_count': comment_count,
'comments': comments,
+ 'http_headers': {
+ 'Referer': 'https://www.instagram.com/',
+ }
}
diff --git a/hypervideo_dl/extractor/internetvideoarchive.py b/hypervideo_dl/extractor/internetvideoarchive.py
index 59b0a90..880918c 100644
--- a/hypervideo_dl/extractor/internetvideoarchive.py
+++ b/hypervideo_dl/extractor/internetvideoarchive.py
@@ -4,10 +4,7 @@ import json
import re
from .common import InfoExtractor
-from ..compat import (
- compat_parse_qs,
- compat_urlparse,
-)
+from ..utils import parse_qs
class InternetVideoArchiveIE(InfoExtractor):
@@ -32,7 +29,7 @@ class InternetVideoArchiveIE(InfoExtractor):
return 'http://video.internetvideoarchive.net/player/6/configuration.ashx?' + query
def _real_extract(self, url):
- query = compat_parse_qs(compat_urlparse.urlparse(url).query)
+ query = parse_qs(url)
video_id = query['publishedid'][0]
data = self._download_json(
'https://video.internetvideoarchive.net/videojs7/videojs7.ivasettings.ashx',
diff --git a/hypervideo_dl/extractor/iprima.py b/hypervideo_dl/extractor/iprima.py
index 648ae67..28e6609 100644
--- a/hypervideo_dl/extractor/iprima.py
+++ b/hypervideo_dl/extractor/iprima.py
@@ -136,7 +136,7 @@ class IPrimaIE(InfoExtractor):
extract_formats(src)
if not formats and '>GEO_IP_NOT_ALLOWED<' in playerpage:
- self.raise_geo_restricted(countries=['CZ'])
+ self.raise_geo_restricted(countries=['CZ'], metadata_available=True)
self._sort_formats(formats)
diff --git a/hypervideo_dl/extractor/iqiyi.py b/hypervideo_dl/extractor/iqiyi.py
index 6df5214..b13b9f4 100644
--- a/hypervideo_dl/extractor/iqiyi.py
+++ b/hypervideo_dl/extractor/iqiyi.py
@@ -198,7 +198,7 @@ class IqiyiIE(InfoExtractor):
'url': stream['m3utx'],
'format_id': vd,
'ext': 'mp4',
- 'preference': self._FORMATS_MAP.get(vd, -1),
+ 'quality': self._FORMATS_MAP.get(vd, -1),
'protocol': 'm3u8_native',
})
diff --git a/hypervideo_dl/extractor/itv.py b/hypervideo_dl/extractor/itv.py
index e86c40b..4cd34a2 100644
--- a/hypervideo_dl/extractor/itv.py
+++ b/hypervideo_dl/extractor/itv.py
@@ -2,20 +2,26 @@
from __future__ import unicode_literals
import json
-import re
from .common import InfoExtractor
from .brightcove import BrightcoveNewIE
+
+from ..compat import compat_str
from ..utils import (
+ base_url,
clean_html,
determine_ext,
extract_attributes,
+ ExtractorError,
get_element_by_class,
JSON_LD_RE,
merge_dicts,
parse_duration,
smuggle_url,
+ try_get,
url_or_none,
+ url_basename,
+ urljoin,
)
@@ -23,15 +29,32 @@ class ITVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?itv\.com/hub/[^/]+/(?P<id>[0-9a-zA-Z]+)'
_GEO_COUNTRIES = ['GB']
_TESTS = [{
- 'url': 'https://www.itv.com/hub/liar/2a4547a0012',
+ 'url': 'https://www.itv.com/hub/plebs/2a1873a0002',
'info_dict': {
- 'id': '2a4547a0012',
+ 'id': '2a1873a0002',
'ext': 'mp4',
- 'title': 'Liar - Series 2 - Episode 6',
- 'description': 'md5:d0f91536569dec79ea184f0a44cca089',
- 'series': 'Liar',
- 'season_number': 2,
- 'episode_number': 6,
+ 'title': 'Plebs - The Orgy',
+ 'description': 'md5:4d7159af53ebd5b36e8b3ec82a41fdb4',
+ 'series': 'Plebs',
+ 'season_number': 1,
+ 'episode_number': 1,
+ 'thumbnail': r're:https?://hubimages\.itv\.com/episode/2_1873_0002'
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.itv.com/hub/the-jonathan-ross-show/2a1166a0209',
+ 'info_dict': {
+ 'id': '2a1166a0209',
+ 'ext': 'mp4',
+ 'title': 'The Jonathan Ross Show - Series 17 - Episode 8',
+ 'description': 'md5:3023dcdd375db1bc9967186cdb3f1399',
+ 'series': 'The Jonathan Ross Show',
+ 'episode_number': 8,
+ 'season_number': 17,
+ 'thumbnail': r're:https?://hubimages\.itv\.com/episode/2_1873_0002'
},
'params': {
# m3u8 download
@@ -51,22 +74,16 @@ class ITVIE(InfoExtractor):
'only_matching': True,
}]
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
- params = extract_attributes(self._search_regex(
- r'(?s)(<[^>]+id="video"[^>]*>)', webpage, 'params'))
-
- ios_playlist_url = params.get('data-video-playlist') or params['data-video-id']
- hmac = params['data-video-hmac']
- headers = self.geo_verification_headers()
- headers.update({
+ def _generate_api_headers(self, hmac):
+ return merge_dicts({
'Accept': 'application/vnd.itv.vod.playlist.v2+json',
'Content-Type': 'application/json',
'hmac': hmac.upper(),
- })
- ios_playlist = self._download_json(
- ios_playlist_url, video_id, data=json.dumps({
+ }, self.geo_verification_headers())
+
+ def _call_api(self, video_id, playlist_url, headers, platform_tag, featureset, fatal=True):
+ return self._download_json(
+ playlist_url, video_id, data=json.dumps({
'user': {
'itvUserId': '',
'entitlements': [],
@@ -87,15 +104,61 @@ class ITVIE(InfoExtractor):
},
'variantAvailability': {
'featureset': {
- 'min': ['hls', 'aes', 'outband-webvtt'],
- 'max': ['hls', 'aes', 'outband-webvtt']
+ 'min': featureset,
+ 'max': featureset
},
- 'platformTag': 'dotcom'
+ 'platformTag': platform_tag
}
- }).encode(), headers=headers)
- video_data = ios_playlist['Playlist']['Video']
- ios_base_url = video_data.get('Base')
+ }).encode(), headers=headers, fatal=fatal)
+
+ def _get_subtitles(self, video_id, variants, ios_playlist_url, headers, *args, **kwargs):
+ subtitles = {}
+ # Prefer last matching featureset
+ # See: https://github.com/hypervideo/hypervideo/issues/986
+ platform_tag_subs, featureset_subs = next(
+ ((platform_tag, featureset)
+ for platform_tag, featuresets in reversed(variants.items()) for featureset in featuresets
+ if try_get(featureset, lambda x: x[2]) == 'outband-webvtt'),
+ (None, None))
+
+ if platform_tag_subs and featureset_subs:
+ subs_playlist = self._call_api(
+ video_id, ios_playlist_url, headers, platform_tag_subs, featureset_subs, fatal=False)
+ subs = try_get(subs_playlist, lambda x: x['Playlist']['Video']['Subtitles'], list) or []
+ for sub in subs:
+ if not isinstance(sub, dict):
+ continue
+ href = url_or_none(sub.get('Href'))
+ if not href:
+ continue
+ subtitles.setdefault('en', []).append({'url': href})
+ return subtitles
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ params = extract_attributes(self._search_regex(
+ r'(?s)(<[^>]+id="video"[^>]*>)', webpage, 'params'))
+ variants = self._parse_json(
+ try_get(params, lambda x: x['data-video-variants'], compat_str) or '{}',
+ video_id, fatal=False)
+ # Prefer last matching featureset
+ # See: https://github.com/hypervideo/hypervideo/issues/986
+ platform_tag_video, featureset_video = next(
+ ((platform_tag, featureset)
+ for platform_tag, featuresets in reversed(variants.items()) for featureset in featuresets
+ if try_get(featureset, lambda x: x[:2]) == ['hls', 'aes']),
+ (None, None))
+ if not platform_tag_video or not featureset_video:
+ raise ExtractorError('No downloads available', expected=True, video_id=video_id)
+
+ ios_playlist_url = params.get('data-video-playlist') or params['data-video-id']
+ headers = self._generate_api_headers(params['data-video-hmac'])
+ ios_playlist = self._call_api(
+ video_id, ios_playlist_url, headers, platform_tag_video, featureset_video)
+
+ video_data = try_get(ios_playlist, lambda x: x['Playlist']['Video'], dict) or {}
+ ios_base_url = video_data.get('Base')
formats = []
for media_file in (video_data.get('MediaFiles') or []):
href = media_file.get('Href')
@@ -113,20 +176,6 @@ class ITVIE(InfoExtractor):
'url': href,
})
self._sort_formats(formats)
-
- subtitles = {}
- subs = video_data.get('Subtitles') or []
- for sub in subs:
- if not isinstance(sub, dict):
- continue
- href = url_or_none(sub.get('Href'))
- if not href:
- continue
- subtitles.setdefault('en', []).append({
- 'url': href,
- 'ext': determine_ext(href, 'vtt'),
- })
-
info = self._search_json_ld(webpage, video_id, default={})
if not info:
json_ld = self._parse_json(self._search_regex(
@@ -140,25 +189,45 @@ class ITVIE(InfoExtractor):
info = self._json_ld(item, video_id, fatal=False) or {}
break
+ thumbnails = []
+ thumbnail_url = try_get(params, lambda x: x['data-video-posterframe'], compat_str)
+ if thumbnail_url:
+ thumbnails.extend([{
+ 'url': thumbnail_url.format(width=1920, height=1080, quality=100, blur=0, bg='false'),
+ 'width': 1920,
+ 'height': 1080,
+ }, {
+ 'url': urljoin(base_url(thumbnail_url), url_basename(thumbnail_url)),
+ 'preference': -2
+ }])
+
+ thumbnail_url = self._html_search_meta(['og:image', 'twitter:image'], webpage, default=None)
+ if thumbnail_url:
+ thumbnails.append({
+ 'url': thumbnail_url,
+ })
+ self._remove_duplicate_formats(thumbnails)
+
return merge_dicts({
'id': video_id,
'title': self._html_search_meta(['og:title', 'twitter:title'], webpage),
'formats': formats,
- 'subtitles': subtitles,
+ 'subtitles': self.extract_subtitles(video_id, variants, ios_playlist_url, headers),
'duration': parse_duration(video_data.get('Duration')),
'description': clean_html(get_element_by_class('episode-info__synopsis', webpage)),
+ 'thumbnails': thumbnails
}, info)
class ITVBTCCIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?itv\.com/btcc/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TEST = {
- 'url': 'http://www.itv.com/btcc/races/btcc-2018-all-the-action-from-brands-hatch',
+ 'url': 'https://www.itv.com/btcc/articles/btcc-2019-brands-hatch-gp-race-action',
'info_dict': {
- 'id': 'btcc-2018-all-the-action-from-brands-hatch',
- 'title': 'BTCC 2018: All the action from Brands Hatch',
+ 'id': 'btcc-2019-brands-hatch-gp-race-action',
+ 'title': 'BTCC 2019: Brands Hatch GP race action',
},
- 'playlist_mincount': 9,
+ 'playlist_count': 12,
}
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1582188683001/HkiHLnNRx_default/index.html?videoId=%s'
@@ -167,6 +236,16 @@ class ITVBTCCIE(InfoExtractor):
webpage = self._download_webpage(url, playlist_id)
+ json_map = try_get(self._parse_json(self._html_search_regex(
+ '(?s)<script[^>]+id=[\'"]__NEXT_DATA__[^>]*>([^<]+)</script>', webpage, 'json_map'), playlist_id),
+ lambda x: x['props']['pageProps']['article']['body']['content']) or []
+
+ # Discard empty objects
+ video_ids = []
+ for video in json_map:
+ if video['data'].get('id'):
+ video_ids.append(video['data']['id'])
+
entries = [
self.url_result(
smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, {
@@ -178,7 +257,7 @@ class ITVBTCCIE(InfoExtractor):
'referrer': url,
}),
ie=BrightcoveNewIE.ie_key(), video_id=video_id)
- for video_id in re.findall(r'data-video-id=["\'](\d+)', webpage)]
+ for video_id in video_ids]
title = self._og_search_title(webpage, fatal=False)
diff --git a/hypervideo_dl/extractor/ivi.py b/hypervideo_dl/extractor/ivi.py
index 04c54e8..5f8a046 100644
--- a/hypervideo_dl/extractor/ivi.py
+++ b/hypervideo_dl/extractor/ivi.py
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
import json
import re
-import sys
from .common import InfoExtractor
from ..utils import (
@@ -94,20 +93,21 @@ class IviIE(InfoExtractor):
]
})
- bundled = hasattr(sys, 'frozen')
-
for site in (353, 183):
content_data = (data % site).encode()
if site == 353:
- if bundled:
- continue
try:
from Cryptodome.Cipher import Blowfish
from Cryptodome.Hash import CMAC
- pycryptodomex_found = True
+ pycryptodome_found = True
except ImportError:
- pycryptodomex_found = False
- continue
+ try:
+ from Crypto.Cipher import Blowfish
+ from Crypto.Hash import CMAC
+ pycryptodome_found = True
+ except ImportError:
+ pycryptodome_found = False
+ continue
timestamp = (self._download_json(
self._LIGHT_URL, video_id,
@@ -140,14 +140,8 @@ class IviIE(InfoExtractor):
extractor_msg = 'Video %s does not exist'
elif site == 353:
continue
- elif bundled:
- raise ExtractorError(
- 'This feature does not work from bundled exe. Run hypervideo from sources.',
- expected=True)
- elif not pycryptodomex_found:
- raise ExtractorError(
- 'pycryptodomex not found. Please install it.',
- expected=True)
+ elif not pycryptodome_found:
+ raise ExtractorError('pycryptodomex not found. Please install', expected=True)
elif message:
extractor_msg += ': ' + message
raise ExtractorError(extractor_msg % video_id, expected=True)
@@ -163,7 +157,10 @@ class IviIE(InfoExtractor):
for f in result.get('files', []):
f_url = f.get('url')
content_format = f.get('content_format')
- if not f_url or '-MDRM-' in content_format or '-FPS-' in content_format:
+ if not f_url:
+ continue
+ if (not self.get_param('allow_unplayable_formats')
+ and ('-MDRM-' in content_format or '-FPS-' in content_format)):
continue
formats.append({
'url': f_url,
@@ -242,7 +239,7 @@ class IviCompilationIE(InfoExtractor):
r'<a\b[^>]+\bhref=["\']/watch/%s/(\d+)["\']' % compilation_id, html)]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
compilation_id = mobj.group('compilationid')
season_id = mobj.group('seasonid')
diff --git a/hypervideo_dl/extractor/ivideon.py b/hypervideo_dl/extractor/ivideon.py
index 3ca824f..01e7b22 100644
--- a/hypervideo_dl/extractor/ivideon.py
+++ b/hypervideo_dl/extractor/ivideon.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..compat import (
@@ -38,7 +37,7 @@ class IvideonIE(InfoExtractor):
_QUALITIES = ('low', 'mid', 'hi')
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
server_id, camera_id = mobj.group('id'), mobj.group('camera_id')
camera_name, description = None, None
camera_url = compat_urlparse.urljoin(
diff --git a/hypervideo_dl/extractor/iwara.py b/hypervideo_dl/extractor/iwara.py
index 907d5fc..254d986 100644
--- a/hypervideo_dl/extractor/iwara.py
+++ b/hypervideo_dl/extractor/iwara.py
@@ -1,5 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
+import re
from .common import InfoExtractor
from ..compat import compat_urllib_parse_urlparse
@@ -8,6 +9,8 @@ from ..utils import (
mimetype2ext,
remove_end,
url_or_none,
+ unified_strdate,
+ strip_or_none,
)
@@ -21,6 +24,10 @@ class IwaraIE(InfoExtractor):
'ext': 'mp4',
'title': '【MMD R-18】ガールフレンド carry_me_off',
'age_limit': 18,
+ 'thumbnail': 'https://i.iwara.tv/sites/default/files/videos/thumbnails/7951/thumbnail-7951_0001.png',
+ 'uploader': 'Reimu丨Action',
+ 'upload_date': '20150828',
+ 'description': 'md5:1d4905ce48c66c9299c617f08e106e0f',
},
}, {
'url': 'http://ecchi.iwara.tv/videos/Vb4yf2yZspkzkBO',
@@ -72,6 +79,19 @@ class IwaraIE(InfoExtractor):
title = remove_end(self._html_search_regex(
r'<title>([^<]+)</title>', webpage, 'title'), ' | Iwara')
+ thumbnail = self._html_search_regex(
+ r'poster=[\'"]([^\'"]+)', webpage, 'thumbnail', default=None)
+
+ uploader = self._html_search_regex(
+ r'class="username">([^<]+)', webpage, 'uploader', fatal=False)
+
+ upload_date = unified_strdate(self._html_search_regex(
+ r'作成日:([^\s]+)', webpage, 'upload_date', fatal=False))
+
+ description = strip_or_none(self._search_regex(
+ r'<p>(.+?(?=</div))', webpage, 'description', fatal=False,
+ flags=re.DOTALL))
+
formats = []
for a_format in video_data:
format_uri = url_or_none(a_format.get('uri'))
@@ -96,4 +116,8 @@ class IwaraIE(InfoExtractor):
'title': title,
'age_limit': age_limit,
'formats': formats,
+ 'thumbnail': self._proto_relative_url(thumbnail, 'https:'),
+ 'uploader': uploader,
+ 'upload_date': upload_date,
+ 'description': description,
}
diff --git a/hypervideo_dl/extractor/jeuxvideo.py b/hypervideo_dl/extractor/jeuxvideo.py
index e9f4ed7..77c0f52 100644
--- a/hypervideo_dl/extractor/jeuxvideo.py
+++ b/hypervideo_dl/extractor/jeuxvideo.py
@@ -2,7 +2,6 @@
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
@@ -25,7 +24,7 @@ class JeuxVideoIE(InfoExtractor):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
title = mobj.group(1)
webpage = self._download_webpage(url, title)
title = self._html_search_meta('name', webpage) or self._og_search_title(webpage)
diff --git a/hypervideo_dl/extractor/joj.py b/hypervideo_dl/extractor/joj.py
index 62b28e9..6376181 100644
--- a/hypervideo_dl/extractor/joj.py
+++ b/hypervideo_dl/extractor/joj.py
@@ -1,108 +1,108 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import (
- int_or_none,
- js_to_json,
- try_get,
-)
-
-
-class JojIE(InfoExtractor):
- _VALID_URL = r'''(?x)
- (?:
- joj:|
- https?://media\.joj\.sk/embed/
- )
- (?P<id>[^/?#^]+)
- '''
- _TESTS = [{
- 'url': 'https://media.joj.sk/embed/a388ec4c-6019-4a4a-9312-b1bee194e932',
- 'info_dict': {
- 'id': 'a388ec4c-6019-4a4a-9312-b1bee194e932',
- 'ext': 'mp4',
- 'title': 'NOVÉ BÝVANIE',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 3118,
- }
- }, {
- 'url': 'https://media.joj.sk/embed/9i1cxv',
- 'only_matching': True,
- }, {
- 'url': 'joj:a388ec4c-6019-4a4a-9312-b1bee194e932',
- 'only_matching': True,
- }, {
- 'url': 'joj:9i1cxv',
- 'only_matching': True,
- }]
-
- @staticmethod
- def _extract_urls(webpage):
- return [
- mobj.group('url')
- for mobj in re.finditer(
- r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//media\.joj\.sk/embed/(?:(?!\1).)+)\1',
- webpage)]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(
- 'https://media.joj.sk/embed/%s' % video_id, video_id)
-
- title = self._search_regex(
- (r'videoTitle\s*:\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
- r'<title>(?P<title>[^<]+)'), webpage, 'title',
- default=None, group='title') or self._og_search_title(webpage)
-
- bitrates = self._parse_json(
- self._search_regex(
- r'(?s)(?:src|bitrates)\s*=\s*({.+?});', webpage, 'bitrates',
- default='{}'),
- video_id, transform_source=js_to_json, fatal=False)
-
- formats = []
- for format_url in try_get(bitrates, lambda x: x['mp4'], list) or []:
- if isinstance(format_url, compat_str):
- height = self._search_regex(
- r'(\d+)[pP]\.', format_url, 'height', default=None)
- formats.append({
- 'url': format_url,
- 'format_id': '%sp' % height if height else None,
- 'height': int(height),
- })
- if not formats:
- playlist = self._download_xml(
- 'https://media.joj.sk/services/Video.php?clip=%s' % video_id,
- video_id)
- for file_el in playlist.findall('./files/file'):
- path = file_el.get('path')
- if not path:
- continue
- format_id = file_el.get('id') or file_el.get('label')
- formats.append({
- 'url': 'http://n16.joj.sk/storage/%s' % path.replace(
- 'dat/', '', 1),
- 'format_id': format_id,
- 'height': int_or_none(self._search_regex(
- r'(\d+)[pP]', format_id or path, 'height',
- default=None)),
- })
- self._sort_formats(formats)
-
- thumbnail = self._og_search_thumbnail(webpage)
-
- duration = int_or_none(self._search_regex(
- r'videoDuration\s*:\s*(\d+)', webpage, 'duration', fatal=False))
-
- return {
- 'id': video_id,
- 'title': title,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'formats': formats,
- }
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ int_or_none,
+ js_to_json,
+ try_get,
+)
+
+
+class JojIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ (?:
+ joj:|
+ https?://media\.joj\.sk/embed/
+ )
+ (?P<id>[^/?#^]+)
+ '''
+ _TESTS = [{
+ 'url': 'https://media.joj.sk/embed/a388ec4c-6019-4a4a-9312-b1bee194e932',
+ 'info_dict': {
+ 'id': 'a388ec4c-6019-4a4a-9312-b1bee194e932',
+ 'ext': 'mp4',
+ 'title': 'NOVÉ BÝVANIE',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 3118,
+ }
+ }, {
+ 'url': 'https://media.joj.sk/embed/9i1cxv',
+ 'only_matching': True,
+ }, {
+ 'url': 'joj:a388ec4c-6019-4a4a-9312-b1bee194e932',
+ 'only_matching': True,
+ }, {
+ 'url': 'joj:9i1cxv',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return [
+ mobj.group('url')
+ for mobj in re.finditer(
+ r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//media\.joj\.sk/embed/(?:(?!\1).)+)\1',
+ webpage)]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'https://media.joj.sk/embed/%s' % video_id, video_id)
+
+ title = self._search_regex(
+ (r'videoTitle\s*:\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
+ r'<title>(?P<title>[^<]+)'), webpage, 'title',
+ default=None, group='title') or self._og_search_title(webpage)
+
+ bitrates = self._parse_json(
+ self._search_regex(
+ r'(?s)(?:src|bitrates)\s*=\s*({.+?});', webpage, 'bitrates',
+ default='{}'),
+ video_id, transform_source=js_to_json, fatal=False)
+
+ formats = []
+ for format_url in try_get(bitrates, lambda x: x['mp4'], list) or []:
+ if isinstance(format_url, compat_str):
+ height = self._search_regex(
+ r'(\d+)[pP]\.', format_url, 'height', default=None)
+ formats.append({
+ 'url': format_url,
+ 'format_id': '%sp' % height if height else None,
+ 'height': int(height),
+ })
+ if not formats:
+ playlist = self._download_xml(
+ 'https://media.joj.sk/services/Video.php?clip=%s' % video_id,
+ video_id)
+ for file_el in playlist.findall('./files/file'):
+ path = file_el.get('path')
+ if not path:
+ continue
+ format_id = file_el.get('id') or file_el.get('label')
+ formats.append({
+ 'url': 'http://n16.joj.sk/storage/%s' % path.replace(
+ 'dat/', '', 1),
+ 'format_id': format_id,
+ 'height': int_or_none(self._search_regex(
+ r'(\d+)[pP]', format_id or path, 'height',
+ default=None)),
+ })
+ self._sort_formats(formats)
+
+ thumbnail = self._og_search_thumbnail(webpage)
+
+ duration = int_or_none(self._search_regex(
+ r'videoDuration\s*:\s*(\d+)', webpage, 'duration', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/jove.py b/hypervideo_dl/extractor/jove.py
index 27e0e37..4b7dfc5 100644
--- a/hypervideo_dl/extractor/jove.py
+++ b/hypervideo_dl/extractor/jove.py
@@ -1,6 +1,5 @@
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..utils import (
@@ -41,7 +40,7 @@ class JoveIE(InfoExtractor):
]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
diff --git a/hypervideo_dl/extractor/jwplatform.py b/hypervideo_dl/extractor/jwplatform.py
index c34b5f5..5aa508b 100644
--- a/hypervideo_dl/extractor/jwplatform.py
+++ b/hypervideo_dl/extractor/jwplatform.py
@@ -32,9 +32,14 @@ class JWPlatformIE(InfoExtractor):
@staticmethod
def _extract_urls(webpage):
- return re.findall(
- r'<(?:script|iframe)[^>]+?src=["\']((?:https?:)?//(?:content\.jwplatform|cdn\.jwplayer)\.com/players/[a-zA-Z0-9]{8})',
- webpage)
+ for tag, key in ((r'(?:script|iframe)', 'src'), ('input', 'value')):
+ # <input value=URL> is used by hyland.com
+ # if we find <iframe>, dont look for <input>
+ ret = re.findall(
+ r'<%s[^>]+?%s=["\']((?:https?:)?//(?:content\.jwplatform|cdn\.jwplayer)\.com/players/[a-zA-Z0-9]{8})' % (tag, key),
+ webpage)
+ if ret:
+ return ret
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
diff --git a/hypervideo_dl/extractor/kakao.py b/hypervideo_dl/extractor/kakao.py
index 31ce7a8..97c986d 100644
--- a/hypervideo_dl/extractor/kakao.py
+++ b/hypervideo_dl/extractor/kakao.py
@@ -3,21 +3,19 @@
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..compat import compat_HTTPError
+from ..compat import compat_str
from ..utils import (
- ExtractorError,
int_or_none,
- str_or_none,
strip_or_none,
- try_get,
+ traverse_obj,
unified_timestamp,
- update_url_query,
)
class KakaoIE(InfoExtractor):
_VALID_URL = r'https?://(?:play-)?tv\.kakao\.com/(?:channel/\d+|embed/player)/cliplink/(?P<id>\d+|[^?#&]+@my)'
- _API_BASE_TMPL = 'http://tv.kakao.com/api/v1/ft/cliplinks/%s/'
+ _API_BASE_TMPL = 'http://tv.kakao.com/api/v1/ft/playmeta/cliplink/%s/'
+ _CDN_API = 'https://tv.kakao.com/katz/v1/ft/cliplink/%s/readyNplay?'
_TESTS = [{
'url': 'http://tv.kakao.com/channel/2671005/cliplink/301965083',
@@ -26,7 +24,7 @@ class KakaoIE(InfoExtractor):
'id': '301965083',
'ext': 'mp4',
'title': '乃木坂46 バナナマン 「3期生紹介コーナーが始動!顔高低差GPも!」 『乃木坂工事中』',
- 'uploader_id': '2671005',
+ 'uploader_id': 2671005,
'uploader': '그랑그랑이',
'timestamp': 1488160199,
'upload_date': '20170227',
@@ -39,31 +37,17 @@ class KakaoIE(InfoExtractor):
'ext': 'mp4',
'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\r\n\r\n[쇼! 음악중심] 20160611, 507회',
'title': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)',
- 'uploader_id': '2653210',
+ 'uploader_id': 2653210,
'uploader': '쇼! 음악중심',
'timestamp': 1485684628,
'upload_date': '20170129',
}
- }, {
- # geo restricted
- 'url': 'https://tv.kakao.com/channel/3643855/cliplink/412069491',
- 'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
- display_id = video_id.rstrip('@my')
api_base = self._API_BASE_TMPL % video_id
-
- player_header = {
- 'Referer': update_url_query(
- 'http://tv.kakao.com/embed/player/cliplink/%s' % video_id, {
- 'service': 'kakao_tv',
- 'autoplay': '1',
- 'profile': 'HIGH',
- 'wmode': 'transparent',
- })
- }
+ cdn_api_base = self._CDN_API % video_id
query = {
'player': 'monet_html5',
@@ -75,64 +59,69 @@ class KakaoIE(InfoExtractor):
'fields': ','.join([
'-*', 'tid', 'clipLink', 'displayTitle', 'clip', 'title',
'description', 'channelId', 'createTime', 'duration', 'playCount',
- 'likeCount', 'commentCount', 'tagList', 'channel', 'name', 'thumbnailUrl',
+ 'likeCount', 'commentCount', 'tagList', 'channel', 'name',
+ 'clipChapterThumbnailList', 'thumbnailUrl', 'timeInSec', 'isDefault',
'videoOutputList', 'width', 'height', 'kbps', 'profile', 'label'])
}
- impress = self._download_json(
- api_base + 'impress', display_id, 'Downloading video info',
- query=query, headers=player_header)
+ api_json = self._download_json(
+ api_base, video_id, 'Downloading video info')
- clip_link = impress['clipLink']
+ clip_link = api_json['clipLink']
clip = clip_link['clip']
title = clip.get('title') or clip_link.get('displayTitle')
- query.update({
- 'fields': '-*,code,message,url',
- 'tid': impress.get('tid') or '',
- })
-
formats = []
- for fmt in (clip.get('videoOutputList') or []):
- try:
- profile_name = fmt['profile']
- if profile_name == 'AUDIO':
- continue
- query['profile'] = profile_name
- try:
- fmt_url_json = self._download_json(
- api_base + 'raw/videolocation', display_id,
- 'Downloading video URL for profile %s' % profile_name,
- query=query, headers=player_header)
- except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
- resp = self._parse_json(e.cause.read().decode(), video_id)
- if resp.get('code') == 'GeoBlocked':
- self.raise_geo_restricted()
- continue
+ for fmt in clip.get('videoOutputList', []):
+ profile_name = fmt.get('profile')
+ if not profile_name or profile_name == 'AUDIO':
+ continue
+ query.update({
+ 'profile': profile_name,
+ 'fields': '-*,url',
+ })
+
+ fmt_url_json = self._download_json(
+ cdn_api_base, video_id,
+ 'Downloading video URL for profile %s' % profile_name,
+ query=query, fatal=False)
+ fmt_url = traverse_obj(fmt_url_json, ('videoLocation', 'url'))
+ if not fmt_url:
+ continue
- fmt_url = fmt_url_json['url']
- formats.append({
- 'url': fmt_url,
- 'format_id': profile_name,
- 'width': int_or_none(fmt.get('width')),
- 'height': int_or_none(fmt.get('height')),
- 'format_note': fmt.get('label'),
- 'filesize': int_or_none(fmt.get('filesize')),
- 'tbr': int_or_none(fmt.get('kbps')),
- })
- except KeyError:
- pass
+ formats.append({
+ 'url': fmt_url,
+ 'format_id': profile_name,
+ 'width': int_or_none(fmt.get('width')),
+ 'height': int_or_none(fmt.get('height')),
+ 'format_note': fmt.get('label'),
+ 'filesize': int_or_none(fmt.get('filesize')),
+ 'tbr': int_or_none(fmt.get('kbps')),
+ })
self._sort_formats(formats)
+ thumbs = []
+ for thumb in clip.get('clipChapterThumbnailList') or []:
+ thumbs.append({
+ 'url': thumb.get('thumbnailUrl'),
+ 'id': compat_str(thumb.get('timeInSec')),
+ 'preference': -1 if thumb.get('isDefault') else 0
+ })
+ top_thumbnail = clip.get('thumbnailUrl')
+ if top_thumbnail:
+ thumbs.append({
+ 'url': top_thumbnail,
+ 'preference': 10,
+ })
+
return {
- 'id': display_id,
+ 'id': video_id,
'title': title,
'description': strip_or_none(clip.get('description')),
- 'uploader': try_get(clip_link, lambda x: x['channel']['name']),
- 'uploader_id': str_or_none(clip_link.get('channelId')),
- 'thumbnail': clip.get('thumbnailUrl'),
+ 'uploader': traverse_obj(clip_link, ('channel', 'name')),
+ 'uploader_id': clip_link.get('channelId'),
+ 'thumbnails': thumbs,
'timestamp': unified_timestamp(clip_link.get('createTime')),
'duration': int_or_none(clip.get('duration')),
'view_count': int_or_none(clip.get('playCount')),
diff --git a/hypervideo_dl/extractor/kaltura.py b/hypervideo_dl/extractor/kaltura.py
index c731612..c8f60ef 100644
--- a/hypervideo_dl/extractor/kaltura.py
+++ b/hypervideo_dl/extractor/kaltura.py
@@ -229,7 +229,7 @@ class KalturaIE(InfoExtractor):
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
partner_id, entry_id = mobj.group('partner_id', 'id')
ks = None
captions = None
@@ -309,7 +309,7 @@ class KalturaIE(InfoExtractor):
if f.get('fileExt') == 'chun':
continue
# DRM-protected video, cannot be decrypted
- if f.get('fileExt') == 'wvm':
+ if not self.get_param('allow_unplayable_formats') and f.get('fileExt') == 'wvm':
continue
if not f.get('fileExt'):
# QT indicates QuickTime; some videos have broken fileExt
diff --git a/hypervideo_dl/extractor/kanalplay.py b/hypervideo_dl/extractor/kanalplay.py
new file mode 100644
index 0000000..5e24f7e
--- /dev/null
+++ b/hypervideo_dl/extractor/kanalplay.py
@@ -0,0 +1,96 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ srt_subtitles_timecode,
+)
+
+
+class KanalPlayIE(InfoExtractor):
+ IE_DESC = 'Kanal 5/9/11 Play'
+ _VALID_URL = r'https?://(?:www\.)?kanal(?P<channel_id>5|9|11)play\.se/(?:#!/)?(?:play/)?program/\d+/video/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://www.kanal5play.se/#!/play/program/3060212363/video/3270012277',
+ 'info_dict': {
+ 'id': '3270012277',
+ 'ext': 'flv',
+ 'title': 'Saknar både dusch och avlopp',
+ 'description': 'md5:6023a95832a06059832ae93bc3c7efb7',
+ 'duration': 2636.36,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'http://www.kanal9play.se/#!/play/program/335032/video/246042',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.kanal11play.se/#!/play/program/232835958/video/367135199',
+ 'only_matching': True,
+ }]
+
+ def _fix_subtitles(self, subs):
+ return '\r\n\r\n'.join(
+ '%s\r\n%s --> %s\r\n%s'
+ % (
+ num,
+ srt_subtitles_timecode(item['startMillis'] / 1000.0),
+ srt_subtitles_timecode(item['endMillis'] / 1000.0),
+ item['text'],
+ ) for num, item in enumerate(subs, 1))
+
+ def _get_subtitles(self, channel_id, video_id):
+ subs = self._download_json(
+ 'http://www.kanal%splay.se/api/subtitles/%s' % (channel_id, video_id),
+ video_id, 'Downloading subtitles JSON', fatal=False)
+ return {'sv': [{'ext': 'srt', 'data': self._fix_subtitles(subs)}]} if subs else {}
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ video_id = mobj.group('id')
+ channel_id = mobj.group('channel_id')
+
+ video = self._download_json(
+ 'http://www.kanal%splay.se/api/getVideo?format=FLASH&videoId=%s' % (channel_id, video_id),
+ video_id)
+
+ reasons_for_no_streams = video.get('reasonsForNoStreams')
+ if reasons_for_no_streams:
+ raise ExtractorError(
+ '%s returned error: %s' % (self.IE_NAME, '\n'.join(reasons_for_no_streams)),
+ expected=True)
+
+ title = video['title']
+ description = video.get('description')
+ duration = float_or_none(video.get('length'), 1000)
+ thumbnail = video.get('posterUrl')
+
+ stream_base_url = video['streamBaseUrl']
+
+ formats = [{
+ 'url': stream_base_url,
+ 'play_path': stream['source'],
+ 'ext': 'flv',
+ 'tbr': float_or_none(stream.get('bitrate'), 1000),
+ 'rtmp_real_time': True,
+ } for stream in video['streams']]
+ self._sort_formats(formats)
+
+ subtitles = {}
+ if video.get('hasSubtitle'):
+ subtitles = self.extract_subtitles(channel_id, video_id)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
diff --git a/hypervideo_dl/extractor/keezmovies.py b/hypervideo_dl/extractor/keezmovies.py
index c3eb74c..027f43c 100644
--- a/hypervideo_dl/extractor/keezmovies.py
+++ b/hypervideo_dl/extractor/keezmovies.py
@@ -35,7 +35,7 @@ class KeezMoviesIE(InfoExtractor):
}]
def _extract_info(self, url, fatal=True):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
display_id = (mobj.group('display_id')
if 'display_id' in mobj.groupdict()
@@ -101,7 +101,7 @@ class KeezMoviesIE(InfoExtractor):
if not formats:
if 'title="This video is no longer available"' in webpage:
- raise ExtractorError(
+ self.raise_no_formats(
'Video %s is no longer available' % video_id, expected=True)
try:
diff --git a/hypervideo_dl/extractor/kinja.py b/hypervideo_dl/extractor/kinja.py
index 79e3026..1be8b48 100644
--- a/hypervideo_dl/extractor/kinja.py
+++ b/hypervideo_dl/extractor/kinja.py
@@ -129,7 +129,7 @@ class KinjaEmbedIE(InfoExtractor):
webpage)]
def _real_extract(self, url):
- video_type, video_id = re.match(self._VALID_URL, url).groups()
+ video_type, video_id = self._match_valid_url(url).groups()
provider = self._PROVIDER_MAP.get(video_type)
if provider:
diff --git a/hypervideo_dl/extractor/koo.py b/hypervideo_dl/extractor/koo.py
new file mode 100644
index 0000000..8154ba7
--- /dev/null
+++ b/hypervideo_dl/extractor/koo.py
@@ -0,0 +1,116 @@
+# coding: utf-8
+from __future__ import unicode_literals
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ try_get,
+)
+
+
+class KooIE(InfoExtractor):
+ _VALID_URL = r'(?:https?://)(?:www\.)?kooapp\.com/koo/[^/]+/(?P<id>[^/&#$?]+)'
+ _TESTS = [{ # Test for video in the comments
+ 'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/946c4189-bc2d-4524-b95b-43f641e2adde',
+ 'info_dict': {
+ 'id': '946c4189-bc2d-4524-b95b-43f641e2adde',
+ 'ext': 'mp4',
+ 'title': 'test for video in comment',
+ 'description': 'md5:daa77dc214add4da8b6ea7d2226776e7',
+ 'timestamp': 1632215195,
+ 'uploader_id': 'ytdlpTestAccount',
+ 'uploader': 'hypervideoTestAccount',
+ 'duration': 7000,
+ 'upload_date': '20210921'
+ },
+ 'params': {'skip_download': True}
+ }, { # Test for koo with long title
+ 'url': 'https://www.kooapp.com/koo/laxman_kumarDBFEC/33decbf7-5e1e-4bb8-bfd7-04744a064361',
+ 'info_dict': {
+ 'id': '33decbf7-5e1e-4bb8-bfd7-04744a064361',
+ 'ext': 'mp4',
+ 'title': 'md5:47a71c2337295330c5a19a8af1bbf450',
+ 'description': 'md5:06a6a84e9321499486dab541693d8425',
+ 'timestamp': 1632106884,
+ 'uploader_id': 'laxman_kumarDBFEC',
+ 'uploader': 'Laxman Kumar 🇮🇳',
+ 'duration': 46000,
+ 'upload_date': '20210920'
+ },
+ 'params': {'skip_download': True}
+ }, { # Test for audio
+ 'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/a2a9c88e-ce4b-4d2d-952f-d06361c5b602',
+ 'info_dict': {
+ 'id': 'a2a9c88e-ce4b-4d2d-952f-d06361c5b602',
+ 'ext': 'mp4',
+ 'title': 'Test for audio',
+ 'description': 'md5:ecb9a2b6a5d34b736cecb53788cb11e8',
+ 'timestamp': 1632211634,
+ 'uploader_id': 'ytdlpTestAccount',
+ 'uploader': 'hypervideoTestAccount',
+ 'duration': 214000,
+ 'upload_date': '20210921'
+ },
+ 'params': {'skip_download': True}
+ }, { # Test for video
+ 'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/a3e56c53-c1ed-4ac9-ac02-ed1630e6b1d1',
+ 'info_dict': {
+ 'id': 'a3e56c53-c1ed-4ac9-ac02-ed1630e6b1d1',
+ 'ext': 'mp4',
+ 'title': 'Test for video',
+ 'description': 'md5:7afc4eb839074ddeb2beea5dd6fe9500',
+ 'timestamp': 1632211468,
+ 'uploader_id': 'ytdlpTestAccount',
+ 'uploader': 'hypervideoTestAccount',
+ 'duration': 14000,
+ 'upload_date': '20210921'
+ },
+ 'params': {'skip_download': True}
+ }, { # Test for link
+ 'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/01bf5b94-81a5-4d8e-a387-5f732022e15a',
+ 'skip': 'No video/audio found at the provided url.',
+ 'info_dict': {
+ 'id': '01bf5b94-81a5-4d8e-a387-5f732022e15a',
+ 'title': 'Test for link',
+ 'ext': 'none',
+ },
+ }, { # Test for images
+ 'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/dc05d9cd-a61d-45fd-bb07-e8019d8ca8cb',
+ 'skip': 'No video/audio found at the provided url.',
+ 'info_dict': {
+ 'id': 'dc05d9cd-a61d-45fd-bb07-e8019d8ca8cb',
+ 'title': 'Test for images',
+ 'ext': 'none',
+ },
+ }]
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+ data_json = self._download_json(f'https://www.kooapp.com/apiV1/ku/{id}?limit=20&offset=0&showSimilarKoos=true', id)['parentContent']
+ item_json = next(content['items'][0] for content in data_json
+ if try_get(content, lambda x: x['items'][0]['id']) == id)
+ media_json = item_json['mediaMap']
+ formats = []
+
+ mp4_url = media_json.get('videoMp4')
+ video_m3u8_url = media_json.get('videoHls')
+ if mp4_url:
+ formats.append({
+ 'url': mp4_url,
+ 'ext': 'mp4',
+ })
+ if video_m3u8_url:
+ formats.extend(self._extract_m3u8_formats(video_m3u8_url, id, fatal=False, ext='mp4'))
+ if not formats:
+ self.raise_no_formats('No video/audio found at the provided url.', expected=True)
+
+ self._sort_formats(formats)
+ return {
+ 'id': id,
+ 'title': clean_html(item_json.get('title')),
+ 'description': f'{clean_html(item_json.get("title"))}\n\n{clean_html(item_json.get("enTransliteration"))}',
+ 'timestamp': item_json.get('createdAt'),
+ 'uploader_id': item_json.get('handle'),
+ 'uploader': item_json.get('name'),
+ 'duration': media_json.get('duration'),
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/kusi.py b/hypervideo_dl/extractor/kusi.py
index 9833d35..707fe18 100644
--- a/hypervideo_dl/extractor/kusi.py
+++ b/hypervideo_dl/extractor/kusi.py
@@ -2,7 +2,6 @@
from __future__ import unicode_literals
import random
-import re
from .common import InfoExtractor
from ..compat import compat_urllib_parse_unquote_plus
@@ -35,7 +34,7 @@ class KUSIIE(InfoExtractor):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
clip_id = mobj.group('clipId')
video_id = clip_id or mobj.group('path')
diff --git a/hypervideo_dl/extractor/kuwo.py b/hypervideo_dl/extractor/kuwo.py
index cc5b2a1..460a425 100644
--- a/hypervideo_dl/extractor/kuwo.py
+++ b/hypervideo_dl/extractor/kuwo.py
@@ -49,7 +49,7 @@ class KuwoBaseIE(InfoExtractor):
'url': song_url,
'format_id': file_format['format'],
'format': file_format['format'],
- 'preference': file_format['preference'],
+ 'quality': file_format['preference'],
'abr': file_format.get('abr'),
})
diff --git a/hypervideo_dl/extractor/la7.py b/hypervideo_dl/extractor/la7.py
index c3b4ffa..363fbd6 100644
--- a/hypervideo_dl/extractor/la7.py
+++ b/hypervideo_dl/extractor/la7.py
@@ -1,10 +1,15 @@
# coding: utf-8
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
from ..utils import (
- js_to_json,
+ determine_ext,
+ float_or_none,
+ parse_duration,
smuggle_url,
+ unified_strdate,
)
@@ -23,22 +28,13 @@ class LA7IE(InfoExtractor):
'id': '0_42j6wd36',
'ext': 'mp4',
'title': 'Inc.Cool8',
- 'description': 'Benvenuti nell\'incredibile mondo della INC. COOL. 8. dove “INC.” sta per “Incorporated” “COOL” sta per “fashion” ed Eight sta per il gesto atletico',
+ 'description': 'Benvenuti nell\'incredibile mondo della INC. COOL. 8. dove “INC.” sta per “Incorporated” “COOL” sta per “fashion” ed Eight sta per il gesto atletico',
'thumbnail': 're:^https?://.*',
'uploader_id': 'kdla7pillole@iltrovatore.it',
'timestamp': 1443814869,
'upload_date': '20151002',
},
}, {
- # 'src' is a dictionary
- 'url': 'http://tg.la7.it/repliche-tgla7?id=189080',
- 'md5': '6b0d8888d286e39870208dfeceaf456b',
- 'info_dict': {
- 'id': '189080',
- 'ext': 'mp4',
- 'title': 'TG LA7',
- },
- }, {
'url': 'http://www.la7.it/omnibus/rivedila7/omnibus-news-02-07-2016-189077',
'only_matching': True,
}]
@@ -46,22 +42,162 @@ class LA7IE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
+ if not url.startswith('http'):
+ url = '%s//%s' % (self.http_scheme(), url)
+
webpage = self._download_webpage(url, video_id)
- player_data = self._parse_json(
- self._search_regex(
- [r'(?s)videoParams\s*=\s*({.+?});', r'videoLa7\(({[^;]+})\);'],
- webpage, 'player data'),
- video_id, transform_source=js_to_json)
+ player_data = self._search_regex(
+ [r'(?s)videoParams\s*=\s*({.+?});', r'videoLa7\(({[^;]+})\);'],
+ webpage, 'player data')
+ vid = self._search_regex(r'vid\s*:\s*"(.+?)",', player_data, 'vid')
return {
'_type': 'url_transparent',
- 'url': smuggle_url('kaltura:103:%s' % player_data['vid'], {
+ 'url': smuggle_url('kaltura:103:%s' % vid, {
'service_url': 'http://nkdam.iltrovatore.it',
}),
'id': video_id,
- 'title': player_data['title'],
+ 'title': self._og_search_title(webpage, default=None),
'description': self._og_search_description(webpage, default=None),
- 'thumbnail': player_data.get('poster'),
+ 'thumbnail': self._og_search_thumbnail(webpage, default=None),
'ie_key': 'Kaltura',
}
+
+
+class LA7PodcastEpisodeIE(InfoExtractor):
+ IE_NAME = 'la7.it:pod:episode'
+ _VALID_URL = r'''(?x)(https?://)?
+ (?:www\.)?la7\.it/[^/]+/podcast/([^/]+-)?(?P<id>\d+)'''
+
+ _TESTS = [{
+ 'url': 'https://www.la7.it/voicetown/podcast/la-carezza-delle-memoria-di-carlo-verdone-23-03-2021-371497',
+ 'md5': '7737d4d79b3c1a34b3de3e16297119ed',
+ 'info_dict': {
+ 'id': '371497',
+ 'ext': 'mp3',
+ 'title': '"La carezza delle memoria" di Carlo Verdone',
+ 'description': 'md5:5abf07c3c551a687db80af3f9ceb7d52',
+ 'thumbnail': 'https://www.la7.it/sites/default/files/podcast/371497.jpg',
+ 'upload_date': '20210323',
+ },
+ }, {
+ # embed url
+ 'url': 'https://www.la7.it/embed/podcast/371497',
+ 'only_matching': True,
+ }, {
+ # date already in the title
+ 'url': 'https://www.la7.it/propagandalive/podcast/lintervista-di-diego-bianchi-ad-annalisa-cuzzocrea-puntata-del-1932021-20-03-2021-371130',
+ 'only_matching': True,
+ }, {
+ # title same as show_title
+ 'url': 'https://www.la7.it/otto-e-mezzo/podcast/otto-e-mezzo-26-03-2021-372340',
+ 'only_matching': True,
+ }]
+
+ def _extract_info(self, webpage, video_id=None, ppn=None):
+ if not video_id:
+ video_id = self._search_regex(
+ r'data-nid=([\'"])(?P<vid>\d+)\1',
+ webpage, 'video_id', group='vid')
+
+ media_url = self._search_regex(
+ (r'src:\s*([\'"])(?P<url>.+?mp3.+?)\1',
+ r'data-podcast=([\'"])(?P<url>.+?mp3.+?)\1'),
+ webpage, 'media_url', group='url')
+ ext = determine_ext(media_url)
+ formats = [{
+ 'url': media_url,
+ 'format_id': ext,
+ 'ext': ext,
+ }]
+ self._sort_formats(formats)
+
+ title = self._html_search_regex(
+ (r'<div class="title">(?P<title>.+?)</',
+ r'<title>(?P<title>[^<]+)</title>',
+ r'title:\s*([\'"])(?P<title>.+?)\1'),
+ webpage, 'title', group='title')
+
+ description = (
+ self._html_search_regex(
+ (r'<div class="description">(.+?)</div>',
+ r'<div class="description-mobile">(.+?)</div>',
+ r'<div class="box-txt">([^<]+?)</div>',
+ r'<div class="field-content"><p>(.+?)</p></div>'),
+ webpage, 'description', default=None)
+ or self._html_search_meta('description', webpage))
+
+ thumb = self._html_search_regex(
+ (r'<div class="podcast-image"><img src="(.+?)"></div>',
+ r'<div class="container-embed"[^<]+url\((.+?)\);">',
+ r'<div class="field-content"><img src="(.+?)"'),
+ webpage, 'thumbnail', fatal=False, default=None)
+
+ duration = parse_duration(self._html_search_regex(
+ r'<span class="(?:durata|duration)">([\d:]+)</span>',
+ webpage, 'duration', fatal=False, default=None))
+
+ date = self._html_search_regex(
+ r'class="data">\s*(?:<span>)?([\d\.]+)\s*</',
+ webpage, 'date', default=None)
+
+ date_alt = self._search_regex(
+ r'(\d+[\./]\d+[\./]\d+)', title, 'date_alt', default=None)
+ ppn = ppn or self._search_regex(
+ r'ppN:\s*([\'"])(?P<ppn>.+?)\1',
+ webpage, 'ppn', group='ppn', default=None)
+ # if the date is not in the title
+ # and title is the same as the show_title
+ # add the date to the title
+ if date and not date_alt and ppn and ppn.lower() == title.lower():
+ title += ' del %s' % date
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'duration': float_or_none(duration),
+ 'formats': formats,
+ 'thumbnail': thumb,
+ 'upload_date': unified_strdate(date),
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ return self._extract_info(webpage, video_id)
+
+
+class LA7PodcastIE(LA7PodcastEpisodeIE):
+ IE_NAME = 'la7.it:podcast'
+ _VALID_URL = r'(https?://)?(www\.)?la7\.it/(?P<id>[^/]+)/podcast/?(?:$|[#?])'
+
+ _TESTS = [{
+ 'url': 'https://www.la7.it/propagandalive/podcast',
+ 'info_dict': {
+ 'id': 'propagandalive',
+ 'title': "Propaganda Live",
+ },
+ 'playlist_count': 10,
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ webpage = self._download_webpage(url, playlist_id)
+
+ title = (
+ self._html_search_regex(
+ r'<h1.*?>(.+?)</h1>', webpage, 'title', fatal=False, default=None)
+ or self._og_search_title(webpage))
+ ppn = self._search_regex(
+ r'window\.ppN\s*=\s*([\'"])(?P<ppn>.+?)\1',
+ webpage, 'ppn', group='ppn', default=None)
+
+ entries = []
+ for episode in re.finditer(
+ r'<div class="container-podcast-property">([\s\S]+?)(?:</div>\s*){3}',
+ webpage):
+ entries.append(self._extract_info(episode.group(1), ppn=ppn))
+
+ return self.playlist_result(entries, playlist_id, title)
diff --git a/hypervideo_dl/extractor/lbry.py b/hypervideo_dl/extractor/lbry.py
index cfd6b83..0f87bf1 100644
--- a/hypervideo_dl/extractor/lbry.py
+++ b/hypervideo_dl/extractor/lbry.py
@@ -6,16 +6,15 @@ import json
from .common import InfoExtractor
from ..compat import (
- compat_parse_qs,
compat_str,
compat_urllib_parse_unquote,
- compat_urllib_parse_urlparse,
)
from ..utils import (
determine_ext,
ExtractorError,
int_or_none,
mimetype2ext,
+ parse_qs,
OnDemandPagedList,
try_get,
urljoin,
@@ -23,27 +22,34 @@ from ..utils import (
class LBRYBaseIE(InfoExtractor):
- _BASE_URL_REGEX = r'https?://(?:www\.)?(?:lbry\.tv|odysee\.com)/'
+ _BASE_URL_REGEX = r'(?:https?://(?:www\.)?(?:lbry\.tv|odysee\.com)/|lbry://)'
_CLAIM_ID_REGEX = r'[0-9a-f]{1,40}'
- _OPT_CLAIM_ID = '[^:/?#&]+(?::%s)?' % _CLAIM_ID_REGEX
+ _OPT_CLAIM_ID = '[^:/?#&]+(?:[:#]%s)?' % _CLAIM_ID_REGEX
_SUPPORTED_STREAM_TYPES = ['video', 'audio']
def _call_api_proxy(self, method, display_id, params, resource):
- return self._download_json(
+ response = self._download_json(
'https://api.lbry.tv/api/v1/proxy',
display_id, 'Downloading %s JSON metadata' % resource,
headers={'Content-Type': 'application/json-rpc'},
data=json.dumps({
'method': method,
'params': params,
- }).encode())['result']
+ }).encode())
+ err = response.get('error')
+ if err:
+ raise ExtractorError(
+ f'{self.IE_NAME} said: {err.get("code")} - {err.get("message")}', expected=True)
+ return response['result']
def _resolve_url(self, url, display_id, resource):
return self._call_api_proxy(
'resolve', display_id, {'urls': url}, resource)[url]
def _permanent_url(self, url, claim_name, claim_id):
- return urljoin(url, '/%s:%s' % (claim_name, claim_id))
+ return urljoin(
+ url.replace('lbry://', 'https://lbry.tv/'),
+ '/%s:%s' % (claim_name, claim_id))
def _parse_stream(self, stream, url):
stream_value = stream.get('value') or {}
@@ -164,6 +170,9 @@ class LBRYIE(LBRYBaseIE):
}, {
'url': 'https://lbry.tv/@lacajadepandora:a/TRUMP-EST%C3%81-BIEN-PUESTO-con-Pilar-Baselga,-Carlos-Senra,-Luis-Palacios-(720p_30fps_H264-192kbit_AAC):1',
'only_matching': True,
+ }, {
+ 'url': 'lbry://@lbry#3f/odysee#7',
+ 'only_matching': True,
}]
def _real_extract(self, url):
@@ -201,7 +210,7 @@ class LBRYIE(LBRYBaseIE):
class LBRYChannelIE(LBRYBaseIE):
IE_NAME = 'lbry:channel'
- _VALID_URL = LBRYBaseIE._BASE_URL_REGEX + r'(?P<id>@%s)/?(?:[?#&]|$)' % LBRYBaseIE._OPT_CLAIM_ID
+ _VALID_URL = LBRYBaseIE._BASE_URL_REGEX + r'(?P<id>@%s)/?(?:[?&]|$)' % LBRYBaseIE._OPT_CLAIM_ID
_TESTS = [{
'url': 'https://lbry.tv/@LBRYFoundation:0',
'info_dict': {
@@ -213,6 +222,9 @@ class LBRYChannelIE(LBRYBaseIE):
}, {
'url': 'https://lbry.tv/@LBRYFoundation',
'only_matching': True,
+ }, {
+ 'url': 'lbry://@lbry#3f',
+ 'only_matching': True,
}]
_PAGE_SIZE = 50
@@ -248,7 +260,7 @@ class LBRYChannelIE(LBRYBaseIE):
result = self._resolve_url(
'lbry://' + display_id, display_id, 'channel')
claim_id = result['claim_id']
- qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+ qs = parse_qs(url)
content = qs.get('content', [None])[0]
params = {
'fee_amount': qs.get('fee_amount', ['>=0'])[0],
diff --git a/hypervideo_dl/extractor/lecturio.py b/hypervideo_dl/extractor/lecturio.py
index 1b2dcef..9d22287 100644
--- a/hypervideo_dl/extractor/lecturio.py
+++ b/hypervideo_dl/extractor/lecturio.py
@@ -103,7 +103,7 @@ class LecturioIE(LecturioBaseIE):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
nt = mobj.group('nt') or mobj.group('nt_de')
lecture_id = mobj.group('id')
display_id = nt or lecture_id
@@ -196,7 +196,7 @@ class LecturioCourseIE(LecturioBaseIE):
}]
def _real_extract(self, url):
- nt, course_id = re.match(self._VALID_URL, url).groups()
+ nt, course_id = self._match_valid_url(url).groups()
display_id = nt or course_id
api_path = 'courses/' + course_id if course_id else 'course/content/' + nt + '.json'
course = self._download_json(
diff --git a/hypervideo_dl/extractor/leeco.py b/hypervideo_dl/extractor/leeco.py
index 7dc0ad7..d5e1142 100644
--- a/hypervideo_dl/extractor/leeco.py
+++ b/hypervideo_dl/extractor/leeco.py
@@ -185,7 +185,7 @@ class LeIE(InfoExtractor):
f['height'] = int_or_none(format_id[:-1])
formats.append(f)
- self._sort_formats(formats, ('height', 'quality', 'format_id'))
+ self._sort_formats(formats, ('res', 'quality'))
publish_time = parse_iso8601(self._html_search_regex(
r'发布时间&nbsp;([^<>]+) ', page, 'publish time', default=None),
diff --git a/hypervideo_dl/extractor/lego.py b/hypervideo_dl/extractor/lego.py
index 1e3c19d..b9d8b16 100644
--- a/hypervideo_dl/extractor/lego.py
+++ b/hypervideo_dl/extractor/lego.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
import uuid
from .common import InfoExtractor
@@ -64,7 +63,7 @@ class LEGOIE(InfoExtractor):
}
def _real_extract(self, url):
- locale, video_id = re.match(self._VALID_URL, url).groups()
+ locale, video_id = self._match_valid_url(url).groups()
countries = [locale.split('-')[1].upper()]
self._initialize_geo_bypass({
'countries': countries,
diff --git a/hypervideo_dl/extractor/libsyn.py b/hypervideo_dl/extractor/libsyn.py
index 2cf4442..d1fcda4 100644
--- a/hypervideo_dl/extractor/libsyn.py
+++ b/hypervideo_dl/extractor/libsyn.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..utils import (
@@ -42,7 +41,7 @@ class LibsynIE(InfoExtractor):
}]
def _real_extract(self, url):
- url, video_id = re.match(self._VALID_URL, url).groups()
+ url, video_id = self._match_valid_url(url).groups()
webpage = self._download_webpage(url, video_id)
data = self._parse_json(self._search_regex(
diff --git a/hypervideo_dl/extractor/lifenews.py b/hypervideo_dl/extractor/lifenews.py
index 42e263b..49a0a59 100644
--- a/hypervideo_dl/extractor/lifenews.py
+++ b/hypervideo_dl/extractor/lifenews.py
@@ -201,7 +201,7 @@ class LifeEmbedIE(InfoExtractor):
formats.append({
'url': original_url,
'format_id': determine_ext(original_url, None),
- 'preference': 1,
+ 'quality': 1,
})
playlist = self._parse_json(
diff --git a/hypervideo_dl/extractor/limelight.py b/hypervideo_dl/extractor/limelight.py
index 39f74d2..369141d 100644
--- a/hypervideo_dl/extractor/limelight.py
+++ b/hypervideo_dl/extractor/limelight.py
@@ -96,7 +96,9 @@ class LimelightBaseIE(InfoExtractor):
urls = []
for stream in pc_item.get('streams', []):
stream_url = stream.get('url')
- if not stream_url or stream.get('drmProtected') or stream_url in urls:
+ if not stream_url or stream_url in urls:
+ continue
+ if not self.get_param('allow_unplayable_formats') and stream.get('drmProtected'):
continue
urls.append(stream_url)
ext = determine_ext(stream_url)
@@ -158,7 +160,10 @@ class LimelightBaseIE(InfoExtractor):
for mobile_url in mobile_item.get('mobileUrls', []):
media_url = mobile_url.get('mobileUrl')
format_id = mobile_url.get('targetMediaPlatform')
- if not media_url or format_id in ('Widevine', 'SmoothStreaming') or media_url in urls:
+ if not media_url or media_url in urls:
+ continue
+ if (format_id in ('Widevine', 'SmoothStreaming')
+ and not self.get_param('allow_unplayable_formats', False)):
continue
urls.append(media_url)
ext = determine_ext(media_url)
@@ -173,7 +178,7 @@ class LimelightBaseIE(InfoExtractor):
formats.append({
'url': media_url,
'format_id': format_id,
- 'preference': -1,
+ 'quality': -10,
'ext': ext,
})
diff --git a/hypervideo_dl/extractor/line.py b/hypervideo_dl/extractor/line.py
index 2526daa..d4bcae6 100644
--- a/hypervideo_dl/extractor/line.py
+++ b/hypervideo_dl/extractor/line.py
@@ -1,12 +1,10 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
- ExtractorError,
int_or_none,
js_to_json,
str_or_none,
@@ -32,7 +30,7 @@ class LineTVIE(InfoExtractor):
}]
def _real_extract(self, url):
- series_id, segment = re.match(self._VALID_URL, url).groups()
+ series_id, segment = self._match_valid_url(url).groups()
video_id = '%s_%s' % (series_id, segment)
webpage = self._download_webpage(url, video_id)
@@ -77,7 +75,7 @@ class LineTVIE(InfoExtractor):
self._sort_formats(formats)
- if not formats[0].get('width'):
+ if formats and not formats[0].get('width'):
formats[0]['vcodec'] = 'none'
title = self._og_search_title(webpage)
@@ -155,7 +153,7 @@ class LineLiveIE(LineLiveBaseIE):
}]
def _real_extract(self, url):
- channel_id, broadcast_id = re.match(self._VALID_URL, url).groups()
+ channel_id, broadcast_id = self._match_valid_url(url).groups()
broadcast = self._download_json(
self._API_BASE_URL + '%s/broadcast/%s' % (channel_id, broadcast_id),
broadcast_id)
@@ -183,7 +181,7 @@ class LineLiveIE(LineLiveBaseIE):
if not formats:
archive_status = item.get('archiveStatus')
if archive_status != 'ARCHIVED':
- raise ExtractorError('this video has been ' + archive_status.lower(), expected=True)
+ self.raise_no_formats('this video has been ' + archive_status.lower(), expected=True)
self._sort_formats(formats)
info['formats'] = formats
return info
diff --git a/hypervideo_dl/extractor/linkedin.py b/hypervideo_dl/extractor/linkedin.py
index 26fc703..3ce906e 100644
--- a/hypervideo_dl/extractor/linkedin.py
+++ b/hypervideo_dl/extractor/linkedin.py
@@ -1,6 +1,7 @@
# coding: utf-8
from __future__ import unicode_literals
+from itertools import zip_longest
import re
from .common import InfoExtractor
@@ -8,6 +9,8 @@ from ..utils import (
ExtractorError,
float_or_none,
int_or_none,
+ srt_subtitles_timecode,
+ try_get,
urlencode_postdata,
urljoin,
)
@@ -86,8 +89,18 @@ class LinkedInLearningIE(LinkedInLearningBaseIE):
},
}
+ def json2srt(self, transcript_lines, duration=None):
+ srt_data = ''
+ for line, (line_dict, next_dict) in enumerate(zip_longest(transcript_lines, transcript_lines[1:])):
+ start_time, caption = line_dict['transcriptStartAt'] / 1000, line_dict['caption']
+ end_time = next_dict['transcriptStartAt'] / 1000 if next_dict else duration or start_time + 1
+ srt_data += '%d\n%s --> %s\n%s\n\n' % (line + 1, srt_subtitles_timecode(start_time),
+ srt_subtitles_timecode(end_time),
+ caption)
+ return srt_data
+
def _real_extract(self, url):
- course_slug, video_slug = re.match(self._VALID_URL, url).groups()
+ course_slug, video_slug = self._match_valid_url(url).groups()
video_data = None
formats = []
@@ -101,6 +114,7 @@ class LinkedInLearningIE(LinkedInLearningBaseIE):
formats.append({
'format_id': 'progressive-%dp' % height,
'url': progressive_url,
+ 'ext': 'mp4',
'height': height,
'width': width,
'source_preference': 1,
@@ -124,7 +138,18 @@ class LinkedInLearningIE(LinkedInLearningBaseIE):
streaming_url, video_slug, 'mp4',
'm3u8_native', m3u8_id='hls', fatal=False))
- self._sort_formats(formats, ('width', 'height', 'source_preference', 'tbr', 'abr'))
+ # It seems like this would be correctly handled by default
+ # However, unless someone can confirm this, the old
+ # behaviour is being kept as-is
+ self._sort_formats(formats, ('res', 'source_preference'))
+ subtitles = {}
+ duration = int_or_none(video_data.get('durationInSeconds'))
+ transcript_lines = try_get(video_data, lambda x: x['transcript']['lines'], expected_type=list)
+ if transcript_lines:
+ subtitles['en'] = [{
+ 'ext': 'srt',
+ 'data': self.json2srt(transcript_lines, duration)
+ }]
return {
'id': self._get_video_id(video_data, course_slug, video_slug),
@@ -132,7 +157,8 @@ class LinkedInLearningIE(LinkedInLearningBaseIE):
'formats': formats,
'thumbnail': video_data.get('defaultThumbnail'),
'timestamp': float_or_none(video_data.get('publishedOn'), 1000),
- 'duration': int_or_none(video_data.get('durationInSeconds')),
+ 'duration': duration,
+ 'subtitles': subtitles,
}
diff --git a/hypervideo_dl/extractor/linuxacademy.py b/hypervideo_dl/extractor/linuxacademy.py
index 7ec4a65..2053970 100644
--- a/hypervideo_dl/extractor/linuxacademy.py
+++ b/hypervideo_dl/extractor/linuxacademy.py
@@ -2,7 +2,6 @@ from __future__ import unicode_literals
import json
import random
-import re
from .common import InfoExtractor
from ..compat import (
@@ -38,8 +37,8 @@ class LinuxAcademyIE(InfoExtractor):
'ext': 'mp4',
'title': 'What Is Data Science',
'description': 'md5:c574a3c20607144fb36cb65bdde76c99',
- 'timestamp': 1607387907,
- 'upload_date': '20201208',
+ 'timestamp': int, # The timestamp and upload date changes
+ 'upload_date': r're:\d+',
'duration': 304,
},
'params': {
@@ -59,6 +58,16 @@ class LinuxAcademyIE(InfoExtractor):
},
'playlist_count': 41,
'skip': 'Requires Linux Academy account credentials',
+ }, {
+ 'url': 'https://linuxacademy.com/cp/modules/view/id/39',
+ 'info_dict': {
+ 'id': '39',
+ 'title': 'Red Hat Certified Systems Administrator - RHCSA (EX200) Exam Prep (legacy)',
+ 'description': 'md5:0f1d3369e90c3fb14a79813b863c902f',
+ 'duration': 89280,
+ },
+ 'playlist_count': 73,
+ 'skip': 'Requires Linux Academy account credentials',
}]
_AUTHORIZE_URL = 'https://login.linuxacademy.com/authorize'
@@ -102,7 +111,7 @@ class LinuxAcademyIE(InfoExtractor):
'client_id': self._CLIENT_ID,
'redirect_uri': self._ORIGIN_URL,
'tenant': 'lacausers',
- 'connection': 'Username-Password-Authentication',
+ 'connection': 'Username-Password-ACG-Proxy',
'username': username,
'password': password,
'sso': 'true',
@@ -152,7 +161,7 @@ class LinuxAcademyIE(InfoExtractor):
% access_token, None, 'Downloading token validation page')
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
chapter_id, lecture_id, course_id = mobj.group('chapter_id', 'lesson_id', 'course_id')
item_id = course_id if course_id else '%s-%s' % (chapter_id, lecture_id)
@@ -162,7 +171,7 @@ class LinuxAcademyIE(InfoExtractor):
if course_id:
module = self._parse_json(
self._search_regex(
- r'window\.module\s*=\s*({.+?})\s*;', webpage, 'module'),
+ r'window\.module\s*=\s*({(?:(?!};)[^"]|"([^"]|\\")*")+})\s*;', webpage, 'module'),
item_id)
entries = []
chapter_number = None
diff --git a/hypervideo_dl/extractor/litv.py b/hypervideo_dl/extractor/litv.py
index 337b1b1..18d237e 100644
--- a/hypervideo_dl/extractor/litv.py
+++ b/hypervideo_dl/extractor/litv.py
@@ -71,7 +71,7 @@ class LiTVIE(InfoExtractor):
video_id = self._match_id(url)
- noplaylist = self._downloader.params.get('noplaylist')
+ noplaylist = self.get_param('noplaylist')
noplaylist_prompt = True
if 'force_noplaylist' in data:
noplaylist = data['force_noplaylist']
diff --git a/hypervideo_dl/extractor/livestream.py b/hypervideo_dl/extractor/livestream.py
index e55b1a2..f591289 100644
--- a/hypervideo_dl/extractor/livestream.py
+++ b/hypervideo_dl/extractor/livestream.py
@@ -84,7 +84,7 @@ class LivestreamIE(InfoExtractor):
'format_id': 'smil_%d' % tbr,
'ext': 'flv',
'tbr': tbr,
- 'preference': -1000,
+ 'preference': -1000, # Strictly inferior than all other formats?
})
return formats
@@ -212,7 +212,7 @@ class LivestreamIE(InfoExtractor):
return self.playlist_result(entries, event_id, event_data['full_name'])
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
event = mobj.group('event_id') or mobj.group('event_name')
account = mobj.group('account_id') or mobj.group('account_name')
@@ -319,7 +319,7 @@ class LivestreamOriginalIE(InfoExtractor):
return self.playlist_result(entries, folder_id)
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
user = mobj.group('user')
url_type = mobj.group('type')
content_id = mobj.group('id')
@@ -359,7 +359,7 @@ class LivestreamShortenerIE(InfoExtractor):
_VALID_URL = r'https?://livestre\.am/(?P<id>.+)'
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
id = mobj.group('id')
webpage = self._download_webpage(url, id)
diff --git a/hypervideo_dl/extractor/lnkgo.py b/hypervideo_dl/extractor/lnkgo.py
index 3e71852..1467596 100644
--- a/hypervideo_dl/extractor/lnkgo.py
+++ b/hypervideo_dl/extractor/lnkgo.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..utils import (
@@ -58,7 +57,7 @@ class LnkGoIE(InfoExtractor):
_M3U8_TEMPL = 'https://vod.lnk.lt/lnk_vod/lnk/lnk/%s:%s/playlist.m3u8%s'
def _real_extract(self, url):
- display_id, video_id = re.match(self._VALID_URL, url).groups()
+ display_id, video_id = self._match_valid_url(url).groups()
video_info = self._download_json(
'https://lnk.lt/api/main/video-page/%s/%s/false' % (display_id, video_id or '0'),
diff --git a/hypervideo_dl/extractor/localnews8.py b/hypervideo_dl/extractor/localnews8.py
index aad3961..c3e9d10 100644
--- a/hypervideo_dl/extractor/localnews8.py
+++ b/hypervideo_dl/extractor/localnews8.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
@@ -25,7 +24,7 @@ class LocalNews8IE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
display_id = mobj.group('display_id')
diff --git a/hypervideo_dl/extractor/lovehomeporn.py b/hypervideo_dl/extractor/lovehomeporn.py
index 8f65a3c..ca4b5f3 100644
--- a/hypervideo_dl/extractor/lovehomeporn.py
+++ b/hypervideo_dl/extractor/lovehomeporn.py
@@ -1,6 +1,5 @@
from __future__ import unicode_literals
-import re
from .nuevo import NuevoBaseIE
@@ -23,7 +22,7 @@ class LoveHomePornIE(NuevoBaseIE):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
display_id = mobj.group('display_id')
diff --git a/hypervideo_dl/extractor/lrt.py b/hypervideo_dl/extractor/lrt.py
index 89d5498..4024aef 100644
--- a/hypervideo_dl/extractor/lrt.py
+++ b/hypervideo_dl/extractor/lrt.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..utils import (
@@ -47,7 +46,7 @@ class LRTIE(InfoExtractor):
webpage, var_name.replace('_', ' '), default, group=2)
def _real_extract(self, url):
- path, video_id = re.match(self._VALID_URL, url).groups()
+ path, video_id = self._match_valid_url(url).groups()
webpage = self._download_webpage(url, video_id)
media_url = self._extract_js_var(webpage, 'main_url', path)
diff --git a/hypervideo_dl/extractor/lynda.py b/hypervideo_dl/extractor/lynda.py
index b3d8653..58cf172 100644
--- a/hypervideo_dl/extractor/lynda.py
+++ b/hypervideo_dl/extractor/lynda.py
@@ -128,7 +128,7 @@ class LyndaIE(LyndaBaseIE):
'Video %s is only available for members' % video_id)
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
course_id = mobj.group('course_id')
@@ -281,7 +281,7 @@ class LyndaCourseIE(LyndaBaseIE):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
course_path = mobj.group('coursepath')
course_id = mobj.group('courseid')
@@ -331,7 +331,7 @@ class LyndaCourseIE(LyndaBaseIE):
})
if unaccessible_videos > 0:
- self._downloader.report_warning(
+ self.report_warning(
'%s videos are only available for members (or paid members) and will not be downloaded. '
% unaccessible_videos + self._ACCOUNT_CREDENTIALS_HINT)
diff --git a/hypervideo_dl/extractor/magentamusik360.py b/hypervideo_dl/extractor/magentamusik360.py
new file mode 100644
index 0000000..5c27490
--- /dev/null
+++ b/hypervideo_dl/extractor/magentamusik360.py
@@ -0,0 +1,61 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class MagentaMusik360IE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?magenta-musik-360\.de/([a-z0-9-]+-(?P<id>[0-9]+)|festivals/.+)'
+ _TESTS = [{
+ 'url': 'https://www.magenta-musik-360.de/within-temptation-wacken-2019-1-9208205928595185932',
+ 'md5': '65b6f060b40d90276ec6fb9b992c1216',
+ 'info_dict': {
+ 'id': '9208205928595185932',
+ 'ext': 'm3u8',
+ 'title': 'WITHIN TEMPTATION',
+ 'description': 'Robert Westerholt und Sharon Janny den Adel gründeten die Symphonic Metal-Band. Privat sind die Niederländer ein Paar und haben zwei Kinder. Die Single Ice Queen brachte ihnen Platin und Gold und verhalf 2002 zum internationalen Durchbruch. Charakteristisch für die Band war Anfangs der hohe Gesang von Frontfrau Sharon. Stilistisch fing die Band im Gothic Metal an. Mit neuem Sound, schnellen Gitarrenriffs und Gitarrensoli, avancierte Within Temptation zur erfolgreichen Rockband. Auch dieses Jahr wird die Band ihre Fangemeinde wieder mitreißen.',
+ }
+ }, {
+ 'url': 'https://www.magenta-musik-360.de/festivals/wacken-world-wide-2020-body-count-feat-ice-t',
+ 'md5': '81010d27d7cab3f7da0b0f681b983b7e',
+ 'info_dict': {
+ 'id': '9208205928595231363',
+ 'ext': 'm3u8',
+ 'title': 'Body Count feat. Ice-T',
+ 'description': 'Body Count feat. Ice-T konnten bereits im vergangenen Jahr auf dem „Holy Ground“ in Wacken überzeugen. 2020 gehen die Crossover-Metaller aus einem Club in Los Angeles auf Sendung und bringen mit ihrer Mischung aus Metal und Hip-Hop Abwechslung und ordentlich Alarm zum WWW. Bereits seit 1990 stehen die beiden Gründer Ice-T (Gesang) und Ernie C (Gitarre) auf der Bühne. Sieben Studioalben hat die Gruppe bis jetzt veröffentlicht, darunter das Debüt „Body Count“ (1992) mit dem kontroversen Track „Cop Killer“.',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ # _match_id casts to string, but since "None" is not a valid video_id for magenta
+ # there is no risk for confusion
+ if video_id == "None":
+ webpage = self._download_webpage(url, video_id)
+ video_id = self._html_search_regex(r'data-asset-id="([^"]+)"', webpage, 'video_id')
+ json = self._download_json("https://wcps.t-online.de/cvss/magentamusic/vodplayer/v3/player/58935/%s/Main%%20Movie" % video_id, video_id)
+ xml_url = json['content']['feature']['representations'][0]['contentPackages'][0]['media']['href']
+ metadata = json['content']['feature'].get('metadata')
+ title = None
+ description = None
+ duration = None
+ thumbnails = []
+ if metadata:
+ title = metadata.get('title')
+ description = metadata.get('fullDescription')
+ duration = metadata.get('runtimeInSeconds')
+ for img_key in ('teaserImageWide', 'smallCoverImage'):
+ if img_key in metadata:
+ thumbnails.append({'url': metadata[img_key].get('href')})
+
+ xml = self._download_xml(xml_url, video_id)
+ final_url = xml[0][0][0].attrib['src']
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'url': final_url,
+ 'duration': duration,
+ 'thumbnails': thumbnails
+ }
diff --git a/hypervideo_dl/extractor/mailru.py b/hypervideo_dl/extractor/mailru.py
index 65cc474..5d9f80b 100644
--- a/hypervideo_dl/extractor/mailru.py
+++ b/hypervideo_dl/extractor/mailru.py
@@ -12,6 +12,7 @@ from ..utils import (
parse_duration,
remove_end,
try_get,
+ urljoin,
)
@@ -20,10 +21,10 @@ class MailRuIE(InfoExtractor):
IE_DESC = 'Видео@Mail.Ru'
_VALID_URL = r'''(?x)
https?://
- (?:(?:www|m)\.)?my\.mail\.ru/+
+ (?:(?:www|m|videoapi)\.)?my\.mail\.ru/+
(?:
video/.*\#video=/?(?P<idv1>(?:[^/]+/){3}\d+)|
- (?:(?P<idv2prefix>(?:[^/]+/+){2})video/(?P<idv2suffix>[^/]+/\d+))\.html|
+ (?:videos/embed/)?(?:(?P<idv2prefix>(?:[^/]+/+){2})(?:video/(?:embed/)?)?(?P<idv2suffix>[^/]+/\d+))(?:\.html)?|
(?:video/embed|\+/video/meta)/(?P<metaid>\d+)
)
'''
@@ -93,11 +94,19 @@ class MailRuIE(InfoExtractor):
{
'url': 'https://my.mail.ru//list//sinyutin10/video/_myvideo/4.html',
'only_matching': True,
+ },
+ {
+ 'url': 'https://my.mail.ru/mail/cloud-strife/video/embed/Games/2009',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://videoapi.my.mail.ru/videos/embed/mail/cloud-strife/Games/2009.html',
+ 'only_matching': True,
}
]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
meta_id = mobj.group('metaid')
video_id = None
@@ -108,15 +117,21 @@ class MailRuIE(InfoExtractor):
if not video_id:
video_id = mobj.group('idv2prefix') + mobj.group('idv2suffix')
webpage = self._download_webpage(url, video_id)
- page_config = self._parse_json(self._search_regex(
+ page_config = self._parse_json(self._search_regex([
r'(?s)<script[^>]+class="sp-video__page-config"[^>]*>(.+?)</script>',
+ r'(?s)"video":\s*({.+?}),'],
webpage, 'page config', default='{}'), video_id, fatal=False)
if page_config:
- meta_url = page_config.get('metaUrl') or page_config.get('video', {}).get('metaUrl')
+ meta_url = page_config.get('metaUrl') or page_config.get('video', {}).get('metaUrl') or page_config.get('metadataUrl')
else:
meta_url = None
video_data = None
+
+ # fix meta_url if missing the host address
+ if re.match(r'^\/\+\/', meta_url):
+ meta_url = urljoin('https://my.mail.ru', meta_url)
+
if meta_url:
video_data = self._download_json(
meta_url, video_id or meta_id, 'Downloading video meta JSON',
diff --git a/hypervideo_dl/extractor/manoto.py b/hypervideo_dl/extractor/manoto.py
new file mode 100644
index 0000000..d12aa5f
--- /dev/null
+++ b/hypervideo_dl/extractor/manoto.py
@@ -0,0 +1,138 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ int_or_none,
+ traverse_obj
+)
+
+
+_API_URL = 'https://dak1vd5vmi7x6.cloudfront.net/api/v1/publicrole/{}/{}?id={}'
+
+
+class ManotoTVIE(InfoExtractor):
+ IE_DESC = 'Manoto TV (Episode)'
+ _VALID_URL = r'https?://(?:www\.)?manototv\.com/episode/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'https://www.manototv.com/episode/8475',
+ 'info_dict': {
+ 'id': '8475',
+ 'series': 'خانه های رویایی با برادران اسکات',
+ 'season_number': 7,
+ 'episode_number': 25,
+ 'episode_id': 'My Dream Home S7: Carol & John',
+ 'duration': 3600,
+ 'categories': ['سرگرمی'],
+ 'title': 'کارول و جان',
+ 'description': 'md5:d0fff1f8ba5c6775d312a00165d1a97e',
+ 'thumbnail': r're:^https?://.*\.(jpeg|png|jpg)$',
+ 'ext': 'mp4'
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ }
+ }, {
+ 'url': 'https://www.manototv.com/episode/12576',
+ 'info_dict': {
+ 'id': '12576',
+ 'series': 'فیلم های ایرانی',
+ 'episode_id': 'Seh Mah Taatili',
+ 'duration': 5400,
+ 'view_count': int,
+ 'categories': ['سرگرمی'],
+ 'title': 'سه ماه تعطیلی',
+ 'description': 'سه ماه تعطیلی فیلمی به کارگردانی و نویسندگی شاپور قریب ساختهٔ سال ۱۳۵۶ است.',
+ 'thumbnail': r're:^https?://.*\.(jpeg|png|jpg)$',
+ 'ext': 'mp4'
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ episode_json = self._download_json(_API_URL.format('showmodule', 'episodedetails', video_id), video_id)
+ details = episode_json.get('details', {})
+ formats = self._extract_m3u8_formats(details.get('videoM3u8Url'), video_id, 'mp4')
+ self._sort_formats(formats)
+ return {
+ 'id': video_id,
+ 'series': details.get('showTitle'),
+ 'season_number': int_or_none(details.get('analyticsSeasonNumber')),
+ 'episode_number': int_or_none(details.get('episodeNumber')),
+ 'episode_id': details.get('analyticsEpisodeTitle'),
+ 'duration': int_or_none(details.get('durationInMinutes'), invscale=60),
+ 'view_count': details.get('viewCount'),
+ 'categories': [details.get('videoCategory')],
+ 'title': details.get('episodeTitle'),
+ 'description': clean_html(details.get('episodeDescription')),
+ 'thumbnail': details.get('episodelandscapeImgIxUrl'),
+ 'formats': formats,
+ }
+
+
+class ManotoTVShowIE(InfoExtractor):
+ IE_DESC = 'Manoto TV (Show)'
+ _VALID_URL = r'https?://(?:www\.)?manototv\.com/show/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'https://www.manototv.com/show/2526',
+ 'playlist_mincount': 68,
+ 'info_dict': {
+ 'id': '2526',
+ 'title': 'فیلم های ایرانی',
+ 'description': 'مجموعه ای از فیلم های سینمای کلاسیک ایران',
+ },
+ }]
+
+ def _real_extract(self, url):
+ show_id = self._match_id(url)
+ show_json = self._download_json(_API_URL.format('showmodule', 'details', show_id), show_id)
+ show_details = show_json.get('details', {})
+ title = show_details.get('showTitle')
+ description = show_details.get('showSynopsis')
+
+ series_json = self._download_json(_API_URL.format('showmodule', 'serieslist', show_id), show_id)
+ playlist_id = str(traverse_obj(series_json, ('details', 'list', 0, 'id')))
+
+ playlist_json = self._download_json(_API_URL.format('showmodule', 'episodelist', playlist_id), playlist_id)
+ playlist = traverse_obj(playlist_json, ('details', 'list')) or []
+
+ entries = [
+ self.url_result(
+ 'https://www.manototv.com/episode/%s' % item['slideID'], ie=ManotoTVIE.ie_key(), video_id=item['slideID'])
+ for item in playlist]
+ return self.playlist_result(entries, show_id, title, description)
+
+
+class ManotoTVLiveIE(InfoExtractor):
+ IE_DESC = 'Manoto TV (Live)'
+ _VALID_URL = r'https?://(?:www\.)?manototv\.com/live/'
+ _TEST = {
+ 'url': 'https://www.manototv.com/live/',
+ 'info_dict': {
+ 'id': 'live',
+ 'title': 'Manoto TV Live',
+ 'ext': 'mp4',
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = 'live'
+ json = self._download_json(_API_URL.format('livemodule', 'details', ''), video_id)
+ details = json.get('details', {})
+ video_url = details.get('liveUrl')
+ formats = self._extract_m3u8_formats(video_url, video_id, 'mp4', live=True)
+ self._sort_formats(formats)
+ return {
+ 'id': video_id,
+ 'title': 'Manoto TV Live',
+ 'is_live': True,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/massengeschmacktv.py b/hypervideo_dl/extractor/massengeschmacktv.py
index cfcc6b2..b381d31 100644
--- a/hypervideo_dl/extractor/massengeschmacktv.py
+++ b/hypervideo_dl/extractor/massengeschmacktv.py
@@ -67,7 +67,7 @@ class MassengeschmackTVIE(InfoExtractor):
'vcodec': 'none' if format_id.startswith('Audio') else None,
})
- self._sort_formats(formats, ('width', 'height', 'filesize', 'tbr'))
+ self._sort_formats(formats)
return {
'id': episode,
diff --git a/hypervideo_dl/extractor/mdr.py b/hypervideo_dl/extractor/mdr.py
index dc6aa98..0bdd626 100644
--- a/hypervideo_dl/extractor/mdr.py
+++ b/hypervideo_dl/extractor/mdr.py
@@ -137,11 +137,11 @@ class MDRIE(InfoExtractor):
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
- preference=0, m3u8_id='HLS', fatal=False))
+ quality=1, m3u8_id='HLS', fatal=False))
elif ext == 'f4m':
formats.extend(self._extract_f4m_formats(
video_url + '?hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id,
- preference=0, f4m_id='HDS', fatal=False))
+ quality=1, f4m_id='HDS', fatal=False))
else:
media_type = xpath_text(asset, './mediaType', 'media type', default='MP4')
vbr = int_or_none(xpath_text(asset, './bitrateVideo', 'vbr'), 1000)
diff --git a/hypervideo_dl/extractor/medaltv.py b/hypervideo_dl/extractor/medaltv.py
index 67bb4de..2ece5aa 100644
--- a/hypervideo_dl/extractor/medaltv.py
+++ b/hypervideo_dl/extractor/medaltv.py
@@ -103,11 +103,11 @@ class MedalTVIE(InfoExtractor):
error = clip.get('error')
if not formats and error:
if error == 404:
- raise ExtractorError(
+ self.raise_no_formats(
'That clip does not exist.',
expected=True, video_id=video_id)
else:
- raise ExtractorError(
+ self.raise_no_formats(
'An unknown error occurred ({0}).'.format(error),
video_id=video_id)
diff --git a/hypervideo_dl/extractor/mediaite.py b/hypervideo_dl/extractor/mediaite.py
new file mode 100644
index 0000000..b670f0d
--- /dev/null
+++ b/hypervideo_dl/extractor/mediaite.py
@@ -0,0 +1,93 @@
+from __future__ import unicode_literals
+
+
+from .common import InfoExtractor
+
+
+class MediaiteIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?mediaite.com(?!/category)(?:/[\w-]+){2}'
+ _TESTS = [{
+ 'url': 'https://www.mediaite.com/sports/bill-burr-roasts-nfl-for-promoting-black-lives-matter-while-scheduling-more-games-after-all-the-sht-they-know-about-cte/',
+ 'info_dict': {
+ 'id': 'vPHKITzy',
+ 'ext': 'm4a',
+ 'title': 'Bill Burr On NFL And Black Lives Matter',
+ 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+ 'thumbnail': 'https://cdn.jwplayer.com/v2/media/vPHKITzy/poster.jpg?width=720',
+ 'duration': 55,
+ 'timestamp': 1631630185,
+ 'upload_date': '20210914',
+ },
+ 'params': {'skip_download': True}
+ }, {
+ 'url': 'https://www.mediaite.com/tv/joe-scarborough-goes-off-on-tax-breaks-for-super-wealthy-largest-income-redistribution-scam-in-american-history/',
+ 'info_dict': {
+ 'id': 'eeFcK4Xm',
+ 'ext': 'mp4',
+ 'title': 'Morning Joe-6_16_52 am - 6_21_10 am-2021-09-14.mp4',
+ 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+ 'thumbnail': 'https://cdn.jwplayer.com/v2/media/eeFcK4Xm/poster.jpg?width=720',
+ 'duration': 258,
+ 'timestamp': 1631618057,
+ 'upload_date': '20210914',
+ },
+ 'params': {'skip_download': True}
+ }, {
+ 'url': 'https://www.mediaite.com/politics/watch-rudy-giuliani-impersonates-queen-elizabeth-calls-mark-milley-an-asshle-in-bizarre-9-11-speech/',
+ 'info_dict': {
+ 'id': 'EiyiXKcr',
+ 'ext': 'mp4',
+ 'title': 'Giuliani 1',
+ 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+ 'thumbnail': 'https://cdn.jwplayer.com/v2/media/EiyiXKcr/poster.jpg?width=720',
+ 'duration': 39,
+ 'timestamp': 1631536476,
+ 'upload_date': '20210913',
+ },
+ 'params': {'skip_download': True}
+ }, {
+ 'url': 'https://www.mediaite.com/podcasts/clarissa-ward-says-she-decided-to-become-a-journalist-on-9-11/',
+ 'info_dict': {
+ 'id': 'TxavoRTx',
+ 'ext': 'mp4',
+ 'title': 'clarissa-ward-3.mp4',
+ 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+ 'thumbnail': 'https://cdn.jwplayer.com/v2/media/TxavoRTx/poster.jpg?width=720',
+ 'duration': 83,
+ 'timestamp': 1631311188,
+ 'upload_date': '20210910',
+ },
+ 'params': {'skip_download': True}
+ }, {
+ 'url': 'https://www.mediaite.com/opinion/mainstream-media-ignores-rose-mcgowans-bombshell-allegation-that-newsoms-wife-tried-to-silence-her-on-weinstein/',
+ 'info_dict': {
+ 'id': 'sEIWvKR7',
+ 'ext': 'mp4',
+ 'title': 'KTTV_09-13-2021_05.34.21',
+ 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+ 'thumbnail': 'https://cdn.jwplayer.com/v2/media/sEIWvKR7/poster.jpg?width=720',
+ 'duration': 52,
+ 'timestamp': 1631553328,
+ 'upload_date': '20210913',
+ },
+ 'params': {'skip_download': True}
+ }, {
+ 'url': 'https://www.mediaite.com/news/watch-cnbcs-jim-cramer-says-nobody-wants-to-die-getting-infected-by-unvaccinated-coworker-even-for-22-an-hour/',
+ 'info_dict': {
+ 'id': 'nwpt1elX',
+ 'ext': 'mp4',
+ 'title': "CNBC's Jim Cramer Says Nobody Wants to Die Getting Infected by Unvaccinated Coworker 'Even for $22 an Hour'.mp4",
+ 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+ 'thumbnail': 'https://cdn.jwplayer.com/v2/media/nwpt1elX/poster.jpg?width=720',
+ 'duration': 60,
+ 'timestamp': 1633014214,
+ 'upload_date': '20210930',
+ },
+ 'params': {'skip_download': True}
+ }]
+
+ def _real_extract(self, url):
+ webpage = self._download_webpage(url, None)
+ id = self._search_regex(r'data-video-id\s?=\s?\"([^\"]+)\"', webpage, 'id')
+ data_json = self._download_json(f'https://cdn.jwplayer.com/v2/media/{id}', id)
+ return self._parse_jwplayer_data(data_json)
diff --git a/hypervideo_dl/extractor/mediaklikk.py b/hypervideo_dl/extractor/mediaklikk.py
new file mode 100644
index 0000000..b9b6d73
--- /dev/null
+++ b/hypervideo_dl/extractor/mediaklikk.py
@@ -0,0 +1,104 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from ..utils import (
+ unified_strdate
+)
+from .common import InfoExtractor
+from ..compat import (
+ compat_urllib_parse_unquote,
+ compat_str
+)
+
+
+class MediaKlikkIE(InfoExtractor):
+ _VALID_URL = r'''(?x)^https?:\/\/(?:www\.)?
+ (?:mediaklikk|m4sport|hirado|petofilive)\.hu\/.*?videok?\/
+ (?:(?P<year>[0-9]{4})/(?P<month>[0-9]{1,2})/(?P<day>[0-9]{1,2})/)?
+ (?P<id>[^/#?_]+)'''
+
+ _TESTS = [{
+ # mediaklikk. date in html.
+ 'url': 'https://mediaklikk.hu/video/hazajaro-delnyugat-bacska-a-duna-menten-palankatol-doroszloig/',
+ 'info_dict': {
+ 'id': '4754129',
+ 'title': 'Hazajáró, DÉLNYUGAT-BÁCSKA – A Duna mentén Palánkától Doroszlóig',
+ 'ext': 'mp4',
+ 'upload_date': '20210901',
+ 'thumbnail': 'http://mediaklikk.hu/wp-content/uploads/sites/4/2014/02/hazajarouj_JO.jpg'
+ }
+ }, {
+ # m4sport
+ 'url': 'https://m4sport.hu/video/2021/08/30/gyemant-liga-parizs/',
+ 'info_dict': {
+ 'id': '4754999',
+ 'title': 'Gyémánt Liga, Párizs',
+ 'ext': 'mp4',
+ 'upload_date': '20210830',
+ 'thumbnail': 'http://m4sport.hu/wp-content/uploads/sites/4/2021/08/vlcsnap-2021-08-30-18h21m20s10-1024x576.jpg'
+ }
+ }, {
+ # m4sport with *video/ url and no date
+ 'url': 'https://m4sport.hu/bl-video/real-madrid-chelsea-1-1/',
+ 'info_dict': {
+ 'id': '4492099',
+ 'title': 'Real Madrid - Chelsea 1-1',
+ 'ext': 'mp4',
+ 'thumbnail': 'http://m4sport.hu/wp-content/uploads/sites/4/2021/04/Sequence-01.Still001-1024x576.png'
+ }
+ }, {
+ # hirado
+ 'url': 'https://hirado.hu/videok/felteteleket-szabott-a-fovaros/',
+ 'info_dict': {
+ 'id': '4760120',
+ 'title': 'Feltételeket szabott a főváros',
+ 'ext': 'mp4',
+ 'thumbnail': 'http://hirado.hu/wp-content/uploads/sites/4/2021/09/vlcsnap-2021-09-01-20h20m37s165.jpg'
+ }
+ }, {
+ # petofilive
+ 'url': 'https://petofilive.hu/video/2021/06/07/tha-shudras-az-akusztikban/',
+ 'info_dict': {
+ 'id': '4571948',
+ 'title': 'Tha Shudras az Akusztikban',
+ 'ext': 'mp4',
+ 'upload_date': '20210607',
+ 'thumbnail': 'http://petofilive.hu/wp-content/uploads/sites/4/2021/06/vlcsnap-2021-06-07-22h14m23s915-1024x576.jpg'
+ }
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ display_id = mobj.group('id')
+ webpage = self._download_webpage(url, display_id)
+
+ player_data_str = self._html_search_regex(
+ r'mtva_player_manager\.player\(document.getElementById\(.*\),\s?(\{.*\}).*\);', webpage, 'player data')
+ player_data = self._parse_json(player_data_str, display_id, compat_urllib_parse_unquote)
+ video_id = compat_str(player_data['contentId'])
+ title = player_data.get('title') or self._og_search_title(webpage, fatal=False) or \
+ self._html_search_regex(r'<h\d+\b[^>]+\bclass="article_title">([^<]+)<', webpage, 'title')
+
+ upload_date = unified_strdate(
+ '%s-%s-%s' % (mobj.group('year'), mobj.group('month'), mobj.group('day')))
+ if not upload_date:
+ upload_date = unified_strdate(self._html_search_regex(
+ r'<p+\b[^>]+\bclass="article_date">([^<]+)<', webpage, 'upload date', default=None))
+
+ player_data['video'] = player_data.pop('token')
+ player_page = self._download_webpage('https://player.mediaklikk.hu/playernew/player.php', video_id, query=player_data)
+ playlist_url = self._proto_relative_url(compat_urllib_parse_unquote(
+ self._html_search_regex(r'\"file\":\s*\"(\\?/\\?/.*playlist\.m3u8)\"', player_page, 'playlist_url')).replace('\\/', '/'))
+
+ formats = self._extract_wowza_formats(
+ playlist_url, video_id, skip_protocols=['f4m', 'smil', 'dash'])
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'display_id': display_id,
+ 'formats': formats,
+ 'upload_date': upload_date,
+ 'thumbnail': player_data.get('bgImage') or self._og_search_thumbnail(webpage)
+ }
diff --git a/hypervideo_dl/extractor/mediaset.py b/hypervideo_dl/extractor/mediaset.py
index 2c16fc9..26e7abc 100644
--- a/hypervideo_dl/extractor/mediaset.py
+++ b/hypervideo_dl/extractor/mediaset.py
@@ -4,13 +4,10 @@ from __future__ import unicode_literals
import re
from .theplatform import ThePlatformBaseIE
-from ..compat import (
- compat_parse_qs,
- compat_urllib_parse_urlparse,
-)
from ..utils import (
ExtractorError,
int_or_none,
+ parse_qs,
update_url_query,
)
@@ -30,38 +27,70 @@ class MediasetIE(ThePlatformBaseIE):
'''
_TESTS = [{
# full episode
- 'url': 'https://www.mediasetplay.mediaset.it/video/hellogoodbye/quarta-puntata_FAFU000000661824',
- 'md5': '9b75534d42c44ecef7bf1ffeacb7f85d',
+ 'url': 'https://www.mediasetplay.mediaset.it/video/mrwronglezionidamore/episodio-1_F310575103000102',
+ 'md5': 'a7e75c6384871f322adb781d3bd72c26',
'info_dict': {
- 'id': 'FAFU000000661824',
+ 'id': 'F310575103000102',
'ext': 'mp4',
- 'title': 'Quarta puntata',
+ 'title': 'Episodio 1',
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 1414.26,
- 'upload_date': '20161107',
- 'series': 'Hello Goodbye',
- 'timestamp': 1478532900,
- 'uploader': 'Rete 4',
- 'uploader_id': 'R4',
+ 'duration': 2682.0,
+ 'upload_date': '20210530',
+ 'series': 'Mr Wrong - Lezioni d\'amore',
+ 'timestamp': 1622413946,
+ 'uploader': 'Canale 5',
+ 'uploader_id': 'C5',
},
}, {
'url': 'https://www.mediasetplay.mediaset.it/video/matrix/puntata-del-25-maggio_F309013801000501',
- 'md5': '288532f0ad18307705b01e581304cd7b',
+ 'md5': '1276f966ac423d16ba255ce867de073e',
'info_dict': {
'id': 'F309013801000501',
'ext': 'mp4',
'title': 'Puntata del 25 maggio',
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 6565.007,
- 'upload_date': '20180526',
+ 'duration': 6565.008,
+ 'upload_date': '20200903',
'series': 'Matrix',
- 'timestamp': 1527326245,
+ 'timestamp': 1599172492,
'uploader': 'Canale 5',
'uploader_id': 'C5',
},
}, {
+ 'url': 'https://www.mediasetplay.mediaset.it/video/cameracafe5/episodio-69-pezzo-di-luna_F303843101017801',
+ 'md5': 'd1650ac9ff944f185556126a736df148',
+ 'info_dict': {
+ 'id': 'F303843101017801',
+ 'ext': 'mp4',
+ 'title': 'Episodio 69 - Pezzo di luna',
+ 'description': '',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 263.008,
+ 'upload_date': '20200902',
+ 'series': 'Camera Café 5',
+ 'timestamp': 1599064700,
+ 'uploader': 'Italia 1',
+ 'uploader_id': 'I1',
+ },
+ }, {
+ 'url': 'https://www.mediasetplay.mediaset.it/video/cameracafe5/episodio-51-tu-chi-sei_F303843107000601',
+ 'md5': '567e9ad375b7a27a0e370650f572a1e3',
+ 'info_dict': {
+ 'id': 'F303843107000601',
+ 'ext': 'mp4',
+ 'title': 'Episodio 51 - Tu chi sei?',
+ 'description': '',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 367.021,
+ 'upload_date': '20200902',
+ 'series': 'Camera Café 5',
+ 'timestamp': 1599069817,
+ 'uploader': 'Italia 1',
+ 'uploader_id': 'I1',
+ },
+ }, {
# clip
'url': 'https://www.mediasetplay.mediaset.it/video/gogglebox/un-grande-classico-della-commedia-sexy_FAFU000000661680',
'only_matching': True,
@@ -96,7 +125,7 @@ class MediasetIE(ThePlatformBaseIE):
@staticmethod
def _extract_urls(ie, webpage):
def _qs(url):
- return compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+ return parse_qs(url)
def _program_guid(qs):
return qs.get('programGuid', [None])[0]
@@ -135,36 +164,38 @@ class MediasetIE(ThePlatformBaseIE):
formats = []
subtitles = {}
first_e = None
- for asset_type in ('SD', 'HD'):
- # TODO: fixup ISM+none manifest URLs
- for f in ('MPEG4', 'MPEG-DASH+none', 'M3U+none'):
- try:
- tp_formats, tp_subtitles = self._extract_theplatform_smil(
- update_url_query('http://link.theplatform.%s/s/%s' % (self._TP_TLD, tp_path), {
- 'mbr': 'true',
- 'formats': f,
- 'assetTypes': asset_type,
- }), guid, 'Downloading %s %s SMIL data' % (f.split('+')[0], asset_type))
- except ExtractorError as e:
- if not first_e:
- first_e = e
- break
- for tp_f in tp_formats:
- tp_f['quality'] = 1 if asset_type == 'HD' else 0
- formats.extend(tp_formats)
- subtitles = self._merge_subtitles(subtitles, tp_subtitles)
+ asset_type = 'geoNo:HD,browser,geoIT|geoNo:HD,geoIT|geoNo:SD,browser,geoIT|geoNo:SD,geoIT|geoNo|HD|SD'
+ # TODO: fixup ISM+none manifest URLs
+ for f in ('MPEG4', 'MPEG-DASH+none', 'M3U+none'):
+ try:
+ tp_formats, tp_subtitles = self._extract_theplatform_smil(
+ update_url_query('http://link.theplatform.%s/s/%s' % (self._TP_TLD, tp_path), {
+ 'mbr': 'true',
+ 'formats': f,
+ 'assetTypes': asset_type,
+ }), guid, 'Downloading %s SMIL data' % (f.split('+')[0]))
+ except ExtractorError as e:
+ if not first_e:
+ first_e = e
+ break
+ formats.extend(tp_formats)
+ subtitles = self._merge_subtitles(subtitles, tp_subtitles)
if first_e and not formats:
raise first_e
self._sort_formats(formats)
- fields = []
- for templ, repls in (('tvSeason%sNumber', ('', 'Episode')), ('mediasetprogram$%s', ('brandTitle', 'numberOfViews', 'publishInfo'))):
- fields.extend(templ % repl for repl in repls)
feed_data = self._download_json(
- 'https://feed.entertainment.tv.theplatform.eu/f/PR1GhC/mediaset-prod-all-programs/guid/-/' + guid,
- guid, fatal=False, query={'fields': ','.join(fields)})
+ 'https://feed.entertainment.tv.theplatform.eu/f/PR1GhC/mediaset-prod-all-programs-v2/guid/-/' + guid,
+ guid, fatal=False)
if feed_data:
publish_info = feed_data.get('mediasetprogram$publishInfo') or {}
+ thumbnails = feed_data.get('thumbnails') or {}
+ thumbnail = None
+ for key, value in thumbnails.items():
+ if key.startswith('image_keyframe_poster-'):
+ thumbnail = value.get('url')
+ break
+
info.update({
'episode_number': int_or_none(feed_data.get('tvSeasonEpisodeNumber')),
'season_number': int_or_none(feed_data.get('tvSeasonNumber')),
@@ -172,6 +203,7 @@ class MediasetIE(ThePlatformBaseIE):
'uploader': publish_info.get('description'),
'uploader_id': publish_info.get('channel'),
'view_count': int_or_none(feed_data.get('mediasetprogram$numberOfViews')),
+ 'thumbnail': thumbnail,
})
info.update({
diff --git a/hypervideo_dl/extractor/mediasite.py b/hypervideo_dl/extractor/mediasite.py
index d6eb157..ace86c2 100644
--- a/hypervideo_dl/extractor/mediasite.py
+++ b/hypervideo_dl/extractor/mediasite.py
@@ -26,7 +26,7 @@ _ID_RE = r'(?:[0-9a-f]{32,34}|[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0
class MediasiteIE(InfoExtractor):
- _VALID_URL = r'(?xi)https?://[^/]+/Mediasite/(?:Play|Showcase/(?:default|livebroadcast)/Presentation)/(?P<id>%s)(?P<query>\?[^#]+|)' % _ID_RE
+ _VALID_URL = r'(?xi)https?://[^/]+/Mediasite/(?:Play|Showcase/[^/#?]+/Presentation)/(?P<id>%s)(?P<query>\?[^#]+|)' % _ID_RE
_TESTS = [
{
'url': 'https://hitsmediaweb.h-its.org/mediasite/Play/2db6c271681e4f199af3c60d1f82869b1d',
@@ -122,9 +122,55 @@ class MediasiteIE(InfoExtractor):
r'(?xi)<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:(?:https?:)?//[^/]+)?/Mediasite/Play/%s(?:\?.*?)?)\1' % _ID_RE,
webpage)]
+ def __extract_slides(self, *, stream_id, snum, Stream, duration, images):
+ slide_base_url = Stream['SlideBaseUrl']
+
+ fname_template = Stream['SlideImageFileNameTemplate']
+ if fname_template != 'slide_{0:D4}.jpg':
+ self.report_warning('Unusual slide file name template; report a bug if slide downloading fails')
+ fname_template = re.sub(r'\{0:D([0-9]+)\}', r'{0:0\1}', fname_template)
+
+ fragments = []
+ for i, slide in enumerate(Stream['Slides']):
+ if i == 0:
+ if slide['Time'] > 0:
+ default_slide = images.get('DefaultSlide')
+ if default_slide is None:
+ default_slide = images.get('DefaultStreamImage')
+ if default_slide is not None:
+ default_slide = default_slide['ImageFilename']
+ if default_slide is not None:
+ fragments.append({
+ 'path': default_slide,
+ 'duration': slide['Time'] / 1000,
+ })
+
+ next_time = try_get(None, [
+ lambda _: Stream['Slides'][i + 1]['Time'],
+ lambda _: duration,
+ lambda _: slide['Time'],
+ ], expected_type=(int, float))
+
+ fragments.append({
+ 'path': fname_template.format(slide.get('Number', i + 1)),
+ 'duration': (next_time - slide['Time']) / 1000
+ })
+
+ return {
+ 'format_id': '%s-%u.slides' % (stream_id, snum),
+ 'ext': 'mhtml',
+ 'url': slide_base_url,
+ 'protocol': 'mhtml',
+ 'acodec': 'none',
+ 'vcodec': 'none',
+ 'format_note': 'Slides',
+ 'fragments': fragments,
+ 'fragment_base_url': slide_base_url,
+ }
+
def _real_extract(self, url):
url, data = unsmuggle_url(url, {})
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
resource_id = mobj.group('id')
query = mobj.group('query')
@@ -198,15 +244,20 @@ class MediasiteIE(InfoExtractor):
'ext': mimetype2ext(VideoUrl.get('MimeType')),
})
- # TODO: if Stream['HasSlideContent']:
- # synthesise an MJPEG video stream '%s-%u.slides' % (stream_type, snum)
- # from Stream['Slides']
- # this will require writing a custom downloader...
+ if Stream.get('HasSlideContent', False):
+ images = player_options['PlayerLayoutOptions']['Images']
+ stream_formats.append(self.__extract_slides(
+ stream_id=stream_id,
+ snum=snum,
+ Stream=Stream,
+ duration=presentation.get('Duration'),
+ images=images,
+ ))
# disprefer 'secondary' streams
if stream_type != 0:
for fmt in stream_formats:
- fmt['preference'] = -1
+ fmt['quality'] = -10
thumbnail_url = Stream.get('ThumbnailUrl')
if thumbnail_url:
@@ -276,7 +327,7 @@ class MediasiteCatalogIE(InfoExtractor):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
mediasite_url = mobj.group('url')
catalog_id = mobj.group('catalog_id')
current_folder_id = mobj.group('current_folder_id') or catalog_id
@@ -352,7 +403,7 @@ class MediasiteNamedCatalogIE(InfoExtractor):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
mediasite_url = mobj.group('url')
catalog_name = mobj.group('catalog_name')
diff --git a/hypervideo_dl/extractor/metacafe.py b/hypervideo_dl/extractor/metacafe.py
index 9e92416..7b2d4a0 100644
--- a/hypervideo_dl/extractor/metacafe.py
+++ b/hypervideo_dl/extractor/metacafe.py
@@ -19,7 +19,7 @@ from ..utils import (
class MetacafeIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?metacafe\.com/watch/(?P<video_id>[^/]+)/(?P<display_id>[^/?#]+)'
+ _VALID_URL = r'https?://(?:www\.)?metacafe\.com/watch/(?P<id>[^/]+)/(?P<display_id>[^/?#]+)'
_DISCLAIMER = 'http://www.metacafe.com/family_filter/'
_FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
IE_NAME = 'metacafe'
@@ -130,7 +130,7 @@ class MetacafeIE(InfoExtractor):
def _real_extract(self, url):
# Extract id and simplified title from URL
- video_id, display_id = re.match(self._VALID_URL, url).groups()
+ video_id, display_id = self._match_valid_url(url).groups()
# the video may come from an external site
m_external = re.match(r'^(\w{2})-(.*)$', video_id)
diff --git a/hypervideo_dl/extractor/metacritic.py b/hypervideo_dl/extractor/metacritic.py
index 7d468d7..1424288 100644
--- a/hypervideo_dl/extractor/metacritic.py
+++ b/hypervideo_dl/extractor/metacritic.py
@@ -33,7 +33,7 @@ class MetacriticIE(InfoExtractor):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
# The xml is not well formatted, there are raw '&'
diff --git a/hypervideo_dl/extractor/mgoon.py b/hypervideo_dl/extractor/mgoon.py
index 7bb4739..184c311 100644
--- a/hypervideo_dl/extractor/mgoon.py
+++ b/hypervideo_dl/extractor/mgoon.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..utils import (
@@ -41,7 +40,7 @@ class MgoonIE(InfoExtractor):
]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
data = self._download_json(self._API_URL.format(video_id), video_id)
diff --git a/hypervideo_dl/extractor/microsoftvirtualacademy.py b/hypervideo_dl/extractor/microsoftvirtualacademy.py
index 8e0aee0..46abd2a 100644
--- a/hypervideo_dl/extractor/microsoftvirtualacademy.py
+++ b/hypervideo_dl/extractor/microsoftvirtualacademy.py
@@ -55,7 +55,7 @@ class MicrosoftVirtualAcademyIE(MicrosoftVirtualAcademyBaseIE):
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
course_id = mobj.group('course_id')
video_id = mobj.group('id')
@@ -152,7 +152,7 @@ class MicrosoftVirtualAcademyCourseIE(MicrosoftVirtualAcademyBaseIE):
MicrosoftVirtualAcademyCourseIE, cls).suitable(url)
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
course_id = mobj.group('id')
display_id = mobj.group('display_id')
diff --git a/hypervideo_dl/extractor/mildom.py b/hypervideo_dl/extractor/mildom.py
new file mode 100644
index 0000000..c147cbb
--- /dev/null
+++ b/hypervideo_dl/extractor/mildom.py
@@ -0,0 +1,258 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import base64
+from datetime import datetime
+import itertools
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ std_headers,
+ update_url_query,
+ random_uuidv4,
+ try_get,
+)
+from ..compat import (
+ compat_str,
+)
+
+
+class MildomBaseIE(InfoExtractor):
+ _GUEST_ID = None
+ _DISPATCHER_CONFIG = None
+
+ def _call_api(self, url, video_id, query={}, note='Downloading JSON metadata', init=False):
+ url = update_url_query(url, self._common_queries(query, init=init))
+ return self._download_json(url, video_id, note=note)['body']
+
+ def _common_queries(self, query={}, init=False):
+ dc = self._fetch_dispatcher_config()
+ r = {
+ 'timestamp': self.iso_timestamp(),
+ '__guest_id': '' if init else self.guest_id(),
+ '__location': dc['location'],
+ '__country': dc['country'],
+ '__cluster': dc['cluster'],
+ '__platform': 'web',
+ '__la': self.lang_code(),
+ '__pcv': 'v2.9.44',
+ 'sfr': 'pc',
+ 'accessToken': '',
+ }
+ r.update(query)
+ return r
+
+ def _fetch_dispatcher_config(self):
+ if not self._DISPATCHER_CONFIG:
+ tmp = self._download_json(
+ 'https://disp.mildom.com/serverListV2', 'initialization',
+ note='Downloading dispatcher_config', data=json.dumps({
+ 'protover': 0,
+ 'data': base64.b64encode(json.dumps({
+ 'fr': 'web',
+ 'sfr': 'pc',
+ 'devi': 'Windows',
+ 'la': 'ja',
+ 'gid': None,
+ 'loc': '',
+ 'clu': '',
+ 'wh': '1919*810',
+ 'rtm': self.iso_timestamp(),
+ 'ua': std_headers['User-Agent'],
+ }).encode('utf8')).decode('utf8').replace('\n', ''),
+ }).encode('utf8'))
+ self._DISPATCHER_CONFIG = self._parse_json(base64.b64decode(tmp['data']), 'initialization')
+ return self._DISPATCHER_CONFIG
+
+ @staticmethod
+ def iso_timestamp():
+ 'new Date().toISOString()'
+ return datetime.utcnow().isoformat()[0:-3] + 'Z'
+
+ def guest_id(self):
+ 'getGuestId'
+ if self._GUEST_ID:
+ return self._GUEST_ID
+ self._GUEST_ID = try_get(
+ self, (
+ lambda x: x._call_api(
+ 'https://cloudac.mildom.com/nonolive/gappserv/guest/h5init', 'initialization',
+ note='Downloading guest token', init=True)['guest_id'] or None,
+ lambda x: x._get_cookies('https://www.mildom.com').get('gid').value,
+ lambda x: x._get_cookies('https://m.mildom.com').get('gid').value,
+ ), compat_str) or ''
+ return self._GUEST_ID
+
+ def lang_code(self):
+ 'getCurrentLangCode'
+ return 'ja'
+
+
+class MildomIE(MildomBaseIE):
+ IE_NAME = 'mildom'
+ IE_DESC = 'Record ongoing live by specific user in Mildom'
+ _VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/(?P<id>\d+)'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ url = 'https://www.mildom.com/%s' % video_id
+
+ webpage = self._download_webpage(url, video_id)
+
+ enterstudio = self._call_api(
+ 'https://cloudac.mildom.com/nonolive/gappserv/live/enterstudio', video_id,
+ note='Downloading live metadata', query={'user_id': video_id})
+ result_video_id = enterstudio.get('log_id', video_id)
+
+ title = try_get(
+ enterstudio, (
+ lambda x: self._html_search_meta('twitter:description', webpage),
+ lambda x: x['anchor_intro'],
+ ), compat_str)
+ description = try_get(
+ enterstudio, (
+ lambda x: x['intro'],
+ lambda x: x['live_intro'],
+ ), compat_str)
+ uploader = try_get(
+ enterstudio, (
+ lambda x: self._html_search_meta('twitter:title', webpage),
+ lambda x: x['loginname'],
+ ), compat_str)
+
+ servers = self._call_api(
+ 'https://cloudac.mildom.com/nonolive/gappserv/live/liveserver', result_video_id,
+ note='Downloading live server list', query={
+ 'user_id': video_id,
+ 'live_server_type': 'hls',
+ })
+
+ stream_query = self._common_queries({
+ 'streamReqId': random_uuidv4(),
+ 'is_lhls': '0',
+ })
+ m3u8_url = update_url_query(servers['stream_server'] + '/%s_master.m3u8' % video_id, stream_query)
+ formats = self._extract_m3u8_formats(m3u8_url, result_video_id, 'mp4', headers={
+ 'Referer': 'https://www.mildom.com/',
+ 'Origin': 'https://www.mildom.com',
+ }, note='Downloading m3u8 information')
+
+ del stream_query['streamReqId'], stream_query['timestamp']
+ for fmt in formats:
+ fmt.setdefault('http_headers', {})['Referer'] = 'https://www.mildom.com/'
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': result_video_id,
+ 'title': title,
+ 'description': description,
+ 'uploader': uploader,
+ 'uploader_id': video_id,
+ 'formats': formats,
+ 'is_live': True,
+ }
+
+
+class MildomVodIE(MildomBaseIE):
+ IE_NAME = 'mildom:vod'
+ IE_DESC = 'Download a VOD in Mildom'
+ _VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/playback/(?P<user_id>\d+)/(?P<id>(?P=user_id)-[a-zA-Z0-9]+)'
+
+ def _real_extract(self, url):
+ m = self._match_valid_url(url)
+ user_id, video_id = m.group('user_id'), m.group('id')
+ url = 'https://www.mildom.com/playback/%s/%s' % (user_id, video_id)
+
+ webpage = self._download_webpage(url, video_id)
+
+ autoplay = self._call_api(
+ 'https://cloudac.mildom.com/nonolive/videocontent/playback/getPlaybackDetail', video_id,
+ note='Downloading playback metadata', query={
+ 'v_id': video_id,
+ })['playback']
+
+ title = try_get(
+ autoplay, (
+ lambda x: self._html_search_meta('og:description', webpage),
+ lambda x: x['title'],
+ ), compat_str)
+ description = try_get(
+ autoplay, (
+ lambda x: x['video_intro'],
+ ), compat_str)
+ uploader = try_get(
+ autoplay, (
+ lambda x: x['author_info']['login_name'],
+ ), compat_str)
+
+ formats = [{
+ 'url': autoplay['audio_url'],
+ 'format_id': 'audio',
+ 'protocol': 'm3u8_native',
+ 'vcodec': 'none',
+ 'acodec': 'aac',
+ 'ext': 'm4a'
+ }]
+ for fmt in autoplay['video_link']:
+ formats.append({
+ 'format_id': 'video-%s' % fmt['name'],
+ 'url': fmt['url'],
+ 'protocol': 'm3u8_native',
+ 'width': fmt['level'] * autoplay['video_width'] // autoplay['video_height'],
+ 'height': fmt['level'],
+ 'vcodec': 'h264',
+ 'acodec': 'aac',
+ 'ext': 'mp4'
+ })
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'uploader': uploader,
+ 'uploader_id': user_id,
+ 'formats': formats,
+ }
+
+
+class MildomUserVodIE(MildomBaseIE):
+ IE_NAME = 'mildom:user:vod'
+ IE_DESC = 'Download all VODs from specific user in Mildom'
+ _VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/profile/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.mildom.com/profile/10093333',
+ 'info_dict': {
+ 'id': '10093333',
+ 'title': 'Uploads from ねこばたけ',
+ },
+ 'playlist_mincount': 351,
+ }]
+
+ def _entries(self, user_id):
+ for page in itertools.count(1):
+ reply = self._call_api(
+ 'https://cloudac.mildom.com/nonolive/videocontent/profile/playbackList',
+ user_id, note='Downloading page %d' % page, query={
+ 'user_id': user_id,
+ 'page': page,
+ 'limit': '30',
+ })
+ if not reply:
+ break
+ for x in reply:
+ yield self.url_result('https://www.mildom.com/playback/%s/%s' % (user_id, x['v_id']))
+
+ def _real_extract(self, url):
+ user_id = self._match_id(url)
+ self.to_screen('This will download all VODs belonging to user. To download ongoing live video, use "https://www.mildom.com/%s" instead' % user_id)
+
+ profile = self._call_api(
+ 'https://cloudac.mildom.com/nonolive/gappserv/user/profileV2', user_id,
+ query={'user_id': user_id}, note='Downloading user profile')['user_info']
+
+ return self.playlist_result(
+ self._entries(user_id), user_id, 'Uploads from %s' % profile['loginname'])
diff --git a/hypervideo_dl/extractor/minoto.py b/hypervideo_dl/extractor/minoto.py
index 6367311..603ce94 100644
--- a/hypervideo_dl/extractor/minoto.py
+++ b/hypervideo_dl/extractor/minoto.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..utils import (
@@ -14,7 +13,7 @@ class MinotoIE(InfoExtractor):
_VALID_URL = r'(?:minoto:|https?://(?:play|iframe|embed)\.minoto-video\.com/(?P<player_id>[0-9]+)/)(?P<id>[a-zA-Z0-9]+)'
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
player_id = mobj.group('player_id') or '1'
video_id = mobj.group('id')
video_data = self._download_json('http://play.minoto-video.com/%s/%s.js' % (player_id, video_id), video_id)
@@ -38,7 +37,7 @@ class MinotoIE(InfoExtractor):
'filesize': int_or_none(fmt.get('filesize')),
'width': int_or_none(fmt.get('width')),
'height': int_or_none(fmt.get('height')),
- 'codecs': parse_codecs(fmt.get('codecs')),
+ **parse_codecs(fmt.get('codecs')),
})
self._sort_formats(formats)
diff --git a/hypervideo_dl/extractor/mirrativ.py b/hypervideo_dl/extractor/mirrativ.py
new file mode 100644
index 0000000..81aea54
--- /dev/null
+++ b/hypervideo_dl/extractor/mirrativ.py
@@ -0,0 +1,134 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ dict_get,
+ traverse_obj,
+ try_get,
+)
+
+
+class MirrativBaseIE(InfoExtractor):
+ def assert_error(self, response):
+ error_message = traverse_obj(response, ('status', 'error'))
+ if error_message:
+ raise ExtractorError('Mirrativ says: %s' % error_message, expected=True)
+
+
+class MirrativIE(MirrativBaseIE):
+ IE_NAME = 'mirrativ'
+ _VALID_URL = r'https?://(?:www\.)?mirrativ\.com/live/(?P<id>[^/?#&]+)'
+ LIVE_API_URL = 'https://www.mirrativ.com/api/live/live?live_id=%s'
+
+ TESTS = [{
+ 'url': 'https://mirrativ.com/live/POxyuG1KmW2982lqlDTuPw',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage('https://www.mirrativ.com/live/%s' % video_id, video_id)
+ live_response = self._download_json(self.LIVE_API_URL % video_id, video_id)
+ self.assert_error(live_response)
+
+ hls_url = dict_get(live_response, ('archive_url_hls', 'streaming_url_hls'))
+ is_live = bool(live_response.get('is_live'))
+ was_live = bool(live_response.get('is_archive'))
+ if not hls_url:
+ raise ExtractorError('Neither archive nor live is available.', expected=True)
+
+ formats = self._extract_m3u8_formats(
+ hls_url, video_id,
+ ext='mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', live=is_live)
+ rtmp_url = live_response.get('streaming_url_edge')
+ if rtmp_url:
+ keys_to_copy = ('width', 'height', 'vcodec', 'acodec', 'tbr')
+ fmt = {
+ 'format_id': 'rtmp',
+ 'url': rtmp_url,
+ 'protocol': 'rtmp',
+ 'ext': 'mp4',
+ }
+ fmt.update({k: traverse_obj(formats, (0, k)) for k in keys_to_copy})
+ formats.append(fmt)
+ self._sort_formats(formats)
+
+ title = self._og_search_title(webpage, default=None) or self._search_regex(
+ r'<title>\s*(.+?) - Mirrativ\s*</title>', webpage) or live_response.get('title')
+ description = live_response.get('description')
+ thumbnail = live_response.get('image_url')
+
+ duration = try_get(live_response, lambda x: x['ended_at'] - x['started_at'])
+ view_count = live_response.get('total_viewer_num')
+ release_timestamp = live_response.get('started_at')
+ timestamp = live_response.get('created_at')
+
+ owner = live_response.get('owner', {})
+ uploader = owner.get('name')
+ uploader_id = owner.get('user_id')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'is_live': is_live,
+ 'description': description,
+ 'formats': formats,
+ 'thumbnail': thumbnail,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'release_timestamp': release_timestamp,
+ 'timestamp': timestamp,
+ 'was_live': was_live,
+ }
+
+
+class MirrativUserIE(MirrativBaseIE):
+ IE_NAME = 'mirrativ:user'
+ _VALID_URL = r'https?://(?:www\.)?mirrativ\.com/user/(?P<id>\d+)'
+ LIVE_HISTORY_API_URL = 'https://www.mirrativ.com/api/live/live_history?user_id=%s&page=%d'
+ USER_INFO_API_URL = 'https://www.mirrativ.com/api/user/profile?user_id=%s'
+
+ _TESTS = [{
+ # Live archive is available up to 3 days
+ # see: https://helpfeel.com/mirrativ/%E9%8C%B2%E7%94%BB-5e26d3ad7b59ef0017fb49ac (Japanese)
+ 'url': 'https://www.mirrativ.com/user/110943130',
+ 'note': 'multiple archives available',
+ 'only_matching': True,
+ }]
+
+ def _entries(self, user_id):
+ page = 1
+ while page is not None:
+ api_response = self._download_json(
+ self.LIVE_HISTORY_API_URL % (user_id, page), user_id,
+ note='Downloading page %d' % page)
+ self.assert_error(api_response)
+ lives = api_response.get('lives')
+ if not lives:
+ break
+ for live in lives:
+ if not live.get('is_archive') and not live.get('is_live'):
+ # neither archive nor live is available, so skip it
+ # or the service will ban your IP address for a while
+ continue
+ live_id = live.get('live_id')
+ url = 'https://www.mirrativ.com/live/%s' % live_id
+ yield self.url_result(url, video_id=live_id, video_title=live.get('title'))
+ page = api_response.get('next_page')
+
+ def _real_extract(self, url):
+ user_id = self._match_id(url)
+ user_info = self._download_json(
+ self.USER_INFO_API_URL % user_id, user_id,
+ note='Downloading user info', fatal=False)
+ self.assert_error(user_info)
+
+ uploader = user_info.get('name')
+ description = user_info.get('description')
+
+ entries = self._entries(user_id)
+ return self.playlist_result(entries, user_id, uploader, description)
diff --git a/hypervideo_dl/extractor/mit.py b/hypervideo_dl/extractor/mit.py
index e1506a7..60e4569 100644
--- a/hypervideo_dl/extractor/mit.py
+++ b/hypervideo_dl/extractor/mit.py
@@ -98,7 +98,7 @@ class OCWMITIE(InfoExtractor):
]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
topic = mobj.group('topic')
webpage = self._download_webpage(url, topic)
diff --git a/hypervideo_dl/extractor/mixcloud.py b/hypervideo_dl/extractor/mixcloud.py
index 6931985..a0c043d 100644
--- a/hypervideo_dl/extractor/mixcloud.py
+++ b/hypervideo_dl/extractor/mixcloud.py
@@ -1,7 +1,6 @@
from __future__ import unicode_literals
import itertools
-import re
from .common import InfoExtractor
from ..compat import (
@@ -79,7 +78,7 @@ class MixcloudIE(MixcloudBaseIE):
for ch, k in compat_zip(ciphertext, itertools.cycle(key))])
def _real_extract(self, url):
- username, slug = re.match(self._VALID_URL, url).groups()
+ username, slug = self._match_valid_url(url).groups()
username, slug = compat_urllib_parse_unquote(username), compat_urllib_parse_unquote(slug)
track_id = '%s_%s' % (username, slug)
@@ -157,7 +156,7 @@ class MixcloudIE(MixcloudBaseIE):
})
if not formats and cloudcast.get('isExclusive'):
- self.raise_login_required()
+ self.raise_login_required(metadata_available=True)
self._sort_formats(formats)
@@ -214,7 +213,7 @@ class MixcloudPlaylistBaseIE(MixcloudBaseIE):
return title
def _real_extract(self, url):
- username, slug = re.match(self._VALID_URL, url).groups()
+ username, slug = self._match_valid_url(url).groups()
username = compat_urllib_parse_unquote(username)
if not slug:
slug = 'uploads'
diff --git a/hypervideo_dl/extractor/moevideo.py b/hypervideo_dl/extractor/moevideo.py
index eb9b4ce..a3f1b38 100644
--- a/hypervideo_dl/extractor/moevideo.py
+++ b/hypervideo_dl/extractor/moevideo.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..utils import (
@@ -54,7 +53,7 @@ class MoeVideoIE(InfoExtractor):
]
def _real_extract(self, url):
- host, video_id = re.match(self._VALID_URL, url).groups()
+ host, video_id = self._match_valid_url(url).groups()
webpage = self._download_webpage(
'http://%s/video/%s' % (host, video_id),
diff --git a/hypervideo_dl/extractor/mojvideo.py b/hypervideo_dl/extractor/mojvideo.py
index 165e658..0421f3f 100644
--- a/hypervideo_dl/extractor/mojvideo.py
+++ b/hypervideo_dl/extractor/mojvideo.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..utils import (
@@ -26,7 +25,7 @@ class MojvideoIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
display_id = mobj.group('display_id')
diff --git a/hypervideo_dl/extractor/morningstar.py b/hypervideo_dl/extractor/morningstar.py
index 0093bcd..71a22a6 100644
--- a/hypervideo_dl/extractor/morningstar.py
+++ b/hypervideo_dl/extractor/morningstar.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
@@ -25,7 +24,7 @@ class MorningstarIE(InfoExtractor):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
diff --git a/hypervideo_dl/extractor/motherless.py b/hypervideo_dl/extractor/motherless.py
index ef1e081..111c7c5 100644
--- a/hypervideo_dl/extractor/motherless.py
+++ b/hypervideo_dl/extractor/motherless.py
@@ -127,9 +127,9 @@ class MotherlessIE(InfoExtractor):
comment_count = webpage.count('class="media-comment-contents"')
uploader_id = self._html_search_regex(
- r'"thumb-member-username">\s+<a href="/m/([^"]+)"',
- webpage, 'uploader_id')
-
+ (r'"media-meta-member">\s+<a href="/m/([^"]+)"',
+ r'<span\b[^>]+\bclass="username">([^<]+)</span>'),
+ webpage, 'uploader_id', fatal=False)
categories = self._html_search_meta('keywords', webpage, default=None)
if categories:
categories = [cat.strip() for cat in categories.split(',')]
@@ -169,7 +169,18 @@ class MotherlessGroupIE(InfoExtractor):
'description': 'Sex can be funny. Wide smiles,laugh, games, fun of '
'any kind!'
},
- 'playlist_mincount': 9,
+ 'playlist_mincount': 0,
+ 'expected_warnings': [
+ 'This group has no videos.',
+ ]
+ }, {
+ 'url': 'https://motherless.com/g/beautiful_cock',
+ 'info_dict': {
+ 'id': 'beautiful_cock',
+ 'title': 'Beautiful Cock',
+ 'description': 'Group for lovely cocks yours, mine, a friends anything human',
+ },
+ 'playlist_mincount': 2500,
}]
@classmethod
@@ -209,11 +220,18 @@ class MotherlessGroupIE(InfoExtractor):
description = self._html_search_meta(
'description', webpage, fatal=False)
page_count = self._int(self._search_regex(
- r'(\d+)</(?:a|span)><(?:a|span)[^>]+>\s*NEXT',
- webpage, 'page_count'), 'page_count')
+ r'(\d+)</(?:a|span)><(?:a|span)[^>]+rel="next">',
+ webpage, 'page_count', default=0), 'page_count')
+ if not page_count:
+ message = self._search_regex(
+ r'class="error-page"[^>]*>\s*<p[^>]*>\s*(?P<error_msg>[^<]+)(?<=\S)\s*',
+ webpage, 'error_msg', default=None) or 'This group has no videos.'
+ self.report_warning(message, group_id)
PAGE_SIZE = 80
def _get_page(idx):
+ if not page_count:
+ return
webpage = self._download_webpage(
page_url, group_id, query={'page': idx + 1},
note='Downloading page %d/%d' % (idx + 1, page_count)
diff --git a/hypervideo_dl/extractor/moviezine.py b/hypervideo_dl/extractor/moviezine.py
index 85cc6e2..730da4b 100644
--- a/hypervideo_dl/extractor/moviezine.py
+++ b/hypervideo_dl/extractor/moviezine.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
@@ -21,7 +20,7 @@ class MoviezineIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
diff --git a/hypervideo_dl/extractor/msn.py b/hypervideo_dl/extractor/msn.py
index e59b0b7..f34e210 100644
--- a/hypervideo_dl/extractor/msn.py
+++ b/hypervideo_dl/extractor/msn.py
@@ -67,7 +67,7 @@ class MSNIE(InfoExtractor):
}]
def _real_extract(self, url):
- display_id, page_id = re.match(self._VALID_URL, url).groups()
+ display_id, page_id = self._match_valid_url(url).groups()
webpage = self._download_webpage(url, display_id)
@@ -132,7 +132,7 @@ class MSNIE(InfoExtractor):
'width': int_or_none(file_.get('width')),
'height': int_or_none(file_.get('height')),
'vbr': int_or_none(self._search_regex(r'_(\d+)\.mp4', format_url, 'vbr', default=None)),
- 'preference': 1 if format_id == '1001' else None,
+ 'quality': 1 if format_id == '1001' else None,
})
self._sort_formats(formats)
diff --git a/hypervideo_dl/extractor/mtv.py b/hypervideo_dl/extractor/mtv.py
index 5a5205c..e060884 100644
--- a/hypervideo_dl/extractor/mtv.py
+++ b/hypervideo_dl/extractor/mtv.py
@@ -14,6 +14,7 @@ from ..utils import (
fix_xml_ampersands,
float_or_none,
HEADRequest,
+ int_or_none,
RegexNotFoundError,
sanitized_Request,
strip_or_none,
@@ -43,7 +44,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
# Remove the templates, like &device={device}
return re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', url)
- def _get_feed_url(self, uri):
+ def _get_feed_url(self, uri, url=None):
return self._FEED_URL
def _get_thumbnail_url(self, uri, itemdoc):
@@ -176,6 +177,22 @@ class MTVServicesInfoExtractor(InfoExtractor):
raise ExtractorError('Could not find video title')
title = title.strip()
+ series = find_xpath_attr(
+ itemdoc, './/{http://search.yahoo.com/mrss/}category',
+ 'scheme', 'urn:mtvn:franchise')
+ season = find_xpath_attr(
+ itemdoc, './/{http://search.yahoo.com/mrss/}category',
+ 'scheme', 'urn:mtvn:seasonN')
+ episode = find_xpath_attr(
+ itemdoc, './/{http://search.yahoo.com/mrss/}category',
+ 'scheme', 'urn:mtvn:episodeN')
+ series = series.text if series is not None else None
+ season = season.text if season is not None else None
+ episode = episode.text if episode is not None else None
+ if season and episode:
+ # episode number includes season, so remove it
+ episode = re.sub(r'^%s' % season, '', episode)
+
# This a short id that's used in the webpage urls
mtvn_id = None
mtvn_id_node = find_xpath_attr(itemdoc, './/{http://search.yahoo.com/mrss/}category',
@@ -201,6 +218,9 @@ class MTVServicesInfoExtractor(InfoExtractor):
'description': description,
'duration': float_or_none(content_el.attrib.get('duration')),
'timestamp': timestamp,
+ 'series': series,
+ 'season_number': int_or_none(season),
+ 'episode_number': int_or_none(episode),
}
def _get_feed_query(self, uri):
@@ -209,9 +229,9 @@ class MTVServicesInfoExtractor(InfoExtractor):
data['lang'] = self._LANG
return data
- def _get_videos_info(self, uri, use_hls=True):
+ def _get_videos_info(self, uri, use_hls=True, url=None):
video_id = self._id_from_uri(uri)
- feed_url = self._get_feed_url(uri)
+ feed_url = self._get_feed_url(uri, url)
info_url = update_url_query(feed_url, self._get_feed_query(uri))
return self._get_videos_info_from_url(info_url, video_id, use_hls)
@@ -229,6 +249,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
if info:
entries.append(info)
+ # TODO: should be multi-video
return self.playlist_result(
entries, playlist_title=title, playlist_description=description)
@@ -292,13 +313,17 @@ class MTVServicesInfoExtractor(InfoExtractor):
video_player = self._extract_child_with_type(ab_testing or main_container, 'VideoPlayer')
mgid = video_player['props']['media']['video']['config']['uri']
+ if not mgid:
+ mgid = self._search_regex(
+ r'"media":{"video":{"config":{"uri":"(mgid:.*?)"', webpage, 'mgid', default=None)
+
return mgid
def _real_extract(self, url):
title = url_basename(url)
webpage = self._download_webpage(url, title)
mgid = self._extract_mgid(webpage)
- videos_info = self._get_videos_info(mgid)
+ videos_info = self._get_videos_info(mgid, url=url)
return videos_info
@@ -327,14 +352,14 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor):
if mobj:
return mobj.group('url')
- def _get_feed_url(self, uri):
+ def _get_feed_url(self, uri, url=None):
video_id = self._id_from_uri(uri)
config = self._download_json(
'http://media.mtvnservices.com/pmt/e1/access/index.html?uri=%s&configtype=edge' % uri, video_id)
return self._remove_template_parameter(config['feedWithQueryParams'])
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
mgid = mobj.group('mgid')
return self._get_videos_info(mgid)
@@ -416,7 +441,7 @@ class MTVVideoIE(MTVServicesInfoExtractor):
return 'http://mtv.mtvnimages.com/uri/' + uri
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('videoid')
uri = mobj.groupdict().get('mgid')
if uri is None:
@@ -486,3 +511,152 @@ class MTVDEIE(MTVServicesInfoExtractor):
'arcEp': 'mtv.de',
'mgid': uri,
}
+
+
+class MTVItaliaIE(MTVServicesInfoExtractor):
+ IE_NAME = 'mtv.it'
+ _VALID_URL = r'https?://(?:www\.)?mtv\.it/(?:episodi|video|musica)/(?P<id>[0-9a-z]+)'
+ _TESTS = [{
+ 'url': 'http://www.mtv.it/episodi/24bqab/mario-una-serie-di-maccio-capatonda-cavoli-amario-episodio-completo-S1-E1',
+ 'info_dict': {
+ 'id': '0f0fc78e-45fc-4cce-8f24-971c25477530',
+ 'ext': 'mp4',
+ 'title': 'Cavoli amario (episodio completo)',
+ 'description': 'md5:4962bccea8fed5b7c03b295ae1340660',
+ 'series': 'Mario - Una Serie Di Maccio Capatonda',
+ 'season_number': 1,
+ 'episode_number': 1,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+ _GEO_COUNTRIES = ['IT']
+ _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed'
+
+ def _get_feed_query(self, uri):
+ return {
+ 'arcEp': 'mtv.it',
+ 'mgid': uri,
+ }
+
+
+class MTVItaliaProgrammaIE(MTVItaliaIE):
+ IE_NAME = 'mtv.it:programma'
+ _VALID_URL = r'https?://(?:www\.)?mtv\.it/(?:programmi|playlist)/(?P<id>[0-9a-z]+)'
+ _TESTS = [{
+ # program page: general
+ 'url': 'http://www.mtv.it/programmi/s2rppv/mario-una-serie-di-maccio-capatonda',
+ 'info_dict': {
+ 'id': 'a6f155bc-8220-4640-aa43-9b95f64ffa3d',
+ 'title': 'Mario - Una Serie Di Maccio Capatonda',
+ 'description': 'md5:72fbffe1f77ccf4e90757dd4e3216153',
+ },
+ 'playlist_count': 2,
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # program page: specific season
+ 'url': 'http://www.mtv.it/programmi/d9ncjf/mario-una-serie-di-maccio-capatonda-S2',
+ 'info_dict': {
+ 'id': '4deeb5d8-f272-490c-bde2-ff8d261c6dd1',
+ 'title': 'Mario - Una Serie Di Maccio Capatonda - Stagione 2',
+ },
+ 'playlist_count': 34,
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # playlist page + redirect
+ 'url': 'http://www.mtv.it/playlist/sexy-videos/ilctal',
+ 'info_dict': {
+ 'id': 'dee8f9ee-756d-493b-bf37-16d1d2783359',
+ 'title': 'Sexy Videos',
+ },
+ 'playlist_mincount': 145,
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+ _GEO_COUNTRIES = ['IT']
+ _FEED_URL = 'http://www.mtv.it/feeds/triforce/manifest/v8'
+
+ def _get_entries(self, title, url):
+ while True:
+ pg = self._search_regex(r'/(\d+)$', url, 'entries', '1')
+ entries = self._download_json(url, title, 'page %s' % pg)
+ url = try_get(
+ entries, lambda x: x['result']['nextPageURL'], compat_str)
+ entries = try_get(
+ entries, (
+ lambda x: x['result']['data']['items'],
+ lambda x: x['result']['data']['seasons']),
+ list)
+ for entry in entries or []:
+ if entry.get('canonicalURL'):
+ yield self.url_result(entry['canonicalURL'])
+ if not url:
+ break
+
+ def _real_extract(self, url):
+ query = {'url': url}
+ info_url = update_url_query(self._FEED_URL, query)
+ video_id = self._match_id(url)
+ info = self._download_json(info_url, video_id).get('manifest')
+
+ redirect = try_get(
+ info, lambda x: x['newLocation']['url'], compat_str)
+ if redirect:
+ return self.url_result(redirect)
+
+ title = info.get('title')
+ video_id = try_get(
+ info, lambda x: x['reporting']['itemId'], compat_str)
+ parent_id = try_get(
+ info, lambda x: x['reporting']['parentId'], compat_str)
+
+ playlist_url = current_url = None
+ for z in (info.get('zones') or {}).values():
+ if z.get('moduleName') in ('INTL_M304', 'INTL_M209'):
+ info_url = z.get('feed')
+ if z.get('moduleName') in ('INTL_M308', 'INTL_M317'):
+ playlist_url = playlist_url or z.get('feed')
+ if z.get('moduleName') in ('INTL_M300',):
+ current_url = current_url or z.get('feed')
+
+ if not info_url:
+ raise ExtractorError('No info found')
+
+ if video_id == parent_id:
+ video_id = self._search_regex(
+ r'([^\/]+)/[^\/]+$', info_url, 'video_id')
+
+ info = self._download_json(info_url, video_id, 'Show infos')
+ info = try_get(info, lambda x: x['result']['data'], dict)
+ title = title or try_get(
+ info, (
+ lambda x: x['title'],
+ lambda x: x['headline']),
+ compat_str)
+ description = try_get(info, lambda x: x['content'], compat_str)
+
+ if current_url:
+ season = try_get(
+ self._download_json(playlist_url, video_id, 'Seasons info'),
+ lambda x: x['result']['data'], dict)
+ current = try_get(
+ season, lambda x: x['currentSeason'], compat_str)
+ seasons = try_get(
+ season, lambda x: x['seasons'], list) or []
+
+ if current in [s.get('eTitle') for s in seasons]:
+ playlist_url = current_url
+
+ title = re.sub(
+ r'[-|]\s*(?:mtv\s*italia|programma|playlist)',
+ '', title, flags=re.IGNORECASE).strip()
+
+ return self.playlist_result(
+ self._get_entries(title, playlist_url),
+ video_id, title, description)
diff --git a/hypervideo_dl/extractor/muenchentv.py b/hypervideo_dl/extractor/muenchentv.py
index 2cc2bf2..d256236 100644
--- a/hypervideo_dl/extractor/muenchentv.py
+++ b/hypervideo_dl/extractor/muenchentv.py
@@ -61,7 +61,7 @@ class MuenchenTVIE(InfoExtractor):
'tbr': int_or_none(s.get('label')),
'ext': 'mp4',
'format_id': format_id,
- 'preference': -100 if '.smil' in s['file'] else 0,
+ 'preference': -100 if '.smil' in s['file'] else 0, # Strictly inferior than all other formats?
})
self._sort_formats(formats)
diff --git a/hypervideo_dl/extractor/musescore.py b/hypervideo_dl/extractor/musescore.py
new file mode 100644
index 0000000..dcd2638
--- /dev/null
+++ b/hypervideo_dl/extractor/musescore.py
@@ -0,0 +1,67 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class MuseScoreIE(InfoExtractor):
+ _VALID_URL = r'(?:https?://)(?:www\.)?musescore\.com/(?:user/\d+|[^/]+)(?:/scores)?/(?P<id>[^#&?]+)'
+ _TESTS = [{
+ 'url': 'https://musescore.com/user/73797/scores/142975',
+ 'info_dict': {
+ 'id': '142975',
+ 'ext': 'mp3',
+ 'title': 'WA Mozart Marche Turque (Turkish March fingered)',
+ 'description': 'md5:7ede08230e4eaabd67a4a98bb54d07be',
+ 'thumbnail': r're:(?:https?://)(?:www\.)?musescore\.com/.*\.png[^$]+',
+ 'uploader': 'PapyPiano',
+ 'creator': 'Wolfgang Amadeus Mozart',
+ }
+ }, {
+ 'url': 'https://musescore.com/user/36164500/scores/6837638',
+ 'info_dict': {
+ 'id': '6837638',
+ 'ext': 'mp3',
+ 'title': 'Sweet Child O\' Mine – Guns N\' Roses sweet child',
+ 'description': 'md5:4dca71191c14abc312a0a4192492eace',
+ 'thumbnail': r're:(?:https?://)(?:www\.)?musescore\.com/.*\.png[^$]+',
+ 'uploader': 'roxbelviolin',
+ 'creator': 'Guns N´Roses Arr. Roxbel Violin',
+ }
+ }, {
+ 'url': 'https://musescore.com/classicman/fur-elise',
+ 'info_dict': {
+ 'id': '33816',
+ 'ext': 'mp3',
+ 'title': 'Für Elise – Beethoven',
+ 'description': 'md5:49515a3556d5ecaf9fa4b2514064ac34',
+ 'thumbnail': r're:(?:https?://)(?:www\.)?musescore\.com/.*\.png[^$]+',
+ 'uploader': 'ClassicMan',
+ 'creator': 'Ludwig van Beethoven (1770–1827)',
+ }
+ }, {
+ 'url': 'https://musescore.com/minh_cuteee/scores/6555384',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ webpage = self._download_webpage(url, None)
+ url = self._og_search_url(webpage) or url
+ id = self._match_id(url)
+ mp3_url = self._download_json(f'https://musescore.com/api/jmuse?id={id}&index=0&type=mp3&v2=1', id,
+ headers={'authorization': '63794e5461e4cfa046edfbdddfccc1ac16daffd2'})['info']['url']
+ formats = [{
+ 'url': mp3_url,
+ 'ext': 'mp3',
+ 'vcodec': 'none',
+ }]
+
+ return {
+ 'id': id,
+ 'formats': formats,
+ 'title': self._og_search_title(webpage),
+ 'description': self._og_search_description(webpage),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'uploader': self._html_search_meta('musescore:author', webpage, 'uploader'),
+ 'creator': self._html_search_meta('musescore:composer', webpage, 'composer'),
+ }
diff --git a/hypervideo_dl/extractor/mxplayer.py b/hypervideo_dl/extractor/mxplayer.py
new file mode 100644
index 0000000..5874556
--- /dev/null
+++ b/hypervideo_dl/extractor/mxplayer.py
@@ -0,0 +1,222 @@
+from __future__ import unicode_literals
+
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import try_get
+
+
+class MxplayerIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?mxplayer\.in/(?P<type>movie|show/[-\w]+/[-\w]+)/(?P<display_id>[-\w]+)-(?P<id>\w+)'
+ _TESTS = [{
+ 'url': 'https://www.mxplayer.in/show/watch-my-girlfriend-is-an-alien-hindi-dubbed/season-1/episode-1-online-9d2013d31d5835bb8400e3b3c5e7bb72',
+ 'info_dict': {
+ 'id': '9d2013d31d5835bb8400e3b3c5e7bb72',
+ 'ext': 'mp4',
+ 'title': 'Episode 1',
+ 'description': 'md5:62ed43eb9fec5efde5cf3bd1040b7670',
+ 'season_number': 1,
+ 'episode_number': 1,
+ 'duration': 2451,
+ 'season': 'Season 1',
+ 'series': 'My Girlfriend Is An Alien (Hindi Dubbed)',
+ 'thumbnail': 'https://qqcdnpictest.mxplay.com/pic/9d2013d31d5835bb8400e3b3c5e7bb72/en/16x9/320x180/9562f5f8df42cad09c9a9c4e69eb1567_1920x1080.webp',
+ 'episode': 'Episode 1'
+ },
+ 'params': {
+ 'format': 'bv',
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.mxplayer.in/movie/watch-knock-knock-hindi-dubbed-movie-online-b9fa28df3bfb8758874735bbd7d2655a?watch=true',
+ 'info_dict': {
+ 'id': 'b9fa28df3bfb8758874735bbd7d2655a',
+ 'ext': 'mp4',
+ 'title': 'Knock Knock (Hindi Dubbed)',
+ 'description': 'md5:b195ba93ff1987309cfa58e2839d2a5b',
+ 'season_number': 0,
+ 'episode_number': 0,
+ 'duration': 5970,
+ 'season': 'Season 0',
+ 'series': None,
+ 'thumbnail': 'https://qqcdnpictest.mxplay.com/pic/b9fa28df3bfb8758874735bbd7d2655a/en/16x9/320x180/test_pic1588676032011.webp',
+ 'episode': 'Episode 0'
+ },
+ 'params': {
+ 'format': 'bv',
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.mxplayer.in/show/watch-shaitaan/season-1/the-infamous-taxi-gang-of-meerut-online-45055d5bcff169ad48f2ad7552a83d6c',
+ 'info_dict': {
+ 'id': '45055d5bcff169ad48f2ad7552a83d6c',
+ 'ext': 'mp4',
+ 'title': 'The infamous taxi gang of Meerut',
+ 'description': 'md5:033a0a7e3fd147be4fb7e07a01a3dc28',
+ 'season_number': 1,
+ 'episode_number': 1,
+ 'duration': 2332,
+ 'season': 'Season 1',
+ 'series': 'Shaitaan',
+ 'thumbnail': 'https://qqcdnpictest.mxplay.com/pic/45055d5bcff169ad48f2ad7552a83d6c/en/16x9/320x180/voot_8e7d5f8d8183340869279c732c1e3a43.webp',
+ 'episode': 'Episode 1'
+ },
+ 'params': {
+ 'format': 'best',
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.mxplayer.in/show/watch-aashram/chapter-1/duh-swapna-online-d445579792b0135598ba1bc9088a84cb',
+ 'info_dict': {
+ 'id': 'd445579792b0135598ba1bc9088a84cb',
+ 'ext': 'mp4',
+ 'title': 'Duh Swapna',
+ 'description': 'md5:35ff39c4bdac403c53be1e16a04192d8',
+ 'season_number': 1,
+ 'episode_number': 3,
+ 'duration': 2568,
+ 'season': 'Chapter 1',
+ 'series': 'Aashram',
+ 'thumbnail': 'https://qqcdnpictest.mxplay.com/pic/d445579792b0135598ba1bc9088a84cb/en/4x3/1600x1200/test_pic1624819307993.webp',
+ 'episode': 'Episode 3'
+ },
+ 'params': {
+ 'format': 'bv',
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.mxplayer.in/show/watch-dangerous/season-1/chapter-1-online-5a351b4f9fb69436f6bd6ae3a1a75292',
+ 'info_dict': {
+ 'id': '5a351b4f9fb69436f6bd6ae3a1a75292',
+ 'ext': 'mp4',
+ 'title': 'Chapter 1',
+ 'description': 'md5:233886b8598bc91648ac098abe1d288f',
+ 'season_number': 1,
+ 'episode_number': 1,
+ 'duration': 1305,
+ 'season': 'Season 1',
+ 'series': 'Dangerous',
+ 'thumbnail': 'https://qqcdnpictest.mxplay.com/pic/5a351b4f9fb69436f6bd6ae3a1a75292/en/4x3/1600x1200/test_pic1624706302350.webp',
+ 'episode': 'Episode 1'
+ },
+ 'params': {
+ 'format': 'bv',
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.mxplayer.in/movie/watch-the-attacks-of-2611-movie-online-0452f0d80226c398d63ce7e3ea40fa2d',
+ 'info_dict': {
+ 'id': '0452f0d80226c398d63ce7e3ea40fa2d',
+ 'ext': 'mp4',
+ 'title': 'The Attacks of 26/11',
+ 'description': 'md5:689bacd29e97b3f31eaf519eb14127e5',
+ 'season_number': 0,
+ 'episode_number': 0,
+ 'duration': 6085,
+ 'season': 'Season 0',
+ 'series': None,
+ 'thumbnail': 'https://qqcdnpictest.mxplay.com/pic/0452f0d80226c398d63ce7e3ea40fa2d/en/16x9/320x180/00c8955dab5e5d340dbde643f9b1f6fd_1920x1080.webp',
+ 'episode': 'Episode 0'
+ },
+ 'params': {
+ 'format': 'best',
+ 'skip_download': True,
+ },
+ }]
+
+ def _real_extract(self, url):
+ type, display_id, video_id = self._match_valid_url(url).groups()
+ type = 'movie_film' if type == 'movie' else 'tvshow_episode'
+ API_URL = 'https://androidapi.mxplay.com/v1/detail/'
+ headers = {
+ 'X-Av-Code': '23',
+ 'X-Country': 'IN',
+ 'X-Platform': 'android',
+ 'X-App-Version': '1370001318',
+ 'X-Resolution': '3840x2160',
+ }
+ data_json = self._download_json(f'{API_URL}{type}/{video_id}', display_id, headers=headers)['profile']
+
+ season, series = None, None
+ for dct in data_json.get('levelInfos', []):
+ if dct.get('type') == 'tvshow_season':
+ season = dct.get('name')
+ elif dct.get('type') == 'tvshow_show':
+ series = dct.get('name')
+ thumbnails = []
+ for thumb in data_json.get('poster', []):
+ thumbnails.append({
+ 'url': thumb.get('url'),
+ 'width': thumb.get('width'),
+ 'height': thumb.get('height'),
+ })
+
+ formats = []
+ subtitles = {}
+ for dct in data_json.get('playInfo', []):
+ if dct.get('extension') == 'mpd':
+ frmt, subs = self._extract_mpd_formats_and_subtitles(dct.get('playUrl'), display_id, fatal=False)
+ formats.extend(frmt)
+ subtitles = self._merge_subtitles(subtitles, subs)
+ elif dct.get('extension') == 'm3u8':
+ frmt, subs = self._extract_m3u8_formats_and_subtitles(dct.get('playUrl'), display_id, fatal=False)
+ formats.extend(frmt)
+ subtitles = self._merge_subtitles(subtitles, subs)
+ self._sort_formats(formats)
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': data_json.get('name') or display_id,
+ 'description': data_json.get('description'),
+ 'season_number': data_json.get('seasonNum'),
+ 'episode_number': data_json.get('episodeNum'),
+ 'duration': data_json.get('duration'),
+ 'season': season,
+ 'series': series,
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+
+class MxplayerShowIE(InfoExtractor):
+ _VALID_URL = r'(?:https?://)(?:www\.)?mxplayer\.in/show/(?P<display_id>[-\w]+)-(?P<id>\w+)/?(?:$|[#?])'
+ _TESTS = [{
+ 'url': 'https://www.mxplayer.in/show/watch-chakravartin-ashoka-samrat-series-online-a8f44e3cc0814b5601d17772cedf5417',
+ 'playlist_mincount': 440,
+ 'info_dict': {
+ 'id': 'a8f44e3cc0814b5601d17772cedf5417',
+ 'title': 'Watch Chakravartin Ashoka Samrat Series Online',
+ }
+ }]
+
+ _API_SHOW_URL = "https://api.mxplay.com/v1/web/detail/tab/tvshowseasons?type=tv_show&id={}&device-density=2&platform=com.mxplay.desktop&content-languages=hi,en"
+ _API_EPISODES_URL = "https://api.mxplay.com/v1/web/detail/tab/tvshowepisodes?type=season&id={}&device-density=1&platform=com.mxplay.desktop&content-languages=hi,en&{}"
+
+ def _entries(self, show_id):
+ show_json = self._download_json(
+ self._API_SHOW_URL.format(show_id),
+ video_id=show_id, headers={'Referer': 'https://mxplayer.in'})
+ page_num = 0
+ for season in show_json.get('items') or []:
+ season_id = try_get(season, lambda x: x['id'], compat_str)
+ next_url = ''
+ while next_url is not None:
+ page_num += 1
+ season_json = self._download_json(
+ self._API_EPISODES_URL.format(season_id, next_url),
+ video_id=season_id,
+ headers={'Referer': 'https://mxplayer.in'},
+ note='Downloading JSON metadata page %d' % page_num)
+ for episode in season_json.get('items') or []:
+ video_url = episode['webUrl']
+ yield self.url_result(
+ 'https://mxplayer.in%s' % video_url,
+ ie=MxplayerIE.ie_key(), video_id=video_url.split('-')[-1])
+ next_url = season_json.get('next')
+
+ def _real_extract(self, url):
+ display_id, show_id = self._match_valid_url(url).groups()
+ return self.playlist_result(
+ self._entries(show_id), playlist_id=show_id,
+ playlist_title=display_id.replace('-', ' ').title())
diff --git a/hypervideo_dl/extractor/mychannels.py b/hypervideo_dl/extractor/mychannels.py
index b1ffe78..d820d4e 100644
--- a/hypervideo_dl/extractor/mychannels.py
+++ b/hypervideo_dl/extractor/mychannels.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
@@ -21,7 +20,7 @@ class MyChannelsIE(InfoExtractor):
}
def _real_extract(self, url):
- id_type, url_id = re.match(self._VALID_URL, url).groups()
+ id_type, url_id = self._match_valid_url(url).groups()
webpage = self._download_webpage(url, url_id)
video_data = self._html_search_regex(r'<div([^>]+data-%s-id="%s"[^>]+)>' % (id_type, url_id), webpage, 'video data')
diff --git a/hypervideo_dl/extractor/myspace.py b/hypervideo_dl/extractor/myspace.py
index e164d59..4227d42 100644
--- a/hypervideo_dl/extractor/myspace.py
+++ b/hypervideo_dl/extractor/myspace.py
@@ -46,18 +46,6 @@ class MySpaceIE(InfoExtractor):
'uploader_id': 'killsorrow',
},
}, {
- 'add_ie': ['Youtube'],
- 'url': 'https://myspace.com/threedaysgrace/music/song/animal-i-have-become-28400208-28218041',
- 'info_dict': {
- 'id': 'xqds0B_meys',
- 'ext': 'webm',
- 'title': 'Three Days Grace - Animal I Have Become',
- 'description': 'md5:8bd86b3693e72a077cf863a8530c54bb',
- 'uploader': 'ThreeDaysGraceVEVO',
- 'uploader_id': 'ThreeDaysGraceVEVO',
- 'upload_date': '20091002',
- },
- }, {
'url': 'https://myspace.com/starset2/music/song/first-light-95799905-106964426',
'only_matching': True,
}, {
@@ -66,7 +54,7 @@ class MySpaceIE(InfoExtractor):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('video_id') or mobj.group('song_id')
is_song = mobj.group('mediatype').startswith('music/song')
webpage = self._download_webpage(url, video_id)
@@ -191,7 +179,7 @@ class MySpaceAlbumIE(InfoExtractor):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
playlist_id = mobj.group('id')
display_id = mobj.group('title') + playlist_id
webpage = self._download_webpage(url, display_id)
diff --git a/hypervideo_dl/extractor/myvideoge.py b/hypervideo_dl/extractor/myvideoge.py
new file mode 100644
index 0000000..0a1d7d0
--- /dev/null
+++ b/hypervideo_dl/extractor/myvideoge.py
@@ -0,0 +1,56 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import js_to_json
+
+
+class MyVideoGeIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?myvideo\.ge/v/(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'https://www.myvideo.ge/v/3941048',
+ 'md5': '8c192a7d2b15454ba4f29dc9c9a52ea9',
+ 'info_dict': {
+ 'id': '3941048',
+ 'ext': 'mp4',
+ 'title': 'The best prikol',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'md5:d72addd357b0dd914e704781f7f777d8',
+ 'description': 'md5:5c0371f540f5888d603ebfedd46b6df3'
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._html_search_regex(r'<h1[^>]*>([^<]+)</h1>', webpage, 'title')
+ description = self._og_search_description(webpage)
+ thumbnail = self._html_search_meta(['og:image'], webpage)
+ uploader = self._search_regex(r'<a[^>]+class="mv_user_name"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False)
+
+ jwplayer_sources = self._parse_json(
+ self._search_regex(
+ r"(?s)jwplayer\(\"mvplayer\"\).setup\(.*?sources: (.*?])", webpage, 'jwplayer sources'),
+ video_id, transform_source=js_to_json)
+
+ def _formats_key(f):
+ if f['label'] == 'SD':
+ return -1
+ elif f['label'] == 'HD':
+ return 1
+ else:
+ return 0
+
+ jwplayer_sources = sorted(jwplayer_sources, key=_formats_key)
+
+ formats = self._parse_jwplayer_formats(jwplayer_sources, video_id)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'uploader': uploader,
+ 'formats': formats,
+ 'thumbnail': thumbnail
+ }
diff --git a/hypervideo_dl/extractor/n1.py b/hypervideo_dl/extractor/n1.py
new file mode 100644
index 0000000..7a09c67
--- /dev/null
+++ b/hypervideo_dl/extractor/n1.py
@@ -0,0 +1,136 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .youtube import YoutubeIE
+from .reddit import RedditRIE
+from .common import InfoExtractor
+from ..utils import (
+ unified_timestamp,
+ extract_attributes,
+)
+
+
+class N1InfoAssetIE(InfoExtractor):
+ _VALID_URL = r'https?://best-vod\.umn\.cdn\.united\.cloud/stream\?asset=(?P<id>[^&]+)'
+ _TESTS = [{
+ 'url': 'https://best-vod.umn.cdn.united.cloud/stream?asset=ljsottomazilirija3060921-n1info-si-worldwide&stream=hp1400&t=0&player=m3u8v&sp=n1info&u=n1info&p=n1Sh4redSecre7iNf0',
+ 'md5': '28b08b32aeaff2b8562736ccd5a66fe7',
+ 'info_dict': {
+ 'id': 'ljsottomazilirija3060921-n1info-si-worldwide',
+ 'ext': 'mp4',
+ 'title': 'ljsottomazilirija3060921-n1info-si-worldwide',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ formats = self._extract_m3u8_formats(
+ url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': video_id,
+ 'formats': formats,
+ }
+
+
+class N1InfoIIE(InfoExtractor):
+ IE_NAME = 'N1Info:article'
+ _VALID_URL = r'https?://(?:(?:ba|rs|hr)\.)?n1info\.(?:com|si)/(?:[^/]+/){1,2}(?P<id>[^/]+)'
+ _TESTS = [{
+ # Youtube embedded
+ 'url': 'https://rs.n1info.com/sport-klub/tenis/kako-je-djokovic-propustio-istorijsku-priliku-video/',
+ 'md5': '01ddb6646d0fd9c4c7d990aa77fe1c5a',
+ 'info_dict': {
+ 'id': 'L5Hd4hQVUpk',
+ 'ext': 'mp4',
+ 'upload_date': '20210913',
+ 'title': 'Ozmo i USO21, ep. 13: Novak Đoković – Danil Medvedev | Ključevi Poraza, Budućnost | SPORT KLUB TENIS',
+ 'description': 'md5:467f330af1effedd2e290f10dc31bb8e',
+ 'uploader': 'Sport Klub',
+ 'uploader_id': 'sportklub',
+ }
+ }, {
+ 'url': 'https://rs.n1info.com/vesti/djilas-los-plan-za-metro-nece-resiti-nijedan-saobracajni-problem/',
+ 'info_dict': {
+ 'id': 'bgmetrosot2409zta20210924174316682-n1info-rs-worldwide',
+ 'ext': 'mp4',
+ 'title': 'Đilas: Predlog izgradnje metroa besmislen; SNS odbacuje navode',
+ 'upload_date': '20210924',
+ 'timestamp': 1632481347,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://n1info.si/novice/slovenija/zadnji-dnevi-na-kopaliscu-ilirija-ilirija-ni-umrla-ubili-so-jo/',
+ 'info_dict': {
+ 'id': 'ljsottomazilirija3060921-n1info-si-worldwide',
+ 'ext': 'mp4',
+ 'title': 'Zadnji dnevi na kopališču Ilirija: “Ilirija ni umrla, ubili so jo”',
+ 'timestamp': 1632567630,
+ 'upload_date': '20210925',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # Reddit embedded
+ 'url': 'https://ba.n1info.com/lifestyle/vucic-bolji-od-tita-ako-izgubi-ja-cu-da-crknem-jugoslavija-je-gotova/',
+ 'info_dict': {
+ 'id': '2wmfee9eycp71',
+ 'ext': 'mp4',
+ 'title': '"Ako Vučić izgubi izbore, ja ću da crknem, Jugoslavija je gotova"',
+ 'upload_date': '20210924',
+ 'timestamp': 1632448649.0,
+ 'uploader': 'YouLotWhatDontStop',
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://hr.n1info.com/vijesti/pravobraniteljica-o-ubojstvu-u-zagrebu-radi-se-o-doista-nezapamcenoj-situaciji/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._html_search_regex(r'<h1[^>]+>(.+?)</h1>', webpage, 'title')
+ timestamp = unified_timestamp(self._html_search_meta('article:published_time', webpage))
+
+ videos = re.findall(r'(?m)(<video[^>]+>)', webpage)
+ entries = []
+ for video in videos:
+ video_data = extract_attributes(video)
+ entries.append({
+ '_type': 'url_transparent',
+ 'url': video_data.get('data-url'),
+ 'id': video_data.get('id'),
+ 'title': title,
+ 'thumbnail': video_data.get('data-thumbnail'),
+ 'timestamp': timestamp,
+ 'ie_key': N1InfoAssetIE.ie_key()})
+
+ embedded_videos = re.findall(r'(<iframe[^>]+>)', webpage)
+ for embedded_video in embedded_videos:
+ video_data = extract_attributes(embedded_video)
+ url = video_data.get('src')
+ if url.startswith('https://www.youtube.com'):
+ entries.append(self.url_result(url, ie=YoutubeIE.ie_key()))
+ elif url.startswith('https://www.redditmedia.com'):
+ entries.append(self.url_result(url, ie=RedditRIE.ie_key()))
+
+ return {
+ '_type': 'playlist',
+ 'id': video_id,
+ 'title': title,
+ 'timestamp': timestamp,
+ 'entries': entries,
+ }
diff --git a/hypervideo_dl/extractor/naver.py b/hypervideo_dl/extractor/naver.py
index 61fc591..acf53c1 100644
--- a/hypervideo_dl/extractor/naver.py
+++ b/hypervideo_dl/extractor/naver.py
@@ -164,3 +164,88 @@ class NaverIE(NaverBaseIE):
'age_limit': 19 if current_clip.get('adult') else None,
})
return info
+
+
+class NaverLiveIE(InfoExtractor):
+ IE_NAME = 'Naver:live'
+ _VALID_URL = r'https?://(?:m\.)?tv(?:cast)?\.naver\.com/l/(?P<id>\d+)'
+ _GEO_BYPASS = False
+ _TESTS = [{
+ 'url': 'https://tv.naver.com/l/52010',
+ 'info_dict': {
+ 'id': '52010',
+ 'ext': 'm3u8',
+ 'title': '[LIVE] 뉴스특보 : "수도권 거리두기, 2주간 2단계로 조정"',
+ 'description': 'md5:df7f0c237a5ed5e786ce5c91efbeaab3',
+ 'channel_id': 'NTV-ytnnews24-0',
+ 'start_time': 1597026780000,
+ },
+ }, {
+ 'url': 'https://tv.naver.com/l/51549',
+ 'info_dict': {
+ 'id': '51549',
+ 'ext': 'm3u8',
+ 'title': '연합뉴스TV - 코로나19 뉴스특보',
+ 'description': 'md5:c655e82091bc21e413f549c0eaccc481',
+ 'channel_id': 'NTV-yonhapnewstv-0',
+ 'start_time': 1596406380000,
+ },
+ }, {
+ 'url': 'https://tv.naver.com/l/54887',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ page = self._download_webpage(url, video_id, 'Downloading Page', 'Unable to download Page')
+ secure_url = self._search_regex(r'sApiF:\s+(?:"|\')([^"\']+)', page, 'secureurl')
+
+ info = self._extract_video_info(video_id, secure_url)
+ info.update({
+ 'description': self._og_search_description(page)
+ })
+
+ return info
+
+ def _extract_video_info(self, video_id, url):
+ video_data = self._download_json(url, video_id, headers=self.geo_verification_headers())
+ meta = video_data.get('meta')
+ status = meta.get('status')
+
+ if status == 'CLOSED':
+ raise ExtractorError('Stream is offline.', expected=True)
+ elif status != 'OPENED':
+ raise ExtractorError('Unknown status %s' % status)
+
+ title = meta.get('title')
+ stream_list = video_data.get('streams')
+
+ if stream_list is None:
+ raise ExtractorError('Could not get stream data.', expected=True)
+
+ formats = []
+ for quality in stream_list:
+ if not quality.get('url'):
+ continue
+
+ prop = quality.get('property')
+ if prop.get('abr'): # This abr doesn't mean Average audio bitrate.
+ continue
+
+ formats.extend(self._extract_m3u8_formats(
+ quality.get('url'), video_id, 'm3u8',
+ m3u8_id=quality.get('qualityId'), live=True
+ ))
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'channel_id': meta.get('channelId'),
+ 'channel_url': meta.get('channelUrl'),
+ 'thumbnail': meta.get('imgUrl'),
+ 'start_time': meta.get('startTime'),
+ 'categories': [meta.get('categoryId')],
+ 'is_live': True
+ }
diff --git a/hypervideo_dl/extractor/nba.py b/hypervideo_dl/extractor/nba.py
index fbc7ada..7390ef8 100644
--- a/hypervideo_dl/extractor/nba.py
+++ b/hypervideo_dl/extractor/nba.py
@@ -5,10 +5,8 @@ import re
from .turner import TurnerBaseIE
from ..compat import (
- compat_parse_qs,
compat_str,
compat_urllib_parse_unquote,
- compat_urllib_parse_urlparse,
)
from ..utils import (
int_or_none,
@@ -16,6 +14,7 @@ from ..utils import (
OnDemandPagedList,
parse_duration,
parse_iso8601,
+ parse_qs,
try_get,
update_url_query,
urljoin,
@@ -165,9 +164,9 @@ class NBAWatchIE(NBAWatchBaseIE):
def _real_extract(self, url):
display_id = self._match_id(url)
- collection_id = compat_parse_qs(compat_urllib_parse_urlparse(url).query).get('collection', [None])[0]
+ collection_id = parse_qs(url).get('collection', [None])[0]
if collection_id:
- if self._downloader.params.get('noplaylist'):
+ if self.get_param('noplaylist'):
self.to_screen('Downloading just video %s because of --no-playlist' % display_id)
else:
self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % collection_id)
@@ -303,7 +302,7 @@ class NBABaseIE(NBACVPBaseIE):
formats.append({
'format_id': 'source',
'url': source_url,
- 'preference': 1,
+ 'quality': 1,
})
m3u8_url = video.get('m3u8')
@@ -337,7 +336,7 @@ class NBABaseIE(NBACVPBaseIE):
return info
def _real_extract(self, url):
- team, display_id = re.match(self._VALID_URL, url).groups()
+ team, display_id = self._match_valid_url(url).groups()
if '/play#/' in url:
display_id = compat_urllib_parse_unquote(display_id)
else:
@@ -359,7 +358,7 @@ class NBAEmbedIE(NBABaseIE):
}]
def _real_extract(self, url):
- qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+ qs = parse_qs(url)
content_id = qs['contentId'][0]
team = qs.get('team', [None])[0]
if not team:
diff --git a/hypervideo_dl/extractor/nbc.py b/hypervideo_dl/extractor/nbc.py
index 0d77648..f304f19 100644
--- a/hypervideo_dl/extractor/nbc.py
+++ b/hypervideo_dl/extractor/nbc.py
@@ -10,7 +10,9 @@ from .adobepass import AdobePassIE
from ..compat import compat_urllib_parse_unquote
from ..utils import (
int_or_none,
+ parse_age_limit,
parse_duration,
+ RegexNotFoundError,
smuggle_url,
try_get,
unified_timestamp,
@@ -18,7 +20,7 @@ from ..utils import (
)
-class NBCIE(AdobePassIE):
+class NBCIE(ThePlatformIE):
_VALID_URL = r'https?(?P<permalink>://(?:www\.)?nbc\.com/(?:classic-tv/)?[^/]+/video/[^/]+/(?P<id>n?\d+))'
_TESTS = [
@@ -84,7 +86,7 @@ class NBCIE(AdobePassIE):
]
def _real_extract(self, url):
- permalink, video_id = re.match(self._VALID_URL, url).groups()
+ permalink, video_id = self._match_valid_url(url).groups()
permalink = 'http' + compat_urllib_parse_unquote(permalink)
video_data = self._download_json(
'https://friendship.nbc.co/v2/graphql', video_id, query={
@@ -132,7 +134,9 @@ class NBCIE(AdobePassIE):
'manifest': 'm3u',
}
video_id = video_data['mpxGuid']
- title = video_data['secondaryTitle']
+ tp_path = 'NnzsPC/media/guid/%s/%s' % (video_data.get('mpxAccountId') or '2410887629', video_id)
+ tpm = self._download_theplatform_metadata(tp_path, video_id)
+ title = tpm.get('title') or video_data.get('secondaryTitle')
if video_data.get('locked'):
resource = self._get_mvpd_resource(
video_data.get('resourceId') or 'nbcentertainment',
@@ -142,18 +146,40 @@ class NBCIE(AdobePassIE):
theplatform_url = smuggle_url(update_url_query(
'http://link.theplatform.com/s/NnzsPC/media/guid/%s/%s' % (video_data.get('mpxAccountId') or '2410887629', video_id),
query), {'force_smil_url': True})
+
+ # Empty string or 0 can be valid values for these. So the check must be `is None`
+ description = video_data.get('description')
+ if description is None:
+ description = tpm.get('description')
+ episode_number = int_or_none(video_data.get('episodeNumber'))
+ if episode_number is None:
+ episode_number = int_or_none(tpm.get('nbcu$airOrder'))
+ rating = video_data.get('rating')
+ if rating is None:
+ try_get(tpm, lambda x: x['ratings'][0]['rating'])
+ season_number = int_or_none(video_data.get('seasonNumber'))
+ if season_number is None:
+ season_number = int_or_none(tpm.get('nbcu$seasonNumber'))
+ series = video_data.get('seriesShortTitle')
+ if series is None:
+ series = tpm.get('nbcu$seriesShortTitle')
+ tags = video_data.get('keywords')
+ if tags is None or len(tags) == 0:
+ tags = tpm.get('keywords')
+
return {
'_type': 'url_transparent',
+ 'age_limit': parse_age_limit(rating),
+ 'description': description,
+ 'episode': title,
+ 'episode_number': episode_number,
'id': video_id,
+ 'ie_key': 'ThePlatform',
+ 'season_number': season_number,
+ 'series': series,
+ 'tags': tags,
'title': title,
'url': theplatform_url,
- 'description': video_data.get('description'),
- 'tags': video_data.get('keywords'),
- 'season_number': int_or_none(video_data.get('seasonNumber')),
- 'episode_number': int_or_none(video_data.get('episodeNumber')),
- 'episode': title,
- 'series': video_data.get('seriesShortTitle'),
- 'ie_key': 'ThePlatform',
}
@@ -435,7 +461,7 @@ class NBCNewsIE(ThePlatformIE):
class NBCOlympicsIE(InfoExtractor):
IE_NAME = 'nbcolympics'
- _VALID_URL = r'https?://www\.nbcolympics\.com/video/(?P<id>[a-z-]+)'
+ _VALID_URL = r'https?://www\.nbcolympics\.com/videos?/(?P<id>[0-9a-z-]+)'
_TEST = {
# Geo-restricted to US
@@ -458,13 +484,18 @@ class NBCOlympicsIE(InfoExtractor):
webpage = self._download_webpage(url, display_id)
- drupal_settings = self._parse_json(self._search_regex(
- r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
- webpage, 'drupal settings'), display_id)
+ try:
+ drupal_settings = self._parse_json(self._search_regex(
+ r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
+ webpage, 'drupal settings'), display_id)
- iframe_url = drupal_settings['vod']['iframe_url']
- theplatform_url = iframe_url.replace(
- 'vplayer.nbcolympics.com', 'player.theplatform.com')
+ iframe_url = drupal_settings['vod']['iframe_url']
+ theplatform_url = iframe_url.replace(
+ 'vplayer.nbcolympics.com', 'player.theplatform.com')
+ except RegexNotFoundError:
+ theplatform_url = self._search_regex(
+ r"([\"'])embedUrl\1: *([\"'])(?P<embedUrl>.+)\2",
+ webpage, 'embedding URL', group="embedUrl")
return {
'_type': 'url_transparent',
@@ -477,43 +508,79 @@ class NBCOlympicsIE(InfoExtractor):
class NBCOlympicsStreamIE(AdobePassIE):
IE_NAME = 'nbcolympics:stream'
_VALID_URL = r'https?://stream\.nbcolympics\.com/(?P<id>[0-9a-z-]+)'
- _TEST = {
- 'url': 'http://stream.nbcolympics.com/2018-winter-olympics-nbcsn-evening-feb-8',
- 'info_dict': {
- 'id': '203493',
- 'ext': 'mp4',
- 'title': 're:Curling, Alpine, Luge [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
+ _TESTS = [
+ {
+ 'note': 'Tokenized m3u8 source URL',
+ 'url': 'https://stream.nbcolympics.com/womens-soccer-group-round-11',
+ 'info_dict': {
+ 'id': '2019740',
+ 'ext': 'mp4',
+ 'title': r"re:Women's Group Stage - Netherlands vs\. Brazil [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$",
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ }, {
+ 'note': 'Plain m3u8 source URL',
+ 'url': 'https://stream.nbcolympics.com/gymnastics-event-finals-mens-floor-pommel-horse-womens-vault-bars',
+ 'info_dict': {
+ 'id': '2021729',
+ 'ext': 'mp4',
+ 'title': r're:Event Finals: M Floor, W Vault, M Pommel, W Uneven Bars [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
},
- }
- _DATA_URL_TEMPLATE = 'http://stream.nbcolympics.com/data/%s_%s.json'
+ ]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
pid = self._search_regex(r'pid\s*=\s*(\d+);', webpage, 'pid')
- resource = self._search_regex(
- r"resource\s*=\s*'(.+)';", webpage,
- 'resource').replace("' + pid + '", pid)
+
event_config = self._download_json(
- self._DATA_URL_TEMPLATE % ('event_config', pid),
- pid)['eventConfig']
- title = self._live_title(event_config['eventTitle'])
+ f'http://stream.nbcolympics.com/data/event_config_{pid}.json',
+ pid, 'Downloading event config')['eventConfig']
+
+ title = event_config['eventTitle']
+ is_live = {'live': True, 'replay': False}.get(event_config.get('eventStatus'))
+ if is_live:
+ title = self._live_title(title)
+
source_url = self._download_json(
- self._DATA_URL_TEMPLATE % ('live_sources', pid),
- pid)['videoSources'][0]['sourceUrl']
- media_token = self._extract_mvpd_auth(
- url, pid, event_config.get('requestorId', 'NBCOlympics'), resource)
- formats = self._extract_m3u8_formats(self._download_webpage(
- 'http://sp.auth.adobe.com/tvs/v1/sign', pid, query={
- 'cdn': 'akamai',
- 'mediaToken': base64.b64encode(media_token.encode()),
- 'resource': base64.b64encode(resource.encode()),
- 'url': source_url,
- }), pid, 'mp4')
+ f'https://api-leap.nbcsports.com/feeds/assets/{pid}?application=NBCOlympics&platform=desktop&format=nbc-player&env=staging',
+ pid, 'Downloading leap config'
+ )['videoSources'][0]['cdnSources']['primary'][0]['sourceUrl']
+
+ if event_config.get('cdnToken'):
+ ap_resource = self._get_mvpd_resource(
+ event_config.get('resourceId', 'NBCOlympics'),
+ re.sub(r'[^\w\d ]+', '', event_config['eventTitle']), pid,
+ event_config.get('ratingId', 'NO VALUE'))
+ media_token = self._extract_mvpd_auth(url, pid, event_config.get('requestorId', 'NBCOlympics'), ap_resource)
+
+ source_url = self._download_json(
+ 'https://tokens.playmakerservices.com/', pid, 'Retrieving tokenized URL',
+ data=json.dumps({
+ 'application': 'NBCSports',
+ 'authentication-type': 'adobe-pass',
+ 'cdn': 'akamai',
+ 'pid': pid,
+ 'platform': 'desktop',
+ 'requestorId': 'NBCOlympics',
+ 'resourceId': base64.b64encode(ap_resource.encode()).decode(),
+ 'token': base64.b64encode(media_token.encode()).decode(),
+ 'url': source_url,
+ 'version': 'v1',
+ }).encode(),
+ )['akamai'][0]['tokenizedUrl']
+
+ formats = self._extract_m3u8_formats(source_url, pid, 'mp4', live=is_live)
+ for f in formats:
+ # -http_seekable requires ffmpeg 4.3+ but it doesnt seem possible to
+ # download with ffmpeg without this option
+ f['_ffmpeg_args'] = ['-seekable', '0', '-http_seekable', '0', '-icy', '0']
self._sort_formats(formats)
return {
@@ -521,5 +588,5 @@ class NBCOlympicsStreamIE(AdobePassIE):
'display_id': display_id,
'title': title,
'formats': formats,
- 'is_live': True,
+ 'is_live': is_live,
}
diff --git a/hypervideo_dl/extractor/ndr.py b/hypervideo_dl/extractor/ndr.py
index ddd828d..f2bae2c 100644
--- a/hypervideo_dl/extractor/ndr.py
+++ b/hypervideo_dl/extractor/ndr.py
@@ -1,135 +1,136 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
from ..utils import (
determine_ext,
int_or_none,
- merge_dicts,
- parse_iso8601,
+ parse_duration,
qualities,
try_get,
+ unified_strdate,
urljoin,
)
class NDRBaseIE(InfoExtractor):
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
display_id = next(group for group in mobj.groups() if group)
+ id = mobj.group('id')
webpage = self._download_webpage(url, display_id)
- return self._extract_embed(webpage, display_id)
+ return self._extract_embed(webpage, display_id, id)
class NDRIE(NDRBaseIE):
IE_NAME = 'ndr'
IE_DESC = 'NDR.de - Norddeutscher Rundfunk'
- _VALID_URL = r'https?://(?:www\.)?ndr\.de/(?:[^/]+/)*(?P<id>[^/?#]+),[\da-z]+\.html'
+ _VALID_URL = r'https?://(?:www\.)?(?:daserste\.)?ndr\.de/(?:[^/]+/)*(?P<display_id>[^/?#]+),(?P<id>[\da-z]+)\.html'
_TESTS = [{
- # httpVideo, same content id
'url': 'http://www.ndr.de/fernsehen/Party-Poette-und-Parade,hafengeburtstag988.html',
- 'md5': '6515bc255dc5c5f8c85bbc38e035a659',
'info_dict': {
'id': 'hafengeburtstag988',
- 'display_id': 'Party-Poette-und-Parade',
'ext': 'mp4',
'title': 'Party, Pötte und Parade',
+ 'thumbnail': 'https://www.ndr.de/fernsehen/hafengeburtstag990_v-contentxl.jpg',
'description': 'md5:ad14f9d2f91d3040b6930c697e5f6b4c',
- 'uploader': 'ndrtv',
- 'timestamp': 1431108900,
- 'upload_date': '20150510',
+ 'series': None,
+ 'channel': 'NDR Fernsehen',
+ 'upload_date': '20150508',
'duration': 3498,
},
- 'params': {
- 'skip_download': True,
- },
}, {
- # httpVideo, different content id
- 'url': 'http://www.ndr.de/sport/fussball/40-Osnabrueck-spielt-sich-in-einen-Rausch,osna270.html',
- 'md5': '1043ff203eab307f0c51702ec49e9a71',
+ 'url': 'https://www.ndr.de/sport/fussball/Rostocks-Matchwinner-Froede-Ein-Hansa-Debuet-wie-im-Maerchen,hansa10312.html',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.ndr.de/nachrichten/niedersachsen/kommunalwahl_niedersachsen_2021/Grosse-Parteien-zufrieden-mit-Ergebnissen-der-Kommunalwahl,kommunalwahl1296.html',
'info_dict': {
- 'id': 'osna272',
- 'display_id': '40-Osnabrueck-spielt-sich-in-einen-Rausch',
+ 'id': 'kommunalwahl1296',
'ext': 'mp4',
- 'title': 'Osnabrück - Wehen Wiesbaden: Die Highlights',
- 'description': 'md5:32e9b800b3d2d4008103752682d5dc01',
- 'uploader': 'ndrtv',
- 'timestamp': 1442059200,
- 'upload_date': '20150912',
- 'duration': 510,
- },
- 'params': {
- 'skip_download': True,
+ 'title': 'Die Spitzenrunde: Die Wahl aus Sicht der Landespolitik',
+ 'thumbnail': 'https://www.ndr.de/fernsehen/screenshot1194912_v-contentxl.jpg',
+ 'description': 'md5:5c6e2ad744cef499135735a1036d7aa7',
+ 'series': 'Hallo Niedersachsen',
+ 'channel': 'NDR Fernsehen',
+ 'upload_date': '20210913',
+ 'duration': 438,
},
}, {
- # httpAudio, same content id
+ 'url': 'https://www.ndr.de/fernsehen/sendungen/extra_3/extra-3-Satiremagazin-mit-Christian-Ehring,sendung1091858.html',
+ 'info_dict': {
+ 'id': 'sendung1091858',
+ 'ext': 'mp4',
+ 'title': 'Extra 3 vom 11.11.2020 mit Christian Ehring',
+ 'thumbnail': 'https://www.ndr.de/fernsehen/screenshot983938_v-contentxl.jpg',
+ 'description': 'md5:700f6de264010585012a72f97b0ac0c9',
+ 'series': 'extra 3',
+ 'channel': 'NDR Fernsehen',
+ 'upload_date': '20201111',
+ 'duration': 1749,
+ }
+ }, {
'url': 'http://www.ndr.de/info/La-Valette-entgeht-der-Hinrichtung,audio51535.html',
- 'md5': 'bb3cd38e24fbcc866d13b50ca59307b8',
'info_dict': {
'id': 'audio51535',
- 'display_id': 'La-Valette-entgeht-der-Hinrichtung',
'ext': 'mp3',
'title': 'La Valette entgeht der Hinrichtung',
+ 'thumbnail': 'https://www.ndr.de/mediathek/mediathekbild140_v-podcast.jpg',
'description': 'md5:22f9541913a40fe50091d5cdd7c9f536',
- 'uploader': 'ndrinfo',
- 'timestamp': 1290626100,
'upload_date': '20140729',
- 'duration': 884,
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- # with subtitles
- 'url': 'https://www.ndr.de/fernsehen/sendungen/extra_3/extra-3-Satiremagazin-mit-Christian-Ehring,sendung1091858.html',
- 'info_dict': {
- 'id': 'extra18674',
- 'display_id': 'extra-3-Satiremagazin-mit-Christian-Ehring',
- 'ext': 'mp4',
- 'title': 'Extra 3 vom 11.11.2020 mit Christian Ehring',
- 'description': 'md5:42ee53990a715eaaf4dc7f13a3bd56c6',
- 'uploader': 'ndrtv',
- 'upload_date': '20201113',
- 'duration': 1749,
- 'subtitles': {
- 'de': [{
- 'ext': 'ttml',
- 'url': r're:^https://www\.ndr\.de.+',
- }],
- },
+ 'duration': 884.0,
},
- 'params': {
- 'skip_download': True,
- },
- 'expected_warnings': ['Unable to download f4m manifest'],
- }, {
- 'url': 'https://www.ndr.de/Fettes-Brot-Ferris-MC-und-Thees-Uhlmann-live-on-stage,festivalsommer116.html',
- 'only_matching': True,
+ 'expected_warnings': ['unable to extract json url'],
}]
- def _extract_embed(self, webpage, display_id):
- embed_url = self._html_search_meta(
- 'embedURL', webpage, 'embed URL',
- default=None) or self._search_regex(
- r'\bembedUrl["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
- 'embed URL', group='url')
- description = self._search_regex(
- r'<p[^>]+itemprop="description">([^<]+)</p>',
- webpage, 'description', default=None) or self._og_search_description(webpage)
- timestamp = parse_iso8601(
- self._search_regex(
- r'<span[^>]+itemprop="(?:datePublished|uploadDate)"[^>]+content="([^"]+)"',
- webpage, 'upload date', default=None))
- info = self._search_json_ld(webpage, display_id, default={})
- return merge_dicts({
- '_type': 'url_transparent',
- 'url': embed_url,
- 'display_id': display_id,
- 'description': description,
- 'timestamp': timestamp,
- }, info)
+ def _extract_embed(self, webpage, display_id, id):
+ formats = []
+ base_url = 'https://www.ndr.de'
+ json_url = self._search_regex(r'<iframe[^>]+src=\"([^\"]+)_theme-ndrde[^\.]*\.html\"', webpage,
+ 'json url', fatal=False)
+ if json_url:
+ data_json = self._download_json(base_url + json_url.replace('ardplayer_image', 'ardjson_image') + '.json',
+ id, fatal=False)
+ info_json = data_json.get('_info', {})
+ media_json = try_get(data_json, lambda x: x['_mediaArray'][0]['_mediaStreamArray'])
+ for media in media_json:
+ if media.get('_quality') == 'auto':
+ formats.extend(self._extract_m3u8_formats(media['_stream'], id))
+ subtitles = {}
+ sub_url = data_json.get('_subtitleUrl')
+ if sub_url:
+ subtitles.setdefault('de', []).append({
+ 'url': base_url + sub_url,
+ })
+ self._sort_formats(formats)
+ return {
+ 'id': id,
+ 'title': info_json.get('clipTitle'),
+ 'thumbnail': base_url + data_json.get('_previewImage'),
+ 'description': info_json.get('clipDescription'),
+ 'series': info_json.get('seriesTitle') or None,
+ 'channel': info_json.get('channelTitle'),
+ 'upload_date': unified_strdate(info_json.get('clipDate')),
+ 'duration': data_json.get('_duration'),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+ else:
+ json_url = base_url + self._search_regex(r'apiUrl\s?=\s?\'([^\']+)\'', webpage, 'json url').replace(
+ '_belongsToPodcast-', '')
+ data_json = self._download_json(json_url, id, fatal=False)
+ return {
+ 'id': id,
+ 'title': data_json.get('title'),
+ 'thumbnail': base_url + data_json.get('poster'),
+ 'description': data_json.get('summary'),
+ 'upload_date': unified_strdate(data_json.get('publicationDate')),
+ 'duration': parse_duration(data_json.get('duration')),
+ 'formats': [{
+ 'url': try_get(data_json, (lambda x: x['audio'][0]['url'], lambda x: x['files'][0]['url'])),
+ 'vcodec': 'none',
+ 'ext': 'mp3',
+ }],
+ }
class NJoyIE(NDRBaseIE):
@@ -175,7 +176,7 @@ class NJoyIE(NDRBaseIE):
'only_matching': True,
}]
- def _extract_embed(self, webpage, display_id):
+ def _extract_embed(self, webpage, display_id, id):
video_id = self._search_regex(
r'<iframe[^>]+id="pp_([\da-z]+)"', webpage, 'embed id')
description = self._search_regex(
@@ -202,7 +203,7 @@ class NDREmbedBaseIE(InfoExtractor):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id') or mobj.group('id_s')
ppjson = self._download_json(
@@ -291,7 +292,7 @@ class NDREmbedBaseIE(InfoExtractor):
class NDREmbedIE(NDREmbedBaseIE):
IE_NAME = 'ndr:embed'
- _VALID_URL = r'https?://(?:www\.)?ndr\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:player|externalPlayer)\.html'
+ _VALID_URL = r'https?://(?:www\.)?(?:daserste\.)?ndr\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:player|externalPlayer)\.html'
_TESTS = [{
'url': 'http://www.ndr.de/fernsehen/sendungen/ndr_aktuell/ndraktuell28488-player.html',
'md5': '8b9306142fe65bbdefb5ce24edb6b0a9',
diff --git a/hypervideo_dl/extractor/nebula.py b/hypervideo_dl/extractor/nebula.py
new file mode 100644
index 0000000..9698a35
--- /dev/null
+++ b/hypervideo_dl/extractor/nebula.py
@@ -0,0 +1,238 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import time
+
+from urllib.error import HTTPError
+from .common import InfoExtractor
+from ..compat import compat_str, compat_urllib_parse_unquote, compat_urllib_parse_quote
+from ..utils import (
+ ExtractorError,
+ parse_iso8601,
+ try_get,
+ urljoin,
+)
+
+
+class NebulaIE(InfoExtractor):
+
+ _VALID_URL = r'https?://(?:www\.)?(?:watchnebula\.com|nebula\.app)/videos/(?P<id>[-\w]+)'
+ _TESTS = [
+ {
+ 'url': 'https://nebula.app/videos/that-time-disney-remade-beauty-and-the-beast',
+ 'md5': 'fe79c4df8b3aa2fea98a93d027465c7e',
+ 'info_dict': {
+ 'id': '5c271b40b13fd613090034fd',
+ 'ext': 'mp4',
+ 'title': 'That Time Disney Remade Beauty and the Beast',
+ 'description': 'Note: this video was originally posted on YouTube with the sponsor read included. We weren’t able to remove it without reducing video quality, so it’s presented here in its original context.',
+ 'upload_date': '20180731',
+ 'timestamp': 1533009600,
+ 'channel': 'Lindsay Ellis',
+ 'uploader': 'Lindsay Ellis',
+ },
+ 'params': {
+ 'usenetrc': True,
+ },
+ 'skip': 'All Nebula content requires authentication',
+ },
+ {
+ 'url': 'https://nebula.app/videos/the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
+ 'md5': '6d4edd14ce65720fa63aba5c583fb328',
+ 'info_dict': {
+ 'id': '5e7e78171aaf320001fbd6be',
+ 'ext': 'mp4',
+ 'title': 'Landing Craft - How The Allies Got Ashore',
+ 'description': r're:^In this episode we explore the unsung heroes of D-Day, the landing craft.',
+ 'upload_date': '20200327',
+ 'timestamp': 1585348140,
+ 'channel': 'The Logistics of D-Day',
+ 'uploader': 'The Logistics of D-Day',
+ },
+ 'params': {
+ 'usenetrc': True,
+ },
+ 'skip': 'All Nebula content requires authentication',
+ },
+ {
+ 'url': 'https://nebula.app/videos/money-episode-1-the-draw',
+ 'md5': '8c7d272910eea320f6f8e6d3084eecf5',
+ 'info_dict': {
+ 'id': '5e779ebdd157bc0001d1c75a',
+ 'ext': 'mp4',
+ 'title': 'Episode 1: The Draw',
+ 'description': r'contains:There’s free money on offer… if the players can all work together.',
+ 'upload_date': '20200323',
+ 'timestamp': 1584980400,
+ 'channel': 'Tom Scott Presents: Money',
+ 'uploader': 'Tom Scott Presents: Money',
+ },
+ 'params': {
+ 'usenetrc': True,
+ },
+ 'skip': 'All Nebula content requires authentication',
+ },
+ {
+ 'url': 'https://watchnebula.com/videos/money-episode-1-the-draw',
+ 'only_matching': True,
+ },
+ ]
+ _NETRC_MACHINE = 'watchnebula'
+
+ _nebula_token = None
+
+ def _retrieve_nebula_auth(self):
+ """
+ Log in to Nebula, and returns a Nebula API token
+ """
+
+ username, password = self._get_login_info()
+ if not (username and password):
+ self.raise_login_required()
+
+ self.report_login()
+ data = json.dumps({'email': username, 'password': password}).encode('utf8')
+ response = self._download_json(
+ 'https://api.watchnebula.com/api/v1/auth/login/',
+ data=data, fatal=False, video_id=None,
+ headers={
+ 'content-type': 'application/json',
+ # Submitting the 'sessionid' cookie always causes a 403 on auth endpoint
+ 'cookie': ''
+ },
+ note='Authenticating to Nebula with supplied credentials',
+ errnote='Authentication failed or rejected')
+ if not response or not response.get('key'):
+ self.raise_login_required()
+
+ # save nebula token as cookie
+ self._set_cookie(
+ 'nebula.app', 'nebula-auth',
+ compat_urllib_parse_quote(
+ json.dumps({
+ "apiToken": response["key"],
+ "isLoggingIn": False,
+ "isLoggingOut": False,
+ }, separators=(",", ":"))),
+ expire_time=int(time.time()) + 86400 * 365,
+ )
+
+ return response['key']
+
+ def _retrieve_zype_api_key(self, page_url, display_id):
+ """
+ Retrieves the Zype API key
+ """
+
+ # Find the js that has the API key from the webpage and download it
+ webpage = self._download_webpage(page_url, video_id=display_id)
+ main_script_relpath = self._search_regex(
+ r'<script[^>]*src="(?P<script_relpath>[^"]*main.[0-9a-f]*.chunk.js)"[^>]*>', webpage,
+ group='script_relpath', name='script relative path', fatal=True)
+ main_script_abspath = urljoin(page_url, main_script_relpath)
+ main_script = self._download_webpage(main_script_abspath, video_id=display_id,
+ note='Retrieving Zype API key')
+
+ api_key = self._search_regex(
+ r'REACT_APP_ZYPE_API_KEY\s*:\s*"(?P<api_key>[\w-]*)"', main_script,
+ group='api_key', name='API key', fatal=True)
+
+ return api_key
+
+ def _call_zype_api(self, path, params, video_id, api_key, note):
+ """
+ A helper for making calls to the Zype API.
+ """
+ query = {'api_key': api_key, 'per_page': 1}
+ query.update(params)
+ return self._download_json('https://api.zype.com' + path, video_id, query=query, note=note)
+
+ def _call_nebula_api(self, path, video_id, access_token, note):
+ """
+ A helper for making calls to the Nebula API.
+ """
+ return self._download_json('https://api.watchnebula.com/api/v1' + path, video_id, headers={
+ 'Authorization': 'Token {access_token}'.format(access_token=access_token)
+ }, note=note)
+
+ def _fetch_zype_access_token(self, video_id):
+ try:
+ user_object = self._call_nebula_api('/auth/user/', video_id, self._nebula_token, note='Retrieving Zype access token')
+ except ExtractorError as exc:
+ # if 401, attempt credential auth and retry
+ if exc.cause and isinstance(exc.cause, HTTPError) and exc.cause.code == 401:
+ self._nebula_token = self._retrieve_nebula_auth()
+ user_object = self._call_nebula_api('/auth/user/', video_id, self._nebula_token, note='Retrieving Zype access token')
+ else:
+ raise
+
+ access_token = try_get(user_object, lambda x: x['zype_auth_info']['access_token'], compat_str)
+ if not access_token:
+ if try_get(user_object, lambda x: x['is_subscribed'], bool):
+ # TODO: Reimplement the same Zype token polling the Nebula frontend implements
+ # see https://github.com/ytdl-org/youtube-dl/pull/24805#issuecomment-749231532
+ raise ExtractorError(
+ 'Unable to extract Zype access token from Nebula API authentication endpoint. '
+ 'Open an arbitrary video in a browser with this account to generate a token',
+ expected=True)
+ raise ExtractorError('Unable to extract Zype access token from Nebula API authentication endpoint')
+ return access_token
+
+ def _extract_channel_title(self, video_meta):
+ # TODO: Implement the API calls giving us the channel list,
+ # so that we can do the title lookup and then figure out the channel URL
+ categories = video_meta.get('categories', []) if video_meta else []
+ # the channel name is the value of the first category
+ for category in categories:
+ if category.get('value'):
+ return category['value'][0]
+
+ def _real_initialize(self):
+ # check cookie jar for valid token
+ nebula_cookies = self._get_cookies('https://nebula.app')
+ nebula_cookie = nebula_cookies.get('nebula-auth')
+ if nebula_cookie:
+ self.to_screen('Authenticating to Nebula with token from cookie jar')
+ nebula_cookie_value = compat_urllib_parse_unquote(nebula_cookie.value)
+ self._nebula_token = self._parse_json(nebula_cookie_value, None).get('apiToken')
+
+ # try to authenticate using credentials if no valid token has been found
+ if not self._nebula_token:
+ self._nebula_token = self._retrieve_nebula_auth()
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ api_key = self._retrieve_zype_api_key(url, display_id)
+
+ response = self._call_zype_api('/videos', {'friendly_title': display_id},
+ display_id, api_key, note='Retrieving metadata from Zype')
+ if len(response.get('response') or []) != 1:
+ raise ExtractorError('Unable to find video on Zype API')
+ video_meta = response['response'][0]
+
+ video_id = video_meta['_id']
+ zype_access_token = self._fetch_zype_access_token(display_id)
+
+ channel_title = self._extract_channel_title(video_meta)
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ '_type': 'url_transparent',
+ 'ie_key': 'Zype',
+ 'url': 'https://player.zype.com/embed/%s.html?access_token=%s' % (video_id, zype_access_token),
+ 'title': video_meta.get('title'),
+ 'description': video_meta.get('description'),
+ 'timestamp': parse_iso8601(video_meta.get('published_at')),
+ 'thumbnails': [{
+ 'id': tn.get('name'), # this appears to be null
+ 'url': tn['url'],
+ 'width': tn.get('width'),
+ 'height': tn.get('height'),
+ } for tn in video_meta.get('thumbnails', [])],
+ 'duration': video_meta.get('duration'),
+ 'channel': channel_title,
+ 'uploader': channel_title, # we chose uploader = channel name
+ # TODO: uploader_url, channel_id, channel_url
+ }
diff --git a/hypervideo_dl/extractor/neteasemusic.py b/hypervideo_dl/extractor/neteasemusic.py
index 978a058..7652371 100644
--- a/hypervideo_dl/extractor/neteasemusic.py
+++ b/hypervideo_dl/extractor/neteasemusic.py
@@ -405,7 +405,7 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
name = info['name']
description = info['description']
- if not info['songs'] or self._downloader.params.get('noplaylist'):
+ if not info['songs'] or self.get_param('noplaylist'):
if info['songs']:
self.to_screen(
'Downloading just the main audio %s because of --no-playlist'
diff --git a/hypervideo_dl/extractor/netzkino.py b/hypervideo_dl/extractor/netzkino.py
index aec3026..4ad0d8e 100644
--- a/hypervideo_dl/extractor/netzkino.py
+++ b/hypervideo_dl/extractor/netzkino.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..utils import (
@@ -13,17 +12,16 @@ from ..utils import (
class NetzkinoIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?netzkino\.de/\#!/(?P<category>[^/]+)/(?P<id>[^/]+)'
+ _VALID_URL = r'https?://(?:www\.)?netzkino\.de/\#!/[^/]+/(?P<id>[^/]+)'
- _TEST = {
- 'url': 'http://www.netzkino.de/#!/scifikino/rakete-zum-mond',
+ _TESTS = [{
+ 'url': 'https://www.netzkino.de/#!/scifikino/rakete-zum-mond',
'md5': '92a3f8b76f8d7220acce5377ea5d4873',
'info_dict': {
'id': 'rakete-zum-mond',
'ext': 'mp4',
- 'title': 'Rakete zum Mond (Endstation Mond, Destination Moon)',
- 'comments': 'mincount:3',
- 'description': 'md5:1eddeacc7e62d5a25a2d1a7290c64a28',
+ 'title': 'Rakete zum Mond \u2013 Jules Verne',
+ 'description': 'md5:f0a8024479618ddbfa450ff48ffa6c60',
'upload_date': '20120813',
'thumbnail': r're:https?://.*\.jpg$',
'timestamp': 1344858571,
@@ -32,17 +30,30 @@ class NetzkinoIE(InfoExtractor):
'params': {
'skip_download': 'Download only works from Germany',
}
- }
+ }, {
+ 'url': 'https://www.netzkino.de/#!/filme/dr-jekyll-mrs-hyde-2',
+ 'md5': 'c7728b2dadd04ff6727814847a51ef03',
+ 'info_dict': {
+ 'id': 'dr-jekyll-mrs-hyde-2',
+ 'ext': 'mp4',
+ 'title': 'Dr. Jekyll & Mrs. Hyde 2',
+ 'description': 'md5:c2e9626ebd02de0a794b95407045d186',
+ 'upload_date': '20190130',
+ 'thumbnail': r're:https?://.*\.jpg$',
+ 'timestamp': 1548849437,
+ 'age_limit': 18,
+ },
+ 'params': {
+ 'skip_download': 'Download only works from Germany',
+ }
+ }]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- category_id = mobj.group('category')
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
- api_url = 'http://api.netzkino.de.simplecache.net/capi-2.0a/categories/%s.json?d=www' % category_id
- api_info = self._download_json(api_url, video_id)
- info = next(
- p for p in api_info['posts'] if p['slug'] == video_id)
+ api_url = 'https://api.netzkino.de.simplecache.net/capi-2.0a/movies/%s.json?d=www' % video_id
+ info = self._download_json(api_url, video_id)
custom_fields = info['custom_fields']
production_js = self._download_webpage(
@@ -67,23 +78,12 @@ class NetzkinoIE(InfoExtractor):
} for key, tpl in templates.items()]
self._sort_formats(formats)
- comments = [{
- 'timestamp': parse_iso8601(c.get('date'), delimiter=' '),
- 'id': c['id'],
- 'author': c['name'],
- 'html': c['content'],
- 'parent': 'root' if c.get('parent', 0) == 0 else c['parent'],
- } for c in info.get('comments', [])]
-
return {
'id': video_id,
'formats': formats,
- 'comments': comments,
'title': info['title'],
'age_limit': int_or_none(custom_fields.get('FSK')[0]),
'timestamp': parse_iso8601(info.get('date'), delimiter=' '),
'description': clean_html(info.get('content')),
'thumbnail': info.get('thumbnail'),
- 'playlist_title': api_info.get('title'),
- 'playlist_id': category_id,
}
diff --git a/hypervideo_dl/extractor/newgrounds.py b/hypervideo_dl/extractor/newgrounds.py
index 82e7cf5..bbbd9e8 100644
--- a/hypervideo_dl/extractor/newgrounds.py
+++ b/hypervideo_dl/extractor/newgrounds.py
@@ -1,19 +1,23 @@
+# coding: utf-8
from __future__ import unicode_literals
+import functools
import re
from .common import InfoExtractor
from ..utils import (
extract_attributes,
int_or_none,
+ parse_count,
parse_duration,
- parse_filesize,
unified_timestamp,
+ OnDemandPagedList,
+ try_get,
)
class NewgroundsIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?newgrounds\.com/(?:audio/listen|portal/view)/(?P<id>[0-9]+)'
+ _VALID_URL = r'https?://(?:www\.)?newgrounds\.com/(?:audio/listen|portal/view)/(?P<id>\d+)(?:/format/flash)?'
_TESTS = [{
'url': 'https://www.newgrounds.com/audio/listen/549479',
'md5': 'fe6033d297591288fa1c1f780386f07a',
@@ -25,17 +29,20 @@ class NewgroundsIE(InfoExtractor):
'timestamp': 1378878540,
'upload_date': '20130911',
'duration': 143,
+ 'description': 'md5:6d885138814015dfd656c2ddb00dacfc',
},
}, {
- 'url': 'https://www.newgrounds.com/portal/view/673111',
- 'md5': '3394735822aab2478c31b1004fe5e5bc',
+ 'url': 'https://www.newgrounds.com/portal/view/1',
+ 'md5': 'fbfb40e2dc765a7e830cb251d370d981',
'info_dict': {
- 'id': '673111',
+ 'id': '1',
'ext': 'mp4',
- 'title': 'Dancin',
- 'uploader': 'Squirrelman82',
- 'timestamp': 1460256780,
- 'upload_date': '20160410',
+ 'title': 'Scrotum 1',
+ 'uploader': 'Brian-Beaton',
+ 'timestamp': 955064100,
+ 'upload_date': '20000406',
+ 'description': 'Scrotum plays "catch."',
+ 'age_limit': 17,
},
}, {
# source format unavailable, additional mp4 formats
@@ -44,70 +51,123 @@ class NewgroundsIE(InfoExtractor):
'id': '689400',
'ext': 'mp4',
'title': 'ZTV News Episode 8',
- 'uploader': 'BennettTheSage',
+ 'uploader': 'ZONE-SAMA',
'timestamp': 1487965140,
'upload_date': '20170224',
+ 'description': 'ZTV News Episode 8 (February 2017)',
+ 'age_limit': 17,
},
'params': {
'skip_download': True,
},
+ }, {
+ 'url': 'https://www.newgrounds.com/portal/view/297383',
+ 'md5': '2c11f5fd8cb6b433a63c89ba3141436c',
+ 'info_dict': {
+ 'id': '297383',
+ 'ext': 'mp4',
+ 'title': 'Metal Gear Awesome',
+ 'uploader': 'Egoraptor',
+ 'timestamp': 1140663240,
+ 'upload_date': '20060223',
+ 'description': 'Metal Gear is awesome is so is this movie.',
+ 'age_limit': 13,
+ }
+ }, {
+ 'url': 'https://www.newgrounds.com/portal/view/297383/format/flash',
+ 'md5': '5d05585a9a0caca059f5abfbd3865524',
+ 'info_dict': {
+ 'id': '297383',
+ 'ext': 'swf',
+ 'title': 'Metal Gear Awesome',
+ 'description': 'Metal Gear is awesome is so is this movie.',
+ 'uploader': 'Egoraptor',
+ 'upload_date': '20060223',
+ 'timestamp': 1140663240,
+ 'age_limit': 13,
+ }
}]
+ _AGE_LIMIT = {
+ 'e': 0,
+ 't': 13,
+ 'm': 17,
+ 'a': 18,
+ }
def _real_extract(self, url):
media_id = self._match_id(url)
-
+ formats = []
+ uploader = None
webpage = self._download_webpage(url, media_id)
title = self._html_search_regex(
- r'<title>([^>]+)</title>', webpage, 'title')
+ r'<title>(.+?)</title>', webpage, 'title')
- media_url = self._parse_json(self._search_regex(
- r'"url"\s*:\s*("[^"]+"),', webpage, ''), media_id)
+ media_url_string = self._search_regex(
+ r'"url"\s*:\s*("[^"]+"),', webpage, 'media url', default=None)
- formats = [{
- 'url': media_url,
- 'format_id': 'source',
- 'quality': 1,
- }]
+ if media_url_string:
+ media_url = self._parse_json(media_url_string, media_id)
+ formats = [{
+ 'url': media_url,
+ 'format_id': 'source',
+ 'quality': 1,
+ }]
+ else:
+ json_video = self._download_json('https://www.newgrounds.com/portal/video/' + media_id, media_id, headers={
+ 'Accept': 'application/json',
+ 'Referer': url,
+ 'X-Requested-With': 'XMLHttpRequest'
+ })
- max_resolution = int_or_none(self._search_regex(
- r'max_resolution["\']\s*:\s*(\d+)', webpage, 'max resolution',
- default=None))
- if max_resolution:
- url_base = media_url.rpartition('.')[0]
- for resolution in (360, 720, 1080):
- if resolution > max_resolution:
- break
- formats.append({
- 'url': '%s.%dp.mp4' % (url_base, resolution),
- 'format_id': '%dp' % resolution,
- 'height': resolution,
- })
+ uploader = json_video.get('author')
+ media_formats = json_video.get('sources', [])
+ for media_format in media_formats:
+ media_sources = media_formats[media_format]
+ for source in media_sources:
+ formats.append({
+ 'format_id': media_format,
+ 'quality': int_or_none(media_format[:-1]),
+ 'url': source.get('src')
+ })
- self._check_formats(formats, media_id)
- self._sort_formats(formats)
+ if not uploader:
+ uploader = self._html_search_regex(
+ (r'(?s)<h4[^>]*>(.+?)</h4>.*?<em>\s*(?:Author|Artist)\s*</em>',
+ r'(?:Author|Writer)\s*<a[^>]+>([^<]+)'), webpage, 'uploader',
+ fatal=False)
- uploader = self._html_search_regex(
- (r'(?s)<h4[^>]*>(.+?)</h4>.*?<em>\s*Author\s*</em>',
- r'(?:Author|Writer)\s*<a[^>]+>([^<]+)'), webpage, 'uploader',
- fatal=False)
+ age_limit = self._html_search_regex(
+ r'<h2\s*class=["\']rated-([^"\'])["\'][^>]+>', webpage, 'age_limit', default='e')
+ age_limit = self._AGE_LIMIT.get(age_limit)
timestamp = unified_timestamp(self._html_search_regex(
(r'<dt>\s*Uploaded\s*</dt>\s*<dd>([^<]+</dd>\s*<dd>[^<]+)',
r'<dt>\s*Uploaded\s*</dt>\s*<dd>([^<]+)'), webpage, 'timestamp',
default=None))
- duration = parse_duration(self._search_regex(
- r'(?s)<dd>\s*Song\s*</dd>\s*<dd>.+?</dd>\s*<dd>([^<]+)', webpage,
+ duration = parse_duration(self._html_search_regex(
+ r'"duration"\s*:\s*["\']?(\d+)["\']?', webpage,
'duration', default=None))
- filesize_approx = parse_filesize(self._html_search_regex(
- r'(?s)<dd>\s*Song\s*</dd>\s*<dd>(.+?)</dd>', webpage, 'filesize',
+ view_count = parse_count(self._html_search_regex(
+ r'(?s)<dt>\s*(?:Views|Listens)\s*</dt>\s*<dd>([\d\.,]+)</dd>', webpage,
+ 'view count', default=None))
+
+ filesize = int_or_none(self._html_search_regex(
+ r'"filesize"\s*:\s*["\']?([\d]+)["\']?,', webpage, 'filesize',
default=None))
+
+ video_type_description = self._html_search_regex(
+ r'"description"\s*:\s*["\']?([^"\']+)["\']?,', webpage, 'filesize',
+ default=None)
+
if len(formats) == 1:
- formats[0]['filesize_approx'] = filesize_approx
+ formats[0]['filesize'] = filesize
- if '<dd>Song' in webpage:
+ if video_type_description == 'Audio File':
formats[0]['vcodec'] = 'none'
+ self._check_formats(formats, media_id)
+ self._sort_formats(formats)
return {
'id': media_id,
@@ -116,10 +176,15 @@ class NewgroundsIE(InfoExtractor):
'timestamp': timestamp,
'duration': duration,
'formats': formats,
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'description': self._og_search_description(webpage),
+ 'age_limit': age_limit,
+ 'view_count': view_count,
}
class NewgroundsPlaylistIE(InfoExtractor):
+ IE_NAME = 'Newgrounds:playlist'
_VALID_URL = r'https?://(?:www\.)?newgrounds\.com/(?:collection|[^/]+/search/[^/]+)/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://www.newgrounds.com/collection/cats',
@@ -127,14 +192,14 @@ class NewgroundsPlaylistIE(InfoExtractor):
'id': 'cats',
'title': 'Cats',
},
- 'playlist_mincount': 46,
+ 'playlist_mincount': 45,
}, {
- 'url': 'http://www.newgrounds.com/portal/search/author/ZONE-SAMA',
+ 'url': 'https://www.newgrounds.com/collection/dogs',
'info_dict': {
- 'id': 'ZONE-SAMA',
- 'title': 'Portal Search: ZONE-SAMA',
+ 'id': 'dogs',
+ 'title': 'Dogs',
},
- 'playlist_mincount': 47,
+ 'playlist_mincount': 26,
}, {
'url': 'http://www.newgrounds.com/audio/search/title/cats',
'only_matching': True,
@@ -155,14 +220,64 @@ class NewgroundsPlaylistIE(InfoExtractor):
entries = []
for a, path, media_id in re.findall(
- r'(<a[^>]+\bhref=["\']/?((?:portal/view|audio/listen)/(\d+))[^>]+>)',
+ r'(<a[^>]+\bhref=["\'][^"\']+((?:portal/view|audio/listen)/(\d+))[^>]+>)',
webpage):
a_class = extract_attributes(a).get('class')
if a_class not in ('item-portalsubmission', 'item-audiosubmission'):
continue
entries.append(
self.url_result(
- 'https://www.newgrounds.com/%s' % path,
+ f'https://www.newgrounds.com/{path}',
ie=NewgroundsIE.ie_key(), video_id=media_id))
return self.playlist_result(entries, playlist_id, title)
+
+
+class NewgroundsUserIE(InfoExtractor):
+ IE_NAME = 'Newgrounds:user'
+ _VALID_URL = r'https?://(?P<id>[^\.]+)\.newgrounds\.com/(?:movies|audio)/?(?:[#?]|$)'
+ _TESTS = [{
+ 'url': 'https://burn7.newgrounds.com/audio',
+ 'info_dict': {
+ 'id': 'burn7',
+ },
+ 'playlist_mincount': 150,
+ }, {
+ 'url': 'https://burn7.newgrounds.com/movies',
+ 'info_dict': {
+ 'id': 'burn7',
+ },
+ 'playlist_mincount': 2,
+ }, {
+ 'url': 'https://brian-beaton.newgrounds.com/movies',
+ 'info_dict': {
+ 'id': 'brian-beaton',
+ },
+ 'playlist_mincount': 10,
+ }]
+ _PAGE_SIZE = 30
+
+ def _fetch_page(self, channel_id, url, page):
+ page += 1
+ posts_info = self._download_json(
+ f'{url}/page/{page}', channel_id,
+ note=f'Downloading page {page}', headers={
+ 'Accept': 'application/json, text/javascript, */*; q = 0.01',
+ 'X-Requested-With': 'XMLHttpRequest',
+ })
+ sequence = posts_info.get('sequence', [])
+ for year in sequence:
+ posts = try_get(posts_info, lambda x: x['years'][str(year)]['items'])
+ for post in posts:
+ path, media_id = self._search_regex(
+ r'<a[^>]+\bhref=["\'][^"\']+((?:portal/view|audio/listen)/(\d+))[^>]+>',
+ post, 'url', group=(1, 2))
+ yield self.url_result(f'https://www.newgrounds.com/{path}', NewgroundsIE.ie_key(), media_id)
+
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
+
+ entries = OnDemandPagedList(functools.partial(
+ self._fetch_page, channel_id, url), self._PAGE_SIZE)
+
+ return self.playlist_result(entries, channel_id)
diff --git a/hypervideo_dl/extractor/nexx.py b/hypervideo_dl/extractor/nexx.py
index 586c1b7..860d636 100644
--- a/hypervideo_dl/extractor/nexx.py
+++ b/hypervideo_dl/extractor/nexx.py
@@ -289,7 +289,7 @@ class NexxIE(InfoExtractor):
return formats
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
domain_id = mobj.group('domain_id') or mobj.group('domain_id_s')
video_id = mobj.group('id')
diff --git a/hypervideo_dl/extractor/nfhsnetwork.py b/hypervideo_dl/extractor/nfhsnetwork.py
new file mode 100644
index 0000000..802f6ca
--- /dev/null
+++ b/hypervideo_dl/extractor/nfhsnetwork.py
@@ -0,0 +1,144 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+from ..utils import (
+ try_get,
+ unified_strdate,
+ unified_timestamp
+)
+
+
+class NFHSNetworkIE(InfoExtractor):
+ IE_NAME = 'NFHSNetwork'
+ _VALID_URL = r'https?://(?:www\.)?nfhsnetwork\.com/events/[\w-]+/(?P<id>(?:gam|evt|dd|)?[\w\d]{0,10})'
+ _TESTS = [{
+ # Auto-generated two-team sport (pixellot)
+ 'url': 'https://www.nfhsnetwork.com/events/rockford-high-school-rockford-mi/gamcf7e54cfbc',
+ 'info_dict': {
+ 'id': 'gamcf7e54cfbc',
+ 'ext': 'mp4',
+ 'title': 'Rockford vs Spring Lake - Girls Varsity Lacrosse 03/27/2021',
+ 'uploader': 'MHSAA - Michigan: Rockford High School, Rockford, MI',
+ 'uploader_id': 'cd2622cf76',
+ 'uploader_url': 'https://www.nfhsnetwork.com/schools/rockford-high-school-rockford-mi',
+ 'location': 'Rockford, Michigan',
+ 'timestamp': 1616859000,
+ 'upload_date': '20210327'
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ }
+ }, {
+ # Non-sport activity with description
+ 'url': 'https://www.nfhsnetwork.com/events/limon-high-school-limon-co/evt4a30e3726c',
+ 'info_dict': {
+ 'id': 'evt4a30e3726c',
+ 'ext': 'mp4',
+ 'title': 'Drama Performance Limon High School vs. Limon High School - 12/13/2020',
+ 'description': 'Join the broadcast of the Limon High School Musical Performance at 2 PM.',
+ 'uploader': 'CHSAA: Limon High School, Limon, CO',
+ 'uploader_id': '7d2d121332',
+ 'uploader_url': 'https://www.nfhsnetwork.com/schools/limon-high-school-limon-co',
+ 'location': 'Limon, Colorado',
+ 'timestamp': 1607893200,
+ 'upload_date': '20201213'
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ }
+ }, {
+ # Postseason game
+ 'url': 'https://www.nfhsnetwork.com/events/nfhs-network-special-events/dd8de71d45',
+ 'info_dict': {
+ 'id': 'dd8de71d45',
+ 'ext': 'mp4',
+ 'title': '2015 UA Holiday Classic Tournament: National Division - 12/26/2015',
+ 'uploader': 'SoCal Sports Productions',
+ 'uploader_id': '063dba0150',
+ 'uploader_url': 'https://www.nfhsnetwork.com/affiliates/socal-sports-productions',
+ 'location': 'San Diego, California',
+ 'timestamp': 1451187000,
+ 'upload_date': '20151226'
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ }
+ }, {
+ # Video with no broadcasts object
+ 'url': 'https://www.nfhsnetwork.com/events/wiaa-wi/9aa2f92f82',
+ 'info_dict': {
+ 'id': '9aa2f92f82',
+ 'ext': 'mp4',
+ 'title': 'Competitive Equity - 01/21/2015',
+ 'description': 'Committee members discuss points of their research regarding a competitive equity plan',
+ 'uploader': 'WIAA - Wisconsin: Wisconsin Interscholastic Athletic Association',
+ 'uploader_id': 'a49f7d1002',
+ 'uploader_url': 'https://www.nfhsnetwork.com/associations/wiaa-wi',
+ 'location': 'Stevens Point, Wisconsin',
+ 'timestamp': 1421856000,
+ 'upload_date': '20150121'
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ }
+ }
+ ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ data = self._download_json(
+ 'https://cfunity.nfhsnetwork.com/v2/game_or_event/' + video_id,
+ video_id)
+ publisher = data.get('publishers')[0] # always exists
+ broadcast = (publisher.get('broadcasts') or publisher.get('vods'))[0] # some (older) videos don't have a broadcasts object
+ uploader = publisher.get('formatted_name') or publisher.get('name')
+ uploaderID = publisher.get('publisher_key')
+ pubType = publisher.get('type')
+ uploaderPrefix = (
+ "schools" if pubType == "school"
+ else "associations" if "association" in pubType
+ else "affiliates" if (pubType == "publisher" or pubType == "affiliate")
+ else "schools")
+ uploaderPage = 'https://www.nfhsnetwork.com/%s/%s' % (uploaderPrefix, publisher.get('slug'))
+ location = '%s, %s' % (data.get('city'), data.get('state_name'))
+ description = broadcast.get('description')
+ isLive = broadcast.get('on_air') or broadcast.get('status') == 'on_air' or False
+
+ timestamp = unified_timestamp(data.get('local_start_time'))
+ upload_date = unified_strdate(data.get('local_start_time'))
+
+ title = (
+ self._og_search_title(webpage)
+ or self._html_search_regex(r'<h1 class="sr-hidden">(.*?)</h1>', webpage, 'title'))
+ title = title.split('|')[0].strip()
+
+ video_type = 'broadcasts' if isLive else 'vods'
+ key = broadcast.get('key') if isLive else try_get(publisher, lambda x: x['vods'][0]['key'])
+ m3u8_url = self._download_json(
+ 'https://cfunity.nfhsnetwork.com/v2/%s/%s/url' % (video_type, key),
+ video_id).get('video_url')
+
+ formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', live=isLive)
+ self._sort_formats(formats, ['res', 'tbr'])
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'description': description,
+ 'timestamp': timestamp,
+ 'uploader': uploader,
+ 'uploader_id': uploaderID,
+ 'uploader_url': uploaderPage,
+ 'location': location,
+ 'upload_date': upload_date,
+ 'is_live': isLive
+ }
diff --git a/hypervideo_dl/extractor/nhk.py b/hypervideo_dl/extractor/nhk.py
index 8a9331a..950a3d0 100644
--- a/hypervideo_dl/extractor/nhk.py
+++ b/hypervideo_dl/extractor/nhk.py
@@ -1,6 +1,5 @@
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..utils import urljoin
@@ -22,7 +21,7 @@ class NhkBaseIE(InfoExtractor):
def _extract_episode_info(self, url, episode=None):
fetch_episode = episode is None
- lang, m_type, episode_id = re.match(NhkVodIE._VALID_URL, url).groups()
+ lang, m_type, episode_id = NhkVodIE._match_valid_url(url).groups()
if episode_id.isdigit():
episode_id = episode_id[:4] + '-' + episode_id[4:]
@@ -158,7 +157,7 @@ class NhkVodProgramIE(NhkBaseIE):
}]
def _real_extract(self, url):
- lang, m_type, program_id, episode_type = re.match(self._VALID_URL, url).groups()
+ lang, m_type, program_id, episode_type = self._match_valid_url(url).groups()
episodes = self._call_api(
program_id, lang, m_type == 'video', False, episode_type == 'clip')
diff --git a/hypervideo_dl/extractor/nhl.py b/hypervideo_dl/extractor/nhl.py
index eddfe1f..d3a5e17 100644
--- a/hypervideo_dl/extractor/nhl.py
+++ b/hypervideo_dl/extractor/nhl.py
@@ -1,6 +1,5 @@
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..compat import compat_str
@@ -14,7 +13,7 @@ from ..utils import (
class NHLBaseIE(InfoExtractor):
def _real_extract(self, url):
- site, tmp_id = re.match(self._VALID_URL, url).groups()
+ site, tmp_id = self._match_valid_url(url).groups()
video_data = self._download_json(
'https://%s/%s/%sid/v1/%s/details/web-v1.json'
% (self._CONTENT_DOMAIN, site[:3], 'item/' if site == 'mlb' else '', tmp_id), tmp_id)
diff --git a/hypervideo_dl/extractor/nick.py b/hypervideo_dl/extractor/nick.py
index 2e8b302..ba7da76 100644
--- a/hypervideo_dl/extractor/nick.py
+++ b/hypervideo_dl/extractor/nick.py
@@ -1,66 +1,73 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .mtv import MTVServicesInfoExtractor
from ..utils import update_url_query
class NickIE(MTVServicesInfoExtractor):
- # None of videos on the website are still alive?
IE_NAME = 'nick.com'
- _VALID_URL = r'https?://(?P<domain>(?:(?:www|beta)\.)?nick(?:jr)?\.com)/(?:[^/]+/)?(?:videos/clip|[^/]+/videos)/(?P<id>[^/?#.]+)'
+ _VALID_URL = r'https?://(?P<domain>(?:www\.)?nick(?:jr)?\.com)/(?:[^/]+/)?(?P<type>videos/clip|[^/]+/videos|episodes/[^/]+)/(?P<id>[^/?#.]+)'
_FEED_URL = 'http://udat.mtvnservices.com/service1/dispatch.htm'
_GEO_COUNTRIES = ['US']
_TESTS = [{
- 'url': 'http://www.nick.com/videos/clip/alvinnn-and-the-chipmunks-112-full-episode.html',
+ 'url': 'https://www.nick.com/episodes/sq47rw/spongebob-squarepants-a-place-for-pets-lockdown-for-love-season-13-ep-1',
+ 'info_dict': {
+ 'description': 'md5:0650a9eb88955609d5c1d1c79292e234',
+ 'title': 'A Place for Pets/Lockdown for Love',
+ },
'playlist': [
{
- 'md5': '6e5adc1e28253bbb1b28ab05403dd4d4',
+ 'md5': 'cb8a2afeafb7ae154aca5a64815ec9d6',
'info_dict': {
- 'id': 'be6a17b0-412d-11e5-8ff7-0026b9414f30',
+ 'id': '85ee8177-d6ce-48f8-9eee-a65364f8a6df',
'ext': 'mp4',
- 'title': 'ALVINNN!!! and The Chipmunks: "Mojo Missing/Who\'s The Animal" S1',
- 'description': 'Alvin is convinced his mojo was in a cap he gave to a fan, and must find a way to get his hat back before the Chipmunks’ big concert.\nDuring a costume visit to the zoo, Alvin finds himself mistaken for the real Tasmanian devil.',
+ 'title': 'SpongeBob SquarePants: "A Place for Pets/Lockdown for Love" S1',
+ 'description': 'A Place for Pets/Lockdown for Love: When customers bring pets into the Krusty Krab, Mr. Krabs realizes pets are more profitable than owners. Plankton ruins another date with Karen, so she puts the Chum Bucket on lockdown until he proves his affection.',
}
},
{
- 'md5': 'd7be441fc53a1d4882fa9508a1e5b3ce',
+ 'md5': '839a04f49900a1fcbf517020d94e0737',
'info_dict': {
- 'id': 'be6b8f96-412d-11e5-8ff7-0026b9414f30',
+ 'id': '2e2a9960-8fd4-411d-868b-28eb1beb7fae',
'ext': 'mp4',
- 'title': 'ALVINNN!!! and The Chipmunks: "Mojo Missing/Who\'s The Animal" S2',
- 'description': 'Alvin is convinced his mojo was in a cap he gave to a fan, and must find a way to get his hat back before the Chipmunks’ big concert.\nDuring a costume visit to the zoo, Alvin finds himself mistaken for the real Tasmanian devil.',
+ 'title': 'SpongeBob SquarePants: "A Place for Pets/Lockdown for Love" S2',
+ 'description': 'A Place for Pets/Lockdown for Love: When customers bring pets into the Krusty Krab, Mr. Krabs realizes pets are more profitable than owners. Plankton ruins another date with Karen, so she puts the Chum Bucket on lockdown until he proves his affection.',
}
},
{
- 'md5': 'efffe1728a234b2b0d2f2b343dd1946f',
+ 'md5': 'f1145699f199770e2919ee8646955d46',
'info_dict': {
- 'id': 'be6cf7e6-412d-11e5-8ff7-0026b9414f30',
+ 'id': 'dc91c304-6876-40f7-84a6-7aece7baa9d0',
'ext': 'mp4',
- 'title': 'ALVINNN!!! and The Chipmunks: "Mojo Missing/Who\'s The Animal" S3',
- 'description': 'Alvin is convinced his mojo was in a cap he gave to a fan, and must find a way to get his hat back before the Chipmunks’ big concert.\nDuring a costume visit to the zoo, Alvin finds himself mistaken for the real Tasmanian devil.',
+ 'title': 'SpongeBob SquarePants: "A Place for Pets/Lockdown for Love" S3',
+ 'description': 'A Place for Pets/Lockdown for Love: When customers bring pets into the Krusty Krab, Mr. Krabs realizes pets are more profitable than owners. Plankton ruins another date with Karen, so she puts the Chum Bucket on lockdown until he proves his affection.',
+
}
},
{
- 'md5': '1ec6690733ab9f41709e274a1d5c7556',
+ 'md5': 'd463116875aee2585ee58de3b12caebd',
'info_dict': {
- 'id': 'be6e3354-412d-11e5-8ff7-0026b9414f30',
+ 'id': '5d929486-cf4c-42a1-889a-6e0d183a101a',
'ext': 'mp4',
- 'title': 'ALVINNN!!! and The Chipmunks: "Mojo Missing/Who\'s The Animal" S4',
- 'description': 'Alvin is convinced his mojo was in a cap he gave to a fan, and must find a way to get his hat back before the Chipmunks’ big concert.\nDuring a costume visit to the zoo, Alvin finds himself mistaken for the real Tasmanian devil.',
+ 'title': 'SpongeBob SquarePants: "A Place for Pets/Lockdown for Love" S4',
+ 'description': 'A Place for Pets/Lockdown for Love: When customers bring pets into the Krusty Krab, Mr. Krabs realizes pets are more profitable than owners. Plankton ruins another date with Karen, so she puts the Chum Bucket on lockdown until he proves his affection.',
+
}
},
],
}, {
- 'url': 'http://www.nickjr.com/paw-patrol/videos/pups-save-a-goldrush-s3-ep302-full-episode/',
- 'only_matching': True,
- }, {
- 'url': 'http://beta.nick.com/nicky-ricky-dicky-and-dawn/videos/nicky-ricky-dicky-dawn-301-full-episode/',
- 'only_matching': True,
+ 'url': 'http://www.nickjr.com/blues-clues-and-you/videos/blues-clues-and-you-original-209-imagination-station/',
+ 'info_dict': {
+ 'id': '31631529-2fc5-430b-b2ef-6a74b4609abd',
+ 'ext': 'mp4',
+ 'description': 'md5:9d65a66df38e02254852794b2809d1cf',
+ 'title': 'Blue\'s Imagination Station',
+ },
+ 'skip': 'Not accessible?'
}]
def _get_feed_query(self, uri):
@@ -70,7 +77,9 @@ class NickIE(MTVServicesInfoExtractor):
}
def _real_extract(self, url):
- domain, display_id = re.match(self._VALID_URL, url).groups()
+ domain, video_type, display_id = self._match_valid_url(url).groups()
+ if video_type.startswith("episodes"):
+ return super()._real_extract(url)
video_data = self._download_json(
'http://%s/data/video.endLevel.json' % domain,
display_id, query={
@@ -108,7 +117,7 @@ class NickBrIE(MTVServicesInfoExtractor):
}]
def _real_extract(self, url):
- domain, display_id = re.match(self._VALID_URL, url).groups()
+ domain, display_id = self._match_valid_url(url).groups()
webpage = self._download_webpage(url, display_id)
uri = self._search_regex(
r'data-(?:contenturi|mgid)="([^"]+)', webpage, 'mgid')
@@ -176,21 +185,11 @@ class NickDeIE(MTVServicesInfoExtractor):
'only_matching': True,
}]
- def _extract_mrss_url(self, webpage, host):
- return update_url_query(self._search_regex(
- r'data-mrss=(["\'])(?P<url>http.+?)\1', webpage, 'mrss url', group='url'),
- {'siteKey': host})
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
- host = mobj.group('host')
-
- webpage = self._download_webpage(url, video_id)
-
- mrss_url = self._extract_mrss_url(webpage, host)
-
- return self._get_videos_info_from_url(mrss_url, video_id)
+ def _get_feed_url(self, uri, url=None):
+ video_id = self._id_from_uri(uri)
+ config = self._download_json(
+ 'http://media.mtvnservices.com/pmt/e1/access/index.html?uri=%s&configtype=edge&ref=%s' % (uri, url), video_id)
+ return self._remove_template_parameter(config['feedWithQueryParams'])
class NickNightIE(NickDeIE):
@@ -245,5 +244,5 @@ class NickRuIE(MTVServicesInfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- mgid = self._extract_mgid(webpage)
+ mgid = self._extract_mgid(webpage, url)
return self.url_result('http://media.mtvnservices.com/embed/%s' % mgid)
diff --git a/hypervideo_dl/extractor/niconico.py b/hypervideo_dl/extractor/niconico.py
index a85fc3d..76f0870 100644
--- a/hypervideo_dl/extractor/niconico.py
+++ b/hypervideo_dl/extractor/niconico.py
@@ -2,25 +2,28 @@
from __future__ import unicode_literals
import datetime
-import functools
+import itertools
import json
-import math
+import re
-from .common import InfoExtractor
+from .common import InfoExtractor, SearchInfoExtractor
+from ..postprocessor.ffmpeg import FFmpegPostProcessor
from ..compat import (
+ compat_str,
compat_parse_qs,
compat_urllib_parse_urlparse,
)
from ..utils import (
- determine_ext,
- dict_get,
ExtractorError,
+ dict_get,
float_or_none,
- InAdvancePagedList,
int_or_none,
+ OnDemandPagedList,
parse_duration,
parse_iso8601,
+ PostProcessingError,
remove_start,
+ str_or_none,
try_get,
unified_timestamp,
urlencode_postdata,
@@ -34,7 +37,7 @@ class NiconicoIE(InfoExtractor):
_TESTS = [{
'url': 'http://www.nicovideo.jp/watch/sm22312215',
- 'md5': 'd1a75c0823e2f629128c43e1212760f9',
+ 'md5': 'a5bad06f1347452102953f323c69da34s',
'info_dict': {
'id': 'sm22312215',
'ext': 'mp4',
@@ -162,6 +165,11 @@ class NiconicoIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.|secure\.|sp\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?[0-9]+)'
_NETRC_MACHINE = 'niconico'
+ _API_HEADERS = {
+ 'X-Frontend-ID': '6',
+ 'X-Frontend-Version': '0'
+ }
+
def _real_initialize(self):
self._login()
@@ -188,40 +196,92 @@ class NiconicoIE(InfoExtractor):
if compat_parse_qs(parts.query).get('message', [None])[0] == 'cant_login':
login_ok = False
if not login_ok:
- self._downloader.report_warning('unable to log in: bad username or password')
+ self.report_warning('unable to log in: bad username or password')
return login_ok
- def _extract_format_for_quality(self, api_data, video_id, audio_quality, video_quality):
- def yesno(boolean):
- return 'yes' if boolean else 'no'
-
- session_api_data = api_data['video']['dmcInfo']['session_api']
- session_api_endpoint = session_api_data['urls'][0]
-
- format_id = '-'.join(map(lambda s: remove_start(s['id'], 'archive_'), [video_quality, audio_quality]))
+ def _get_heartbeat_info(self, info_dict):
+
+ video_id, video_src_id, audio_src_id = info_dict['url'].split(':')[1].split('/')
+
+ api_data = (
+ info_dict.get('_api_data')
+ or self._parse_json(
+ self._html_search_regex(
+ 'data-api-data="([^"]+)"',
+ self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id),
+ 'API data', default='{}'),
+ video_id))
+
+ session_api_data = try_get(api_data, lambda x: x['media']['delivery']['movie']['session'])
+ session_api_endpoint = try_get(session_api_data, lambda x: x['urls'][0])
+
+ def ping():
+ status = try_get(
+ self._download_json(
+ 'https://nvapi.nicovideo.jp/v1/2ab0cbaa/watch', video_id,
+ query={'t': try_get(api_data, lambda x: x['media']['delivery']['trackingId'])},
+ note='Acquiring permission for downloading video',
+ headers=self._API_HEADERS),
+ lambda x: x['meta']['status'])
+ if status != 200:
+ self.report_warning('Failed to acquire permission for playing video. The video may not download.')
+
+ yesno = lambda x: 'yes' if x else 'no'
+
+ # m3u8 (encryption)
+ if try_get(api_data, lambda x: x['media']['delivery']['encryption']) is not None:
+ protocol = 'm3u8'
+ encryption = self._parse_json(session_api_data['token'], video_id)['hls_encryption']
+ session_api_http_parameters = {
+ 'parameters': {
+ 'hls_parameters': {
+ 'encryption': {
+ encryption: {
+ 'encrypted_key': try_get(api_data, lambda x: x['media']['delivery']['encryption']['encryptedKey']),
+ 'key_uri': try_get(api_data, lambda x: x['media']['delivery']['encryption']['keyUri'])
+ }
+ },
+ 'transfer_preset': '',
+ 'use_ssl': yesno(session_api_endpoint['isSsl']),
+ 'use_well_known_port': yesno(session_api_endpoint['isWellKnownPort']),
+ 'segment_duration': 6000,
+ }
+ }
+ }
+ # http
+ else:
+ protocol = 'http'
+ session_api_http_parameters = {
+ 'parameters': {
+ 'http_output_download_parameters': {
+ 'use_ssl': yesno(session_api_endpoint['isSsl']),
+ 'use_well_known_port': yesno(session_api_endpoint['isWellKnownPort']),
+ }
+ }
+ }
session_response = self._download_json(
session_api_endpoint['url'], video_id,
query={'_format': 'json'},
headers={'Content-Type': 'application/json'},
- note='Downloading JSON metadata for %s' % format_id,
+ note='Downloading JSON metadata for %s' % info_dict['format_id'],
data=json.dumps({
'session': {
'client_info': {
- 'player_id': session_api_data['player_id'],
+ 'player_id': session_api_data.get('playerId'),
},
'content_auth': {
- 'auth_type': session_api_data['auth_types'][session_api_data['protocols'][0]],
- 'content_key_timeout': session_api_data['content_key_timeout'],
+ 'auth_type': try_get(session_api_data, lambda x: x['authTypes'][session_api_data['protocols'][0]]),
+ 'content_key_timeout': session_api_data.get('contentKeyTimeout'),
'service_id': 'nicovideo',
- 'service_user_id': session_api_data['service_user_id']
+ 'service_user_id': session_api_data.get('serviceUserId')
},
- 'content_id': session_api_data['content_id'],
+ 'content_id': session_api_data.get('contentId'),
'content_src_id_sets': [{
'content_src_ids': [{
'src_id_to_mux': {
- 'audio_src_ids': [audio_quality['id']],
- 'video_src_ids': [video_quality['id']],
+ 'audio_src_ids': [audio_src_id],
+ 'video_src_ids': [video_src_id],
}
}]
}],
@@ -229,52 +289,81 @@ class NiconicoIE(InfoExtractor):
'content_uri': '',
'keep_method': {
'heartbeat': {
- 'lifetime': session_api_data['heartbeat_lifetime']
+ 'lifetime': session_api_data.get('heartbeatLifetime')
}
},
- 'priority': session_api_data['priority'],
+ 'priority': session_api_data.get('priority'),
'protocol': {
'name': 'http',
'parameters': {
- 'http_parameters': {
- 'parameters': {
- 'http_output_download_parameters': {
- 'use_ssl': yesno(session_api_endpoint['is_ssl']),
- 'use_well_known_port': yesno(session_api_endpoint['is_well_known_port']),
- }
- }
- }
+ 'http_parameters': session_api_http_parameters
}
},
- 'recipe_id': session_api_data['recipe_id'],
+ 'recipe_id': session_api_data.get('recipeId'),
'session_operation_auth': {
'session_operation_auth_by_signature': {
- 'signature': session_api_data['signature'],
- 'token': session_api_data['token'],
+ 'signature': session_api_data.get('signature'),
+ 'token': session_api_data.get('token'),
}
},
'timing_constraint': 'unlimited'
}
}).encode())
- resolution = video_quality.get('resolution', {})
+ info_dict['url'] = session_response['data']['session']['content_uri']
+ info_dict['protocol'] = protocol
+
+ # get heartbeat info
+ heartbeat_info_dict = {
+ 'url': session_api_endpoint['url'] + '/' + session_response['data']['session']['id'] + '?_format=json&_method=PUT',
+ 'data': json.dumps(session_response['data']),
+ # interval, convert milliseconds to seconds, then halve to make a buffer.
+ 'interval': float_or_none(session_api_data.get('heartbeatLifetime'), scale=3000),
+ 'ping': ping
+ }
+
+ return info_dict, heartbeat_info_dict
+
+ def _extract_format_for_quality(self, api_data, video_id, audio_quality, video_quality):
+ def parse_format_id(id_code):
+ mobj = re.match(r'''(?x)
+ (?:archive_)?
+ (?:(?P<codec>[^_]+)_)?
+ (?:(?P<br>[\d]+)kbps_)?
+ (?:(?P<res>[\d+]+)p_)?
+ ''', '%s_' % id_code)
+ return mobj.groupdict() if mobj else {}
+
+ protocol = 'niconico_dmc'
+ format_id = '-'.join(map(lambda s: remove_start(s['id'], 'archive_'), [video_quality, audio_quality]))
+ vdict = parse_format_id(video_quality['id'])
+ adict = parse_format_id(audio_quality['id'])
+ resolution = try_get(video_quality, lambda x: x['metadata']['resolution'], dict) or {'height': vdict.get('res')}
+ vbr = try_get(video_quality, lambda x: x['metadata']['bitrate'], float)
return {
- 'url': session_response['data']['session']['content_uri'],
+ 'url': '%s:%s/%s/%s' % (protocol, video_id, video_quality['id'], audio_quality['id']),
'format_id': format_id,
+ 'format_note': 'DMC %s' % try_get(video_quality, lambda x: x['metadata']['label'], compat_str),
'ext': 'mp4', # Session API are used in HTML5, which always serves mp4
- 'abr': float_or_none(audio_quality.get('bitrate'), 1000),
- 'vbr': float_or_none(video_quality.get('bitrate'), 1000),
- 'height': resolution.get('height'),
- 'width': resolution.get('width'),
+ 'vcodec': vdict.get('codec'),
+ 'acodec': adict.get('codec'),
+ 'vbr': float_or_none(vbr, 1000) or float_or_none(vdict.get('br')),
+ 'abr': float_or_none(audio_quality.get('bitrate'), 1000) or float_or_none(adict.get('br')),
+ 'height': int_or_none(resolution.get('height', vdict.get('res'))),
+ 'width': int_or_none(resolution.get('width')),
+ 'quality': -2 if 'low' in format_id else -1, # Default quality value is -1
+ 'protocol': protocol,
+ 'http_headers': {
+ 'Origin': 'https://www.nicovideo.jp',
+ 'Referer': 'https://www.nicovideo.jp/watch/' + video_id,
+ }
}
def _real_extract(self, url):
video_id = self._match_id(url)
- # Get video webpage. We are not actually interested in it for normal
- # cases, but need the cookies in order to be able to download the
- # info webpage
+ # Get video webpage for API data.
webpage, handle = self._download_webpage_handle(
'http://www.nicovideo.jp/watch/' + video_id, video_id)
if video_id.startswith('so'):
@@ -284,86 +373,136 @@ class NiconicoIE(InfoExtractor):
'data-api-data="([^"]+)"', webpage,
'API data', default='{}'), video_id)
- def _format_id_from_url(video_url):
- return 'economy' if video_real_url.endswith('low') else 'normal'
-
- try:
- video_real_url = api_data['video']['smileInfo']['url']
- except KeyError: # Flash videos
- # Get flv info
- flv_info_webpage = self._download_webpage(
- 'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1',
- video_id, 'Downloading flv info')
-
- flv_info = compat_parse_qs(flv_info_webpage)
- if 'url' not in flv_info:
- if 'deleted' in flv_info:
- raise ExtractorError('The video has been deleted.',
- expected=True)
- elif 'closed' in flv_info:
- raise ExtractorError('Niconico videos now require logging in',
- expected=True)
- elif 'error' in flv_info:
- raise ExtractorError('%s reports error: %s' % (
- self.IE_NAME, flv_info['error'][0]), expected=True)
- else:
- raise ExtractorError('Unable to find video URL')
-
- video_info_xml = self._download_xml(
- 'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id,
- video_id, note='Downloading video info page')
-
- def get_video_info(items):
- if not isinstance(items, list):
- items = [items]
- for item in items:
- ret = xpath_text(video_info_xml, './/' + item)
- if ret:
- return ret
-
- video_real_url = flv_info['url'][0]
-
- extension = get_video_info('movie_type')
- if not extension:
- extension = determine_ext(video_real_url)
-
- formats = [{
- 'url': video_real_url,
- 'ext': extension,
- 'format_id': _format_id_from_url(video_real_url),
- }]
- else:
- formats = []
-
- dmc_info = api_data['video'].get('dmcInfo')
- if dmc_info: # "New" HTML5 videos
- quality_info = dmc_info['quality']
- for audio_quality in quality_info['audios']:
- for video_quality in quality_info['videos']:
- if not audio_quality['available'] or not video_quality['available']:
- continue
- formats.append(self._extract_format_for_quality(
- api_data, video_id, audio_quality, video_quality))
-
- self._sort_formats(formats)
- else: # "Old" HTML5 videos
- formats = [{
+ def get_video_info_web(items):
+ return dict_get(api_data['video'], items)
+
+ # Get video info
+ video_info_xml = self._download_xml(
+ 'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id,
+ video_id, note='Downloading video info page')
+
+ def get_video_info_xml(items):
+ if not isinstance(items, list):
+ items = [items]
+ for item in items:
+ ret = xpath_text(video_info_xml, './/' + item)
+ if ret:
+ return ret
+
+ if get_video_info_xml('error'):
+ error_code = get_video_info_xml('code')
+
+ if error_code == 'DELETED':
+ raise ExtractorError('The video has been deleted.',
+ expected=True)
+ elif error_code == 'NOT_FOUND':
+ raise ExtractorError('The video is not found.',
+ expected=True)
+ elif error_code == 'COMMUNITY':
+ self.to_screen('%s: The video is community members only.' % video_id)
+ else:
+ raise ExtractorError('%s reports error: %s' % (self.IE_NAME, error_code))
+
+ # Start extracting video formats
+ formats = []
+
+ # Get HTML5 videos info
+ quality_info = try_get(api_data, lambda x: x['media']['delivery']['movie'])
+ if not quality_info:
+ raise ExtractorError('The video can\'t be downloaded', expected=True)
+
+ for audio_quality in quality_info.get('audios') or {}:
+ for video_quality in quality_info.get('videos') or {}:
+ if not audio_quality.get('isAvailable') or not video_quality.get('isAvailable'):
+ continue
+ formats.append(self._extract_format_for_quality(
+ api_data, video_id, audio_quality, video_quality))
+
+ # Get flv/swf info
+ timestamp = None
+ video_real_url = try_get(api_data, lambda x: x['video']['smileInfo']['url'])
+ if video_real_url:
+ is_economy = video_real_url.endswith('low')
+
+ if is_economy:
+ self.report_warning('Site is currently in economy mode! You will only have access to lower quality streams')
+
+ # Invoking ffprobe to determine resolution
+ pp = FFmpegPostProcessor(self._downloader)
+ cookies = self._get_cookies('https://nicovideo.jp').output(header='', sep='; path=/; domain=nicovideo.jp;\n')
+
+ self.to_screen('%s: %s' % (video_id, 'Checking smile format with ffprobe'))
+
+ try:
+ metadata = pp.get_metadata_object(video_real_url, ['-cookies', cookies])
+ except PostProcessingError as err:
+ raise ExtractorError(err.msg, expected=True)
+
+ v_stream = a_stream = {}
+
+ # Some complex swf files doesn't have video stream (e.g. nm4809023)
+ for stream in metadata['streams']:
+ if stream['codec_type'] == 'video':
+ v_stream = stream
+ elif stream['codec_type'] == 'audio':
+ a_stream = stream
+
+ # Community restricted videos seem to have issues with the thumb API not returning anything at all
+ filesize = int(
+ (get_video_info_xml('size_high') if not is_economy else get_video_info_xml('size_low'))
+ or metadata['format']['size']
+ )
+ extension = (
+ get_video_info_xml('movie_type')
+ or 'mp4' if 'mp4' in metadata['format']['format_name'] else metadata['format']['format_name']
+ )
+
+ # 'creation_time' tag on video stream of re-encoded SMILEVIDEO mp4 files are '1970-01-01T00:00:00.000000Z'.
+ timestamp = (
+ parse_iso8601(get_video_info_web('first_retrieve'))
+ or unified_timestamp(get_video_info_web('postedDateTime'))
+ )
+ metadata_timestamp = (
+ parse_iso8601(try_get(v_stream, lambda x: x['tags']['creation_time']))
+ or timestamp if extension != 'mp4' else 0
+ )
+
+ # According to compconf, smile videos from pre-2017 are always better quality than their DMC counterparts
+ smile_threshold_timestamp = parse_iso8601('2016-12-08T00:00:00+09:00')
+
+ is_source = timestamp < smile_threshold_timestamp or metadata_timestamp > 0
+
+ # If movie file size is unstable, old server movie is not source movie.
+ if filesize > 1:
+ formats.append({
'url': video_real_url,
- 'ext': 'mp4',
- 'format_id': _format_id_from_url(video_real_url),
- }]
-
- def get_video_info(items):
- return dict_get(api_data['video'], items)
+ 'format_id': 'smile' if not is_economy else 'smile_low',
+ 'format_note': 'SMILEVIDEO source' if not is_economy else 'SMILEVIDEO low quality',
+ 'ext': extension,
+ 'container': extension,
+ 'vcodec': v_stream.get('codec_name'),
+ 'acodec': a_stream.get('codec_name'),
+ # Some complex swf files doesn't have total bit rate metadata (e.g. nm6049209)
+ 'tbr': int_or_none(metadata['format'].get('bit_rate'), scale=1000),
+ 'vbr': int_or_none(v_stream.get('bit_rate'), scale=1000),
+ 'abr': int_or_none(a_stream.get('bit_rate'), scale=1000),
+ 'height': int_or_none(v_stream.get('height')),
+ 'width': int_or_none(v_stream.get('width')),
+ 'source_preference': 5 if not is_economy else -2,
+ 'quality': 5 if is_source and not is_economy else None,
+ 'filesize': filesize
+ })
+
+ self._sort_formats(formats)
# Start extracting information
- title = get_video_info('title')
- if not title:
- title = self._og_search_title(webpage, default=None)
- if not title:
- title = self._html_search_regex(
+ title = (
+ get_video_info_xml('title') # prefer to get the untranslated original title
+ or get_video_info_web(['originalTitle', 'title'])
+ or self._og_search_title(webpage, default=None)
+ or self._html_search_regex(
r'<span[^>]+class="videoHeaderTitle"[^>]*>([^<]+)</span>',
- webpage, 'video title')
+ webpage, 'video title'))
watch_api_data_string = self._html_search_regex(
r'<div[^>]+id="watchAPIDataContainer"[^>]+>([^<]+)</div>',
@@ -372,14 +511,15 @@ class NiconicoIE(InfoExtractor):
video_detail = watch_api_data.get('videoDetail', {})
thumbnail = (
- get_video_info(['thumbnail_url', 'thumbnailURL'])
+ self._html_search_regex(r'<meta property="og:image" content="([^"]+)">', webpage, 'thumbnail data', default=None)
+ or dict_get( # choose highest from 720p to 240p
+ get_video_info_web('thumbnail'),
+ ['ogp', 'player', 'largeUrl', 'middleUrl', 'url'])
or self._html_search_meta('image', webpage, 'thumbnail', default=None)
or video_detail.get('thumbnail'))
- description = get_video_info('description')
+ description = get_video_info_web('description')
- timestamp = (parse_iso8601(get_video_info('first_retrieve'))
- or unified_timestamp(get_video_info('postedDateTime')))
if not timestamp:
match = self._html_search_meta('datePublished', webpage, 'date published', default=None)
if match:
@@ -388,19 +528,25 @@ class NiconicoIE(InfoExtractor):
timestamp = parse_iso8601(
video_detail['postedAt'].replace('/', '-'),
delimiter=' ', timezone=datetime.timedelta(hours=9))
+ timestamp = timestamp or try_get(api_data, lambda x: parse_iso8601(x['video']['registeredAt']))
- view_count = int_or_none(get_video_info(['view_counter', 'viewCount']))
+ view_count = int_or_none(get_video_info_web(['view_counter', 'viewCount']))
if not view_count:
match = self._html_search_regex(
r'>Views: <strong[^>]*>([^<]+)</strong>',
webpage, 'view count', default=None)
if match:
view_count = int_or_none(match.replace(',', ''))
- view_count = view_count or video_detail.get('viewCount')
+ view_count = (
+ view_count
+ or video_detail.get('viewCount')
+ or try_get(api_data, lambda x: x['video']['count']['view']))
+
+ comment_count = (
+ int_or_none(get_video_info_web('comment_num'))
+ or video_detail.get('commentCount')
+ or try_get(api_data, lambda x: x['video']['count']['comment']))
- comment_count = (int_or_none(get_video_info('comment_num'))
- or video_detail.get('commentCount')
- or try_get(api_data, lambda x: x['thread']['commentCount']))
if not comment_count:
match = self._html_search_regex(
r'>Comments: <strong[^>]*>([^<]+)</strong>',
@@ -409,22 +555,41 @@ class NiconicoIE(InfoExtractor):
comment_count = int_or_none(match.replace(',', ''))
duration = (parse_duration(
- get_video_info('length')
+ get_video_info_web('length')
or self._html_search_meta(
'video:duration', webpage, 'video duration', default=None))
or video_detail.get('length')
- or get_video_info('duration'))
+ or get_video_info_web('duration'))
+
+ webpage_url = get_video_info_web('watch_url') or url
- webpage_url = get_video_info('watch_url') or url
+ # for channel movie and community movie
+ channel_id = try_get(
+ api_data,
+ (lambda x: x['channel']['globalId'],
+ lambda x: x['community']['globalId']))
+ channel = try_get(
+ api_data,
+ (lambda x: x['channel']['name'],
+ lambda x: x['community']['name']))
# Note: cannot use api_data.get('owner', {}) because owner may be set to "null"
# in the JSON, which will cause None to be returned instead of {}.
owner = try_get(api_data, lambda x: x.get('owner'), dict) or {}
- uploader_id = get_video_info(['ch_id', 'user_id']) or owner.get('id')
- uploader = get_video_info(['ch_name', 'user_nickname']) or owner.get('nickname')
+ uploader_id = str_or_none(
+ get_video_info_web(['ch_id', 'user_id'])
+ or owner.get('id')
+ or channel_id
+ )
+ uploader = (
+ get_video_info_web(['ch_name', 'user_nickname'])
+ or owner.get('nickname')
+ or channel
+ )
return {
'id': video_id,
+ '_api_data': api_data,
'title': title,
'formats': formats,
'thumbnail': thumbnail,
@@ -432,6 +597,8 @@ class NiconicoIE(InfoExtractor):
'uploader': uploader,
'timestamp': timestamp,
'uploader_id': uploader_id,
+ 'channel': channel,
+ 'channel_id': channel_id,
'view_count': view_count,
'comment_count': comment_count,
'duration': duration,
@@ -440,7 +607,7 @@ class NiconicoIE(InfoExtractor):
class NiconicoPlaylistIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/(?:user/\d+/)?mylist/(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/(?:user/\d+/|my/)?mylist/(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.nicovideo.jp/mylist/27411728',
@@ -456,60 +623,175 @@ class NiconicoPlaylistIE(InfoExtractor):
'url': 'https://www.nicovideo.jp/user/805442/mylist/27411728',
'only_matching': True,
}]
- _PAGE_SIZE = 100
- def _call_api(self, list_id, resource, query):
- return self._download_json(
- 'https://nvapi.nicovideo.jp/v2/mylists/' + list_id, list_id,
- 'Downloading %s JSON metatdata' % resource, query=query,
- headers={'X-Frontend-Id': 6})['data']['mylist']
-
- def _parse_owner(self, item):
- owner = item.get('owner') or {}
- if owner:
- return {
- 'uploader': owner.get('name'),
- 'uploader_id': owner.get('id'),
- }
- return {}
-
- def _fetch_page(self, list_id, page):
- page += 1
- items = self._call_api(list_id, 'page %d' % page, {
- 'page': page,
- 'pageSize': self._PAGE_SIZE,
- })['items']
- for item in items:
- video = item.get('video') or {}
- video_id = video.get('id')
- if not video_id:
- continue
- count = video.get('count') or {}
- get_count = lambda x: int_or_none(count.get(x))
- info = {
+ _API_HEADERS = {
+ 'X-Frontend-ID': '6',
+ 'X-Frontend-Version': '0'
+ }
+
+ def _real_extract(self, url):
+ list_id = self._match_id(url)
+
+ def get_page_data(pagenum, pagesize):
+ return self._download_json(
+ 'http://nvapi.nicovideo.jp/v2/mylists/' + list_id, list_id,
+ query={'page': 1 + pagenum, 'pageSize': pagesize},
+ headers=self._API_HEADERS).get('data').get('mylist')
+
+ data = get_page_data(0, 1)
+ title = data.get('name')
+ description = data.get('description')
+ uploader = data.get('owner').get('name')
+ uploader_id = data.get('owner').get('id')
+
+ def pagefunc(pagenum):
+ data = get_page_data(pagenum, 25)
+ return ({
'_type': 'url',
- 'id': video_id,
- 'title': video.get('title'),
- 'url': 'https://www.nicovideo.jp/watch/' + video_id,
- 'description': video.get('shortDescription'),
- 'duration': int_or_none(video.get('duration')),
- 'view_count': get_count('view'),
- 'comment_count': get_count('comment'),
- 'ie_key': NiconicoIE.ie_key(),
- }
- info.update(self._parse_owner(video))
- yield info
+ 'url': 'http://www.nicovideo.jp/watch/' + item.get('watchId'),
+ } for item in data.get('items'))
+
+ return {
+ '_type': 'playlist',
+ 'id': list_id,
+ 'title': title,
+ 'description': description,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'entries': OnDemandPagedList(pagefunc, 25),
+ }
+
+
+NicovideoSearchIE_NAME = 'nicovideo:search'
+
+
+class NicovideoSearchURLIE(InfoExtractor):
+ IE_NAME = f'{NicovideoSearchIE_NAME}_url'
+ IE_DESC = 'Nico video search URLs'
+ _VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/search/(?P<id>[^?#&]+)?'
+ _TESTS = [{
+ 'url': 'http://www.nicovideo.jp/search/sm9',
+ 'info_dict': {
+ 'id': 'sm9',
+ 'title': 'sm9'
+ },
+ 'playlist_mincount': 40,
+ }, {
+ 'url': 'https://www.nicovideo.jp/search/sm9?sort=h&order=d&end=2020-12-31&start=2020-01-01',
+ 'info_dict': {
+ 'id': 'sm9',
+ 'title': 'sm9'
+ },
+ 'playlist_count': 31,
+ }]
+
+ def _entries(self, url, item_id, query=None, note='Downloading page %(page)s'):
+ query = query or {}
+ pages = [query['page']] if 'page' in query else itertools.count(1)
+ for page_num in pages:
+ query['page'] = str(page_num)
+ webpage = self._download_webpage(url, item_id, query=query, note=note % {'page': page_num})
+ results = re.findall(r'(?<=data-video-id=)["\']?(?P<videoid>.*?)(?=["\'])', webpage)
+ for item in results:
+ yield self.url_result(f'http://www.nicovideo.jp/watch/{item}', 'Niconico', item)
+ if not results:
+ break
+
+ def _real_extract(self, url):
+ query = self._match_id(url)
+ return self.playlist_result(self._entries(url, query), query, query)
+
+
+class NicovideoSearchIE(SearchInfoExtractor, NicovideoSearchURLIE):
+ IE_DESC = 'Nico video searches'
+ _MAX_RESULTS = float('inf')
+ IE_NAME = NicovideoSearchIE_NAME
+ _SEARCH_KEY = 'nicosearch'
+ _TESTS = []
+
+ def _search_results(self, query):
+ return self._entries(
+ self._proto_relative_url(f'//www.nicovideo.jp/search/{query}'), query)
+
+
+class NicovideoSearchDateIE(NicovideoSearchIE):
+ IE_DESC = 'Nico video searches, newest first'
+ IE_NAME = f'{NicovideoSearchIE_NAME}:date'
+ _SEARCH_KEY = 'nicosearchdate'
+ _TESTS = [{
+ 'url': 'nicosearchdateall:a',
+ 'info_dict': {
+ 'id': 'a',
+ 'title': 'a'
+ },
+ 'playlist_mincount': 1610,
+ }]
+
+ _START_DATE = datetime.date(2007, 1, 1)
+ _RESULTS_PER_PAGE = 32
+ _MAX_PAGES = 50
+
+ def _entries(self, url, item_id, start_date=None, end_date=None):
+ start_date, end_date = start_date or self._START_DATE, end_date or datetime.datetime.now().date()
+
+ # If the last page has a full page of videos, we need to break down the query interval further
+ last_page_len = len(list(self._get_entries_for_date(
+ url, item_id, start_date, end_date, self._MAX_PAGES,
+ note=f'Checking number of videos from {start_date} to {end_date}')))
+ if (last_page_len == self._RESULTS_PER_PAGE and start_date != end_date):
+ midpoint = start_date + ((end_date - start_date) // 2)
+ yield from self._entries(url, item_id, midpoint, end_date)
+ yield from self._entries(url, item_id, start_date, midpoint)
+ else:
+ self.to_screen(f'{item_id}: Downloading results from {start_date} to {end_date}')
+ yield from self._get_entries_for_date(
+ url, item_id, start_date, end_date, note=' Downloading page %(page)s')
+
+ def _get_entries_for_date(self, url, item_id, start_date, end_date=None, page_num=None, note=None):
+ query = {
+ 'start': str(start_date),
+ 'end': str(end_date or start_date),
+ 'sort': 'f',
+ 'order': 'd',
+ }
+ if page_num:
+ query['page'] = str(page_num)
+
+ yield from NicovideoSearchURLIE._entries(self, url, item_id, query=query, note=note)
+
+
+class NiconicoUserIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/user/(?P<id>\d+)/?(?:$|[#?])'
+ _TEST = {
+ 'url': 'https://www.nicovideo.jp/user/419948',
+ 'info_dict': {
+ 'id': '419948',
+ },
+ 'playlist_mincount': 101,
+ }
+ _API_URL = "https://nvapi.nicovideo.jp/v1/users/%s/videos?sortKey=registeredAt&sortOrder=desc&pageSize=%s&page=%s"
+ _PAGE_SIZE = 100
+
+ _API_HEADERS = {
+ 'X-Frontend-ID': '6',
+ 'X-Frontend-Version': '0'
+ }
+
+ def _entries(self, list_id):
+ total_count = 1
+ count = page_num = 0
+ while count < total_count:
+ json_parsed = self._download_json(
+ self._API_URL % (list_id, self._PAGE_SIZE, page_num + 1), list_id,
+ headers=self._API_HEADERS,
+ note='Downloading JSON metadata%s' % (' page %d' % page_num if page_num else ''))
+ if not page_num:
+ total_count = int_or_none(json_parsed['data'].get('totalCount'))
+ for entry in json_parsed["data"]["items"]:
+ count += 1
+ yield self.url_result('https://www.nicovideo.jp/watch/%s' % entry['id'])
+ page_num += 1
def _real_extract(self, url):
list_id = self._match_id(url)
- mylist = self._call_api(list_id, 'list', {
- 'pageSize': 1,
- })
- entries = InAdvancePagedList(
- functools.partial(self._fetch_page, list_id),
- math.ceil(mylist['totalItemCount'] / self._PAGE_SIZE),
- self._PAGE_SIZE)
- result = self.playlist_result(
- entries, list_id, mylist.get('name'), mylist.get('description'))
- result.update(self._parse_owner(mylist))
- return result
+ return self.playlist_result(self._entries(list_id), list_id, ie=NiconicoIE.ie_key())
diff --git a/hypervideo_dl/extractor/ninecninemedia.py b/hypervideo_dl/extractor/ninecninemedia.py
index cfc2203..4aaf21a 100644
--- a/hypervideo_dl/extractor/ninecninemedia.py
+++ b/hypervideo_dl/extractor/ninecninemedia.py
@@ -1,11 +1,9 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..utils import (
- ExtractorError,
float_or_none,
int_or_none,
parse_iso8601,
@@ -20,7 +18,7 @@ class NineCNineMediaIE(InfoExtractor):
_API_BASE_TEMPLATE = 'http://capi.9c9media.com/destinations/%s/platforms/desktop/contents/%s/'
def _real_extract(self, url):
- destination_code, content_id = re.match(self._VALID_URL, url).groups()
+ destination_code, content_id = self._match_valid_url(url).groups()
api_base_url = self._API_BASE_TEMPLATE % (destination_code, content_id)
content = self._download_json(api_base_url, content_id, query={
'$include': '[Media.Name,Season,ContentPackages.Duration,ContentPackages.Id]',
@@ -34,8 +32,9 @@ class NineCNineMediaIE(InfoExtractor):
'$include': '[HasClosedCaptions]',
})
- if try_get(content_package, lambda x: x['Constraints']['Security']['Type']):
- raise ExtractorError('This video is DRM protected.', expected=True)
+ if (not self.get_param('allow_unplayable_formats')
+ and try_get(content_package, lambda x: x['Constraints']['Security']['Type'])):
+ self.report_drm(content_id)
manifest_base_url = content_package_url + 'manifest.'
formats = []
diff --git a/hypervideo_dl/extractor/ninenow.py b/hypervideo_dl/extractor/ninenow.py
index 6157dc7..6043674 100644
--- a/hypervideo_dl/extractor/ninenow.py
+++ b/hypervideo_dl/extractor/ninenow.py
@@ -8,6 +8,10 @@ from ..utils import (
int_or_none,
float_or_none,
smuggle_url,
+ str_or_none,
+ try_get,
+ unified_strdate,
+ unified_timestamp,
)
@@ -37,6 +41,24 @@ class NineNowIE(InfoExtractor):
# DRM protected
'url': 'https://www.9now.com.au/andrew-marrs-history-of-the-world/season-1/episode-1',
'only_matching': True,
+ }, {
+ # episode of series
+ 'url': 'https://www.9now.com.au/lego-masters/season-3/episode-3',
+ 'info_dict': {
+ 'id': '6249614030001',
+ 'title': 'Episode 3',
+ 'ext': 'mp4',
+ 'season_number': 3,
+ 'episode_number': 3,
+ 'description': 'In the first elimination of the competition, teams will have 10 hours to build a world inside a snow globe.',
+ 'uploader_id': '4460760524001',
+ 'timestamp': 1619002200,
+ 'upload_date': '20210421',
+ },
+ 'expected_warnings': ['Ignoring subtitle tracks'],
+ 'params':{
+ 'skip_download': True,
+ }
}]
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/4460760524001/default_default/index.html?videoId=%s'
@@ -59,25 +81,31 @@ class NineNowIE(InfoExtractor):
cache = page_data.get(kind, {}).get('%sCache' % kind, {})
if not cache:
continue
- common_data = (cache.get(current_key) or list(cache.values())[0])[kind]
+ common_data = {
+ 'episode': (cache.get(current_key) or list(cache.values())[0])[kind],
+ 'season': (cache.get(current_key) or list(cache.values())[0]).get('season', None)
+ }
break
else:
raise ExtractorError('Unable to find video data')
- video_data = common_data['video']
-
- if video_data.get('drm'):
- raise ExtractorError('This video is DRM protected.', expected=True)
-
- brightcove_id = video_data.get('brightcoveId') or 'ref:' + video_data['referenceId']
- video_id = compat_str(video_data.get('id') or brightcove_id)
- title = common_data['name']
+ if not self.get_param('allow_unplayable_formats') and try_get(common_data, lambda x: x['episode']['video']['drm'], bool):
+ self.report_drm(display_id)
+ brightcove_id = try_get(
+ common_data, lambda x: x['episode']['video']['brightcoveId'], compat_str) or 'ref:%s' % common_data['episode']['video']['referenceId']
+ video_id = str_or_none(try_get(common_data, lambda x: x['episode']['video']['id'])) or brightcove_id
+ title = try_get(common_data, lambda x: x['episode']['name'], compat_str)
+ season_number = try_get(common_data, lambda x: x['season']['seasonNumber'], int)
+ episode_number = try_get(common_data, lambda x: x['episode']['episodeNumber'], int)
+ timestamp = unified_timestamp(try_get(common_data, lambda x: x['episode']['airDate'], compat_str))
+ release_date = unified_strdate(try_get(common_data, lambda x: x['episode']['availability'], compat_str))
+ thumbnails_data = try_get(common_data, lambda x: x['episode']['image']['sizes'], dict) or {}
thumbnails = [{
'id': thumbnail_id,
'url': thumbnail_url,
- 'width': int_or_none(thumbnail_id[1:])
- } for thumbnail_id, thumbnail_url in common_data.get('image', {}).get('sizes', {}).items()]
+ 'width': int_or_none(thumbnail_id[1:]),
+ } for thumbnail_id, thumbnail_url in thumbnails_data.items()]
return {
'_type': 'url_transparent',
@@ -86,8 +114,12 @@ class NineNowIE(InfoExtractor):
{'geo_countries': self._GEO_COUNTRIES}),
'id': video_id,
'title': title,
- 'description': common_data.get('description'),
- 'duration': float_or_none(video_data.get('duration'), 1000),
+ 'description': try_get(common_data, lambda x: x['episode']['description'], compat_str),
+ 'duration': float_or_none(try_get(common_data, lambda x: x['episode']['video']['duration'], float), 1000),
'thumbnails': thumbnails,
'ie_key': 'BrightcoveNew',
+ 'season_number': season_number,
+ 'episode_number': episode_number,
+ 'timestamp': timestamp,
+ 'release_date': release_date,
}
diff --git a/hypervideo_dl/extractor/nitter.py b/hypervideo_dl/extractor/nitter.py
new file mode 100644
index 0000000..a0546cd
--- /dev/null
+++ b/hypervideo_dl/extractor/nitter.py
@@ -0,0 +1,228 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_urlparse
+from ..utils import (
+ parse_count,
+ unified_strdate,
+ unified_timestamp,
+ remove_end,
+ determine_ext,
+)
+import re
+import random
+
+
+class NitterIE(InfoExtractor):
+ # Taken from https://github.com/zedeus/nitter/wiki/Instances
+
+ NON_HTTP_INSTANCES = (
+ '3nzoldnxplag42gqjs23xvghtzf6t6yzssrtytnntc6ppc7xxuoneoad.onion',
+ 'nitter.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd.onion',
+ 'nitter7bryz3jv7e3uekphigvmoyoem4al3fynerxkj22dmoxoq553qd.onion',
+ 'npf37k3mtzwxreiw52ccs5ay4e6qt2fkcs2ndieurdyn2cuzzsfyfvid.onion',
+ 'nitter.v6vgyqpa7yefkorazmg5d5fimstmvm2vtbirt6676mt7qmllrcnwycqd.onion',
+ 'i23nv6w3juvzlw32xzoxcqzktegd4i4fu3nmnc2ewv4ggiu4ledwklad.onion',
+ '26oq3gioiwcmfojub37nz5gzbkdiqp7fue5kvye7d4txv4ny6fb4wwid.onion',
+
+ 'nitter.i2p',
+ 'u6ikd6zndl3c4dsdq4mmujpntgeevdk5qzkfb57r4tnfeccrn2qa.b32.i2p',
+
+ 'nitterlgj3n5fgwesu3vxc5h67ruku33nqaoeoocae2mvlzhsu6k7fqd.onion',
+ )
+
+ HTTP_INSTANCES = (
+ 'nitter.42l.fr',
+ 'nitter.pussthecat.org',
+ 'nitter.nixnet.services',
+ 'nitter.mastodont.cat',
+ 'nitter.tedomum.net',
+ 'nitter.fdn.fr',
+ 'nitter.1d4.us',
+ 'nitter.kavin.rocks',
+ 'tweet.lambda.dance',
+ 'nitter.cc',
+ 'nitter.vxempire.xyz',
+ 'nitter.unixfox.eu',
+ 'nitter.domain.glass',
+ 'nitter.himiko.cloud',
+ 'nitter.eu',
+ 'nitter.namazso.eu',
+ 'nitter.mailstation.de',
+ 'nitter.actionsack.com',
+ 'nitter.cattube.org',
+ 'nitter.dark.fail',
+ 'birdsite.xanny.family',
+ 'nitter.40two.app',
+ 'nitter.skrep.in',
+
+ # not in the list anymore
+ 'nitter.snopyta.org',
+ )
+
+ DEAD_INSTANCES = (
+ # maintenance
+ 'nitter.ethibox.fr',
+
+ # official, rate limited
+ 'nitter.net',
+ # offline
+ 'nitter.13ad.de',
+ 'nitter.weaponizedhumiliation.com',
+ )
+
+ INSTANCES = NON_HTTP_INSTANCES + HTTP_INSTANCES + DEAD_INSTANCES
+
+ _INSTANCES_RE = '(?:' + '|'.join([re.escape(instance) for instance in INSTANCES]) + ')'
+ _VALID_URL = r'https?://%(instance)s/(?P<uploader_id>.+)/status/(?P<id>[0-9]+)(#.)?' % {'instance': _INSTANCES_RE}
+ current_instance = random.choice(HTTP_INSTANCES)
+
+ _TESTS = [
+ {
+ # GIF (wrapped in mp4)
+ 'url': 'https://%s/firefox/status/1314279897502629888#m' % current_instance,
+ 'info_dict': {
+ 'id': '1314279897502629888',
+ 'ext': 'mp4',
+ 'title': 'Firefox 🔥 - You know the old saying, if you see something say something. Now you actually can with the YouTube regrets extension. \n\nReport harmful YouTube recommendations so others can avoid watching them. ➡️ https://mzl.la/3iFIiyg\n\n#UnfckTheInternet',
+ 'description': 'You know the old saying, if you see something say something. Now you actually can with the YouTube regrets extension. \n\nReport harmful YouTube recommendations so others can avoid watching them. ➡️ https://mzl.la/3iFIiyg\n\n#UnfckTheInternet',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'Firefox 🔥',
+ 'uploader_id': 'firefox',
+ 'uploader_url': 'https://%s/firefox' % current_instance,
+ 'upload_date': '20201008',
+ 'timestamp': 1602183720,
+ },
+ }, { # normal video
+ 'url': 'https://%s/Le___Doc/status/1299715685392756737#m' % current_instance,
+ 'info_dict': {
+ 'id': '1299715685392756737',
+ 'ext': 'mp4',
+ 'title': 'Le Doc - "Je ne prédis jamais rien"\nD Raoult, Août 2020...',
+ 'description': '"Je ne prédis jamais rien"\nD Raoult, Août 2020...',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'Le Doc',
+ 'uploader_id': 'Le___Doc',
+ 'uploader_url': 'https://%s/Le___Doc' % current_instance,
+ 'upload_date': '20200829',
+ 'timestamp': 1598711341,
+ 'view_count': int,
+ 'like_count': int,
+ 'repost_count': int,
+ 'comment_count': int,
+ },
+ }, { # video embed in a "Streaming Political Ads" box
+ 'url': 'https://%s/mozilla/status/1321147074491092994#m' % current_instance,
+ 'info_dict': {
+ 'id': '1321147074491092994',
+ 'ext': 'mp4',
+ 'title': "Mozilla - Are you being targeted with weird, ominous or just plain annoying political ads while streaming your favorite shows?\n\nThis isn't a real political ad, but if you're watching streaming TV in the U.S., chances are you've seen quite a few. \n\nLearn more ➡️ https://mzl.la/StreamingAds",
+ 'description': "Are you being targeted with weird, ominous or just plain annoying political ads while streaming your favorite shows?\n\nThis isn't a real political ad, but if you're watching streaming TV in the U.S., chances are you've seen quite a few. \n\nLearn more ➡️ https://mzl.la/StreamingAds",
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'Mozilla',
+ 'uploader_id': 'mozilla',
+ 'uploader_url': 'https://%s/mozilla' % current_instance,
+ 'upload_date': '20201027',
+ 'timestamp': 1603820982
+ },
+ }, { # not the first tweet but main-tweet
+ 'url': 'https://%s/TheNaturalNu/status/1379050895539724290#m' % current_instance,
+ 'info_dict': {
+ 'id': '1379050895539724290',
+ 'ext': 'mp4',
+ 'title': 'Dorothy Zbornak - This had me hollering!!',
+ 'description': 'This had me hollering!!',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'Dorothy Zbornak',
+ 'uploader_id': 'TheNaturalNu',
+ 'uploader_url': 'https://%s/TheNaturalNu' % current_instance,
+ 'timestamp': 1617626329,
+ 'upload_date': '20210405'
+ }
+ }
+ ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ parsed_url = compat_urlparse.urlparse(url)
+ base_url = '%s://%s' % (parsed_url.scheme, parsed_url.netloc)
+
+ self._set_cookie(parsed_url.netloc, 'hlsPlayback', 'on')
+ full_webpage = self._download_webpage(url, video_id)
+
+ main_tweet_start = full_webpage.find('class="main-tweet"')
+ if main_tweet_start > 0:
+ webpage = full_webpage[main_tweet_start:]
+ if not webpage:
+ webpage = full_webpage
+
+ video_url = '%s%s' % (base_url, self._html_search_regex(r'(?:<video[^>]+data-url|<source[^>]+src)="([^"]+)"', webpage, 'video url'))
+ ext = determine_ext(video_url)
+
+ if ext == 'unknown_video':
+ formats = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
+ else:
+ formats = [{
+ 'url': video_url,
+ 'ext': ext
+ }]
+
+ title = self._og_search_description(full_webpage)
+ if not title:
+ title = self._html_search_regex(r'<div class="tweet-content[^>]+>([^<]+)</div>', webpage, 'title')
+ description = title
+
+ mobj = self._match_valid_url(url)
+ uploader_id = (
+ mobj.group('uploader_id')
+ or self._html_search_regex(r'<a class="fullname"[^>]+title="([^"]+)"', webpage, 'uploader name', fatal=False)
+ )
+
+ if uploader_id:
+ uploader_url = '%s/%s' % (base_url, uploader_id)
+
+ uploader = self._html_search_regex(r'<a class="fullname"[^>]+title="([^"]+)"', webpage, 'uploader name', fatal=False)
+
+ if uploader:
+ title = '%s - %s' % (uploader, title)
+
+ view_count = parse_count(self._html_search_regex(r'<span[^>]+class="icon-play[^>]*></span>\s([^<]+)</div>', webpage, 'view count', fatal=False))
+ like_count = parse_count(self._html_search_regex(r'<span[^>]+class="icon-heart[^>]*></span>\s([^<]+)</div>', webpage, 'like count', fatal=False))
+ repost_count = parse_count(self._html_search_regex(r'<span[^>]+class="icon-retweet[^>]*></span>\s([^<]+)</div>', webpage, 'repost count', fatal=False))
+ comment_count = parse_count(self._html_search_regex(r'<span[^>]+class="icon-comment[^>]*></span>\s([^<]+)</div>', webpage, 'repost count', fatal=False))
+
+ thumbnail = self._html_search_meta('og:image', full_webpage, 'thumbnail url')
+ if not thumbnail:
+ thumbnail = '%s%s' % (base_url, self._html_search_regex(r'<video[^>]+poster="([^"]+)"', webpage, 'thumbnail url', fatal=False))
+ thumbnail = remove_end(thumbnail, '%3Asmall')
+
+ thumbnails = []
+ thumbnail_ids = ('thumb', 'small', 'large', 'medium', 'orig')
+ for id in thumbnail_ids:
+ thumbnails.append({
+ 'id': id,
+ 'url': thumbnail + '%3A' + id,
+ })
+
+ date = self._html_search_regex(r'<span[^>]+class="tweet-date"[^>]*><a[^>]+title="([^"]+)"', webpage, 'upload date', fatal=False)
+ upload_date = unified_strdate(date)
+ timestamp = unified_timestamp(date)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'uploader': uploader,
+ 'timestamp': timestamp,
+ 'uploader_id': uploader_id,
+ 'uploader_url': uploader_url,
+ 'view_count': view_count,
+ 'like_count': like_count,
+ 'repost_count': repost_count,
+ 'comment_count': comment_count,
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ 'thumbnail': thumbnail,
+ 'upload_date': upload_date,
+ }
diff --git a/hypervideo_dl/extractor/noco.py b/hypervideo_dl/extractor/noco.py
new file mode 100644
index 0000000..78c4952
--- /dev/null
+++ b/hypervideo_dl/extractor/noco.py
@@ -0,0 +1,235 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+import time
+import hashlib
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+)
+from ..utils import (
+ clean_html,
+ ExtractorError,
+ int_or_none,
+ float_or_none,
+ parse_iso8601,
+ parse_qs,
+ sanitized_Request,
+ urlencode_postdata,
+)
+
+
+class NocoIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P<id>\d+)'
+ _LOGIN_URL = 'https://noco.tv/do.php'
+ _API_URL_TEMPLATE = 'https://api.noco.tv/1.1/%s?ts=%s&tk=%s'
+ _SUB_LANG_TEMPLATE = '&sub_lang=%s'
+ _NETRC_MACHINE = 'noco'
+
+ _TESTS = [
+ {
+ 'url': 'http://noco.tv/emission/11538/nolife/ami-ami-idol-hello-france/',
+ 'md5': '0a993f0058ddbcd902630b2047ef710e',
+ 'info_dict': {
+ 'id': '11538',
+ 'ext': 'mp4',
+ 'title': 'Ami Ami Idol - Hello! France',
+ 'description': 'md5:4eaab46ab68fa4197a317a88a53d3b86',
+ 'upload_date': '20140412',
+ 'uploader': 'Nolife',
+ 'uploader_id': 'NOL',
+ 'duration': 2851.2,
+ },
+ 'skip': 'Requires noco account',
+ },
+ {
+ 'url': 'http://noco.tv/emission/12610/lbl42/the-guild/s01e01-wake-up-call',
+ 'md5': 'c190f1f48e313c55838f1f412225934d',
+ 'info_dict': {
+ 'id': '12610',
+ 'ext': 'mp4',
+ 'title': 'The Guild #1 - Wake-Up Call',
+ 'timestamp': 1403863200,
+ 'upload_date': '20140627',
+ 'uploader': 'LBL42',
+ 'uploader_id': 'LBL',
+ 'duration': 233.023,
+ },
+ 'skip': 'Requires noco account',
+ }
+ ]
+
+ def _real_initialize(self):
+ self._login()
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None:
+ return
+
+ login = self._download_json(
+ self._LOGIN_URL, None, 'Logging in',
+ data=urlencode_postdata({
+ 'a': 'login',
+ 'cookie': '1',
+ 'username': username,
+ 'password': password,
+ }),
+ headers={
+ 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
+ })
+
+ if 'erreur' in login:
+ raise ExtractorError('Unable to login: %s' % clean_html(login['erreur']), expected=True)
+
+ @staticmethod
+ def _ts():
+ return int(time.time() * 1000)
+
+ def _call_api(self, path, video_id, note, sub_lang=None):
+ ts = compat_str(self._ts() + self._ts_offset)
+ tk = hashlib.md5((hashlib.md5(ts.encode('ascii')).hexdigest() + '#8S?uCraTedap6a').encode('ascii')).hexdigest()
+ url = self._API_URL_TEMPLATE % (path, ts, tk)
+ if sub_lang:
+ url += self._SUB_LANG_TEMPLATE % sub_lang
+
+ request = sanitized_Request(url)
+ request.add_header('Referer', self._referer)
+
+ resp = self._download_json(request, video_id, note)
+
+ if isinstance(resp, dict) and resp.get('error'):
+ self._raise_error(resp['error'], resp['description'])
+
+ return resp
+
+ def _raise_error(self, error, description):
+ raise ExtractorError(
+ '%s returned error: %s - %s' % (self.IE_NAME, error, description),
+ expected=True)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ # Timestamp adjustment offset between server time and local time
+ # must be calculated in order to use timestamps closest to server's
+ # in all API requests (see https://github.com/ytdl-org/youtube-dl/issues/7864)
+ webpage = self._download_webpage(url, video_id)
+
+ player_url = self._search_regex(
+ r'(["\'])(?P<player>https?://noco\.tv/(?:[^/]+/)+NocoPlayer.+?\.swf.*?)\1',
+ webpage, 'noco player', group='player',
+ default='http://noco.tv/cdata/js/player/NocoPlayer-v1.2.40.swf')
+
+ qs = parse_qs(player_url)
+ ts = int_or_none(qs.get('ts', [None])[0])
+ self._ts_offset = ts - self._ts() if ts else 0
+ self._referer = player_url
+
+ medias = self._call_api(
+ 'shows/%s/medias' % video_id,
+ video_id, 'Downloading video JSON')
+
+ show = self._call_api(
+ 'shows/by_id/%s' % video_id,
+ video_id, 'Downloading show JSON')[0]
+
+ options = self._call_api(
+ 'users/init', video_id,
+ 'Downloading user options JSON')['options']
+ audio_lang_pref = options.get('audio_language') or options.get('language', 'fr')
+
+ if audio_lang_pref == 'original':
+ audio_lang_pref = show['original_lang']
+ if len(medias) == 1:
+ audio_lang_pref = list(medias.keys())[0]
+ elif audio_lang_pref not in medias:
+ audio_lang_pref = 'fr'
+
+ qualities = self._call_api(
+ 'qualities',
+ video_id, 'Downloading qualities JSON')
+
+ formats = []
+
+ for audio_lang, audio_lang_dict in medias.items():
+ preference = 1 if audio_lang == audio_lang_pref else 0
+ for sub_lang, lang_dict in audio_lang_dict['video_list'].items():
+ for format_id, fmt in lang_dict['quality_list'].items():
+ format_id_extended = 'audio-%s_sub-%s_%s' % (audio_lang, sub_lang, format_id)
+
+ video = self._call_api(
+ 'shows/%s/video/%s/%s' % (video_id, format_id.lower(), audio_lang),
+ video_id, 'Downloading %s video JSON' % format_id_extended,
+ sub_lang if sub_lang != 'none' else None)
+
+ file_url = video['file']
+ if not file_url:
+ continue
+
+ if file_url in ['forbidden', 'not found']:
+ popmessage = video['popmessage']
+ self._raise_error(popmessage['title'], popmessage['message'])
+
+ formats.append({
+ 'url': file_url,
+ 'format_id': format_id_extended,
+ 'width': int_or_none(fmt.get('res_width')),
+ 'height': int_or_none(fmt.get('res_lines')),
+ 'abr': int_or_none(fmt.get('audiobitrate'), 1000),
+ 'vbr': int_or_none(fmt.get('videobitrate'), 1000),
+ 'filesize': int_or_none(fmt.get('filesize')),
+ 'format_note': qualities[format_id].get('quality_name'),
+ 'quality': qualities[format_id].get('priority'),
+ 'language_preference': preference,
+ })
+
+ self._sort_formats(formats)
+
+ timestamp = parse_iso8601(show.get('online_date_start_utc'), ' ')
+
+ if timestamp is not None and timestamp < 0:
+ timestamp = None
+
+ uploader = show.get('partner_name')
+ uploader_id = show.get('partner_key')
+ duration = float_or_none(show.get('duration_ms'), 1000)
+
+ thumbnails = []
+ for thumbnail_key, thumbnail_url in show.items():
+ m = re.search(r'^screenshot_(?P<width>\d+)x(?P<height>\d+)$', thumbnail_key)
+ if not m:
+ continue
+ thumbnails.append({
+ 'url': thumbnail_url,
+ 'width': int(m.group('width')),
+ 'height': int(m.group('height')),
+ })
+
+ episode = show.get('show_TT') or show.get('show_OT')
+ family = show.get('family_TT') or show.get('family_OT')
+ episode_number = show.get('episode_number')
+
+ title = ''
+ if family:
+ title += family
+ if episode_number:
+ title += ' #' + compat_str(episode_number)
+ if episode:
+ title += ' - ' + compat_str(episode)
+
+ description = show.get('show_resume') or show.get('family_resume')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnails': thumbnails,
+ 'timestamp': timestamp,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'duration': duration,
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/nova.py b/hypervideo_dl/extractor/nova.py
index 47b9748..3acb881 100644
--- a/hypervideo_dl/extractor/nova.py
+++ b/hypervideo_dl/extractor/nova.py
@@ -39,7 +39,7 @@ class NovaEmbedIE(InfoExtractor):
player = self._parse_json(
self._search_regex(
- r'Player\.init\s*\([^,]+,\s*({.+?})\s*,\s*{.+?}\s*\)\s*;',
+ r'Player\.init\s*\([^,]+,\s*(?:\w+\s*\?\s*{.+?}\s*:\s*)?({.+})\s*,\s*{.+?}\s*\)\s*;',
webpage, 'player', default='{}'), video_id, fatal=False)
if player:
for format_id, format_list in player['tracks'].items():
@@ -190,7 +190,7 @@ class NovaIE(InfoExtractor):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
display_id = mobj.group('id')
site = mobj.group('site')
diff --git a/hypervideo_dl/extractor/novaplay.py b/hypervideo_dl/extractor/novaplay.py
new file mode 100644
index 0000000..724986a
--- /dev/null
+++ b/hypervideo_dl/extractor/novaplay.py
@@ -0,0 +1,63 @@
+# coding: utf-8
+from .common import InfoExtractor
+from ..utils import int_or_none, parse_duration, parse_iso8601
+
+
+class NovaPlayIE(InfoExtractor):
+ _VALID_URL = r'https://play.nova\.bg/video/.*/(?P<id>\d+)'
+ _TESTS = [
+ {
+ 'url': 'https://play.nova.bg/video/bratya/season-3/bratq-2021-10-08/548677',
+ 'md5': 'b1127a84e61bed1632b7c2ca9cbb4153',
+ 'info_dict': {
+ 'id': '548677',
+ 'ext': 'mp4',
+ 'title': 'Братя',
+ 'alt_title': 'bratya/season-3/bratq-2021-10-08',
+ 'duration': 1603.0,
+ 'timestamp': 1633724150,
+ 'upload_date': '20211008',
+ 'thumbnail': 'https://nbg-img.fite.tv/img/548677_460x260.jpg',
+ 'description': 'Сезон 3 Епизод 25'
+ },
+ },
+ {
+ 'url': 'https://play.nova.bg/video/igri-na-volqta/season-3/igri-na-volqta-2021-09-20-1/548227',
+ 'md5': '5fd61b8ecbe582fc021019d570965d58',
+ 'info_dict': {
+ 'id': '548227',
+ 'ext': 'mp4',
+ 'title': 'Игри на волята: България (20.09.2021) - част 1',
+ 'alt_title': 'gri-na-volqta/season-3/igri-na-volqta-2021-09-20-1',
+ 'duration': 4060.0,
+ 'timestamp': 1632167564,
+ 'upload_date': '20210920',
+ 'thumbnail': 'https://nbg-img.fite.tv/img/548227_460x260.jpg',
+ 'description': 'Сезон 3 Епизод 13'
+ },
+ }
+ ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ video_props = self._parse_json(self._search_regex(
+ r'<script\s?id=\"__NEXT_DATA__\"\s?type=\"application/json\">({.+})</script>',
+ webpage, 'video_props'), video_id)['props']['pageProps']['video']
+ m3u8_url = self._download_json(
+ f'https://nbg-api.fite.tv/api/v2/videos/{video_id}/streams',
+ video_id, headers={'x-flipps-user-agent': 'Flipps/75/9.7'})[0]['url']
+ formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls')
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': video_props['title'],
+ 'alt_title': video_props.get('slug'),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'description': self._og_search_description(webpage),
+ 'formats': formats,
+ 'duration': parse_duration(video_props['duration']),
+ 'timestamp': parse_iso8601(video_props['published_at']),
+ 'view_count': int_or_none(video_props['view_count']),
+ }
diff --git a/hypervideo_dl/extractor/npo.py b/hypervideo_dl/extractor/npo.py
index e525ad9..ed547d0 100644
--- a/hypervideo_dl/extractor/npo.py
+++ b/hypervideo_dl/extractor/npo.py
@@ -246,9 +246,8 @@ class NPOIE(NPOBaseIE):
})
if not formats:
- if drm:
- raise ExtractorError('This video is DRM protected.', expected=True)
- return
+ if not self.get_param('allow_unplayable_formats') and drm:
+ self.report_drm(video_id)
self._sort_formats(formats)
@@ -425,7 +424,7 @@ class NPOIE(NPOBaseIE):
stream_url, video_id, fatal=False)
# f4m downloader downloads only piece of live stream
for f4m_format in f4m_formats:
- f4m_format['preference'] = -1
+ f4m_format['preference'] = -5
formats.extend(f4m_formats)
elif stream_type == 'hls':
formats.extend(self._extract_m3u8_formats(
diff --git a/hypervideo_dl/extractor/nrk.py b/hypervideo_dl/extractor/nrk.py
index 40dee21..b556bc6 100644
--- a/hypervideo_dl/extractor/nrk.py
+++ b/hypervideo_dl/extractor/nrk.py
@@ -58,7 +58,7 @@ class NRKBaseIE(InfoExtractor):
def _call_api(self, path, video_id, item=None, note=None, fatal=True, query=None):
return self._download_json(
- urljoin('http://psapi.nrk.no/', path),
+ urljoin('https://psapi.nrk.no/', path),
video_id, note or 'Downloading %s JSON' % item,
fatal=fatal, query=query,
headers={'Accept-Encoding': 'gzip, deflate, br'})
@@ -452,7 +452,7 @@ class NRKTVEpisodeIE(InfoExtractor):
}]
def _real_extract(self, url):
- display_id, season_number, episode_number = re.match(self._VALID_URL, url).groups()
+ display_id, season_number, episode_number = self._match_valid_url(url).groups()
webpage = self._download_webpage(url, display_id)
@@ -594,7 +594,7 @@ class NRKTVSeasonIE(NRKTVSerieBaseIE):
else super(NRKTVSeasonIE, cls).suitable(url))
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
domain = mobj.group('domain')
serie_kind = mobj.group('serie_kind')
serie = mobj.group('serie')
@@ -692,7 +692,7 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
else super(NRKTVSeriesIE, cls).suitable(url))
def _real_extract(self, url):
- site, serie_kind, series_id = re.match(self._VALID_URL, url).groups()
+ site, serie_kind, series_id = self._match_valid_url(url).groups()
is_radio = site == 'radio.nrk'
domain = 'radio' if is_radio else 'tv'
diff --git a/hypervideo_dl/extractor/ntvde.py b/hypervideo_dl/extractor/ntvde.py
index 101a537..035582e 100644
--- a/hypervideo_dl/extractor/ntvde.py
+++ b/hypervideo_dl/extractor/ntvde.py
@@ -62,7 +62,7 @@ class NTVDeIE(InfoExtractor):
m3u8_url = compat_urlparse.urljoin('http://video.n-tv.de', vdata['videoM3u8'])
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native',
- preference=0, m3u8_id='hls', fatal=False))
+ quality=1, m3u8_id='hls', fatal=False))
self._sort_formats(formats)
return {
diff --git a/hypervideo_dl/extractor/nuvid.py b/hypervideo_dl/extractor/nuvid.py
index ab6bfcd..7487824 100644
--- a/hypervideo_dl/extractor/nuvid.py
+++ b/hypervideo_dl/extractor/nuvid.py
@@ -1,71 +1,73 @@
+# coding: utf-8
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
from ..utils import (
parse_duration,
+ int_or_none,
+ try_get,
)
class NuvidIE(InfoExtractor):
_VALID_URL = r'https?://(?:www|m)\.nuvid\.com/video/(?P<id>[0-9]+)'
- _TEST = {
- 'url': 'http://m.nuvid.com/video/1310741/',
- 'md5': 'eab207b7ac4fccfb4e23c86201f11277',
+ _TESTS = [{
+ 'url': 'https://www.nuvid.com/video/6513023/italian-babe',
+ 'md5': '772d2f8288f3d3c5c45f7a41761c7844',
+ 'info_dict': {
+ 'id': '6513023',
+ 'ext': 'mp4',
+ 'title': 'italian babe',
+ 'duration': 321.0,
+ 'age_limit': 18,
+ }
+ }, {
+ 'url': 'https://m.nuvid.com/video/6523263',
'info_dict': {
- 'id': '1310741',
+ 'id': '6523263',
'ext': 'mp4',
- 'title': 'Horny babes show their awesome bodeis and',
- 'duration': 129,
'age_limit': 18,
+ 'title': 'Slut brunette college student anal dorm',
}
- }
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
- page_url = 'http://m.nuvid.com/video/%s' % video_id
- webpage = self._download_webpage(
- page_url, video_id, 'Downloading video page')
- # When dwnld_speed exists and has a value larger than the MP4 file's
- # bitrate, Nuvid returns the MP4 URL
- # It's unit is 100bytes/millisecond, see mobile-nuvid-min.js for the algorithm
- self._set_cookie('nuvid.com', 'dwnld_speed', '10.0')
- mp4_webpage = self._download_webpage(
- page_url, video_id, 'Downloading video page for MP4 format')
+ qualities = {
+ 'lq': '360p',
+ 'hq': '720p',
+ }
+
+ json_url = f'https://www.nuvid.com/player_config_json/?vid={video_id}&aid=0&domain_id=0&embed=0&check_speed=0'
+ video_data = self._download_json(
+ json_url, video_id, headers={
+ 'Accept': 'application/json, text/javascript, */*; q = 0.01',
+ 'Content-Type': 'application/x-www-form-urlencoded; charset=utf-8',
+ })
- html5_video_re = r'(?s)<(?:video|audio)[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']',
- video_url = self._html_search_regex(html5_video_re, webpage, video_id)
- mp4_video_url = self._html_search_regex(html5_video_re, mp4_webpage, video_id)
formats = [{
- 'url': video_url,
- }]
- if mp4_video_url != video_url:
- formats.append({
- 'url': mp4_video_url,
- })
+ 'url': source,
+ 'format_id': qualities.get(quality),
+ 'height': int_or_none(qualities.get(quality)[:-1]),
+ } for quality, source in video_data.get('files').items() if source]
- title = self._html_search_regex(
- [r'<span title="([^"]+)">',
- r'<div class="thumb-holder video">\s*<h5[^>]*>([^<]+)</h5>',
- r'<span[^>]+class="title_thumb">([^<]+)</span>'], webpage, 'title').strip()
- thumbnails = [
- {
- 'url': thumb_url,
- } for thumb_url in re.findall(r'<img src="([^"]+)" alt="" />', webpage)
- ]
- thumbnail = thumbnails[0]['url'] if thumbnails else None
- duration = parse_duration(self._html_search_regex(
- [r'<i class="fa fa-clock-o"></i>\s*(\d{2}:\d{2})',
- r'<span[^>]+class="view_time">([^<]+)</span>'], webpage, 'duration', fatal=False))
+ self._check_formats(formats, video_id)
+ self._sort_formats(formats)
+
+ title = video_data.get('title')
+ thumbnail_base_url = try_get(video_data, lambda x: x['thumbs']['url'])
+ thumbnail_extension = try_get(video_data, lambda x: x['thumbs']['extension'])
+ thumbnail_id = self._search_regex(
+ r'/media/videos/tmb/6523263/preview/(/d+)' + thumbnail_extension, video_data.get('poster', ''), 'thumbnail id', default=19)
+ thumbnail = f'{thumbnail_base_url}player/{thumbnail_id}{thumbnail_extension}'
+ duration = parse_duration(video_data.get('duration') or video_data.get('duration_format'))
return {
'id': video_id,
+ 'formats': formats,
'title': title,
- 'thumbnails': thumbnails,
'thumbnail': thumbnail,
'duration': duration,
'age_limit': 18,
- 'formats': formats,
}
diff --git a/hypervideo_dl/extractor/nytimes.py b/hypervideo_dl/extractor/nytimes.py
index 976b1c6..9996473 100644
--- a/hypervideo_dl/extractor/nytimes.py
+++ b/hypervideo_dl/extractor/nytimes.py
@@ -46,6 +46,7 @@ class NYTimesBaseIE(InfoExtractor):
urls = []
formats = []
+ subtitles = {}
for video in video_data.get('renditions', []):
video_url = video.get('url')
format_id = video.get('type')
@@ -54,9 +55,11 @@ class NYTimesBaseIE(InfoExtractor):
urls.append(video_url)
ext = mimetype2ext(video.get('mimetype')) or determine_ext(video_url)
if ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
+ m3u8_fmts, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
video_url, video_id, 'mp4', 'm3u8_native',
- m3u8_id=format_id or 'hls', fatal=False))
+ m3u8_id=format_id or 'hls', fatal=False)
+ formats.extend(m3u8_fmts)
+ subtitles = self._merge_subtitles(subtitles, m3u8_subs)
elif ext == 'mpd':
continue
# formats.extend(self._extract_mpd_formats(
@@ -72,7 +75,7 @@ class NYTimesBaseIE(InfoExtractor):
'tbr': int_or_none(video.get('bitrate'), 1000) or None,
'ext': ext,
})
- self._sort_formats(formats, ('height', 'width', 'filesize', 'tbr', 'fps', 'format_id'))
+ self._sort_formats(formats)
thumbnails = []
for image in video_data.get('images', []):
@@ -96,6 +99,7 @@ class NYTimesBaseIE(InfoExtractor):
'uploader': video_data.get('byline'),
'duration': float_or_none(video_data.get('duration'), 1000),
'formats': formats,
+ 'subtitles': subtitles,
'thumbnails': thumbnails,
}
diff --git a/hypervideo_dl/extractor/nzherald.py b/hypervideo_dl/extractor/nzherald.py
new file mode 100644
index 0000000..e5601b4
--- /dev/null
+++ b/hypervideo_dl/extractor/nzherald.py
@@ -0,0 +1,98 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .brightcove import BrightcoveNewIE
+from .common import InfoExtractor
+
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ traverse_obj
+)
+
+
+class NZHeraldIE(InfoExtractor):
+ IE_NAME = 'nzherald'
+ _VALID_URL = r'https?://(?:www\.)?nzherald\.co\.nz/[\w\/-]+\/(?P<id>[A-Z0-9]+)'
+ _TESTS = [
+ {
+ 'url': 'https://www.nzherald.co.nz/nz/weather-heavy-rain-gales-across-nz-most-days-this-week/PTG7QWY4E2225YHZ5NAIRBTYTQ/',
+ 'info_dict': {
+ 'id': '6271084466001',
+ 'ext': 'mp4',
+ 'title': 'MetService severe weather warning: September 6th - 7th',
+ 'timestamp': 1630891576,
+ 'upload_date': '20210906',
+ 'uploader_id': '1308227299001',
+ 'description': 'md5:db6ca335a22e2cdf37ab9d2bcda52902'
+ }
+
+ }, {
+ # Webpage has brightcove embed player url
+ 'url': 'https://www.nzherald.co.nz/travel/pencarrow-coastal-trail/HDVTPJEPP46HJ2UEMK4EGD2DFI/',
+ 'info_dict': {
+ 'id': '6261791733001',
+ 'ext': 'mp4',
+ 'title': 'Pencarrow Coastal Trail',
+ 'timestamp': 1625102897,
+ 'upload_date': '20210701',
+ 'uploader_id': '1308227299001',
+ 'description': 'md5:d361aaa0c6498f7ac1bc4fc0a0aec1e4'
+ }
+
+ }, {
+ # two video embeds of the same video
+ 'url': 'https://www.nzherald.co.nz/nz/truck-driver-captured-cutting-off-motorist-on-state-highway-1-in-canterbury/FIHNJB7PLLPHWQPK4S7ZBDUC4I/',
+ 'info_dict': {
+ 'id': '6251114530001',
+ 'ext': 'mp4',
+ 'title': 'Truck travelling north from Rakaia runs car off road',
+ 'timestamp': 1619730509,
+ 'upload_date': '20210429',
+ 'uploader_id': '1308227299001',
+ 'description': 'md5:4cae7dfb7613ac4c73b9e73a75c6b5d7'
+ }
+ }, {
+ 'url': 'https://www.nzherald.co.nz/kahu/kaupapa-companies-my-taiao-supporting-maori-in-study-and-business/PQBO2J25WCG77VGRX7W7BVYEAI/',
+ 'only_matching': True
+ }, {
+ 'url': 'https://nzherald.co.nz/the-country/video/focus-nzs-first-mass-covid-19-vaccination-event/N5I7IL3BRFLZSD33TLDLYJDGK4/',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.nzherald.co.nz/the-vision-is-clear/news/tvic-damian-roper-planting-trees-an-addiction/AN2AAEPNRK5VLISDWQAJZB6ATQ',
+ 'only_matching': True
+ }
+ ]
+
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1308227299001/S1BXZn8t_default/index.html?videoId=%s'
+
+ def _extract_bc_embed_url(self, webpage):
+ """The initial webpage may include the brightcove player embed url"""
+ bc_url = BrightcoveNewIE._extract_url(self, webpage)
+ return bc_url or self._search_regex(
+ r'(?:embedUrl)\"\s*:\s*\"(?P<embed_url>%s)' % BrightcoveNewIE._VALID_URL,
+ webpage, 'embed url', default=None, group='embed_url')
+
+ def _real_extract(self, url):
+ article_id = self._match_id(url)
+ webpage = self._download_webpage(url, article_id)
+ bc_url = self._extract_bc_embed_url(webpage)
+
+ if not bc_url:
+ fusion_metadata = self._parse_json(
+ self._search_regex(r'Fusion\.globalContent\s*=\s*({.+?})\s*;', webpage, 'fusion metadata'), article_id)
+
+ video_metadata = fusion_metadata.get('video')
+ bc_video_id = traverse_obj(
+ video_metadata or fusion_metadata, # fusion metadata is the video metadata for video-only pages
+ 'brightcoveId', ('content_elements', ..., 'referent', 'id'),
+ get_all=False, expected_type=compat_str)
+
+ if not bc_video_id:
+ if isinstance(video_metadata, dict) and len(video_metadata) == 0:
+ raise ExtractorError('This article does not have a video.', expected=True)
+ else:
+ raise ExtractorError('Failed to extract brightcove video id')
+ bc_url = self.BRIGHTCOVE_URL_TEMPLATE % bc_video_id
+
+ return self.url_result(bc_url, 'BrightcoveNew')
diff --git a/hypervideo_dl/extractor/odnoklassniki.py b/hypervideo_dl/extractor/odnoklassniki.py
index 7ed9fac..9cacd38 100644
--- a/hypervideo_dl/extractor/odnoklassniki.py
+++ b/hypervideo_dl/extractor/odnoklassniki.py
@@ -247,8 +247,7 @@ class OdnoklassnikiIE(InfoExtractor):
m3u8_url = metadata.get('hlsMasterPlaylistUrl')
if m3u8_url:
formats.extend(self._extract_m3u8_formats(
- m3u8_url, video_id, 'mp4', entry_protocol='m3u8',
- m3u8_id='hls', fatal=False))
+ m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
rtmp_url = metadata.get('rtmpUrl')
if rtmp_url:
formats.append({
@@ -260,7 +259,7 @@ class OdnoklassnikiIE(InfoExtractor):
if not formats:
payment_info = metadata.get('paymentInfo')
if payment_info:
- raise ExtractorError('This video is paid, subscribe to download it', expected=True)
+ self.raise_no_formats('This video is paid, subscribe to download it', expected=True)
self._sort_formats(formats)
diff --git a/hypervideo_dl/extractor/olympics.py b/hypervideo_dl/extractor/olympics.py
new file mode 100644
index 0000000..0bc9206
--- /dev/null
+++ b/hypervideo_dl/extractor/olympics.py
@@ -0,0 +1,56 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import unified_strdate
+
+
+class OlympicsReplayIE(InfoExtractor):
+ _VALID_URL = r'(?:https?://)(?:www\.)?olympics\.com/tokyo-2020/(?:[a-z]{2}/)?replay/(?P<id>[^/#&?]+)'
+ _TESTS = [{
+ 'url': 'https://olympics.com/tokyo-2020/en/replay/300622eb-abc0-43ea-b03b-c5f2d429ec7b/jumping-team-qualifier',
+ 'info_dict': {
+ 'id': '300622eb-abc0-43ea-b03b-c5f2d429ec7b',
+ 'ext': 'mp4',
+ 'title': 'Jumping Team Qualifier',
+ 'release_date': '20210806',
+ 'upload_date': '20210713',
+ },
+ 'params': {
+ 'format': 'bv',
+ },
+ }, {
+ 'url': 'https://olympics.com/tokyo-2020/en/replay/bd242924-4b22-49a5-a846-f1d4c809250d/mens-bronze-medal-match-hun-esp',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+ # The parameters are hardcoded in the webpage, it's not necessary to download the webpage just for these parameters.
+ # If in downloading webpage serves other functions aswell, then extract these parameters from it.
+ token_url = 'https://appovptok.ovpobs.tv/api/identity/app/token?api_key=OTk5NDcxOjpvY3N3LWFwaXVzZXI%3D&api_secret=ODY4ODM2MjE3ODMwYmVjNTAxMWZlMDJiMTYxZmY0MjFiMjMwMjllMjJmNDA1YWRiYzA5ODcxYTZjZTljZDkxOTo6NTM2NWIzNjRlMTM1ZmI2YWNjNmYzMGMzOGM3NzZhZTY%3D'
+ token = self._download_webpage(token_url, id)
+ headers = {'x-obs-app-token': token}
+ data_json = self._download_json(f'https://appocswtok.ovpobs.tv/api/schedule-sessions/{id}?include=stream',
+ id, headers=headers)
+ meta_data = data_json['data']['attributes']
+ for t_dict in data_json['included']:
+ if t_dict.get('type') == 'Stream':
+ stream_data = t_dict['attributes']
+ m3u8_url = self._download_json(
+ 'https://meteringtok.ovpobs.tv/api/playback-sessions', id, headers=headers, query={
+ 'alias': stream_data['alias'],
+ 'stream': stream_data['stream'],
+ 'type': 'vod'
+ })['data']['attributes']['url']
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, id)
+ self._sort_formats(formats)
+
+ return {
+ 'id': id,
+ 'title': meta_data['title'],
+ 'release_date': unified_strdate(meta_data.get('start') or meta_data.get('broadcastPublished')),
+ 'upload_date': unified_strdate(meta_data.get('publishedAt')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
diff --git a/hypervideo_dl/extractor/on24.py b/hypervideo_dl/extractor/on24.py
new file mode 100644
index 0000000..d4d8244
--- /dev/null
+++ b/hypervideo_dl/extractor/on24.py
@@ -0,0 +1,91 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ strip_or_none,
+ try_get,
+ urljoin,
+)
+
+
+class On24IE(InfoExtractor):
+ IE_NAME = 'on24'
+ IE_DESC = 'ON24'
+
+ _VALID_URL = r'''(?x)
+ https?://event\.on24\.com/(?:
+ wcc/r/(?P<id_1>\d{7})/(?P<key_1>[0-9A-F]{32})|
+ eventRegistration/(?:console/EventConsoleApollo|EventLobbyServlet\?target=lobby30)
+ \.jsp\?(?:[^/#?]*&)?eventid=(?P<id_2>\d{7})[^/#?]*&key=(?P<key_2>[0-9A-F]{32})
+ )'''
+
+ _TESTS = [{
+ 'url': 'https://event.on24.com/eventRegistration/console/EventConsoleApollo.jsp?uimode=nextgeneration&eventid=2197467&sessionid=1&key=5DF57BE53237F36A43B478DD36277A84&contenttype=A&eventuserid=305999&playerwidth=1000&playerheight=650&caller=previewLobby&text_language_id=en&format=fhaudio&newConsole=false',
+ 'info_dict': {
+ 'id': '2197467',
+ 'ext': 'wav',
+ 'title': 'Pearson Test of English General/Pearson English International Certificate Teacher Training Guide',
+ 'upload_date': '20200219',
+ 'timestamp': 1582149600.0,
+ 'view_count': int,
+ }
+ }, {
+ 'url': 'https://event.on24.com/wcc/r/2639291/82829018E813065A122363877975752E?mode=login&email=johnsmith@gmail.com',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://event.on24.com/eventRegistration/console/EventConsoleApollo.jsp?&eventid=2639291&sessionid=1&username=&partnerref=&format=fhvideo1&mobile=&flashsupportedmobiledevice=&helpcenter=&key=82829018E813065A122363877975752E&newConsole=true&nxChe=true&newTabCon=true&text_language_id=en&playerwidth=748&playerheight=526&eventuserid=338788762&contenttype=A&mediametricsessionid=384764716&mediametricid=3558192&usercd=369267058&mode=launch',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ event_id = mobj.group('id_1') or mobj.group('id_2')
+ event_key = mobj.group('key_1') or mobj.group('key_2')
+
+ event_data = self._download_json(
+ 'https://event.on24.com/apic/utilApp/EventConsoleCachedServlet',
+ event_id, query={
+ 'eventId': event_id,
+ 'displayProfile': 'player',
+ 'key': event_key,
+ 'contentType': 'A'
+ })
+ event_id = str(try_get(event_data, lambda x: x['presentationLogInfo']['eventid'])) or event_id
+ language = event_data.get('localelanguagecode')
+
+ formats = []
+ for media in event_data.get('mediaUrlInfo', []):
+ media_url = urljoin('https://event.on24.com/media/news/corporatevideo/events/', str(media.get('url')))
+ if not media_url:
+ continue
+ media_type = media.get('code')
+ if media_type == 'fhvideo1':
+ formats.append({
+ 'format_id': 'video',
+ 'url': media_url,
+ 'language': language,
+ 'ext': 'mp4',
+ 'vcodec': 'avc1.640020',
+ 'acodec': 'mp4a.40.2',
+ })
+ elif media_type == 'audio':
+ formats.append({
+ 'format_id': 'audio',
+ 'url': media_url,
+ 'language': language,
+ 'ext': 'wav',
+ 'vcodec': 'none',
+ 'acodec': 'wav'
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': event_id,
+ 'title': strip_or_none(event_data.get('description')),
+ 'timestamp': int_or_none(try_get(event_data, lambda x: x['session']['startdate']), 1000),
+ 'webpage_url': f'https://event.on24.com/wcc/r/{event_id}/{event_key}',
+ 'view_count': event_data.get('registrantcount'),
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/ondemandkorea.py b/hypervideo_dl/extractor/ondemandkorea.py
index df1ce3c..cc3c587 100644
--- a/hypervideo_dl/extractor/ondemandkorea.py
+++ b/hypervideo_dl/extractor/ondemandkorea.py
@@ -11,18 +11,34 @@ from ..utils import (
class OnDemandKoreaIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?P<id>[^/]+)\.html'
_GEO_COUNTRIES = ['US', 'CA']
- _TEST = {
- 'url': 'http://www.ondemandkorea.com/ask-us-anything-e43.html',
+ _TESTS = [{
+ 'url': 'https://www.ondemandkorea.com/ask-us-anything-e43.html',
'info_dict': {
'id': 'ask-us-anything-e43',
'ext': 'mp4',
- 'title': 'Ask Us Anything : E43',
+ 'title': 'Ask Us Anything : Gain, Ji Soo - 09/24/2016',
+ 'description': 'A talk show/game show with a school theme where celebrity guests appear as “transfer students.”',
'thumbnail': r're:^https?://.*\.jpg$',
},
'params': {
'skip_download': 'm3u8 download'
}
- }
+ }, {
+ 'url': 'https://www.ondemandkorea.com/confession-e01-1.html',
+ 'info_dict': {
+ 'id': 'confession-e01-1',
+ 'ext': 'mp4',
+ 'title': 'Confession : E01',
+ 'description': 'Choi Do-hyun, a criminal attorney, is the son of a death row convict. Ever since Choi Pil-su got arrested for murder, Do-hyun has wanted to solve his ',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'subtitles': {
+ 'English': 'mincount:1',
+ },
+ },
+ 'params': {
+ 'skip_download': 'm3u8 download'
+ }
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -44,11 +60,18 @@ class OnDemandKoreaIE(InfoExtractor):
'This video is only available to ODK PLUS members.',
expected=True)
- title = self._og_search_title(webpage)
+ if 'ODK PREMIUM Members Only' in webpage:
+ raise ExtractorError(
+ 'This video is only available to ODK PREMIUM members.',
+ expected=True)
+
+ title = self._search_regex(
+ r'class=["\']episode_title["\'][^>]*>([^<]+)',
+ webpage, 'episode_title', fatal=False) or self._og_search_title(webpage)
jw_config = self._parse_json(
self._search_regex(
- r'(?s)jwplayer\(([\'"])(?:(?!\1).)+\1\)\.setup\s*\((?P<options>.+?)\);',
+ r'(?s)odkPlayer\.init.*?(?P<options>{[^;]+}).*?;',
webpage, 'jw config', group='options'),
video_id, transform_source=js_to_json)
info = self._parse_jwplayer_data(
@@ -57,6 +80,7 @@ class OnDemandKoreaIE(InfoExtractor):
info.update({
'title': title,
- 'thumbnail': self._og_search_thumbnail(webpage),
+ 'description': self._og_search_description(webpage),
+ 'thumbnail': self._og_search_thumbnail(webpage)
})
return info
diff --git a/hypervideo_dl/extractor/onet.py b/hypervideo_dl/extractor/onet.py
index e55b2ac..bf53ea0 100644
--- a/hypervideo_dl/extractor/onet.py
+++ b/hypervideo_dl/extractor/onet.py
@@ -138,7 +138,7 @@ class OnetIE(OnetBaseIE):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
display_id, video_id = mobj.group('display_id', 'id')
webpage = self._download_webpage(url, display_id)
@@ -182,7 +182,7 @@ class OnetChannelIE(OnetBaseIE):
video_id = remove_start(current_clip_info['ckmId'], 'mvp:')
video_name = url_basename(current_clip_info['url'])
- if self._downloader.params.get('noplaylist'):
+ if self.get_param('noplaylist'):
self.to_screen(
'Downloading just video %s because of --no-playlist' % video_name)
return self._extract_from_id(video_id, webpage)
diff --git a/hypervideo_dl/extractor/ooyala.py b/hypervideo_dl/extractor/ooyala.py
index eb957b8..20cfa0a 100644
--- a/hypervideo_dl/extractor/ooyala.py
+++ b/hypervideo_dl/extractor/ooyala.py
@@ -10,7 +10,6 @@ from ..compat import (
)
from ..utils import (
determine_ext,
- ExtractorError,
float_or_none,
int_or_none,
try_get,
@@ -85,7 +84,7 @@ class OoyalaBaseIE(InfoExtractor):
'fps': float_or_none(stream.get('framerate')),
})
if not formats and not auth_data.get('authorized'):
- raise ExtractorError('%s said: %s' % (
+ self.raise_no_formats('%s said: %s' % (
self.IE_NAME, auth_data['message']), expected=True)
self._sort_formats(formats)
@@ -205,6 +204,6 @@ class OoyalaExternalIE(OoyalaBaseIE):
}
def _real_extract(self, url):
- partner_id, video_id, pcode = re.match(self._VALID_URL, url).groups()
+ partner_id, video_id, pcode = self._match_valid_url(url).groups()
content_tree_url = self._CONTENT_TREE_BASE + 'external_id/%s/%s:%s' % (pcode, partner_id, video_id)
return self._extract(content_tree_url, video_id)
diff --git a/hypervideo_dl/extractor/openload.py b/hypervideo_dl/extractor/openload.py
index 0c20d01..dfdd0e5 100644
--- a/hypervideo_dl/extractor/openload.py
+++ b/hypervideo_dl/extractor/openload.py
@@ -17,6 +17,7 @@ from ..utils import (
get_exe_version,
is_outdated_version,
std_headers,
+ process_communicate_or_kill,
)
@@ -226,7 +227,7 @@ class PhantomJSwrapper(object):
self.exe, '--ssl-protocol=any',
self._TMP_FILES['script'].name
], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
- out, err = p.communicate()
+ out, err = process_communicate_or_kill(p)
if p.returncode != 0:
raise ExtractorError(
'Executing JS failed\n:' + encodeArgument(err))
diff --git a/hypervideo_dl/extractor/openrec.py b/hypervideo_dl/extractor/openrec.py
new file mode 100644
index 0000000..d7073ab
--- /dev/null
+++ b/hypervideo_dl/extractor/openrec.py
@@ -0,0 +1,126 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ traverse_obj,
+ try_get,
+ unified_strdate
+)
+from ..compat import compat_str
+
+
+class OpenRecIE(InfoExtractor):
+ IE_NAME = 'openrec'
+ _VALID_URL = r'https?://(?:www\.)?openrec\.tv/live/(?P<id>[^/]+)'
+ _TESTS = [{
+ 'url': 'https://www.openrec.tv/live/2p8v31qe4zy',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.openrec.tv/live/wez93eqvjzl',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage('https://www.openrec.tv/live/%s' % video_id, video_id)
+
+ window_stores = self._parse_json(
+ self._search_regex(r'(?m)window\.pageStore\s*=\s*(\{.+?\});$', webpage, 'window.pageStore'), video_id)
+ movie_store = traverse_obj(
+ window_stores,
+ ('v8', 'state', 'movie'),
+ ('v8', 'movie'),
+ expected_type=dict)
+ if not movie_store:
+ raise ExtractorError('Failed to extract live info')
+
+ title = movie_store.get('title')
+ description = movie_store.get('introduction')
+ thumbnail = movie_store.get('thumbnailUrl')
+
+ channel_user = movie_store.get('channel', {}).get('user')
+ uploader = try_get(channel_user, lambda x: x['name'], compat_str)
+ uploader_id = try_get(channel_user, lambda x: x['id'], compat_str)
+
+ timestamp = traverse_obj(movie_store, ('startedAt', 'time'), expected_type=int)
+
+ m3u8_playlists = movie_store.get('media')
+ formats = []
+ for (name, m3u8_url) in m3u8_playlists.items():
+ if not m3u8_url:
+ continue
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, ext='mp4', entry_protocol='m3u8',
+ m3u8_id='hls-%s' % name, live=True))
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'formats': formats,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'timestamp': timestamp,
+ 'is_live': True,
+ }
+
+
+class OpenRecCaptureIE(InfoExtractor):
+ IE_NAME = 'openrec:capture'
+ _VALID_URL = r'https?://(?:www\.)?openrec\.tv/capture/(?P<id>[^/]+)'
+ _TESTS = [{
+ 'url': 'https://www.openrec.tv/capture/l9nk2x4gn14',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.openrec.tv/capture/mldjr82p7qk',
+ 'info_dict': {
+ 'id': 'mldjr82p7qk',
+ 'title': 'たいじの恥ずかしい英語力',
+ 'uploader': 'たいちゃんねる',
+ 'uploader_id': 'Yaritaiji',
+ 'upload_date': '20210803',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage('https://www.openrec.tv/capture/%s' % video_id, video_id)
+
+ window_stores = self._parse_json(
+ self._search_regex(r'(?m)window\.pageStore\s*=\s*(\{.+?\});$', webpage, 'window.pageStore'), video_id)
+ movie_store = window_stores.get('movie')
+
+ capture_data = window_stores.get('capture')
+ if not capture_data:
+ raise ExtractorError('Cannot extract title')
+ title = capture_data.get('title')
+ thumbnail = capture_data.get('thumbnailUrl')
+ upload_date = unified_strdate(capture_data.get('createdAt'))
+
+ channel_info = movie_store.get('channel') or {}
+ uploader = channel_info.get('name')
+ uploader_id = channel_info.get('id')
+
+ m3u8_url = capture_data.get('source')
+ if not m3u8_url:
+ raise ExtractorError('Cannot extract m3u8 url')
+ formats = self._extract_m3u8_formats(
+ m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls')
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'formats': formats,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'upload_date': upload_date,
+ }
diff --git a/hypervideo_dl/extractor/ora.py b/hypervideo_dl/extractor/ora.py
index 1d42be3..422d0b3 100644
--- a/hypervideo_dl/extractor/ora.py
+++ b/hypervideo_dl/extractor/ora.py
@@ -55,7 +55,7 @@ class OraTVIE(InfoExtractor):
formats.append({
'url': http_template % q,
'format_id': q,
- 'preference': preference(q),
+ 'quality': preference(q),
})
self._sort_formats(formats)
else:
diff --git a/hypervideo_dl/extractor/orf.py b/hypervideo_dl/extractor/orf.py
index ed8a9a8..428ec97 100644
--- a/hypervideo_dl/extractor/orf.py
+++ b/hypervideo_dl/extractor/orf.py
@@ -98,6 +98,9 @@ class ORFTVthekIE(InfoExtractor):
elif ext == 'f4m':
formats.extend(self._extract_f4m_formats(
src, video_id, f4m_id=format_id, fatal=False))
+ elif ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ src, video_id, mpd_id=format_id, fatal=False))
else:
formats.append({
'format_id': format_id,
@@ -180,7 +183,7 @@ class ORFTVthekIE(InfoExtractor):
class ORFRadioIE(InfoExtractor):
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
show_date = mobj.group('date')
show_id = mobj.group('show')
diff --git a/hypervideo_dl/extractor/packtpub.py b/hypervideo_dl/extractor/packtpub.py
index 11ad3b3..c06fca7 100644
--- a/hypervideo_dl/extractor/packtpub.py
+++ b/hypervideo_dl/extractor/packtpub.py
@@ -1,7 +1,6 @@
from __future__ import unicode_literals
import json
-import re
from .common import InfoExtractor
from ..compat import (
@@ -66,7 +65,7 @@ class PacktPubIE(PacktPubBaseIE):
raise
def _real_extract(self, url):
- course_id, chapter_id, video_id, display_id = re.match(self._VALID_URL, url).groups()
+ course_id, chapter_id, video_id, display_id = self._match_valid_url(url).groups()
headers = {}
if self._TOKEN:
@@ -123,7 +122,7 @@ class PacktPubCourseIE(PacktPubBaseIE):
PacktPubCourseIE, cls).suitable(url)
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
url, course_id = mobj.group('url', 'id')
course = self._download_json(
diff --git a/hypervideo_dl/extractor/palcomp3.py b/hypervideo_dl/extractor/palcomp3.py
index fb29d83..d0a62fb 100644
--- a/hypervideo_dl/extractor/palcomp3.py
+++ b/hypervideo_dl/extractor/palcomp3.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..compat import compat_str
@@ -64,7 +63,7 @@ class PalcoMP3BaseIE(InfoExtractor):
self._ARTIST_FIELDS_TMPL = self._ARTIST_FIELDS_TMPL % self._MUSIC_FIELDS
def _real_extract(self, url):
- artist_slug, music_slug = re.match(self._VALID_URL, url).groups()
+ artist_slug, music_slug = self._match_valid_url(url).groups()
artist_fields = self._ARTIST_FIELDS_TMPL % music_slug
music = self._call_api(artist_slug, artist_fields)['artist']['music']
return self._parse_music(music)
@@ -109,9 +108,9 @@ class PalcoMP3ArtistIE(PalcoMP3BaseIE):
}
name'''
- @ classmethod
+ @classmethod
def suitable(cls, url):
- return False if re.match(PalcoMP3IE._VALID_URL, url) else super(PalcoMP3ArtistIE, cls).suitable(url)
+ return False if PalcoMP3IE._match_valid_url(url) else super(PalcoMP3ArtistIE, cls).suitable(url)
def _real_extract(self, url):
artist_slug = self._match_id(url)
diff --git a/hypervideo_dl/extractor/pandoratv.py b/hypervideo_dl/extractor/pandoratv.py
index 538738c..6230053 100644
--- a/hypervideo_dl/extractor/pandoratv.py
+++ b/hypervideo_dl/extractor/pandoratv.py
@@ -1,17 +1,16 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..compat import (
compat_str,
- compat_urlparse,
)
from ..utils import (
ExtractorError,
float_or_none,
parse_duration,
+ parse_qs,
str_to_int,
urlencode_postdata,
)
@@ -71,12 +70,12 @@ class PandoraTVIE(InfoExtractor):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
user_id = mobj.group('user_id')
video_id = mobj.group('id')
if not user_id or not video_id:
- qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+ qs = parse_qs(url)
video_id = qs.get('prgid', [None])[0]
user_id = qs.get('ch_userid', [None])[0]
if any(not f for f in (video_id, user_id,)):
diff --git a/hypervideo_dl/extractor/paramountplus.py b/hypervideo_dl/extractor/paramountplus.py
new file mode 100644
index 0000000..338b84d
--- /dev/null
+++ b/hypervideo_dl/extractor/paramountplus.py
@@ -0,0 +1,145 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from .cbs import CBSBaseIE
+from ..utils import (
+ int_or_none,
+ url_or_none,
+)
+
+
+class ParamountPlusIE(CBSBaseIE):
+ _VALID_URL = r'''(?x)
+ (?:
+ paramountplus:|
+ https?://(?:www\.)?(?:
+ paramountplus\.com/(?:shows/[^/]+/video|movies/[^/]+)/
+ )(?P<id>[\w-]+))'''
+
+ # All tests are blocked outside US
+ _TESTS = [{
+ 'url': 'https://www.paramountplus.com/shows/catdog/video/Oe44g5_NrlgiZE3aQVONleD6vXc8kP0k/catdog-climb-every-catdog-the-canine-mutiny/',
+ 'info_dict': {
+ 'id': 'Oe44g5_NrlgiZE3aQVONleD6vXc8kP0k',
+ 'ext': 'mp4',
+ 'title': 'CatDog - Climb Every CatDog/The Canine Mutiny',
+ 'description': 'md5:7ac835000645a69933df226940e3c859',
+ 'duration': 1418,
+ 'timestamp': 920264400,
+ 'upload_date': '19990301',
+ 'uploader': 'CBSI-NEW',
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ }, {
+ 'url': 'https://www.paramountplus.com/shows/tooning-out-the-news/video/6hSWYWRrR9EUTz7IEe5fJKBhYvSUfexd/7-23-21-week-in-review-rep-jahana-hayes-howard-fineman-sen-michael-bennet-sheera-frenkel-cecilia-kang-/',
+ 'info_dict': {
+ 'id': '6hSWYWRrR9EUTz7IEe5fJKBhYvSUfexd',
+ 'ext': 'mp4',
+ 'title': '7/23/21 WEEK IN REVIEW (Rep. Jahana Hayes/Howard Fineman/Sen. Michael Bennet/Sheera Frenkel & Cecilia Kang)',
+ 'description': 'md5:f4adcea3e8b106192022e121f1565bae',
+ 'duration': 2506,
+ 'timestamp': 1627063200,
+ 'upload_date': '20210723',
+ 'uploader': 'CBSI-NEW',
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ }, {
+ 'url': 'https://www.paramountplus.com/movies/daddys-home/vM2vm0kE6vsS2U41VhMRKTOVHyQAr6pC',
+ 'info_dict': {
+ 'id': 'vM2vm0kE6vsS2U41VhMRKTOVHyQAr6pC',
+ 'ext': 'mp4',
+ 'title': 'Daddy\'s Home',
+ 'upload_date': '20151225',
+ 'description': 'md5:a0beaf24e8d3b0e81b2ee41d47c06f33',
+ 'uploader': 'CBSI-NEW',
+ 'timestamp': 1451030400,
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ 'format': 'bestvideo',
+ },
+ 'expected_warnings': ['Ignoring subtitle tracks'], # TODO: Investigate this
+ }, {
+ 'url': 'https://www.paramountplus.com/movies/sonic-the-hedgehog/5EKDXPOzdVf9voUqW6oRuocyAEeJGbEc',
+ 'info_dict': {
+ 'id': '5EKDXPOzdVf9voUqW6oRuocyAEeJGbEc',
+ 'ext': 'mp4',
+ 'uploader': 'CBSI-NEW',
+ 'description': 'md5:bc7b6fea84ba631ef77a9bda9f2ff911',
+ 'timestamp': 1577865600,
+ 'title': 'Sonic the Hedgehog',
+ 'upload_date': '20200101',
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ 'format': 'bestvideo',
+ },
+ 'expected_warnings': ['Ignoring subtitle tracks'],
+ }, {
+ 'url': 'https://www.paramountplus.com/shows/all-rise/video/QmR1WhNkh1a_IrdHZrbcRklm176X_rVc/all-rise-space/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.paramountplus.com/movies/million-dollar-american-princesses-meghan-and-harry/C0LpgNwXYeB8txxycdWdR9TjxpJOsdCq',
+ 'only_matching': True,
+ }]
+
+ def _extract_video_info(self, content_id, mpx_acc=2198311517):
+ items_data = self._download_json(
+ 'https://www.paramountplus.com/apps-api/v2.0/androidtv/video/cid/%s.json' % content_id,
+ content_id, query={'locale': 'en-us', 'at': 'ABCqWNNSwhIqINWIIAG+DFzcFUvF8/vcN6cNyXFFfNzWAIvXuoVgX+fK4naOC7V8MLI='}, headers=self.geo_verification_headers())
+
+ asset_types = {
+ item.get('assetType'): {
+ 'format': 'SMIL',
+ 'formats': 'MPEG4,M3U',
+ } for item in items_data['itemList']
+ }
+ item = items_data['itemList'][-1]
+ return self._extract_common_video_info(content_id, asset_types, mpx_acc, extra_info={
+ 'title': item.get('title'),
+ 'series': item.get('seriesTitle'),
+ 'season_number': int_or_none(item.get('seasonNum')),
+ 'episode_number': int_or_none(item.get('episodeNum')),
+ 'duration': int_or_none(item.get('duration')),
+ 'thumbnail': url_or_none(item.get('thumbnail')),
+ })
+
+
+class ParamountPlusSeriesIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?paramountplus\.com/shows/(?P<id>[a-zA-Z0-9-_]+)/?(?:[#?]|$)'
+ _TESTS = [{
+ 'url': 'https://www.paramountplus.com/shows/drake-josh',
+ 'playlist_mincount': 50,
+ 'info_dict': {
+ 'id': 'drake-josh',
+ }
+ }, {
+ 'url': 'https://www.paramountplus.com/shows/hawaii_five_0/',
+ 'playlist_mincount': 240,
+ 'info_dict': {
+ 'id': 'hawaii_five_0',
+ }
+ }, {
+ 'url': 'https://www.paramountplus.com/shows/spongebob-squarepants/',
+ 'playlist_mincount': 248,
+ 'info_dict': {
+ 'id': 'spongebob-squarepants',
+ }
+ }]
+ _API_URL = 'https://www.paramountplus.com/shows/{}/xhr/episodes/page/0/size/100000/xs/0/season/0/'
+
+ def _entries(self, show_name):
+ show_json = self._download_json(self._API_URL.format(show_name), video_id=show_name)
+ if show_json.get('success'):
+ for episode in show_json['result']['data']:
+ yield self.url_result(
+ 'https://www.paramountplus.com%s' % episode['url'],
+ ie=ParamountPlusIE.ie_key(), video_id=episode['content_id'])
+
+ def _real_extract(self, url):
+ show_name = self._match_id(url)
+ return self.playlist_result(self._entries(show_name), playlist_id=show_name)
diff --git a/hypervideo_dl/extractor/parliamentliveuk.py b/hypervideo_dl/extractor/parliamentliveuk.py
index bdd5ff5..869ebd8 100644
--- a/hypervideo_dl/extractor/parliamentliveuk.py
+++ b/hypervideo_dl/extractor/parliamentliveuk.py
@@ -1,6 +1,14 @@
+# coding: utf-8
from __future__ import unicode_literals
+import json
+import uuid
+
from .common import InfoExtractor
+from ..utils import (
+ unified_timestamp,
+ try_get,
+)
class ParliamentLiveUKIE(InfoExtractor):
@@ -11,12 +19,14 @@ class ParliamentLiveUKIE(InfoExtractor):
_TESTS = [{
'url': 'http://parliamentlive.tv/Event/Index/c1e9d44d-fd6c-4263-b50f-97ed26cc998b',
'info_dict': {
- 'id': '1_af9nv9ym',
+ 'id': 'c1e9d44d-fd6c-4263-b50f-97ed26cc998b',
'ext': 'mp4',
'title': 'Home Affairs Committee',
- 'uploader_id': 'FFMPEG-01',
- 'timestamp': 1422696664,
- 'upload_date': '20150131',
+ 'timestamp': 1395153872,
+ 'upload_date': '20140318',
+ },
+ 'params': {
+ 'format': 'bestvideo',
},
}, {
'url': 'http://parliamentlive.tv/event/index/3f24936f-130f-40bf-9a5d-b3d6479da6a4',
@@ -25,19 +35,49 @@ class ParliamentLiveUKIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage(
- 'http://vodplayer.parliamentlive.tv/?mid=' + video_id, video_id)
- widget_config = self._parse_json(self._search_regex(
- r'(?s)kWidgetConfig\s*=\s*({.+});',
- webpage, 'kaltura widget config'), video_id)
- kaltura_url = 'kaltura:%s:%s' % (
- widget_config['wid'][1:], widget_config['entry_id'])
- event_title = self._download_json(
- 'http://parliamentlive.tv/Event/GetShareVideo/' + video_id, video_id)['event']['title']
+ video_info = self._download_json(f'https://www.parliamentlive.tv/Event/GetShareVideo/{video_id}', video_id)
+ _DEVICE_ID = str(uuid.uuid4())
+ auth = 'Bearer ' + self._download_json(
+ 'https://exposure.api.redbee.live/v2/customer/UKParliament/businessunit/ParliamentLive/auth/anonymous',
+ video_id, headers={
+ 'Origin': 'https://videoplayback.parliamentlive.tv',
+ 'Accept': 'application/json, text/plain, */*',
+ 'Content-Type': 'application/json;charset=utf-8'
+ }, data=json.dumps({
+ 'deviceId': _DEVICE_ID,
+ 'device': {
+ 'deviceId': _DEVICE_ID,
+ 'width': 653,
+ 'height': 368,
+ 'type': 'WEB',
+ 'name': ' Mozilla Firefox 91'
+ }
+ }).encode('utf-8'))['sessionToken']
+
+ video_urls = self._download_json(
+ f'https://exposure.api.redbee.live/v2/customer/UKParliament/businessunit/ParliamentLive/entitlement/{video_id}/play',
+ video_id, headers={'Authorization': auth, 'Accept': 'application/json, text/plain, */*'})['formats']
+
+ formats = []
+ for format in video_urls:
+ if not format.get('mediaLocator'):
+ continue
+ if format.get('format') == 'DASH':
+ formats.extend(self._extract_mpd_formats(
+ format['mediaLocator'], video_id, mpd_id='dash', fatal=False))
+ elif format.get('format') == 'SMOOTHSTREAMING':
+ formats.extend(self._extract_ism_formats(
+ format['mediaLocator'], video_id, ism_id='ism', fatal=False))
+ elif format.get('format') == 'HLS':
+ formats.extend(self._extract_m3u8_formats(
+ format['mediaLocator'], video_id, m3u8_id='hls', fatal=False))
+
+ self._sort_formats(formats)
+
return {
- '_type': 'url_transparent',
- 'title': event_title,
- 'description': '',
- 'url': kaltura_url,
- 'ie_key': 'Kaltura',
+ 'id': video_id,
+ 'formats': formats,
+ 'title': video_info['event']['title'],
+ 'timestamp': unified_timestamp(try_get(video_info, lambda x: x['event']['publishedStartTime'])),
+ 'thumbnail': video_info.get('thumbnailUrl'),
}
diff --git a/hypervideo_dl/extractor/parlview.py b/hypervideo_dl/extractor/parlview.py
new file mode 100644
index 0000000..c85eaa7
--- /dev/null
+++ b/hypervideo_dl/extractor/parlview.py
@@ -0,0 +1,68 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ int_or_none,
+ try_get,
+ unified_timestamp,
+)
+
+
+class ParlviewIE(InfoExtractor):
+
+ _VALID_URL = r'https?://(?:www\.)?parlview\.aph\.gov\.au/(?:[^/]+)?\bvideoID=(?P<id>\d{6})'
+ _TESTS = [{
+ 'url': 'https://parlview.aph.gov.au/mediaPlayer.php?videoID=542661',
+ 'info_dict': {
+ 'id': '542661',
+ 'ext': 'mp4',
+ 'title': "Australia's Family Law System [Part 2]",
+ 'duration': 5799,
+ 'description': 'md5:7099883b391619dbae435891ca871a62',
+ 'timestamp': 1621430700,
+ 'upload_date': '20210519',
+ 'uploader': 'Joint Committee',
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'https://parlview.aph.gov.au/mediaPlayer.php?videoID=539936',
+ 'only_matching': True,
+ }]
+ _API_URL = 'https://parlview.aph.gov.au/api_v3/1/playback/getUniversalPlayerConfig?videoID=%s&format=json'
+ _MEDIA_INFO_URL = 'https://parlview.aph.gov.au/ajaxPlayer.php?videoID=%s&tabNum=4&action=loadTab'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ media = self._download_json(self._API_URL % video_id, video_id).get('media')
+ timestamp = try_get(media, lambda x: x['timeMap']['source']['timecode_offsets'][0], compat_str) or '/'
+
+ stream = try_get(media, lambda x: x['renditions'][0], dict)
+ if not stream:
+ self.raise_no_formats('No streams were detected')
+ elif stream.get('streamType') != 'VOD':
+ self.raise_no_formats('Unknown type of stream was detected: "%s"' % str(stream.get('streamType')))
+ formats = self._extract_m3u8_formats(stream['url'], video_id, 'mp4', 'm3u8_native')
+ self._sort_formats(formats)
+
+ media_info = self._download_webpage(
+ self._MEDIA_INFO_URL % video_id, video_id, note='Downloading media info', fatal=False)
+
+ return {
+ 'id': video_id,
+ 'url': url,
+ 'title': self._html_search_regex(r'<h2>([^<]+)<', webpage, 'title', fatal=False),
+ 'formats': formats,
+ 'duration': int_or_none(media.get('duration')),
+ 'timestamp': unified_timestamp(timestamp.split('/', 1)[1].replace('_', ' ')),
+ 'description': self._html_search_regex(
+ r'<div[^>]+class="descripti?on"[^>]*>[^>]+<strong>[^>]+>[^>]+>([^<]+)',
+ webpage, 'description', fatal=False),
+ 'uploader': self._html_search_regex(
+ r'<td>[^>]+>Channel:[^>]+>([^<]+)', media_info, 'channel', fatal=False),
+ 'thumbnail': media.get('staticImage'),
+ }
diff --git a/hypervideo_dl/extractor/patreon.py b/hypervideo_dl/extractor/patreon.py
index 761a4b1..a189c02 100644
--- a/hypervideo_dl/extractor/patreon.py
+++ b/hypervideo_dl/extractor/patreon.py
@@ -1,7 +1,12 @@
# coding: utf-8
from __future__ import unicode_literals
+import itertools
+
from .common import InfoExtractor
+from .vimeo import VimeoIE
+
+from ..compat import compat_urllib_parse_unquote
from ..utils import (
clean_html,
determine_ext,
@@ -11,6 +16,7 @@ from ..utils import (
parse_iso8601,
str_or_none,
try_get,
+ url_or_none,
)
@@ -63,6 +69,20 @@ class PatreonIE(InfoExtractor):
}, {
'url': 'https://www.patreon.com/posts/743933',
'only_matching': True,
+ }, {
+ 'url': 'https://www.patreon.com/posts/kitchen-as-seen-51706779',
+ 'md5': '96656690071f6d64895866008484251b',
+ 'info_dict': {
+ 'id': '555089736',
+ 'ext': 'mp4',
+ 'title': 'KITCHEN AS SEEN ON DEEZ NUTS EXTENDED!',
+ 'uploader': 'Cold Ones',
+ 'thumbnail': 're:^https?://.*$',
+ 'upload_date': '20210526',
+ 'description': 'md5:557a409bd79d3898689419094934ba79',
+ 'uploader_id': '14936315',
+ },
+ 'skip': 'Patron-only content'
}]
# Currently Patreon exposes download URL via hidden CSS, so login is not
@@ -137,6 +157,19 @@ class PatreonIE(InfoExtractor):
})
if not info.get('url'):
+ # handle Vimeo embeds
+ if try_get(attributes, lambda x: x['embed']['provider']) == 'Vimeo':
+ embed_html = try_get(attributes, lambda x: x['embed']['html'])
+ v_url = url_or_none(compat_urllib_parse_unquote(
+ self._search_regex(r'src=(https%3A%2F%2Fplayer\.vimeo\.com.+)%3F', embed_html, 'vimeo url', fatal=False)))
+ if v_url:
+ info.update({
+ '_type': 'url_transparent',
+ 'url': VimeoIE._smuggle_referrer(v_url, 'https://patreon.com'),
+ 'ie_key': 'Vimeo',
+ })
+
+ if not info.get('url'):
embed_url = try_get(attributes, lambda x: x['embed']['url'])
if embed_url:
info.update({
@@ -154,3 +187,56 @@ class PatreonIE(InfoExtractor):
})
return info
+
+
+class PatreonUserIE(InfoExtractor):
+
+ _VALID_URL = r'https?://(?:www\.)?patreon\.com/(?P<id>[-_\w\d]+)/?(?:posts/?)?'
+
+ _TESTS = [{
+ 'url': 'https://www.patreon.com/dissonancepod/',
+ 'info_dict': {
+ 'title': 'dissonancepod',
+ },
+ 'playlist_mincount': 68,
+ 'expected_warnings': 'Post not viewable by current user! Skipping!',
+ }, {
+ 'url': 'https://www.patreon.com/dissonancepod/posts',
+ 'only_matching': True
+ }, ]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if PatreonIE.suitable(url) else super(PatreonUserIE, cls).suitable(url)
+
+ def _entries(self, campaign_id, user_id):
+ cursor = None
+ params = {
+ 'fields[campaign]': 'show_audio_post_download_links,name,url',
+ 'fields[post]': 'current_user_can_view,embed,image,is_paid,post_file,published_at,patreon_url,url,post_type,thumbnail_url,title',
+ 'filter[campaign_id]': campaign_id,
+ 'filter[is_draft]': 'false',
+ 'sort': '-published_at',
+ 'json-api-version': 1.0,
+ 'json-api-use-default-includes': 'false',
+ }
+
+ for page in itertools.count(1):
+
+ params.update({'page[cursor]': cursor} if cursor else {})
+ posts_json = self._download_json('https://www.patreon.com/api/posts', user_id, note='Downloading posts page %d' % page, query=params, headers={'Cookie': '.'})
+
+ cursor = try_get(posts_json, lambda x: x['meta']['pagination']['cursors']['next'])
+
+ for post in posts_json.get('data') or []:
+ yield self.url_result(url_or_none(try_get(post, lambda x: x['attributes']['patreon_url'])), 'Patreon')
+
+ if cursor is None:
+ break
+
+ def _real_extract(self, url):
+
+ user_id = self._match_id(url)
+ webpage = self._download_webpage(url, user_id, headers={'Cookie': '.'})
+ campaign_id = self._search_regex(r'https://www.patreon.com/api/campaigns/(\d+)/?', webpage, 'Campaign ID')
+ return self.playlist_result(self._entries(campaign_id, user_id), playlist_title=user_id)
diff --git a/hypervideo_dl/extractor/pbs.py b/hypervideo_dl/extractor/pbs.py
index d4baa16..0eabf9b 100644
--- a/hypervideo_dl/extractor/pbs.py
+++ b/hypervideo_dl/extractor/pbs.py
@@ -436,7 +436,7 @@ class PBSIE(InfoExtractor):
self._set_cookie('.pbs.org', 'pbsol.station', station)
def _extract_webpage(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
description = None
@@ -600,6 +600,7 @@ class PBSIE(InfoExtractor):
formats = []
http_url = None
+ hls_subs = {}
for num, redirect in enumerate(redirects):
redirect_id = redirect.get('eeid')
@@ -622,8 +623,9 @@ class PBSIE(InfoExtractor):
continue
if determine_ext(format_url) == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- format_url, display_id, 'mp4', m3u8_id='hls', fatal=False))
+ hls_formats, hls_subs = self._extract_m3u8_formats_and_subtitles(
+ format_url, display_id, 'mp4', m3u8_id='hls', fatal=False)
+ formats.extend(hls_formats)
else:
formats.append({
'url': format_url,
@@ -666,25 +668,12 @@ class PBSIE(InfoExtractor):
age_limit = US_RATINGS.get(rating_str)
subtitles = {}
- closed_captions_url = info.get('closed_captions_url')
- if closed_captions_url:
- subtitles['en'] = [{
- 'ext': 'ttml',
- 'url': closed_captions_url,
- }]
- mobj = re.search(r'/(\d+)_Encoded\.dfxp', closed_captions_url)
- if mobj:
- ttml_caption_suffix, ttml_caption_id = mobj.group(0, 1)
- ttml_caption_id = int(ttml_caption_id)
- subtitles['en'].extend([{
- 'url': closed_captions_url.replace(
- ttml_caption_suffix, '/%d_Encoded.srt' % (ttml_caption_id + 1)),
- 'ext': 'srt',
- }, {
- 'url': closed_captions_url.replace(
- ttml_caption_suffix, '/%d_Encoded.vtt' % (ttml_caption_id + 2)),
- 'ext': 'vtt',
- }])
+ captions = info.get('cc') or {}
+ for caption_url in captions.values():
+ subtitles.setdefault('en', []).append({
+ 'url': caption_url
+ })
+ subtitles = self._merge_subtitles(subtitles, hls_subs)
# info['title'] is often incomplete (e.g. 'Full Episode', 'Episode 5', etc)
# Try turning it to 'program - title' naming scheme if possible
diff --git a/hypervideo_dl/extractor/peertube.py b/hypervideo_dl/extractor/peertube.py
index d9b13ad..1e22f24 100644
--- a/hypervideo_dl/extractor/peertube.py
+++ b/hypervideo_dl/extractor/peertube.py
@@ -1,6 +1,7 @@
# coding: utf-8
from __future__ import unicode_literals
+import functools
import re
from .common import InfoExtractor
@@ -13,12 +14,644 @@ from ..utils import (
unified_timestamp,
url_or_none,
urljoin,
+ OnDemandPagedList,
)
class PeerTubeIE(InfoExtractor):
_INSTANCES_RE = r'''(?:
# Taken from https://instances.joinpeertube.org/instances
+ 40two\.tube|
+ a\.metube\.ch|
+ advtv\.ml|
+ algorithmic\.tv|
+ alimulama\.com|
+ arcana\.fun|
+ archive\.vidicon\.org|
+ artefac-paris\.tv|
+ auf1\.eu|
+ battlepenguin\.video|
+ beertube\.epgn\.ch|
+ befree\.nohost\.me|
+ bideoak\.argia\.eus|
+ birkeundnymphe\.de|
+ bitcointv\.com|
+ cattube\.org|
+ clap\.nerv-project\.eu|
+ climatejustice\.video|
+ comf\.tube|
+ conspiracydistillery\.com|
+ darkvapor\.nohost\.me|
+ daschauher\.aksel\.rocks|
+ digitalcourage\.video|
+ dreiecksnebel\.alex-detsch\.de|
+ eduvid\.org|
+ evangelisch\.video|
+ exo\.tube|
+ fair\.tube|
+ fediverse\.tv|
+ film\.k-prod\.fr|
+ flim\.txmn\.tk|
+ fotogramas\.politicaconciencia\.org|
+ ftsi\.ru|
+ gary\.vger\.cloud|
+ graeber\.video|
+ greatview\.video|
+ grypstube\.uni-greifswald\.de|
+ highvoltage\.tv|
+ hpstube\.fr|
+ htp\.live|
+ hyperreal\.tube|
+ juggling\.digital|
+ kino\.kompot\.si|
+ kino\.schuerz\.at|
+ kinowolnosc\.pl|
+ kirche\.peertube-host\.de|
+ kodcast\.com|
+ kolektiva\.media|
+ kraut\.zone|
+ kumi\.tube|
+ lastbreach\.tv|
+ lepetitmayennais\.fr\.nf|
+ lexx\.impa\.me|
+ libertynode\.tv|
+ libra\.syntazia\.org|
+ libremedia\.video|
+ live\.libratoi\.org|
+ live\.nanao\.moe|
+ live\.toobnix\.org|
+ livegram\.net|
+ lolitube\.freedomchan\.moe|
+ lucarne\.balsamine\.be|
+ maindreieck-tv\.de|
+ mani\.tube|
+ manicphase\.me|
+ media\.gzevd\.de|
+ media\.inno3\.cricket|
+ media\.kaitaia\.life|
+ media\.krashboyz\.org|
+ media\.over-world\.org|
+ media\.skewed\.de|
+ media\.undeadnetwork\.de|
+ medias\.pingbase\.net|
+ melsungen\.peertube-host\.de|
+ mirametube\.fr|
+ mojotube\.net|
+ monplaisirtube\.ddns\.net|
+ mountaintown\.video|
+ my\.bunny\.cafe|
+ myfreetube\.de|
+ mytube\.kn-cloud\.de|
+ mytube\.madzel\.de|
+ myworkoutarenapeertube\.cf|
+ nanawel-peertube\.dyndns\.org|
+ nastub\.cz|
+ offenes\.tv|
+ orgdup\.media|
+ ovaltube\.codinglab\.ch|
+ p2ptv\.ru|
+ p\.eertu\.be|
+ p\.lu|
+ peer\.azurs\.fr|
+ peertube1\.zeteo\.me|
+ peertube\.020\.pl|
+ peertube\.0x5e\.eu|
+ peertube\.alpharius\.io|
+ peertube\.am-networks\.fr|
+ peertube\.anduin\.net|
+ peertube\.anzui\.dev|
+ peertube\.arbleizez\.bzh|
+ peertube\.art3mis\.de|
+ peertube\.atilla\.org|
+ peertube\.atsuchan\.page|
+ peertube\.aukfood\.net|
+ peertube\.aventer\.biz|
+ peertube\.b38\.rural-it\.org|
+ peertube\.beeldengeluid\.nl|
+ peertube\.be|
+ peertube\.bgzashtita\.es|
+ peertube\.bitsandlinux\.com|
+ peertube\.biz|
+ peertube\.boba\.best|
+ peertube\.br0\.fr|
+ peertube\.bridaahost\.ynh\.fr|
+ peertube\.bubbletea\.dev|
+ peertube\.bubuit\.net|
+ peertube\.cabaal\.net|
+ peertube\.cats-home\.net|
+ peertube\.chemnitz\.freifunk\.net|
+ peertube\.chevro\.fr|
+ peertube\.chrisspiegl\.com|
+ peertube\.chtisurel\.net|
+ peertube\.cipherbliss\.com|
+ peertube\.cloud\.sans\.pub|
+ peertube\.cpge-brizeux\.fr|
+ peertube\.ctseuro\.com|
+ peertube\.cuatrolibertades\.org|
+ peertube\.cybercirujas\.club|
+ peertube\.cythin\.com|
+ peertube\.davigge\.com|
+ peertube\.dc\.pini\.fr|
+ peertube\.debian\.social|
+ peertube\.demonix\.fr|
+ peertube\.designersethiques\.org|
+ peertube\.desmu\.fr|
+ peertube\.devloprog\.org|
+ peertube\.devol\.it|
+ peertube\.dtmf\.ca|
+ peertube\.ecologie\.bzh|
+ peertube\.eu\.org|
+ peertube\.european-pirates\.eu|
+ peertube\.euskarabildua\.eus|
+ peertube\.fenarinarsa\.com|
+ peertube\.fomin\.site|
+ peertube\.forsud\.be|
+ peertube\.francoispelletier\.org|
+ peertube\.freenet\.ru|
+ peertube\.freetalklive\.com|
+ peertube\.functional\.cafe|
+ peertube\.gardeludwig\.fr|
+ peertube\.gargantia\.fr|
+ peertube\.gcfamily\.fr|
+ peertube\.genma\.fr|
+ peertube\.get-racing\.de|
+ peertube\.gidikroon\.eu|
+ peertube\.gruezishop\.ch|
+ peertube\.habets\.house|
+ peertube\.hackerfraternity\.org|
+ peertube\.ichigo\.everydayimshuflin\.com|
+ peertube\.ignifi\.me|
+ peertube\.inapurna\.org|
+ peertube\.informaction\.info|
+ peertube\.interhop\.org|
+ peertube\.iselfhost\.com|
+ peertube\.it|
+ peertube\.jensdiemer\.de|
+ peertube\.joffreyverd\.fr|
+ peertube\.kalua\.im|
+ peertube\.kathryl\.fr|
+ peertube\.keazilla\.net|
+ peertube\.klaewyss\.fr|
+ peertube\.kodcast\.com|
+ peertube\.kx\.studio|
+ peertube\.lagvoid\.com|
+ peertube\.lavallee\.tech|
+ peertube\.le5emeaxe\.fr|
+ peertube\.lestutosdeprocessus\.fr|
+ peertube\.librenet\.co\.za|
+ peertube\.logilab\.fr|
+ peertube\.louisematic\.site|
+ peertube\.luckow\.org|
+ peertube\.luga\.at|
+ peertube\.lyceeconnecte\.fr|
+ peertube\.manalejandro\.com|
+ peertube\.marud\.fr|
+ peertube\.mattone\.net|
+ peertube\.maxweiss\.io|
+ peertube\.monlycee\.net|
+ peertube\.mxinfo\.fr|
+ peertube\.myrasp\.eu|
+ peertube\.nebelcloud\.de|
+ peertube\.netzbegruenung\.de|
+ peertube\.newsocial\.tech|
+ peertube\.nicolastissot\.fr|
+ peertube\.nz|
+ peertube\.offerman\.com|
+ peertube\.opencloud\.lu|
+ peertube\.orthus\.link|
+ peertube\.patapouf\.xyz|
+ peertube\.pi2\.dev|
+ peertube\.plataformess\.org|
+ peertube\.pl|
+ peertube\.portaesgnos\.org|
+ peertube\.r2\.enst\.fr|
+ peertube\.r5c3\.fr|
+ peertube\.radres\.xyz|
+ peertube\.red|
+ peertube\.robonomics\.network|
+ peertube\.rtnkv\.cloud|
+ peertube\.runfox\.tk|
+ peertube\.satoshishop\.de|
+ peertube\.scic-tetris\.org|
+ peertube\.securitymadein\.lu|
+ peertube\.semweb\.pro|
+ peertube\.social\.my-wan\.de|
+ peertube\.soykaf\.org|
+ peertube\.stefofficiel\.me|
+ peertube\.stream|
+ peertube\.su|
+ peertube\.swrs\.net|
+ peertube\.takeko\.cyou|
+ peertube\.tangentfox\.com|
+ peertube\.taxinachtegel\.de|
+ peertube\.thenewoil\.xyz|
+ peertube\.ti-fr\.com|
+ peertube\.tiennot\.net|
+ peertube\.troback\.com|
+ peertube\.tspu\.edu\.ru|
+ peertube\.tux\.ovh|
+ peertube\.tv|
+ peertube\.tweb\.tv|
+ peertube\.ucy\.de|
+ peertube\.underworld\.fr|
+ peertube\.us\.to|
+ peertube\.ventresmous\.fr|
+ peertube\.vlaki\.cz|
+ peertube\.w\.utnw\.de|
+ peertube\.westring\.digital|
+ peertube\.xwiki\.com|
+ peertube\.zoz-serv\.org|
+ peervideo\.ru|
+ periscope\.numenaute\.org|
+ perron-tube\.de|
+ petitlutinartube\.fr|
+ phijkchu\.com|
+ pierre\.tube|
+ piraten\.space|
+ play\.rosano\.ca|
+ player\.ojamajo\.moe|
+ plextube\.nl|
+ pocketnetpeertube1\.nohost\.me|
+ pocketnetpeertube3\.nohost\.me|
+ pocketnetpeertube4\.nohost\.me|
+ pocketnetpeertube5\.nohost\.me|
+ pocketnetpeertube6\.nohost\.me|
+ pt\.24-7\.ro|
+ pt\.apathy\.top|
+ pt\.diaspodon\.fr|
+ pt\.fedi\.tech|
+ pt\.maciej\.website|
+ ptb\.lunarviews\.net|
+ ptmir1\.inter21\.net|
+ ptmir2\.inter21\.net|
+ ptmir3\.inter21\.net|
+ ptmir4\.inter21\.net|
+ ptmir5\.inter21\.net|
+ ptube\.horsentiers\.fr|
+ ptube\.xmanifesto\.club|
+ queermotion\.org|
+ re-wizja\.re-medium\.com|
+ regarder\.sans\.pub|
+ ruraletv\.ovh|
+ s1\.gegenstimme\.tv|
+ s2\.veezee\.tube|
+ sdmtube\.fr|
+ sender-fm\.veezee\.tube|
+ serv1\.wiki-tube\.de|
+ serv3\.wiki-tube\.de|
+ sickstream\.net|
+ sleepy\.tube|
+ sovran\.video|
+ spectra\.video|
+ stream\.elven\.pw|
+ stream\.k-prod\.fr|
+ stream\.shahab\.nohost\.me|
+ streamsource\.video|
+ studios\.racer159\.com|
+ testtube\.florimond\.eu|
+ tgi\.hosted\.spacebear\.ee|
+ thaitube\.in\.th|
+ the\.jokertv\.eu|
+ theater\.ethernia\.net|
+ thecool\.tube|
+ tilvids\.com|
+ toob\.bub\.org|
+ tpaw\.video|
+ truetube\.media|
+ tuba\.lhub\.pl|
+ tube-aix-marseille\.beta\.education\.fr|
+ tube-amiens\.beta\.education\.fr|
+ tube-besancon\.beta\.education\.fr|
+ tube-bordeaux\.beta\.education\.fr|
+ tube-clermont-ferrand\.beta\.education\.fr|
+ tube-corse\.beta\.education\.fr|
+ tube-creteil\.beta\.education\.fr|
+ tube-dijon\.beta\.education\.fr|
+ tube-education\.beta\.education\.fr|
+ tube-grenoble\.beta\.education\.fr|
+ tube-lille\.beta\.education\.fr|
+ tube-limoges\.beta\.education\.fr|
+ tube-montpellier\.beta\.education\.fr|
+ tube-nancy\.beta\.education\.fr|
+ tube-nantes\.beta\.education\.fr|
+ tube-nice\.beta\.education\.fr|
+ tube-normandie\.beta\.education\.fr|
+ tube-orleans-tours\.beta\.education\.fr|
+ tube-outremer\.beta\.education\.fr|
+ tube-paris\.beta\.education\.fr|
+ tube-poitiers\.beta\.education\.fr|
+ tube-reims\.beta\.education\.fr|
+ tube-rennes\.beta\.education\.fr|
+ tube-strasbourg\.beta\.education\.fr|
+ tube-toulouse\.beta\.education\.fr|
+ tube-versailles\.beta\.education\.fr|
+ tube1\.it\.tuwien\.ac\.at|
+ tube\.abolivier\.bzh|
+ tube\.ac-amiens\.fr|
+ tube\.aerztefueraufklaerung\.de|
+ tube\.alexx\.ml|
+ tube\.amic37\.fr|
+ tube\.anufrij\.de|
+ tube\.apolut\.net|
+ tube\.arkhalabs\.io|
+ tube\.arthack\.nz|
+ tube\.as211696\.net|
+ tube\.avensio\.de|
+ tube\.azbyka\.ru|
+ tube\.azkware\.net|
+ tube\.bachaner\.fr|
+ tube\.bmesh\.org|
+ tube\.borked\.host|
+ tube\.bstly\.de|
+ tube\.chaoszone\.tv|
+ tube\.chatelet\.ovh|
+ tube\.cloud-libre\.eu|
+ tube\.cms\.garden|
+ tube\.cowfee\.moe|
+ tube\.cryptography\.dog|
+ tube\.darknight-coffee\.org|
+ tube\.dev\.lhub\.pl|
+ tube\.distrilab\.fr|
+ tube\.dsocialize\.net|
+ tube\.ebin\.club|
+ tube\.fdn\.fr|
+ tube\.florimond\.eu|
+ tube\.foxarmy\.ml|
+ tube\.foxden\.party|
+ tube\.frischesicht\.de|
+ tube\.futuretic\.fr|
+ tube\.gnous\.eu|
+ tube\.grap\.coop|
+ tube\.graz\.social|
+ tube\.grin\.hu|
+ tube\.hackerscop\.org|
+ tube\.hordearii\.fr|
+ tube\.jeena\.net|
+ tube\.kai-stuht\.com|
+ tube\.kockatoo\.org|
+ tube\.kotur\.org|
+ tube\.lacaveatonton\.ovh|
+ tube\.linkse\.media|
+ tube\.lokad\.com|
+ tube\.lucie-philou\.com|
+ tube\.melonbread\.xyz|
+ tube\.mfraters\.net|
+ tube\.motuhake\.xyz|
+ tube\.mrbesen\.de|
+ tube\.nah\.re|
+ tube\.nchoco\.net|
+ tube\.novg\.net|
+ tube\.nox-rhea\.org|
+ tube\.nuagelibre\.fr|
+ tube\.nx12\.net|
+ tube\.octaplex\.net|
+ tube\.odat\.xyz|
+ tube\.oisux\.org|
+ tube\.opportunis\.me|
+ tube\.org\.il|
+ tube\.ortion\.xyz|
+ tube\.others\.social|
+ tube\.picasoft\.net|
+ tube\.plomlompom\.com|
+ tube\.pmj\.rocks|
+ tube\.portes-imaginaire\.org|
+ tube\.pyngu\.com|
+ tube\.rebellion\.global|
+ tube\.rhythms-of-resistance\.org|
+ tube\.rita\.moe|
+ tube\.rsi\.cnr\.it|
+ tube\.s1gm4\.eu|
+ tube\.saumon\.io|
+ tube\.schleuss\.online|
+ tube\.schule\.social|
+ tube\.seditio\.fr|
+ tube\.shanti\.cafe|
+ tube\.shela\.nu|
+ tube\.skrep\.in|
+ tube\.sp-codes\.de|
+ tube\.sp4ke\.com|
+ tube\.superseriousbusiness\.org|
+ tube\.systest\.eu|
+ tube\.tappret\.fr|
+ tube\.tardis\.world|
+ tube\.toontoet\.nl|
+ tube\.tpshd\.de|
+ tube\.troopers\.agency|
+ tube\.tylerdavis\.xyz|
+ tube\.undernet\.uy|
+ tube\.vigilian-consulting\.nl|
+ tube\.vraphim\.com|
+ tube\.wehost\.lgbt|
+ tube\.wien\.rocks|
+ tube\.wolfe\.casa|
+ tube\.xd0\.de|
+ tube\.xy-space\.de|
+ tube\.yapbreak\.fr|
+ tubedu\.org|
+ tubes\.jodh\.us|
+ tuktube\.com|
+ turkum\.me|
+ tututu\.tube|
+ tuvideo\.encanarias\.info|
+ tv1\.cocu\.cc|
+ tv1\.gomntu\.space|
+ tv2\.cocu\.cc|
+ tv\.adn\.life|
+ tv\.atmx\.ca|
+ tv\.bitma\.st|
+ tv\.generallyrubbish\.net\.au|
+ tv\.lumbung\.space|
+ tv\.mattchristiansenmedia\.com|
+ tv\.netwhood\.online|
+ tv\.neue\.city|
+ tv\.piejacker\.net|
+ tv\.pirateradio\.social|
+ tv\.undersco\.re|
+ tvox\.ru|
+ twctube\.twc-zone\.eu|
+ unfilter\.tube|
+ v\.basspistol\.org|
+ v\.kisombrella\.top|
+ v\.lastorder\.xyz|
+ v\.lor\.sh|
+ v\.phreedom\.club|
+ v\.sil\.sh|
+ v\.szy\.io|
+ v\.xxxapex\.com|
+ veezee\.tube|
+ vid\.dascoyote\.xyz|
+ vid\.garwood\.io|
+ vid\.ncrypt\.at|
+ vid\.pravdastalina\.info|
+ vid\.qorg11\.net|
+ vid\.rajeshtaylor\.com|
+ vid\.samtripoli\.com|
+ vid\.werefox\.dev|
+ vid\.wildeboer\.net|
+ video-cave-v2\.de|
+ video\.076\.ne\.jp|
+ video\.1146\.nohost\.me|
+ video\.altertek\.org|
+ video\.anartist\.org|
+ video\.apps\.thedoodleproject\.net|
+ video\.artist\.cx|
+ video\.asgardius\.company|
+ video\.balsillie\.net|
+ video\.bards\.online|
+ video\.binarydad\.com|
+ video\.blast-info\.fr|
+ video\.catgirl\.biz|
+ video\.cigliola\.com|
+ video\.cm-en-transition\.fr|
+ video\.cnt\.social|
+ video\.coales\.co|
+ video\.codingfield\.com|
+ video\.comptoir\.net|
+ video\.comune\.trento\.it|
+ video\.cpn\.so|
+ video\.csc49\.fr|
+ video\.cybre\.town|
+ video\.demokratischer-sommer\.de|
+ video\.discord-insoumis\.fr|
+ video\.dolphincastle\.com|
+ video\.dresden\.network|
+ video\.ecole-89\.com|
+ video\.elgrillolibertario\.org|
+ video\.emergeheart\.info|
+ video\.eradicatinglove\.xyz|
+ video\.ethantheenigma\.me|
+ video\.exodus-privacy\.eu\.org|
+ video\.fbxl\.net|
+ video\.fhtagn\.org|
+ video\.greenmycity\.eu|
+ video\.guerredeclasse\.fr|
+ video\.gyt\.is|
+ video\.hackers\.town|
+ video\.hardlimit\.com|
+ video\.hooli\.co|
+ video\.igem\.org|
+ video\.internet-czas-dzialac\.pl|
+ video\.islameye\.com|
+ video\.kicik\.fr|
+ video\.kuba-orlik\.name|
+ video\.kyushojitsu\.ca|
+ video\.lavolte\.net|
+ video\.lespoesiesdheloise\.fr|
+ video\.liberta\.vip|
+ video\.liege\.bike|
+ video\.linc\.systems|
+ video\.linux\.it|
+ video\.linuxtrent\.it|
+ video\.lokal\.social|
+ video\.lono\.space|
+ video\.lunasqu\.ee|
+ video\.lundi\.am|
+ video\.marcorennmaus\.de|
+ video\.mass-trespass\.uk|
+ video\.mugoreve\.fr|
+ video\.mundodesconocido\.com|
+ video\.mycrowd\.ca|
+ video\.nogafam\.es|
+ video\.odayacres\.farm|
+ video\.ozgurkon\.org|
+ video\.p1ng0ut\.social|
+ video\.p3x\.de|
+ video\.pcf\.fr|
+ video\.pony\.gallery|
+ video\.potate\.space|
+ video\.pourpenser\.pro|
+ video\.progressiv\.dev|
+ video\.resolutions\.it|
+ video\.rw501\.de|
+ video\.screamer\.wiki|
+ video\.sdm-tools\.net|
+ video\.sftblw\.moe|
+ video\.shitposter\.club|
+ video\.skyn3t\.in|
+ video\.soi\.ch|
+ video\.stuartbrand\.co\.uk|
+ video\.thinkof\.name|
+ video\.toot\.pt|
+ video\.triplea\.fr|
+ video\.turbo\.chat|
+ video\.vaku\.org\.ua|
+ video\.veloma\.org|
+ video\.violoncello\.ch|
+ video\.wilkie\.how|
+ video\.wsf2021\.info|
+ videorelay\.co|
+ videos-passages\.huma-num\.fr|
+ videos\.3d-wolf\.com|
+ videos\.ahp-numerique\.fr|
+ videos\.alexandrebadalo\.pt|
+ videos\.archigny\.net|
+ videos\.benjaminbrady\.ie|
+ videos\.buceoluegoexisto\.com|
+ videos\.capas\.se|
+ videos\.casually\.cat|
+ videos\.cloudron\.io|
+ videos\.coletivos\.org|
+ videos\.danksquad\.org|
+ videos\.denshi\.live|
+ videos\.fromouter\.space|
+ videos\.fsci\.in|
+ videos\.globenet\.org|
+ videos\.hauspie\.fr|
+ videos\.hush\.is|
+ videos\.john-livingston\.fr|
+ videos\.jordanwarne\.xyz|
+ videos\.lavoixdessansvoix\.org|
+ videos\.leslionsfloorball\.fr|
+ videos\.lucero\.top|
+ videos\.martyn\.berlin|
+ videos\.mastodont\.cat|
+ videos\.monstro1\.com|
+ videos\.npo\.city|
+ videos\.optoutpod\.com|
+ videos\.petch\.rocks|
+ videos\.pzelawski\.xyz|
+ videos\.rampin\.org|
+ videos\.scanlines\.xyz|
+ videos\.shmalls\.pw|
+ videos\.sibear\.fr|
+ videos\.stadtfabrikanten\.org|
+ videos\.tankernn\.eu|
+ videos\.testimonia\.org|
+ videos\.thisishowidontdisappear\.com|
+ videos\.traumaheilung\.net|
+ videos\.trom\.tf|
+ videos\.wakkerewereld\.nu|
+ videos\.weblib\.re|
+ videos\.yesil\.club|
+ vids\.roshless\.me|
+ vids\.tekdmn\.me|
+ vidz\.dou\.bet|
+ vod\.lumikko\.dev|
+ vs\.uniter\.network|
+ vulgarisation-informatique\.fr|
+ watch\.breadtube\.tv|
+ watch\.deranalyst\.ch|
+ watch\.ignorance\.eu|
+ watch\.krazy\.party|
+ watch\.libertaria\.space|
+ watch\.rt4mn\.org|
+ watch\.softinio\.com|
+ watch\.tubelab\.video|
+ web-fellow\.de|
+ webtv\.vandoeuvre\.net|
+ wechill\.space|
+ wikileaks\.video|
+ wiwi\.video|
+ worldofvids\.com|
+ wwtube\.net|
+ www4\.mir\.inter21\.net|
+ www\.birkeundnymphe\.de|
+ www\.captain-german\.com|
+ www\.wiki-tube\.de|
+ xxivproduction\.video|
+ xxx\.noho\.st|
+
+ # from youtube-dl
peertube\.rainbowswingers\.net|
tube\.stanisic\.nl|
peer\.suiri\.us|
@@ -410,24 +1043,24 @@ class PeerTubeIE(InfoExtractor):
video\.colibris-outilslibres\.org|
tube\.svnet\.fr|
peertube\.video|
- peertube3\.cpy\.re|
peertube2\.cpy\.re|
+ peertube3\.cpy\.re|
videos\.tcit\.fr|
peertube\.cpy\.re|
canard\.tube
)'''
- _UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}'
+ _UUID_RE = r'[\da-zA-Z]{22}|[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}'
_API_BASE = 'https://%s/api/v1/videos/%s/%s'
_VALID_URL = r'''(?x)
(?:
peertube:(?P<host>[^:]+):|
- https?://(?P<host_2>%s)/(?:videos/(?:watch|embed)|api/v\d/videos)/
+ https?://(?P<host_2>%s)/(?:videos/(?:watch|embed)|api/v\d/videos|w)/
)
(?P<id>%s)
''' % (_INSTANCES_RE, _UUID_RE)
_TESTS = [{
'url': 'https://framatube.org/videos/watch/9c9de5e8-0a1e-484a-b099-e80766180a6d',
- 'md5': '9bed8c0137913e17b86334e5885aacff',
+ 'md5': '8563064d245a4be5705bddb22bb00a28',
'info_dict': {
'id': '9c9de5e8-0a1e-484a-b099-e80766180a6d',
'ext': 'mp4',
@@ -439,9 +1072,9 @@ class PeerTubeIE(InfoExtractor):
'uploader': 'Framasoft',
'uploader_id': '3',
'uploader_url': 'https://framatube.org/accounts/framasoft',
- 'channel': 'Les vidéos de Framasoft',
- 'channel_id': '2',
- 'channel_url': 'https://framatube.org/video-channels/bf54d359-cfad-4935-9d45-9d6be93f63e8',
+ 'channel': 'A propos de PeerTube',
+ 'channel_id': '2215',
+ 'channel_url': 'https://framatube.org/video-channels/joinpeertube',
'language': 'en',
'license': 'Attribution - Share Alike',
'duration': 113,
@@ -452,6 +1085,39 @@ class PeerTubeIE(InfoExtractor):
'categories': ['Science & Technology'],
}
}, {
+ 'url': 'https://peertube2.cpy.re/w/122d093a-1ede-43bd-bd34-59d2931ffc5e',
+ 'info_dict': {
+ 'id': '122d093a-1ede-43bd-bd34-59d2931ffc5e',
+ 'ext': 'mp4',
+ 'title': 'E2E tests',
+ 'uploader_id': '37855',
+ 'timestamp': 1589276219,
+ 'upload_date': '20200512',
+ 'uploader': 'chocobozzz',
+ }
+ }, {
+ 'url': 'https://peertube2.cpy.re/w/3fbif9S3WmtTP8gGsC5HBd',
+ 'info_dict': {
+ 'id': '3fbif9S3WmtTP8gGsC5HBd',
+ 'ext': 'mp4',
+ 'title': 'E2E tests',
+ 'uploader_id': '37855',
+ 'timestamp': 1589276219,
+ 'upload_date': '20200512',
+ 'uploader': 'chocobozzz',
+ },
+ }, {
+ 'url': 'https://peertube2.cpy.re/api/v1/videos/3fbif9S3WmtTP8gGsC5HBd',
+ 'info_dict': {
+ 'id': '3fbif9S3WmtTP8gGsC5HBd',
+ 'ext': 'mp4',
+ 'title': 'E2E tests',
+ 'uploader_id': '37855',
+ 'timestamp': 1589276219,
+ 'upload_date': '20200512',
+ 'uploader': 'chocobozzz',
+ },
+ }, {
# Issue #26002
'url': 'peertube:spacepub.space:d8943b2d-8280-497b-85ec-bc282ec2afdc',
'info_dict': {
@@ -464,29 +1130,30 @@ class PeerTubeIE(InfoExtractor):
'uploader': 'Drew DeVault',
}
}, {
- 'url': 'https://peertube.tamanoir.foucry.net/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44',
+ 'url': 'https://peertube.debian.social/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44',
'only_matching': True,
}, {
# nsfw
- 'url': 'https://tube.22decembre.eu/videos/watch/9bb88cd3-9959-46d9-9ab9-33d2bb704c39',
+ 'url': 'https://vod.ksite.de/videos/watch/9bb88cd3-9959-46d9-9ab9-33d2bb704c39',
'only_matching': True,
}, {
- 'url': 'https://tube.22decembre.eu/videos/embed/fed67262-6edb-4d1c-833b-daa9085c71d7',
+ 'url': 'https://vod.ksite.de/videos/embed/fed67262-6edb-4d1c-833b-daa9085c71d7',
'only_matching': True,
}, {
- 'url': 'https://tube.openalgeria.org/api/v1/videos/c1875674-97d0-4c94-a058-3f7e64c962e8',
+ 'url': 'https://peertube.tv/api/v1/videos/c1875674-97d0-4c94-a058-3f7e64c962e8',
'only_matching': True,
}, {
- 'url': 'peertube:video.blender.org:b37a5b9f-e6b5-415c-b700-04a5cd6ec205',
+ 'url': 'peertube:framatube.org:b37a5b9f-e6b5-415c-b700-04a5cd6ec205',
'only_matching': True,
}]
@staticmethod
def _extract_peertube_url(webpage, source_url):
mobj = re.match(
- r'https?://(?P<host>[^/]+)/videos/(?:watch|embed)/(?P<id>%s)'
+ r'https?://(?P<host>[^/]+)/(?:videos/(?:watch|embed)|w)/(?P<id>%s)'
% PeerTubeIE._UUID_RE, source_url)
if mobj and any(p in webpage for p in (
+ 'meta property="og:platform" content="PeerTube"',
'<title>PeerTube<',
'There will be other non JS-based clients to access PeerTube',
'>We are sorry but it seems that PeerTube is not compatible with your web browser.<')):
@@ -529,7 +1196,7 @@ class PeerTubeIE(InfoExtractor):
return subtitles
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
host = mobj.group('host') or mobj.group('host_2')
video_id = mobj.group('id')
@@ -569,15 +1236,15 @@ class PeerTubeIE(InfoExtractor):
formats.append(f)
self._sort_formats(formats)
- full_description = self._call_api(
- host, video_id, 'description', note='Downloading description JSON',
- fatal=False)
+ description = video.get('description')
+ if description and len(description) >= 250:
+ # description is shortened
+ full_description = self._call_api(
+ host, video_id, 'description', note='Downloading description JSON',
+ fatal=False)
- description = None
- if isinstance(full_description, dict):
- description = str_or_none(full_description.get('description'))
- if not description:
- description = video.get('description')
+ if isinstance(full_description, dict):
+ description = str_or_none(full_description.get('description')) or description
subtitles = self.extract_subtitles(host, video_id)
@@ -626,3 +1293,110 @@ class PeerTubeIE(InfoExtractor):
'subtitles': subtitles,
'webpage_url': webpage_url,
}
+
+
+class PeerTubePlaylistIE(InfoExtractor):
+ IE_NAME = 'PeerTube:Playlist'
+ _TYPES = {
+ 'a': 'accounts',
+ 'c': 'video-channels',
+ 'w/p': 'video-playlists',
+ }
+ _VALID_URL = r'''(?x)
+ https?://(?P<host>%s)/(?P<type>(?:%s))/
+ (?P<id>[^/]+)
+ ''' % (PeerTubeIE._INSTANCES_RE, '|'.join(_TYPES.keys()))
+ _TESTS = [{
+ 'url': 'https://peertube.tux.ovh/w/p/3af94cba-95e8-4b74-b37a-807ab6d82526',
+ 'info_dict': {
+ 'id': '3af94cba-95e8-4b74-b37a-807ab6d82526',
+ 'description': 'playlist',
+ 'timestamp': 1611171863,
+ 'title': 'playlist',
+ },
+ 'playlist_mincount': 6,
+ }, {
+ 'url': 'https://peertube.tux.ovh/w/p/wkyqcQBnsvFxtUB2pkYc1e',
+ 'info_dict': {
+ 'id': 'wkyqcQBnsvFxtUB2pkYc1e',
+ 'description': 'Cette liste de vidéos contient uniquement les jeux qui peuvent être terminés en une seule vidéo.',
+ 'title': 'Let\'s Play',
+ 'timestamp': 1604147331,
+ },
+ 'playlist_mincount': 6,
+ }, {
+ 'url': 'https://peertube.debian.social/w/p/hFdJoTuyhNJVa1cDWd1d12',
+ 'info_dict': {
+ 'id': 'hFdJoTuyhNJVa1cDWd1d12',
+ 'description': 'Diversas palestras do Richard Stallman no Brasil.',
+ 'title': 'Richard Stallman no Brasil',
+ 'timestamp': 1599676222,
+ },
+ 'playlist_mincount': 9,
+ }, {
+ 'url': 'https://peertube2.cpy.re/a/chocobozzz/videos',
+ 'info_dict': {
+ 'id': 'chocobozzz',
+ 'timestamp': 1553874564,
+ 'title': 'chocobozzz',
+ },
+ 'playlist_mincount': 2,
+ }, {
+ 'url': 'https://framatube.org/c/bf54d359-cfad-4935-9d45-9d6be93f63e8/videos',
+ 'info_dict': {
+ 'id': 'bf54d359-cfad-4935-9d45-9d6be93f63e8',
+ 'timestamp': 1519917377,
+ 'title': 'Les vidéos de Framasoft',
+ },
+ 'playlist_mincount': 345,
+ }, {
+ 'url': 'https://peertube2.cpy.re/c/blender_open_movies@video.blender.org/videos',
+ 'info_dict': {
+ 'id': 'blender_open_movies@video.blender.org',
+ 'timestamp': 1542287810,
+ 'title': 'Official Blender Open Movies',
+ },
+ 'playlist_mincount': 11,
+ }]
+ _API_BASE = 'https://%s/api/v1/%s/%s%s'
+ _PAGE_SIZE = 30
+
+ def call_api(self, host, name, path, base, **kwargs):
+ return self._download_json(
+ self._API_BASE % (host, base, name, path), name, **kwargs)
+
+ def fetch_page(self, host, id, type, page):
+ page += 1
+ video_data = self.call_api(
+ host, id,
+ f'/videos?sort=-createdAt&start={self._PAGE_SIZE * (page - 1)}&count={self._PAGE_SIZE}&nsfw=both',
+ type, note=f'Downloading page {page}').get('data', [])
+ for video in video_data:
+ shortUUID = video.get('shortUUID') or try_get(video, lambda x: x['video']['shortUUID'])
+ video_title = video.get('name') or try_get(video, lambda x: x['video']['name'])
+ yield self.url_result(
+ f'https://{host}/w/{shortUUID}', PeerTubeIE.ie_key(),
+ video_id=shortUUID, video_title=video_title)
+
+ def _extract_playlist(self, host, type, id):
+ info = self.call_api(host, id, '', type, note='Downloading playlist information', fatal=False)
+
+ playlist_title = info.get('displayName')
+ playlist_description = info.get('description')
+ playlist_timestamp = unified_timestamp(info.get('createdAt'))
+ channel = try_get(info, lambda x: x['ownerAccount']['name']) or info.get('displayName')
+ channel_id = try_get(info, lambda x: x['ownerAccount']['id']) or info.get('id')
+ thumbnail = info.get('thumbnailPath')
+ thumbnail = f'https://{host}{thumbnail}' if thumbnail else None
+
+ entries = OnDemandPagedList(functools.partial(
+ self.fetch_page, host, id, type), self._PAGE_SIZE)
+
+ return self.playlist_result(
+ entries, id, playlist_title, playlist_description,
+ timestamp=playlist_timestamp, channel=channel, channel_id=channel_id, thumbnail=thumbnail)
+
+ def _real_extract(self, url):
+ type, host, id = self._match_valid_url(url).group('type', 'host', 'id')
+ type = self._TYPES[type]
+ return self._extract_playlist(host, type, id)
diff --git a/hypervideo_dl/extractor/peloton.py b/hypervideo_dl/extractor/peloton.py
new file mode 100644
index 0000000..287d341
--- /dev/null
+++ b/hypervideo_dl/extractor/peloton.py
@@ -0,0 +1,222 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_HTTPError,
+ compat_urllib_parse,
+)
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ str_or_none,
+ traverse_obj,
+ url_or_none,
+)
+
+
+class PelotonIE(InfoExtractor):
+ IE_NAME = 'peloton'
+ _NETRC_MACHINE = 'peloton'
+ _VALID_URL = r'https?://members\.onepeloton\.com/classes/player/(?P<id>[a-f0-9]+)'
+ _TESTS = [{
+ 'url': 'https://members.onepeloton.com/classes/player/0e9653eb53544eeb881298c8d7a87b86',
+ 'info_dict': {
+ 'id': '0e9653eb53544eeb881298c8d7a87b86',
+ 'title': '20 min Chest & Back Strength',
+ 'ext': 'mp4',
+ 'thumbnail': r're:^https?://.+\.jpg',
+ 'description': 'md5:fcd5be9b9eda0194b470e13219050a66',
+ 'creator': 'Chase Tucker',
+ 'release_timestamp': 1556141400,
+ 'timestamp': 1556141400,
+ 'upload_date': '20190424',
+ 'duration': 1389,
+ 'categories': ['Strength'],
+ 'tags': ['Workout Mat', 'Light Weights', 'Medium Weights'],
+ 'is_live': False,
+ 'chapters': 'count:1',
+ 'subtitles': {'en': [{
+ 'url': r're:^https?://.+',
+ 'ext': 'vtt'
+ }]},
+ }, 'params': {
+ 'skip_download': 'm3u8',
+ },
+ '_skip': 'Account needed'
+ }, {
+ 'url': 'https://members.onepeloton.com/classes/player/26603d53d6bb4de1b340514864a6a6a8',
+ 'info_dict': {
+ 'id': '26603d53d6bb4de1b340514864a6a6a8',
+ 'title': '30 min Earth Day Run',
+ 'ext': 'm4a',
+ 'thumbnail': r're:https://.+\.jpg',
+ 'description': 'md5:adc065a073934d7ee0475d217afe0c3d',
+ 'creator': 'Selena Samuela',
+ 'release_timestamp': 1587567600,
+ 'timestamp': 1587567600,
+ 'upload_date': '20200422',
+ 'duration': 1802,
+ 'categories': ['Running'],
+ 'is_live': False,
+ 'chapters': 'count:3'
+ }, 'params': {
+ 'skip_download': 'm3u8',
+ },
+ '_skip': 'Account needed'
+ }]
+
+ _MANIFEST_URL_TEMPLATE = '%s?hdnea=%s'
+
+ def _start_session(self, video_id):
+ self._download_webpage('https://api.onepeloton.com/api/started_client_session', video_id, note='Starting session')
+
+ def _login(self, video_id):
+ username, password = self._get_login_info()
+ if not (username and password):
+ self.raise_login_required()
+ try:
+ self._download_json(
+ 'https://api.onepeloton.com/auth/login', video_id, note='Logging in',
+ data=json.dumps({
+ 'username_or_email': username,
+ 'password': password,
+ 'with_pubsub': False
+ }).encode(),
+ headers={'Content-Type': 'application/json', 'User-Agent': 'web'})
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+ json_string = self._webpage_read_content(e.cause, None, video_id)
+ res = self._parse_json(json_string, video_id)
+ raise ExtractorError(res['message'], expected=res['message'] == 'Login failed')
+ else:
+ raise
+
+ def _get_token(self, video_id):
+ try:
+ subscription = self._download_json(
+ 'https://api.onepeloton.com/api/subscription/stream', video_id, note='Downloading token',
+ data=json.dumps({}).encode(), headers={'Content-Type': 'application/json'})
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+ json_string = self._webpage_read_content(e.cause, None, video_id)
+ res = self._parse_json(json_string, video_id)
+ raise ExtractorError(res['message'], expected=res['message'] == 'Stream limit reached')
+ else:
+ raise
+ return subscription['token']
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ try:
+ self._start_session(video_id)
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+ self._login(video_id)
+ self._start_session(video_id)
+ else:
+ raise
+
+ metadata = self._download_json('https://api.onepeloton.com/api/ride/%s/details?stream_source=multichannel' % video_id, video_id)
+ ride_data = metadata.get('ride')
+ if not ride_data:
+ raise ExtractorError('Missing stream metadata')
+ token = self._get_token(video_id)
+
+ is_live = False
+ if ride_data.get('content_format') == 'audio':
+ url = self._MANIFEST_URL_TEMPLATE % (ride_data.get('vod_stream_url'), compat_urllib_parse.quote(token))
+ formats = [{
+ 'url': url,
+ 'ext': 'm4a',
+ 'format_id': 'audio',
+ 'vcodec': 'none',
+ }]
+ subtitles = {}
+ else:
+ if ride_data.get('vod_stream_url'):
+ url = 'https://members.onepeloton.com/.netlify/functions/m3u8-proxy?displayLanguage=en&acceptedSubtitles=%s&url=%s?hdnea=%s' % (
+ ','.join([re.sub('^([a-z]+)-([A-Z]+)$', r'\1', caption) for caption in ride_data['captions']]),
+ ride_data['vod_stream_url'],
+ compat_urllib_parse.quote(compat_urllib_parse.quote(token)))
+ elif ride_data.get('live_stream_url'):
+ url = self._MANIFEST_URL_TEMPLATE % (ride_data.get('live_stream_url'), compat_urllib_parse.quote(token))
+ is_live = True
+ else:
+ raise ExtractorError('Missing video URL')
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4')
+
+ if metadata.get('instructor_cues'):
+ subtitles['cues'] = [{
+ 'data': json.dumps(metadata.get('instructor_cues')),
+ 'ext': 'json'
+ }]
+
+ category = ride_data.get('fitness_discipline_display_name')
+ chapters = [{
+ 'start_time': segment.get('start_time_offset'),
+ 'end_time': segment.get('start_time_offset') + segment.get('length'),
+ 'title': segment.get('name')
+ } for segment in traverse_obj(metadata, ('segments', 'segment_list'))]
+
+ self._sort_formats(formats)
+ return {
+ 'id': video_id,
+ 'title': ride_data.get('title'),
+ 'formats': formats,
+ 'thumbnail': url_or_none(ride_data.get('image_url')),
+ 'description': str_or_none(ride_data.get('description')),
+ 'creator': traverse_obj(ride_data, ('instructor', 'name')),
+ 'release_timestamp': ride_data.get('original_air_time'),
+ 'timestamp': ride_data.get('original_air_time'),
+ 'subtitles': subtitles,
+ 'duration': float_or_none(ride_data.get('length')),
+ 'categories': [category] if category else None,
+ 'tags': traverse_obj(ride_data, ('equipment_tags', ..., 'name')),
+ 'is_live': is_live,
+ 'chapters': chapters
+ }
+
+
+class PelotonLiveIE(InfoExtractor):
+ IE_NAME = 'peloton:live'
+ IE_DESC = 'Peloton Live'
+ _VALID_URL = r'https?://members\.onepeloton\.com/player/live/(?P<id>[a-f0-9]+)'
+ _TEST = {
+ 'url': 'https://members.onepeloton.com/player/live/eedee2d19f804a9788f53aa8bd38eb1b',
+ 'info_dict': {
+ 'id': '32edc92d28044be5bf6c7b6f1f8d1cbc',
+ 'title': '30 min HIIT Ride: Live from Home',
+ 'ext': 'mp4',
+ 'thumbnail': r're:^https?://.+\.png',
+ 'description': 'md5:f0d7d8ed3f901b7ee3f62c1671c15817',
+ 'creator': 'Alex Toussaint',
+ 'release_timestamp': 1587736620,
+ 'timestamp': 1587736620,
+ 'upload_date': '20200424',
+ 'duration': 2014,
+ 'categories': ['Cycling'],
+ 'is_live': False,
+ 'chapters': 'count:3'
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ 'skip_download': 'm3u8',
+ },
+ '_skip': 'Account needed'
+ }
+
+ def _real_extract(self, url):
+ workout_id = self._match_id(url)
+ peloton = self._download_json(f'https://api.onepeloton.com/api/peloton/{workout_id}', workout_id)
+
+ if peloton.get('ride_id'):
+ if not peloton.get('is_live') or peloton.get('is_encore') or peloton.get('status') != 'PRE_START':
+ return self.url_result('https://members.onepeloton.com/classes/player/%s' % peloton['ride_id'])
+ else:
+ raise ExtractorError('Ride has not started', expected=True)
+ else:
+ raise ExtractorError('Missing video ID')
diff --git a/hypervideo_dl/extractor/performgroup.py b/hypervideo_dl/extractor/performgroup.py
index 26942bf..c00d393 100644
--- a/hypervideo_dl/extractor/performgroup.py
+++ b/hypervideo_dl/extractor/performgroup.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..utils import int_or_none
@@ -34,7 +33,7 @@ class PerformGroupIE(InfoExtractor):
})
def _real_extract(self, url):
- player_id, auth_token = re.search(self._VALID_URL, url).groups()
+ player_id, auth_token = self._match_valid_url(url).groups()
bootstrap = self._call_api('bootstrap', auth_token, player_id, url)
video = bootstrap['config']['dataSource']['sourceItems'][0]['videos'][0]
video_id = video['uuid']
diff --git a/hypervideo_dl/extractor/periscope.py b/hypervideo_dl/extractor/periscope.py
index b159063..b93a02b 100644
--- a/hypervideo_dl/extractor/periscope.py
+++ b/hypervideo_dl/extractor/periscope.py
@@ -12,6 +12,10 @@ from ..utils import (
class PeriscopeBaseIE(InfoExtractor):
+ _M3U8_HEADERS = {
+ 'Referer': 'https://www.periscope.tv/'
+ }
+
def _call_api(self, method, query, item_id):
return self._download_json(
'https://api.periscope.tv/api/v2/%s' % method,
@@ -54,9 +58,11 @@ class PeriscopeBaseIE(InfoExtractor):
m3u8_url, video_id, 'mp4',
entry_protocol='m3u8_native'
if state in ('ended', 'timed_out') else 'm3u8',
- m3u8_id=format_id, fatal=fatal)
+ m3u8_id=format_id, fatal=fatal, headers=self._M3U8_HEADERS)
if len(m3u8_formats) == 1:
self._add_width_and_height(m3u8_formats[0], width, height)
+ for f in m3u8_formats:
+ f.setdefault('http_headers', {}).update(self._M3U8_HEADERS)
return m3u8_formats
diff --git a/hypervideo_dl/extractor/philharmoniedeparis.py b/hypervideo_dl/extractor/philharmoniedeparis.py
index 03da64b..9f4899c 100644
--- a/hypervideo_dl/extractor/philharmoniedeparis.py
+++ b/hypervideo_dl/extractor/philharmoniedeparis.py
@@ -79,7 +79,7 @@ class PhilharmonieDeParisIE(InfoExtractor):
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
- if not formats:
+ if not formats and not self.get_param('ignore_no_formats'):
return
self._sort_formats(formats)
return {
diff --git a/hypervideo_dl/extractor/photobucket.py b/hypervideo_dl/extractor/photobucket.py
index 6c8bbe1..53aebe2 100644
--- a/hypervideo_dl/extractor/photobucket.py
+++ b/hypervideo_dl/extractor/photobucket.py
@@ -1,7 +1,6 @@
from __future__ import unicode_literals
import json
-import re
from .common import InfoExtractor
from ..compat import compat_urllib_parse_unquote
@@ -23,7 +22,7 @@ class PhotobucketIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
video_extension = mobj.group('ext')
diff --git a/hypervideo_dl/extractor/piksel.py b/hypervideo_dl/extractor/piksel.py
index ecf56ff..a362664 100644
--- a/hypervideo_dl/extractor/piksel.py
+++ b/hypervideo_dl/extractor/piksel.py
@@ -85,7 +85,7 @@ class PikselIE(InfoExtractor):
return response
def _real_extract(self, url):
- ref_id, display_id = re.match(self._VALID_URL, url).groups()
+ ref_id, display_id = self._match_valid_url(url).groups()
webpage = self._download_webpage(url, display_id)
app_token = self._search_regex([
r'clientAPI\s*:\s*"([^"]+)"',
diff --git a/hypervideo_dl/extractor/pinterest.py b/hypervideo_dl/extractor/pinterest.py
index 42528d7..80e9cd0 100644
--- a/hypervideo_dl/extractor/pinterest.py
+++ b/hypervideo_dl/extractor/pinterest.py
@@ -2,7 +2,6 @@
from __future__ import unicode_literals
import json
-import re
from .common import InfoExtractor
from ..compat import compat_str
@@ -56,8 +55,7 @@ class PinterestBaseIE(InfoExtractor):
'height': int_or_none(format_dict.get('height')),
'duration': duration,
})
- self._sort_formats(
- formats, field_preference=('height', 'width', 'tbr', 'format_id'))
+ self._sort_formats(formats)
description = data.get('description') or data.get('description_html') or data.get('seo_description')
timestamp = unified_timestamp(data.get('created_at'))
@@ -166,7 +164,7 @@ class PinterestCollectionIE(PinterestBaseIE):
PinterestCollectionIE, cls).suitable(url)
def _real_extract(self, url):
- username, slug = re.match(self._VALID_URL, url).groups()
+ username, slug = self._match_valid_url(url).groups()
board = self._call_api(
'Board', slug, {
'slug': slug,
diff --git a/hypervideo_dl/extractor/pladform.py b/hypervideo_dl/extractor/pladform.py
index e86c653..dc20300 100644
--- a/hypervideo_dl/extractor/pladform.py
+++ b/hypervideo_dl/extractor/pladform.py
@@ -4,11 +4,11 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
-from ..compat import compat_urlparse
from ..utils import (
determine_ext,
ExtractorError,
int_or_none,
+ parse_qs,
xpath_text,
qualities,
)
@@ -56,7 +56,7 @@ class PladformIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
- qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+ qs = parse_qs(url)
pl = qs.get('pl', ['1'])[0]
video = self._download_xml(
diff --git a/hypervideo_dl/extractor/playfm.py b/hypervideo_dl/extractor/playfm.py
index e766ccc..4298cbe 100644
--- a/hypervideo_dl/extractor/playfm.py
+++ b/hypervideo_dl/extractor/playfm.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..compat import compat_str
@@ -35,7 +34,7 @@ class PlayFMIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
slug = mobj.group('slug')
diff --git a/hypervideo_dl/extractor/playplustv.py b/hypervideo_dl/extractor/playplustv.py
index 1e30ab2..fd72a37 100644
--- a/hypervideo_dl/extractor/playplustv.py
+++ b/hypervideo_dl/extractor/playplustv.py
@@ -2,7 +2,6 @@
from __future__ import unicode_literals
import json
-import re
from .common import InfoExtractor
from ..compat import compat_HTTPError
@@ -63,7 +62,7 @@ class PlayPlusTVIE(InfoExtractor):
self._profile = self._call_api('Profiles')['list'][0]['_id']
def _real_extract(self, url):
- project_id, media_id = re.match(self._VALID_URL, url).groups()
+ project_id, media_id = self._match_valid_url(url).groups()
media = self._call_api(
'Media', media_id, {
'profileId': self._profile,
diff --git a/hypervideo_dl/extractor/playtvak.py b/hypervideo_dl/extractor/playtvak.py
index 4c5f579..84e92dd 100644
--- a/hypervideo_dl/extractor/playtvak.py
+++ b/hypervideo_dl/extractor/playtvak.py
@@ -150,7 +150,7 @@ class PlaytvakIE(InfoExtractor):
ext = 'mp4'
# Some streams have mp3 audio which does not play
# well with ffmpeg filter aac_adtstoasc
- preference = -1
+ preference = -10
elif format_ == 'adobe': # f4m manifest fails with 404 in 80% of requests
continue
else: # Other formats not supported yet
diff --git a/hypervideo_dl/extractor/playwire.py b/hypervideo_dl/extractor/playwire.py
index 4d96a10..9c9e597 100644
--- a/hypervideo_dl/extractor/playwire.py
+++ b/hypervideo_dl/extractor/playwire.py
@@ -1,6 +1,5 @@
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..utils import (
@@ -46,7 +45,7 @@ class PlaywireIE(InfoExtractor):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
publisher_id, video_id = mobj.group('publisher_id'), mobj.group('id')
player = self._download_json(
diff --git a/hypervideo_dl/extractor/pluralsight.py b/hypervideo_dl/extractor/pluralsight.py
index 2d63855..801057e 100644
--- a/hypervideo_dl/extractor/pluralsight.py
+++ b/hypervideo_dl/extractor/pluralsight.py
@@ -17,6 +17,7 @@ from ..utils import (
float_or_none,
int_or_none,
parse_duration,
+ parse_qs,
qualities,
srt_subtitles_timecode,
try_get,
@@ -273,7 +274,7 @@ query viewClip {
return srt
def _real_extract(self, url):
- qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+ qs = parse_qs(url)
author = qs.get('author', [None])[0]
name = qs.get('name', [None])[0]
@@ -337,11 +338,11 @@ query viewClip {
# In order to minimize the number of calls to ViewClip API and reduce
# the probability of being throttled or banned by Pluralsight we will request
# only single format until formats listing was explicitly requested.
- if self._downloader.params.get('listformats', False):
+ if self.get_param('listformats', False):
allowed_qualities = ALLOWED_QUALITIES
else:
def guess_allowed_qualities():
- req_format = self._downloader.params.get('format') or 'best'
+ req_format = self.get_param('format') or 'best'
req_format_split = req_format.split('-', 1)
if len(req_format_split) > 1:
req_ext, req_quality = req_format_split
@@ -349,7 +350,7 @@ query viewClip {
for allowed_quality in ALLOWED_QUALITIES:
if req_ext == allowed_quality.ext and req_quality in allowed_quality.qualities:
return (AllowedQuality(req_ext, (req_quality, )), )
- req_ext = 'webm' if self._downloader.params.get('prefer_free_formats') else 'mp4'
+ req_ext = 'webm' if self.get_param('prefer_free_formats') else 'mp4'
return (AllowedQuality(req_ext, (best_quality, )), )
allowed_qualities = guess_allowed_qualities()
diff --git a/hypervideo_dl/extractor/plutotv.py b/hypervideo_dl/extractor/plutotv.py
new file mode 100644
index 0000000..0cf8246
--- /dev/null
+++ b/hypervideo_dl/extractor/plutotv.py
@@ -0,0 +1,184 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+import uuid
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urlparse,
+)
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ try_get,
+ url_or_none,
+)
+
+
+class PlutoTVIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://(?:www\.)?pluto\.tv(?:/en)?/on-demand
+ /(?P<video_type>movies|series)
+ /(?P<series_or_movie_slug>[^/]+)
+ (?:
+ /seasons?/(?P<season_no>\d+)
+ (?:/episode/(?P<episode_slug>[^/]+))?
+ )?
+ /?(?:$|[#?])'''
+
+ _INFO_URL = 'https://service-vod.clusters.pluto.tv/v3/vod/slugs/'
+ _INFO_QUERY_PARAMS = {
+ 'appName': 'web',
+ 'appVersion': 'na',
+ 'clientID': compat_str(uuid.uuid1()),
+ 'clientModelNumber': 'na',
+ 'serverSideAds': 'false',
+ 'deviceMake': 'unknown',
+ 'deviceModel': 'web',
+ 'deviceType': 'web',
+ 'deviceVersion': 'unknown',
+ 'sid': compat_str(uuid.uuid1()),
+ }
+ _TESTS = [
+ {
+ 'url': 'https://pluto.tv/on-demand/series/i-love-money/season/2/episode/its-in-the-cards-2009-2-3',
+ 'md5': 'ebcdd8ed89aaace9df37924f722fd9bd',
+ 'info_dict': {
+ 'id': '5de6c598e9379ae4912df0a8',
+ 'ext': 'mp4',
+ 'title': 'It\'s In The Cards',
+ 'episode': 'It\'s In The Cards',
+ 'description': 'The teams face off against each other in a 3-on-2 soccer showdown. Strategy comes into play, though, as each team gets to select their opposing teams’ two defenders.',
+ 'series': 'I Love Money',
+ 'season_number': 2,
+ 'episode_number': 3,
+ 'duration': 3600,
+ }
+ }, {
+ 'url': 'https://pluto.tv/on-demand/series/i-love-money/season/1/',
+ 'playlist_count': 11,
+ 'info_dict': {
+ 'id': '5de6c582e9379ae4912dedbd',
+ 'title': 'I Love Money - Season 1',
+ }
+ }, {
+ 'url': 'https://pluto.tv/on-demand/series/i-love-money/',
+ 'playlist_count': 26,
+ 'info_dict': {
+ 'id': '5de6c582e9379ae4912dedbd',
+ 'title': 'I Love Money',
+ }
+ }, {
+ 'url': 'https://pluto.tv/on-demand/movies/arrival-2015-1-1',
+ 'md5': '3cead001d317a018bf856a896dee1762',
+ 'info_dict': {
+ 'id': '5e83ac701fa6a9001bb9df24',
+ 'ext': 'mp4',
+ 'title': 'Arrival',
+ 'description': 'When mysterious spacecraft touch down across the globe, an elite team - led by expert translator Louise Banks (Academy Award® nominee Amy Adams) – races against time to decipher their intent.',
+ 'duration': 9000,
+ }
+ }, {
+ 'url': 'https://pluto.tv/en/on-demand/series/manhunters-fugitive-task-force/seasons/1/episode/third-times-the-charm-1-1',
+ 'only_matching': True,
+ }
+ ]
+
+ def _to_ad_free_formats(self, video_id, formats, subtitles):
+ ad_free_formats, ad_free_subtitles, m3u8_urls = [], {}, set()
+ for fmt in formats:
+ res = self._download_webpage(
+ fmt.get('url'), video_id, note='Downloading m3u8 playlist',
+ fatal=False)
+ if not res:
+ continue
+ first_segment_url = re.search(
+ r'^(https?://.*/)0\-(end|[0-9]+)/[^/]+\.ts$', res,
+ re.MULTILINE)
+ if first_segment_url:
+ m3u8_urls.add(
+ compat_urlparse.urljoin(first_segment_url.group(1), '0-end/master.m3u8'))
+ continue
+ first_segment_url = re.search(
+ r'^(https?://.*/).+\-0+\.ts$', res,
+ re.MULTILINE)
+ if first_segment_url:
+ m3u8_urls.add(
+ compat_urlparse.urljoin(first_segment_url.group(1), 'master.m3u8'))
+ continue
+
+ for m3u8_url in m3u8_urls:
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
+ ad_free_formats.extend(fmts)
+ ad_free_subtitles = self._merge_subtitles(ad_free_subtitles, subs)
+ if ad_free_formats:
+ formats, subtitles = ad_free_formats, ad_free_subtitles
+ else:
+ self.report_warning('Unable to find ad-free formats')
+ return formats, subtitles
+
+ def _get_video_info(self, video_json, slug, series_name=None):
+ video_id = video_json.get('_id', slug)
+ formats, subtitles = [], {}
+ for video_url in try_get(video_json, lambda x: x['stitched']['urls'], list) or []:
+ if video_url.get('type') != 'hls':
+ continue
+ url = url_or_none(video_url.get('url'))
+
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
+ formats.extend(fmts)
+ subtitles = self._merge_subtitles(subtitles, subs)
+
+ formats, subtitles = self._to_ad_free_formats(video_id, formats, subtitles)
+ self._sort_formats(formats)
+
+ info = {
+ 'id': video_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'title': video_json.get('name'),
+ 'description': video_json.get('description'),
+ 'duration': float_or_none(video_json.get('duration'), scale=1000),
+ }
+ if series_name:
+ info.update({
+ 'series': series_name,
+ 'episode': video_json.get('name'),
+ 'season_number': int_or_none(video_json.get('season')),
+ 'episode_number': int_or_none(video_json.get('number')),
+ })
+ return info
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url).groupdict()
+ info_slug = mobj['series_or_movie_slug']
+ video_json = self._download_json(self._INFO_URL + info_slug, info_slug, query=self._INFO_QUERY_PARAMS)
+
+ if mobj['video_type'] == 'series':
+ series_name = video_json.get('name', info_slug)
+ season_number, episode_slug = mobj.get('season_number'), mobj.get('episode_slug')
+
+ videos = []
+ for season in video_json['seasons']:
+ if season_number is not None and season_number != int_or_none(season.get('number')):
+ continue
+ for episode in season['episodes']:
+ if episode_slug is not None and episode_slug != episode.get('slug'):
+ continue
+ videos.append(self._get_video_info(episode, episode_slug, series_name))
+ if not videos:
+ raise ExtractorError('Failed to find any videos to extract')
+ if episode_slug is not None and len(videos) == 1:
+ return videos[0]
+ playlist_title = series_name
+ if season_number is not None:
+ playlist_title += ' - Season %d' % season_number
+ return self.playlist_result(videos,
+ playlist_id=video_json.get('_id', info_slug),
+ playlist_title=playlist_title)
+ return self._get_video_info(video_json, info_slug)
diff --git a/hypervideo_dl/extractor/podomatic.py b/hypervideo_dl/extractor/podomatic.py
index e782e3f..673a3ab 100644
--- a/hypervideo_dl/extractor/podomatic.py
+++ b/hypervideo_dl/extractor/podomatic.py
@@ -1,7 +1,6 @@
from __future__ import unicode_literals
import json
-import re
from .common import InfoExtractor
from ..utils import int_or_none
@@ -46,7 +45,7 @@ class PodomaticIE(InfoExtractor):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
channel = mobj.group('channel') or mobj.group('channel_2')
diff --git a/hypervideo_dl/extractor/pokemon.py b/hypervideo_dl/extractor/pokemon.py
index 80222d4..402b574 100644
--- a/hypervideo_dl/extractor/pokemon.py
+++ b/hypervideo_dl/extractor/pokemon.py
@@ -1,12 +1,14 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..utils import (
+ ExtractorError,
extract_attributes,
int_or_none,
+ js_to_json,
+ merge_dicts,
)
@@ -47,7 +49,7 @@ class PokemonIE(InfoExtractor):
}]
def _real_extract(self, url):
- video_id, display_id = re.match(self._VALID_URL, url).groups()
+ video_id, display_id = self._match_valid_url(url).groups()
webpage = self._download_webpage(url, video_id or display_id)
video_data = extract_attributes(self._search_regex(
r'(<[^>]+data-video-id="%s"[^>]*>)' % (video_id if video_id else '[a-z0-9]{32}'),
@@ -69,3 +71,70 @@ class PokemonIE(InfoExtractor):
'episode_number': int_or_none(video_data.get('data-video-episode')),
'ie_key': 'LimelightMedia',
}
+
+
+class PokemonWatchIE(InfoExtractor):
+ _VALID_URL = r'https?://watch\.pokemon\.com/[a-z]{2}-[a-z]{2}/(?:#/)?player(?:\.html)?\?id=(?P<id>[a-z0-9]{32})'
+ _API_URL = 'https://www.pokemon.com/api/pokemontv/v2/channels/{0:}'
+ _TESTS = [{
+ 'url': 'https://watch.pokemon.com/en-us/player.html?id=8309a40969894a8e8d5bc1311e9c5667',
+ 'md5': '62833938a31e61ab49ada92f524c42ff',
+ 'info_dict': {
+ 'id': '8309a40969894a8e8d5bc1311e9c5667',
+ 'ext': 'mp4',
+ 'title': 'Lillier and the Staff!',
+ 'description': 'md5:338841b8c21b283d24bdc9b568849f04',
+ }
+ }, {
+ 'url': 'https://watch.pokemon.com/en-us/#/player?id=3fe7752ba09141f0b0f7756d1981c6b2',
+ 'only_matching': True
+ }, {
+ 'url': 'https://watch.pokemon.com/de-de/player.html?id=b3c402e111a4459eb47e12160ab0ba07',
+ 'only_matching': True
+ }]
+
+ def _extract_media(self, channel_array, video_id):
+ for channel in channel_array:
+ for media in channel.get('media'):
+ if media.get('id') == video_id:
+ return media
+ return None
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ info = {
+ '_type': 'url',
+ 'id': video_id,
+ 'url': 'limelight:media:%s' % video_id,
+ 'ie_key': 'LimelightMedia',
+ }
+
+ # API call can be avoided entirely if we are listing formats
+ if self.get_param('listformats', False):
+ return info
+
+ webpage = self._download_webpage(url, video_id)
+ build_vars = self._parse_json(self._search_regex(
+ r'(?s)buildVars\s*=\s*({.*?})', webpage, 'build vars'),
+ video_id, transform_source=js_to_json)
+ region = build_vars.get('region')
+ channel_array = self._download_json(self._API_URL.format(region), video_id)
+ video_data = self._extract_media(channel_array, video_id)
+
+ if video_data is None:
+ raise ExtractorError(
+ 'Video %s does not exist' % video_id, expected=True)
+
+ info['_type'] = 'url_transparent'
+ images = video_data.get('images')
+
+ return merge_dicts(info, {
+ 'title': video_data.get('title'),
+ 'description': video_data.get('description'),
+ 'thumbnail': images.get('medium') or images.get('small'),
+ 'series': 'Pokémon',
+ 'season_number': int_or_none(video_data.get('season')),
+ 'episode': video_data.get('title'),
+ 'episode_number': int_or_none(video_data.get('episode')),
+ })
diff --git a/hypervideo_dl/extractor/polskieradio.py b/hypervideo_dl/extractor/polskieradio.py
index 978d6f8..53fe034 100644
--- a/hypervideo_dl/extractor/polskieradio.py
+++ b/hypervideo_dl/extractor/polskieradio.py
@@ -15,12 +15,13 @@ from ..utils import (
int_or_none,
strip_or_none,
unified_timestamp,
+ unescapeHTML,
)
class PolskieRadioIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?polskieradio\.pl/\d+/\d+/Artykul/(?P<id>[0-9]+)'
- _TESTS = [{
+ _TESTS = [{ # Old-style single broadcast.
'url': 'http://www.polskieradio.pl/7/5102/Artykul/1587943,Prof-Andrzej-Nowak-o-historii-nie-da-sie-myslec-beznamietnie',
'info_dict': {
'id': '1587943',
@@ -39,14 +40,41 @@ class PolskieRadioIE(InfoExtractor):
'thumbnail': r're:^https?://static\.prsa\.pl/images/.*\.jpg$'
},
}],
- }, {
- 'url': 'http://www.polskieradio.pl/265/5217/Artykul/1635803,Euro-2016-nie-ma-miejsca-na-blad-Polacy-graja-ze-Szwajcaria-o-cwiercfinal',
+ }, { # New-style single broadcast.
+ 'url': 'https://www.polskieradio.pl/8/2382/Artykul/2534482,Zagarysci-Poezja-jak-spoiwo',
+ 'info_dict': {
+ 'id': '2534482',
+ 'title': 'Żagaryści. Poezja jak spoiwo',
+ 'description': 'md5:f18d95d5dcba747a09b635e21a4c0695',
+ },
+ 'playlist': [{
+ 'md5': 'd07559829f61d5a93a75755987ded760',
+ 'info_dict': {
+ 'id': '2516679',
+ 'ext': 'mp3',
+ 'title': 'md5:c6e1234e0b747ad883cb91b7ad06b98c',
+ 'timestamp': 1592654400,
+ 'upload_date': '20200620',
+ 'duration': 1430,
+ 'thumbnail': r're:^https?://static\.prsa\.pl/images/.*\.jpg$'
+ },
+ }],
+ }, { # Old-style multiple broadcast playlist.
+ 'url': 'https://www.polskieradio.pl/8/4346/Artykul/2487823,Marek-Kondrat-czyta-Mistrza-i-Malgorzate',
+ 'info_dict': {
+ 'id': '2487823',
+ 'title': 'Marek Kondrat czyta "Mistrza i Małgorzatę"',
+ 'description': 'md5:8422a95cc83834f2aaeff9d82e9c8f39',
+ },
+ 'playlist_mincount': 50,
+ }, { # New-style multiple broadcast playlist.
+ 'url': 'https://www.polskieradio.pl/8/4346/Artykul/2541317,Czytamy-Kalendarz-i-klepsydre-Tadeusza-Konwickiego',
'info_dict': {
- 'id': '1635803',
- 'title': 'Euro 2016: nie ma miejsca na błąd. Polacy grają ze Szwajcarią o ćwierćfinał',
- 'description': 'md5:01cb7d0cad58664095d72b51a1ebada2',
+ 'id': '2541317',
+ 'title': 'Czytamy "Kalendarz i klepsydrę" Tadeusza Konwickiego',
+ 'description': 'md5:0baeaa46d877f1351fb2eeed3e871f9f',
},
- 'playlist_mincount': 12,
+ 'playlist_mincount': 15,
}, {
'url': 'http://polskieradio.pl/9/305/Artykul/1632955,Bardzo-popularne-slowo-remis',
'only_matching': True,
@@ -78,8 +106,8 @@ class PolskieRadioIE(InfoExtractor):
media_urls = set()
- for data_media in re.findall(r'<[^>]+data-media=({[^>]+})', content):
- media = self._parse_json(data_media, playlist_id, fatal=False)
+ for data_media in re.findall(r'<[^>]+data-media="?({[^>]+})"?', content):
+ media = self._parse_json(data_media, playlist_id, transform_source=unescapeHTML, fatal=False)
if not media.get('file') or not media.get('desc'):
continue
media_url = self._proto_relative_url(media['file'], 'http:')
@@ -98,6 +126,7 @@ class PolskieRadioIE(InfoExtractor):
title = self._og_search_title(webpage).strip()
description = strip_or_none(self._og_search_description(webpage))
+ description = description.replace('\xa0', ' ') if description is not None else None
return self.playlist_result(entries, playlist_id, title, description)
diff --git a/hypervideo_dl/extractor/popcorntimes.py b/hypervideo_dl/extractor/popcorntimes.py
index 7bf7f98..5f9d0e7 100644
--- a/hypervideo_dl/extractor/popcorntimes.py
+++ b/hypervideo_dl/extractor/popcorntimes.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..compat import (
@@ -33,7 +32,7 @@ class PopcorntimesIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id, display_id = mobj.group('id', 'display_id')
webpage = self._download_webpage(url, display_id)
diff --git a/hypervideo_dl/extractor/popcorntv.py b/hypervideo_dl/extractor/popcorntv.py
index 9f834fb..66d2e50 100644
--- a/hypervideo_dl/extractor/popcorntv.py
+++ b/hypervideo_dl/extractor/popcorntv.py
@@ -1,6 +1,5 @@
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..utils import (
@@ -33,7 +32,7 @@ class PopcornTVIE(InfoExtractor):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
display_id, video_id = mobj.group('display_id', 'id')
webpage = self._download_webpage(url, display_id)
diff --git a/hypervideo_dl/extractor/porncom.py b/hypervideo_dl/extractor/porncom.py
index 5726cab..83df221 100644
--- a/hypervideo_dl/extractor/porncom.py
+++ b/hypervideo_dl/extractor/porncom.py
@@ -35,7 +35,7 @@ class PornComIE(InfoExtractor):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
display_id = mobj.group('display_id') or video_id
diff --git a/hypervideo_dl/extractor/pornflip.py b/hypervideo_dl/extractor/pornflip.py
new file mode 100644
index 0000000..d0aefa2
--- /dev/null
+++ b/hypervideo_dl/extractor/pornflip.py
@@ -0,0 +1,82 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_duration,
+ parse_iso8601
+)
+
+
+class PornFlipIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?pornflip\.com/(?:(embed|sv|v)/)?(?P<id>[^/]+)'
+ _TESTS = [
+ {
+ 'url': 'https://www.pornflip.com/dzv9Mtw1qj2/sv/brazzers-double-dare-two-couples-fucked-jenna-reid-maya-bijou',
+ 'info_dict': {
+ 'id': 'dzv9Mtw1qj2',
+ 'ext': 'mp4',
+ 'title': 'Brazzers - Double Dare Two couples fucked Jenna Reid Maya Bijou',
+ 'description': 'md5:d2b69e6cc743c5fd158e162aa7f05821',
+ 'duration': 476,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'view_count': int,
+ 'timestamp': 1617846819,
+ 'upload_date': '20210408',
+ 'uploader': 'Brazzers',
+ 'age_limit': 18,
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'https://www.pornflip.com/v/IrJEC40i21L',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://www.pornflip.com/Z3jzbChC5-P/sexintaxi-e-sereyna-gomez-czech-naked-couple',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://www.pornflip.com/embed/bLcDFxnrZnU',
+ 'only_matching': True,
+ },
+ ]
+ _HOST = 'www.pornflip.com'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(
+ 'https://{}/sv/{}'.format(self._HOST, video_id), video_id, headers={'host': self._HOST})
+ description = self._html_search_regex(r'&p\[summary\]=(.*?)\s*&p', webpage, 'description', fatal=False)
+ duration = self._search_regex(r'"duration":\s+"([^"]+)",', webpage, 'duration', fatal=False)
+ view_count = self._search_regex(r'"interactionCount":\s+"([^"]+)"', webpage, 'view_count', fatal=False)
+ title = self._html_search_regex(r'id="mediaPlayerTitleLink"[^>]*>(.+)</a>', webpage, 'title', fatal=False)
+ uploader = self._html_search_regex(r'class="title-chanel"[^>]*>[^<]*<a[^>]*>([^<]+)<', webpage, 'uploader', fatal=False)
+ upload_date = self._search_regex(r'"uploadDate":\s+"([^"]+)",', webpage, 'upload_date', fatal=False)
+ likes = self._html_search_regex(
+ r'class="btn btn-up-rating[^>]*>[^<]*<i[^>]*>[^<]*</i>[^>]*<span[^>]*>[^0-9]*([0-9]+)[^<0-9]*<', webpage, 'like_count', fatal=False)
+ dislikes = self._html_search_regex(
+ r'class="btn btn-down-rating[^>]*>[^<]*<i[^>]*>[^<]*</i>[^>]*<span[^>]*>[^0-9]*([0-9]+)[^<0-9]*<', webpage, 'dislike_count', fatal=False)
+ mpd_url = self._search_regex(r'"([^"]+userscontent.net/dash/[0-9]+/manifest.mpd[^"]*)"', webpage, 'mpd_url').replace('&amp;', '&')
+ formats = self._extract_mpd_formats(mpd_url, video_id, mpd_id='dash')
+ self._sort_formats(formats)
+
+ return {
+ 'age_limit': 18,
+ 'description': description,
+ 'dislike_count': int_or_none(dislikes),
+ 'duration': parse_duration(duration),
+ 'formats': formats,
+ 'id': video_id,
+ 'like_count': int_or_none(likes),
+ 'timestamp': parse_iso8601(upload_date),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'title': title,
+ 'uploader': uploader,
+ 'view_count': int_or_none(view_count),
+ }
diff --git a/hypervideo_dl/extractor/pornhd.py b/hypervideo_dl/extractor/pornhd.py
index c6052ac..9dbd72f 100644
--- a/hypervideo_dl/extractor/pornhd.py
+++ b/hypervideo_dl/extractor/pornhd.py
@@ -1,6 +1,5 @@
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..utils import (
@@ -47,7 +46,7 @@ class PornHdIE(InfoExtractor):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
display_id = mobj.group('display_id')
diff --git a/hypervideo_dl/extractor/pornhub.py b/hypervideo_dl/extractor/pornhub.py
index 0314546..6d894af 100644
--- a/hypervideo_dl/extractor/pornhub.py
+++ b/hypervideo_dl/extractor/pornhub.py
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
import functools
import itertools
+import math
import operator
import re
@@ -14,6 +15,7 @@ from ..compat import (
)
from .openload import PhantomJSwrapper
from ..utils import (
+ clean_html,
determine_ext,
ExtractorError,
int_or_none,
@@ -30,6 +32,7 @@ from ..utils import (
class PornHubBaseIE(InfoExtractor):
_NETRC_MACHINE = 'pornhub'
+ _PORNHUB_HOST_RE = r'(?:(?P<host>pornhub(?:premium)?\.(?:com|net|org))|pornhubthbh7ap3u\.onion)'
def _download_webpage_handle(self, *args, **kwargs):
def dl(*args, **kwargs):
@@ -122,11 +125,13 @@ class PornHubIE(PornHubBaseIE):
_VALID_URL = r'''(?x)
https?://
(?:
- (?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
+ (?:[^/]+\.)?
+ %s
+ /(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
(?:www\.)?thumbzilla\.com/video/
)
(?P<id>[\da-z]+)
- '''
+ ''' % PornHubBaseIE._PORNHUB_HOST_RE
_TESTS = [{
'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
'md5': 'a6391306d050e4547f62b3f485dd9ba9',
@@ -145,6 +150,7 @@ class PornHubIE(PornHubBaseIE):
'age_limit': 18,
'tags': list,
'categories': list,
+ 'cast': list,
},
}, {
# non-ASCII title
@@ -236,6 +242,13 @@ class PornHubIE(PornHubBaseIE):
}, {
'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5f75b0f4b18e3',
'only_matching': True,
+ }, {
+ # geo restricted
+ 'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5a9813bfa7156',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://pornhubthbh7ap3u.onion/view_video.php?viewkey=ph5a9813bfa7156',
+ 'only_matching': True,
}]
@staticmethod
@@ -249,7 +262,7 @@ class PornHubIE(PornHubBaseIE):
pattern, webpage, '%s count' % name, fatal=False))
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
host = mobj.group('host') or 'pornhub.com'
video_id = mobj.group('id')
@@ -275,6 +288,11 @@ class PornHubIE(PornHubBaseIE):
'PornHub said: %s' % error_msg,
expected=True, video_id=video_id)
+ if any(re.search(p, webpage) for p in (
+ r'class=["\']geoBlocked["\']',
+ r'>\s*This content is unavailable in your country')):
+ self.raise_geo_restricted()
+
# video_title from flashvars contains whitespace instead of non-ASCII (see
# http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying
# on that anymore.
@@ -408,17 +426,14 @@ class PornHubIE(PornHubBaseIE):
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
return
- tbr = None
- mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', format_url)
- if mobj:
- if not height:
- height = int(mobj.group('height'))
- tbr = int(mobj.group('tbr'))
+ if not height:
+ height = int_or_none(self._search_regex(
+ r'(?P<height>\d+)[pP]?_\d+[kK]', format_url, 'height',
+ default=None))
formats.append({
'url': format_url,
'format_id': '%dp' % height if height else None,
'height': height,
- 'tbr': tbr,
})
for video_url, height in video_urls:
@@ -440,7 +455,10 @@ class PornHubIE(PornHubBaseIE):
add_format(video_url, height)
continue
add_format(video_url)
- self._sort_formats(formats)
+
+ # field_preference is unnecessary here, but kept for code-similarity with youtube-dl
+ self._sort_formats(
+ formats, field_preference=('height', 'width', 'fps', 'format_id'))
video_uploader = self._html_search_regex(
r'(?s)From:&nbsp;.+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
@@ -464,7 +482,7 @@ class PornHubIE(PornHubBaseIE):
r'(?s)<div[^>]+\bclass=["\'].*?\b%sWrapper[^>]*>(.+?)</div>'
% meta_key, webpage, meta_key, default=None)
if div:
- return re.findall(r'<a[^>]+\bhref=[^>]+>([^<]+)', div)
+ return [clean_html(x).strip() for x in re.findall(r'(?s)<a[^>]+\bhref=[^>]+>.+?</a>', div)]
info = self._search_json_ld(webpage, video_id, default={})
# description provided in JSON-LD is irrelevant
@@ -485,6 +503,7 @@ class PornHubIE(PornHubBaseIE):
'age_limit': 18,
'tags': extract_list('tags'),
'categories': extract_list('categories'),
+ 'cast': extract_list('pornstars'),
'subtitles': subtitles,
}, info)
@@ -513,7 +532,7 @@ class PornHubPlaylistBaseIE(PornHubBaseIE):
class PornHubUserIE(PornHubPlaylistBaseIE):
- _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)'
+ _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?%s/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)' % PornHubBaseIE._PORNHUB_HOST_RE
_TESTS = [{
'url': 'https://www.pornhub.com/model/zoe_ph',
'playlist_mincount': 118,
@@ -542,10 +561,13 @@ class PornHubUserIE(PornHubPlaylistBaseIE):
# Same as before, multi page
'url': 'https://www.pornhubpremium.com/pornstar/lily-labeau',
'only_matching': True,
+ }, {
+ 'url': 'https://pornhubthbh7ap3u.onion/model/zoe_ph',
+ 'only_matching': True,
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
user_id = mobj.group('id')
videos_url = '%s/videos' % mobj.group('url')
page = self._extract_page(url)
@@ -607,7 +629,7 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
break
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
host = mobj.group('host')
item_id = mobj.group('id')
@@ -617,7 +639,7 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
- _VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?P<id>(?:[^/]+/)*[^/?#&]+)'
+ _VALID_URL = r'https?://(?:[^/]+\.)?%s/(?!playlist/)(?P<id>(?:[^/]+/)*[^/?#&]+)' % PornHubBaseIE._PORNHUB_HOST_RE
_TESTS = [{
'url': 'https://www.pornhub.com/model/zoe_ph/videos',
'only_matching': True,
@@ -711,16 +733,7 @@ class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
'url': 'https://www.pornhub.com/video/incategories/60fps-1/hd-porn',
'only_matching': True,
}, {
- 'url': 'https://www.pornhub.com/playlist/44121572',
- 'info_dict': {
- 'id': 'playlist/44121572',
- },
- 'playlist_mincount': 132,
- }, {
- 'url': 'https://www.pornhub.com/playlist/4667351',
- 'only_matching': True,
- }, {
- 'url': 'https://de.pornhub.com/playlist/4667351',
+ 'url': 'https://pornhubthbh7ap3u.onion/model/zoe_ph/videos',
'only_matching': True,
}]
@@ -732,7 +745,7 @@ class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE):
- _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)'
+ _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?%s/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)' % PornHubBaseIE._PORNHUB_HOST_RE
_TESTS = [{
'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload',
'info_dict': {
@@ -742,4 +755,63 @@ class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE):
}, {
'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload',
'only_matching': True,
+ }, {
+ 'url': 'http://pornhubthbh7ap3u.onion/pornstar/jenny-blighe/videos/upload',
+ 'only_matching': True,
}]
+
+
+class PornHubPlaylistIE(PornHubPlaylistBaseIE):
+ _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?%s/playlist/(?P<id>[^/?#&]+))' % PornHubBaseIE._PORNHUB_HOST_RE
+ _TESTS = [{
+ 'url': 'https://www.pornhub.com/playlist/44121572',
+ 'info_dict': {
+ 'id': '44121572',
+ },
+ 'playlist_count': 77,
+ }, {
+ 'url': 'https://www.pornhub.com/playlist/4667351',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://de.pornhub.com/playlist/4667351',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://de.pornhub.com/playlist/4667351?page=2',
+ 'only_matching': True,
+ }]
+
+ def _entries(self, url, host, item_id):
+ webpage = self._download_webpage(url, item_id, 'Downloading page 1')
+ playlist_id = self._search_regex(r'var\s+playlistId\s*=\s*"([^"]+)"', webpage, 'playlist_id')
+ video_count = int_or_none(
+ self._search_regex(r'var\s+itemsCount\s*=\s*([0-9]+)\s*\|\|', webpage, 'video_count'))
+ token = self._search_regex(r'var\s+token\s*=\s*"([^"]+)"', webpage, 'token')
+ page_count = math.ceil((video_count - 36) / 40.) + 1
+ page_entries = self._extract_entries(webpage, host)
+
+ def download_page(page_num):
+ note = 'Downloading page {}'.format(page_num)
+ page_url = 'https://www.{}/playlist/viewChunked'.format(host)
+ return self._download_webpage(page_url, item_id, note, query={
+ 'id': playlist_id,
+ 'page': page_num,
+ 'token': token,
+ })
+
+ for page_num in range(1, page_count + 1):
+ if page_num > 1:
+ webpage = download_page(page_num)
+ page_entries = self._extract_entries(webpage, host)
+ if not page_entries:
+ break
+ for e in page_entries:
+ yield e
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ host = mobj.group('host')
+ item_id = mobj.group('id')
+
+ self._login(host)
+
+ return self.playlist_result(self._entries(mobj.group('url'), host, item_id), item_id)
diff --git a/hypervideo_dl/extractor/pornovoisines.py b/hypervideo_dl/extractor/pornovoisines.py
index b6b7106..18459fc 100644
--- a/hypervideo_dl/extractor/pornovoisines.py
+++ b/hypervideo_dl/extractor/pornovoisines.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..utils import (
@@ -39,7 +38,7 @@ class PornoVoisinesIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
display_id = mobj.group('display_id')
diff --git a/hypervideo_dl/extractor/pornoxo.py b/hypervideo_dl/extractor/pornoxo.py
index 2831368..489dc2b 100644
--- a/hypervideo_dl/extractor/pornoxo.py
+++ b/hypervideo_dl/extractor/pornoxo.py
@@ -1,6 +1,5 @@
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..utils import (
@@ -26,7 +25,7 @@ class PornoXOIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id, display_id = mobj.groups()
webpage = self._download_webpage(url, video_id)
diff --git a/hypervideo_dl/extractor/presstv.py b/hypervideo_dl/extractor/presstv.py
index b5c2792..bfb2eb7 100644
--- a/hypervideo_dl/extractor/presstv.py
+++ b/hypervideo_dl/extractor/presstv.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..utils import remove_start
@@ -25,7 +24,7 @@ class PressTVIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
display_id = mobj.group('display_id') or video_id
diff --git a/hypervideo_dl/extractor/projectveritas.py b/hypervideo_dl/extractor/projectveritas.py
new file mode 100644
index 0000000..1d832a6
--- /dev/null
+++ b/hypervideo_dl/extractor/projectveritas.py
@@ -0,0 +1,55 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ traverse_obj,
+ unified_strdate,
+)
+
+
+class ProjectVeritasIE(InfoExtractor):
+ _VALID_URL = r'(?:https?://)(?:www\.)?projectveritas\.com/(?P<type>news|video)/(?P<id>[^/?#]+)'
+ _TESTS = [{
+ 'url': 'https://www.projectveritas.com/news/exclusive-inside-the-new-york-and-new-jersey-hospitals-battling-coronavirus/',
+ 'info_dict': {
+ 'id': '51910aab-365a-5cf1-88f2-8eb1ca5fd3c6',
+ 'ext': 'mp4',
+ 'title': 'Exclusive: Inside The New York and New Jersey Hospitals Battling Coronavirus',
+ 'upload_date': '20200327',
+ 'thumbnail': 'md5:6076477fe50b03eb8708be9415e18e1c',
+ }
+ }, {
+ 'url': 'https://www.projectveritas.com/video/ilhan-omar-connected-ballot-harvester-in-cash-for-ballots-scheme-car-is-full/',
+ 'info_dict': {
+ 'id': 'c5aab304-a56b-54b1-9f0b-03b77bc5f2f6',
+ 'ext': 'mp4',
+ 'title': 'Ilhan Omar connected Ballot Harvester in cash-for-ballots scheme: "Car is full" of absentee ballots',
+ 'upload_date': '20200927',
+ 'thumbnail': 'md5:194b8edf0e2ba64f25500ff4378369a4',
+ }
+ }]
+
+ def _real_extract(self, url):
+ id, type = self._match_valid_url(url).group('id', 'type')
+ api_url = f'https://www.projectveritas.com/page-data/{type}/{id}/page-data.json'
+ data_json = self._download_json(api_url, id)['result']['data']
+ main_data = traverse_obj(data_json, 'video', 'post')
+ video_id = main_data['id']
+ thumbnail = traverse_obj(main_data, ('image', 'ogImage', 'src'))
+ mux_asset = traverse_obj(main_data,
+ 'muxAsset', ('body', 'json', 'content', ..., 'data', 'target', 'fields', 'muxAsset'),
+ get_all=False, expected_type=dict)
+ if not mux_asset:
+ raise ExtractorError('No video on the provided url.', expected=True)
+ playback_id = traverse_obj(mux_asset, 'playbackId', ('en-US', 'playbackId'))
+ formats = self._extract_m3u8_formats(f'https://stream.mux.com/{playback_id}.m3u8', video_id)
+ self._sort_formats(formats)
+ return {
+ 'id': video_id,
+ 'title': main_data['title'],
+ 'upload_date': unified_strdate(main_data.get('date')),
+ 'thumbnail': thumbnail.replace('//', ''),
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/prosiebensat1.py b/hypervideo_dl/extractor/prosiebensat1.py
index e470882..e89bbfd 100644
--- a/hypervideo_dl/extractor/prosiebensat1.py
+++ b/hypervideo_dl/extractor/prosiebensat1.py
@@ -34,8 +34,8 @@ class ProSiebenSat1BaseIE(InfoExtractor):
'ids': clip_id,
})[0]
- if video.get('is_protected') is True:
- raise ExtractorError('This video is DRM protected.', expected=True)
+ if not self.get_param('allow_unplayable_formats') and video.get('is_protected') is True:
+ self.report_drm(clip_id)
formats = []
if self._ACCESS_ID:
diff --git a/hypervideo_dl/extractor/pyvideo.py b/hypervideo_dl/extractor/pyvideo.py
index b8ac93a..8696197 100644
--- a/hypervideo_dl/extractor/pyvideo.py
+++ b/hypervideo_dl/extractor/pyvideo.py
@@ -27,7 +27,7 @@ class PyvideoIE(InfoExtractor):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
category = mobj.group('category')
video_id = mobj.group('id')
diff --git a/hypervideo_dl/extractor/qqmusic.py b/hypervideo_dl/extractor/qqmusic.py
index 084308a..0106d16 100644
--- a/hypervideo_dl/extractor/qqmusic.py
+++ b/hypervideo_dl/extractor/qqmusic.py
@@ -121,7 +121,7 @@ class QQMusicIE(InfoExtractor):
% (details['prefix'], mid, details['ext'], vkey, guid),
'format': format_id,
'format_id': format_id,
- 'preference': details['preference'],
+ 'quality': details['preference'],
'abr': details.get('abr'),
})
self._check_formats(formats, mid)
diff --git a/hypervideo_dl/extractor/radiko.py b/hypervideo_dl/extractor/radiko.py
new file mode 100644
index 0000000..1e60de1
--- /dev/null
+++ b/hypervideo_dl/extractor/radiko.py
@@ -0,0 +1,234 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+import base64
+import calendar
+import datetime
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ update_url_query,
+ clean_html,
+ unified_timestamp,
+)
+from ..compat import compat_urllib_parse
+
+
+class RadikoBaseIE(InfoExtractor):
+ _FULL_KEY = None
+
+ def _auth_client(self):
+ auth_cache = self._downloader.cache.load('radiko', 'auth_data')
+ if auth_cache:
+ return auth_cache
+
+ _, auth1_handle = self._download_webpage_handle(
+ 'https://radiko.jp/v2/api/auth1', None, 'Downloading authentication page',
+ headers={
+ 'x-radiko-app': 'pc_html5',
+ 'x-radiko-app-version': '0.0.1',
+ 'x-radiko-device': 'pc',
+ 'x-radiko-user': 'dummy_user',
+ })
+ auth1_header = auth1_handle.info()
+
+ auth_token = auth1_header['X-Radiko-AuthToken']
+ kl = int(auth1_header['X-Radiko-KeyLength'])
+ ko = int(auth1_header['X-Radiko-KeyOffset'])
+ raw_partial_key = self._extract_full_key()[ko:ko + kl]
+ partial_key = base64.b64encode(raw_partial_key).decode()
+
+ area_id = self._download_webpage(
+ 'https://radiko.jp/v2/api/auth2', None, 'Authenticating',
+ headers={
+ 'x-radiko-device': 'pc',
+ 'x-radiko-user': 'dummy_user',
+ 'x-radiko-authtoken': auth_token,
+ 'x-radiko-partialkey': partial_key,
+ }).split(',')[0]
+
+ auth_data = (auth_token, area_id)
+ self._downloader.cache.store('radiko', 'auth_data', auth_data)
+ return auth_data
+
+ def _extract_full_key(self):
+ if self._FULL_KEY:
+ return self._FULL_KEY
+
+ jscode = self._download_webpage(
+ 'https://radiko.jp/apps/js/playerCommon.js', None,
+ note='Downloading player js code')
+ full_key = self._search_regex(
+ (r"RadikoJSPlayer\([^,]*,\s*(['\"])pc_html5\1,\s*(['\"])(?P<fullkey>[0-9a-f]+)\2,\s*{"),
+ jscode, 'full key', fatal=False, group='fullkey')
+
+ if full_key:
+ full_key = full_key.encode()
+ else: # use full key ever known
+ full_key = b'bcd151073c03b352e1ef2fd66c32209da9ca0afa'
+
+ self._FULL_KEY = full_key
+ return full_key
+
+ def _find_program(self, video_id, station, cursor):
+ station_program = self._download_xml(
+ 'https://radiko.jp/v3/program/station/weekly/%s.xml' % station, video_id,
+ note='Downloading radio program for %s station' % station)
+
+ prog = None
+ for p in station_program.findall('.//prog'):
+ ft_str, to_str = p.attrib['ft'], p.attrib['to']
+ ft = unified_timestamp(ft_str, False)
+ to = unified_timestamp(to_str, False)
+ if ft <= cursor and cursor < to:
+ prog = p
+ break
+ if not prog:
+ raise ExtractorError('Cannot identify radio program to download!')
+ assert ft, to
+ return prog, station_program, ft, ft_str, to_str
+
+ def _extract_formats(self, video_id, station, is_onair, ft, cursor, auth_token, area_id, query):
+ m3u8_playlist_data = self._download_xml(
+ 'https://radiko.jp/v3/station/stream/pc_html5/%s.xml' % station, video_id,
+ note='Downloading m3u8 information')
+ m3u8_urls = m3u8_playlist_data.findall('.//url')
+
+ formats = []
+ found = set()
+ for url_tag in m3u8_urls:
+ pcu = url_tag.find('playlist_create_url')
+ url_attrib = url_tag.attrib
+ playlist_url = update_url_query(pcu.text, {
+ 'station_id': station,
+ **query,
+ 'l': '15',
+ 'lsid': '77d0678df93a1034659c14d6fc89f018',
+ 'type': 'b',
+ })
+ if playlist_url in found:
+ continue
+ else:
+ found.add(playlist_url)
+
+ time_to_skip = None if is_onair else cursor - ft
+
+ subformats = self._extract_m3u8_formats(
+ playlist_url, video_id, ext='m4a',
+ live=True, fatal=False, m3u8_id=None,
+ headers={
+ 'X-Radiko-AreaId': area_id,
+ 'X-Radiko-AuthToken': auth_token,
+ })
+ for sf in subformats:
+ domain = sf['format_id'] = compat_urllib_parse.urlparse(sf['url']).netloc
+ if re.match(r'^[cf]-radiko\.smartstream\.ne\.jp$', domain):
+ # Prioritize live radio vs playback based on extractor
+ sf['preference'] = 100 if is_onair else -100
+ if not is_onair and url_attrib['timefree'] == '1' and time_to_skip:
+ sf['_ffmpeg_args'] = ['-ss', time_to_skip]
+ formats.extend(subformats)
+
+ self._sort_formats(formats)
+ return formats
+
+
+class RadikoIE(RadikoBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?radiko\.jp/#!/ts/(?P<station>[A-Z0-9-]+)/(?P<id>\d+)'
+
+ _TESTS = [{
+ # QRR (文化放送) station provides <desc>
+ 'url': 'https://radiko.jp/#!/ts/QRR/20210425101300',
+ 'only_matching': True,
+ }, {
+ # FMT (TOKYO FM) station does not provide <desc>
+ 'url': 'https://radiko.jp/#!/ts/FMT/20210810150000',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://radiko.jp/#!/ts/JOAK-FM/20210509090000',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ station, video_id = self._match_valid_url(url).groups()
+ vid_int = unified_timestamp(video_id, False)
+
+ auth_token, area_id = self._auth_client()
+
+ prog, station_program, ft, radio_begin, radio_end = self._find_program(video_id, station, vid_int)
+
+ title = prog.find('title').text
+ description = clean_html(prog.find('info').text)
+ station_name = station_program.find('.//name').text
+
+ formats = self._extract_formats(
+ video_id=video_id, station=station, is_onair=False,
+ ft=ft, cursor=vid_int, auth_token=auth_token, area_id=area_id,
+ query={
+ 'start_at': radio_begin,
+ 'ft': radio_begin,
+ 'end_at': radio_end,
+ 'to': radio_end,
+ 'seek': video_id,
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'uploader': station_name,
+ 'uploader_id': station,
+ 'timestamp': vid_int,
+ 'formats': formats,
+ 'is_live': True,
+ }
+
+
+class RadikoRadioIE(RadikoBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?radiko\.jp/#!/live/(?P<id>[A-Z0-9-]+)'
+
+ _TESTS = [{
+ # QRR (文化放送) station provides <desc>
+ 'url': 'https://radiko.jp/#!/live/QRR',
+ 'only_matching': True,
+ }, {
+ # FMT (TOKYO FM) station does not provide <desc>
+ 'url': 'https://radiko.jp/#!/live/FMT',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://radiko.jp/#!/live/JOAK-FM',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ station = self._match_id(url)
+ self.report_warning('Downloader will not stop at the end of the program! Press Ctrl+C to stop')
+
+ auth_token, area_id = self._auth_client()
+ # get current time in JST (GMT+9:00 w/o DST)
+ vid_now = datetime.datetime.now(datetime.timezone(datetime.timedelta(hours=9)))
+ vid_now = calendar.timegm(vid_now.timetuple())
+
+ prog, station_program, ft, _, _ = self._find_program(station, station, vid_now)
+
+ title = prog.find('title').text
+ description = clean_html(prog.find('info').text)
+ station_name = station_program.find('.//name').text
+
+ formats = self._extract_formats(
+ video_id=station, station=station, is_onair=True,
+ ft=ft, cursor=vid_now, auth_token=auth_token, area_id=area_id,
+ query={})
+
+ return {
+ 'id': station,
+ 'title': title,
+ 'description': description,
+ 'uploader': station_name,
+ 'uploader_id': station,
+ 'timestamp': ft,
+ 'formats': formats,
+ 'is_live': True,
+ }
diff --git a/hypervideo_dl/extractor/radiocanada.py b/hypervideo_dl/extractor/radiocanada.py
index a28b1a2..4b4445c 100644
--- a/hypervideo_dl/extractor/radiocanada.py
+++ b/hypervideo_dl/extractor/radiocanada.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..compat import compat_HTTPError
@@ -143,7 +142,7 @@ class RadioCanadaIE(InfoExtractor):
}
def _real_extract(self, url):
- return self._extract_info(*re.match(self._VALID_URL, url).groups())
+ return self._extract_info(*self._match_valid_url(url).groups())
class RadioCanadaAudioVideoIE(InfoExtractor):
diff --git a/hypervideo_dl/extractor/radiofrance.py b/hypervideo_dl/extractor/radiofrance.py
index a8afc00..082238b 100644
--- a/hypervideo_dl/extractor/radiofrance.py
+++ b/hypervideo_dl/extractor/radiofrance.py
@@ -23,7 +23,7 @@ class RadioFranceIE(InfoExtractor):
}
def _real_extract(self, url):
- m = re.match(self._VALID_URL, url)
+ m = self._match_valid_url(url)
video_id = m.group('id')
webpage = self._download_webpage(url, video_id)
@@ -43,7 +43,7 @@ class RadioFranceIE(InfoExtractor):
'format_id': fm[0],
'url': fm[1],
'vcodec': 'none',
- 'preference': i,
+ 'quality': i,
}
for i, fm in
enumerate(re.findall(r"([a-z0-9]+)\s*:\s*'([^']+)'", formats_str))
diff --git a/hypervideo_dl/extractor/radlive.py b/hypervideo_dl/extractor/radlive.py
new file mode 100644
index 0000000..2de7ab0
--- /dev/null
+++ b/hypervideo_dl/extractor/radlive.py
@@ -0,0 +1,179 @@
+import json
+
+from ..utils import ExtractorError, traverse_obj, try_get, unified_timestamp
+from .common import InfoExtractor
+
+
+class RadLiveIE(InfoExtractor):
+ IE_NAME = 'radlive'
+ _VALID_URL = r'https?://(?:www\.)?rad\.live/content/(?P<content_type>feature|episode)/(?P<id>[a-f0-9-]+)'
+ _TESTS = [{
+ 'url': 'https://rad.live/content/feature/dc5acfbc-761b-4bec-9564-df999905116a',
+ 'md5': '6219d5d31d52de87d21c9cf5b7cb27ff',
+ 'info_dict': {
+ 'id': 'dc5acfbc-761b-4bec-9564-df999905116a',
+ 'ext': 'mp4',
+ 'title': 'Deathpact - Digital Mirage 2 [Full Set]',
+ 'language': 'en',
+ 'thumbnail': 'https://static.12core.net/cb65ae077a079c68380e38f387fbc438.png',
+ 'description': '',
+ 'release_timestamp': 1600185600.0,
+ 'channel': 'Proximity',
+ 'channel_id': '9ce6dd01-70a4-4d59-afb6-d01f807cd009',
+ 'channel_url': 'https://rad.live/content/channel/9ce6dd01-70a4-4d59-afb6-d01f807cd009',
+ }
+ }, {
+ 'url': 'https://rad.live/content/episode/bbcf66ec-0d02-4ca0-8dc0-4213eb2429bf',
+ 'md5': '40b2175f347592125d93e9a344080125',
+ 'info_dict': {
+ 'id': 'bbcf66ec-0d02-4ca0-8dc0-4213eb2429bf',
+ 'ext': 'mp4',
+ 'title': 'E01: Bad Jokes 1',
+ 'language': 'en',
+ 'thumbnail': 'https://lsp.littlstar.com/channels/WHISTLE/BAD_JOKES/SEASON_1/BAD_JOKES_101/poster.jpg',
+ 'description': 'Bad Jokes - Champions, Adam Pally, Super Troopers, Team Edge and 2Hype',
+ 'release_timestamp': None,
+ 'channel': None,
+ 'channel_id': None,
+ 'channel_url': None,
+ 'episode': 'E01: Bad Jokes 1',
+ 'episode_number': 1,
+ 'episode_id': '336',
+ },
+ }]
+
+ def _real_extract(self, url):
+ content_type, video_id = self._match_valid_url(url).groups()
+
+ webpage = self._download_webpage(url, video_id)
+
+ content_info = json.loads(self._search_regex(
+ r'<script[^>]*type=([\'"])application/json\1[^>]*>(?P<json>{.+?})</script>',
+ webpage, 'video info', group='json'))['props']['pageProps']['initialContentData']
+ video_info = content_info[content_type]
+
+ if not video_info:
+ raise ExtractorError('Unable to extract video info, make sure the URL is valid')
+
+ formats = self._extract_m3u8_formats(video_info['assets']['videos'][0]['url'], video_id)
+ self._sort_formats(formats)
+
+ data = video_info.get('structured_data', {})
+
+ release_date = unified_timestamp(traverse_obj(data, ('releasedEvent', 'startDate')))
+ channel = next(iter(content_info.get('channels', [])), {})
+ channel_id = channel.get('lrn', '').split(':')[-1] or None
+
+ result = {
+ 'id': video_id,
+ 'title': video_info['title'],
+ 'formats': formats,
+ 'language': traverse_obj(data, ('potentialAction', 'target', 'inLanguage')),
+ 'thumbnail': traverse_obj(data, ('image', 'contentUrl')),
+ 'description': data.get('description'),
+ 'release_timestamp': release_date,
+ 'channel': channel.get('name'),
+ 'channel_id': channel_id,
+ 'channel_url': f'https://rad.live/content/channel/{channel_id}' if channel_id else None,
+
+ }
+ if content_type == 'episode':
+ result.update({
+ # TODO: Get season number when downloading single episode
+ 'episode': video_info.get('title'),
+ 'episode_number': video_info.get('number'),
+ 'episode_id': video_info.get('id'),
+ })
+
+ return result
+
+
+class RadLiveSeasonIE(RadLiveIE):
+ IE_NAME = 'radlive:season'
+ _VALID_URL = r'https?://(?:www\.)?rad\.live/content/season/(?P<id>[a-f0-9-]+)'
+ _TESTS = [{
+ 'url': 'https://rad.live/content/season/08a290f7-c9ef-4e22-9105-c255995a2e75',
+ 'md5': '40b2175f347592125d93e9a344080125',
+ 'info_dict': {
+ 'id': '08a290f7-c9ef-4e22-9105-c255995a2e75',
+ 'title': 'Bad Jokes - Season 1',
+ },
+ 'playlist_mincount': 5,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if RadLiveIE.suitable(url) else super(RadLiveSeasonIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ season_id = self._match_id(url)
+ webpage = self._download_webpage(url, season_id)
+
+ content_info = json.loads(self._search_regex(
+ r'<script[^>]*type=([\'"])application/json\1[^>]*>(?P<json>{.+?})</script>',
+ webpage, 'video info', group='json'))['props']['pageProps']['initialContentData']
+ video_info = content_info['season']
+
+ entries = [{
+ '_type': 'url_transparent',
+ 'id': episode['structured_data']['url'].split('/')[-1],
+ 'url': episode['structured_data']['url'],
+ 'series': try_get(content_info, lambda x: x['series']['title']),
+ 'season': video_info['title'],
+ 'season_number': video_info.get('number'),
+ 'season_id': video_info.get('id'),
+ 'ie_key': RadLiveIE.ie_key(),
+ } for episode in video_info['episodes']]
+
+ return self.playlist_result(entries, season_id, video_info.get('title'))
+
+
+class RadLiveChannelIE(RadLiveIE):
+ IE_NAME = 'radlive:channel'
+ _VALID_URL = r'https?://(?:www\.)?rad\.live/content/channel/(?P<id>[a-f0-9-]+)'
+ _TESTS = [{
+ 'url': 'https://rad.live/content/channel/5c4d8df4-6fa0-413c-81e3-873479b49274',
+ 'md5': '625156a08b7f2b0b849f234e664457ac',
+ 'info_dict': {
+ 'id': '5c4d8df4-6fa0-413c-81e3-873479b49274',
+ 'title': 'Whistle Sports',
+ },
+ 'playlist_mincount': 7,
+ }]
+
+ _QUERY = '''
+query WebChannelListing ($lrn: ID!) {
+ channel (id:$lrn) {
+ name
+ features {
+ structured_data
+ }
+ }
+}'''
+
+ @classmethod
+ def suitable(cls, url):
+ return False if RadLiveIE.suitable(url) else super(RadLiveChannelIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
+
+ graphql = self._download_json(
+ 'https://content.mhq.12core.net/graphql', channel_id,
+ headers={'Content-Type': 'application/json'},
+ data=json.dumps({
+ 'query': self._QUERY,
+ 'variables': {'lrn': f'lrn:12core:media:content:channel:{channel_id}'}
+ }).encode('utf-8'))
+
+ data = traverse_obj(graphql, ('data', 'channel'))
+ if not data:
+ raise ExtractorError('Unable to extract video info, make sure the URL is valid')
+
+ entries = [{
+ '_type': 'url_transparent',
+ 'url': feature['structured_data']['url'],
+ 'ie_key': RadLiveIE.ie_key(),
+ } for feature in data['features']]
+
+ return self.playlist_result(entries, channel_id, data.get('name'))
diff --git a/hypervideo_dl/extractor/rai.py b/hypervideo_dl/extractor/rai.py
index 67b86fc..27cd018 100644
--- a/hypervideo_dl/extractor/rai.py
+++ b/hypervideo_dl/extractor/rai.py
@@ -5,15 +5,16 @@ import re
from .common import InfoExtractor
from ..compat import (
- compat_urlparse,
compat_str,
+ compat_urlparse,
)
from ..utils import (
- ExtractorError,
determine_ext,
+ ExtractorError,
find_xpath_attr,
fix_xml_ampersands,
GeoRestrictedError,
+ HEADRequest,
int_or_none,
parse_duration,
remove_start,
@@ -94,7 +95,9 @@ class RaiBaseIE(InfoExtractor):
})
if not formats and geoprotection is True:
- self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
+ self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True)
+
+ formats.extend(self._create_http_urls(relinker_url, formats))
return dict((k, v) for k, v in {
'is_live': is_live,
@@ -102,6 +105,92 @@ class RaiBaseIE(InfoExtractor):
'formats': formats,
}.items() if v is not None)
+ def _create_http_urls(self, relinker_url, fmts):
+ _RELINKER_REG = r'https?://(?P<host>[^/]+?)/(?:i/)?(?P<extra>[^/]+?)/(?P<path>.+?)/(?P<id>\d+)(?:_(?P<quality>[\d\,]+))?(?:\.mp4|/playlist\.m3u8).+?'
+ _MP4_TMPL = '%s&overrideUserAgentRule=mp4-%s'
+ _QUALITY = {
+ # tbr: w, h
+ '250': [352, 198],
+ '400': [512, 288],
+ '700': [512, 288],
+ '800': [700, 394],
+ '1200': [736, 414],
+ '1800': [1024, 576],
+ '2400': [1280, 720],
+ '3200': [1440, 810],
+ '3600': [1440, 810],
+ '5000': [1920, 1080],
+ '10000': [1920, 1080],
+ }
+
+ def test_url(url):
+ resp = self._request_webpage(
+ HEADRequest(url), None, headers={'User-Agent': 'Rai'},
+ fatal=False, errnote=False, note=False)
+
+ if resp is False:
+ return False
+
+ if resp.code == 200:
+ return False if resp.url == url else resp.url
+ return None
+
+ def get_format_info(tbr):
+ import math
+ br = int_or_none(tbr)
+ if len(fmts) == 1 and not br:
+ br = fmts[0].get('tbr')
+ if br > 300:
+ tbr = compat_str(math.floor(br / 100) * 100)
+ else:
+ tbr = '250'
+
+ # try extracting info from available m3u8 formats
+ format_copy = None
+ for f in fmts:
+ if f.get('tbr'):
+ br_limit = math.floor(br / 100)
+ if br_limit - 1 <= math.floor(f['tbr'] / 100) <= br_limit + 1:
+ format_copy = f.copy()
+ return {
+ 'width': format_copy.get('width'),
+ 'height': format_copy.get('height'),
+ 'tbr': format_copy.get('tbr'),
+ 'vcodec': format_copy.get('vcodec'),
+ 'acodec': format_copy.get('acodec'),
+ 'fps': format_copy.get('fps'),
+ 'format_id': 'https-%s' % tbr,
+ } if format_copy else {
+ 'width': _QUALITY[tbr][0],
+ 'height': _QUALITY[tbr][1],
+ 'format_id': 'https-%s' % tbr,
+ 'tbr': int(tbr),
+ }
+
+ loc = test_url(_MP4_TMPL % (relinker_url, '*'))
+ if not isinstance(loc, compat_str):
+ return []
+
+ mobj = re.match(
+ _RELINKER_REG,
+ test_url(relinker_url) or '')
+ if not mobj:
+ return []
+
+ available_qualities = mobj.group('quality').split(',') if mobj.group('quality') else ['*']
+ available_qualities = [i for i in available_qualities if i]
+
+ formats = []
+ for q in available_qualities:
+ fmt = {
+ 'url': _MP4_TMPL % (relinker_url, q),
+ 'protocol': 'https',
+ 'ext': 'mp4',
+ }
+ fmt.update(get_format_info(q))
+ formats.append(fmt)
+ return formats
+
@staticmethod
def _extract_subtitles(url, video_data):
STL_EXT = 'stl'
@@ -152,22 +241,49 @@ class RaiPlayIE(RaiBaseIE):
'skip_download': True,
},
}, {
+ # 1080p direct mp4 url
+ 'url': 'https://www.raiplay.it/video/2021/03/Leonardo-S1E1-b5703b02-82ee-475a-85b6-c9e4a8adf642.html',
+ 'md5': '2e501e8651d72f05ffe8f5d286ad560b',
+ 'info_dict': {
+ 'id': 'b5703b02-82ee-475a-85b6-c9e4a8adf642',
+ 'ext': 'mp4',
+ 'title': 'Leonardo - S1E1',
+ 'alt_title': 'St 1 Ep 1 - Episodio 1',
+ 'description': 'md5:f5360cd267d2de146e4e3879a5a47d31',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'Rai 1',
+ 'duration': 3229,
+ 'series': 'Leonardo',
+ 'season': 'Season 1',
+ },
+ }, {
'url': 'http://www.raiplay.it/video/2016/11/gazebotraindesi-efebe701-969c-4593-92f3-285f0d1ce750.html?',
'only_matching': True,
}, {
# subtitles at 'subtitlesArray' key (see #27698)
'url': 'https://www.raiplay.it/video/2020/12/Report---04-01-2021-2e90f1de-8eee-4de4-ac0e-78d21db5b600.html',
'only_matching': True,
+ }, {
+ # DRM protected
+ 'url': 'https://www.raiplay.it/video/2020/09/Lo-straordinario-mondo-di-Zoey-S1E1-Lo-straordinario-potere-di-Zoey-ed493918-1d32-44b7-8454-862e473d00ff.html',
+ 'only_matching': True,
}]
def _real_extract(self, url):
- base, video_id = re.match(self._VALID_URL, url).groups()
+ base, video_id = self._match_valid_url(url).groups()
media = self._download_json(
base + '.json', video_id, 'Downloading video JSON')
- title = media['name']
+ if not self.get_param('allow_unplayable_formats'):
+ if try_get(
+ media,
+ (lambda x: x['rights_management']['rights']['drm'],
+ lambda x: x['program_info']['rights_management']['rights']['drm']),
+ dict):
+ self.report_drm(video_id)
+ title = media['name']
video = media['video']
relinker_info = self._extract_relinker_info(video['content_url'], video_id)
@@ -247,7 +363,7 @@ class RaiPlayPlaylistIE(InfoExtractor):
}]
def _real_extract(self, url):
- base, playlist_id = re.match(self._VALID_URL, url).groups()
+ base, playlist_id = self._match_valid_url(url).groups()
program = self._download_json(
base + '.json', playlist_id, 'Downloading program JSON')
@@ -307,7 +423,7 @@ class RaiIE(RaiBaseIE):
}, {
# with ContentItem in og:url
'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-efb17665-691c-45d5-a60c-5301333cbb0c.html',
- 'md5': '6865dd00cf0bbf5772fdd89d59bd768a',
+ 'md5': '06345bd97c932f19ffb129973d07a020',
'info_dict': {
'id': 'efb17665-691c-45d5-a60c-5301333cbb0c',
'ext': 'mp4',
@@ -340,22 +456,6 @@ class RaiIE(RaiBaseIE):
'skip_download': True,
},
}, {
- # ContentItem in iframe (see #12652) and subtitle at 'subtitlesUrl' key
- 'url': 'http://www.presadiretta.rai.it/dl/portali/site/puntata/ContentItem-3ed19d13-26c2-46ff-a551-b10828262f1b.html',
- 'info_dict': {
- 'id': '1ad6dc64-444a-42a4-9bea-e5419ad2f5fd',
- 'ext': 'mp4',
- 'title': 'Partiti acchiappavoti - Presa diretta del 13/09/2015',
- 'description': 'md5:d291b03407ec505f95f27970c0b025f4',
- 'upload_date': '20150913',
- 'subtitles': {
- 'it': 'count:2',
- },
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
# Direct MMS URL
'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-b63a4089-ac28-48cf-bca5-9f5b5bc46df5.html',
'only_matching': True,
diff --git a/hypervideo_dl/extractor/raywenderlich.py b/hypervideo_dl/extractor/raywenderlich.py
index 5411ece..f04d51f 100644
--- a/hypervideo_dl/extractor/raywenderlich.py
+++ b/hypervideo_dl/extractor/raywenderlich.py
@@ -72,7 +72,7 @@ class RayWenderlichIE(InfoExtractor):
return compat_str(video_id)
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
course_id, lesson_id = mobj.group('course_id', 'id')
display_id = '%s/%s' % (course_id, lesson_id)
diff --git a/hypervideo_dl/extractor/rbmaradio.py b/hypervideo_dl/extractor/rbmaradio.py
index ae7413f..9642fbb 100644
--- a/hypervideo_dl/extractor/rbmaradio.py
+++ b/hypervideo_dl/extractor/rbmaradio.py
@@ -1,6 +1,5 @@
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..compat import compat_str
@@ -30,7 +29,7 @@ class RBMARadioIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
show_id = mobj.group('show_id')
episode_id = mobj.group('id')
diff --git a/hypervideo_dl/extractor/rcs.py b/hypervideo_dl/extractor/rcs.py
new file mode 100644
index 0000000..ace611b
--- /dev/null
+++ b/hypervideo_dl/extractor/rcs.py
@@ -0,0 +1,427 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ ExtractorError,
+ js_to_json,
+ base_url,
+ url_basename,
+ urljoin,
+)
+
+
+class RCSBaseIE(InfoExtractor):
+ # based on VideoPlayerLoader.prototype.getVideoSrc
+ # and VideoPlayerLoader.prototype.transformSrc from
+ # https://js2.corriereobjects.it/includes2013/LIBS/js/corriere_video.sjs
+ _ALL_REPLACE = {
+ 'media2vam.corriere.it.edgesuite.net':
+ 'media2vam-corriere-it.akamaized.net',
+ 'media.youreporter.it.edgesuite.net':
+ 'media-youreporter-it.akamaized.net',
+ 'corrierepmd.corriere.it.edgesuite.net':
+ 'corrierepmd-corriere-it.akamaized.net',
+ 'media2vam-corriere-it.akamaized.net/fcs.quotidiani/vr/videos/':
+ 'video.corriere.it/vr360/videos/',
+ '.net//': '.net/',
+ }
+ _MP4_REPLACE = {
+ 'media2vam.corbologna.corriere.it.edgesuite.net':
+ 'media2vam-bologna-corriere-it.akamaized.net',
+ 'media2vam.corfiorentino.corriere.it.edgesuite.net':
+ 'media2vam-fiorentino-corriere-it.akamaized.net',
+ 'media2vam.cormezzogiorno.corriere.it.edgesuite.net':
+ 'media2vam-mezzogiorno-corriere-it.akamaized.net',
+ 'media2vam.corveneto.corriere.it.edgesuite.net':
+ 'media2vam-veneto-corriere-it.akamaized.net',
+ 'media2.oggi.it.edgesuite.net':
+ 'media2-oggi-it.akamaized.net',
+ 'media2.quimamme.it.edgesuite.net':
+ 'media2-quimamme-it.akamaized.net',
+ 'media2.amica.it.edgesuite.net':
+ 'media2-amica-it.akamaized.net',
+ 'media2.living.corriere.it.edgesuite.net':
+ 'media2-living-corriere-it.akamaized.net',
+ 'media2.style.corriere.it.edgesuite.net':
+ 'media2-style-corriere-it.akamaized.net',
+ 'media2.iodonna.it.edgesuite.net':
+ 'media2-iodonna-it.akamaized.net',
+ 'media2.leitv.it.edgesuite.net':
+ 'media2-leitv-it.akamaized.net',
+ }
+ _MIGRATION_MAP = {
+ 'videoamica-vh.akamaihd': 'amica',
+ 'media2-amica-it.akamaized': 'amica',
+ 'corrierevam-vh.akamaihd': 'corriere',
+ 'media2vam-corriere-it.akamaized': 'corriere',
+ 'cormezzogiorno-vh.akamaihd': 'corrieredelmezzogiorno',
+ 'media2vam-mezzogiorno-corriere-it.akamaized': 'corrieredelmezzogiorno',
+ 'corveneto-vh.akamaihd': 'corrieredelveneto',
+ 'media2vam-veneto-corriere-it.akamaized': 'corrieredelveneto',
+ 'corbologna-vh.akamaihd': 'corrieredibologna',
+ 'media2vam-bologna-corriere-it.akamaized': 'corrieredibologna',
+ 'corfiorentino-vh.akamaihd': 'corrierefiorentino',
+ 'media2vam-fiorentino-corriere-it.akamaized': 'corrierefiorentino',
+ 'corinnovazione-vh.akamaihd': 'corriereinnovazione',
+ 'media2-gazzanet-gazzetta-it.akamaized': 'gazzanet',
+ 'videogazzanet-vh.akamaihd': 'gazzanet',
+ 'videogazzaworld-vh.akamaihd': 'gazzaworld',
+ 'gazzettavam-vh.akamaihd': 'gazzetta',
+ 'media2vam-gazzetta-it.akamaized': 'gazzetta',
+ 'videoiodonna-vh.akamaihd': 'iodonna',
+ 'media2-leitv-it.akamaized': 'leitv',
+ 'videoleitv-vh.akamaihd': 'leitv',
+ 'videoliving-vh.akamaihd': 'living',
+ 'media2-living-corriere-it.akamaized': 'living',
+ 'media2-oggi-it.akamaized': 'oggi',
+ 'videooggi-vh.akamaihd': 'oggi',
+ 'media2-quimamme-it.akamaized': 'quimamme',
+ 'quimamme-vh.akamaihd': 'quimamme',
+ 'videorunning-vh.akamaihd': 'running',
+ 'media2-style-corriere-it.akamaized': 'style',
+ 'style-vh.akamaihd': 'style',
+ 'videostyle-vh.akamaihd': 'style',
+ 'media2-stylepiccoli-it.akamaized': 'stylepiccoli',
+ 'stylepiccoli-vh.akamaihd': 'stylepiccoli',
+ 'doveviaggi-vh.akamaihd': 'viaggi',
+ 'media2-doveviaggi-it.akamaized': 'viaggi',
+ 'media2-vivimilano-corriere-it.akamaized': 'vivimilano',
+ 'vivimilano-vh.akamaihd': 'vivimilano',
+ 'media2-youreporter-it.akamaized': 'youreporter'
+ }
+ _MIGRATION_MEDIA = {
+ 'advrcs-vh.akamaihd': '',
+ 'corriere-f.akamaihd': '',
+ 'corrierepmd-corriere-it.akamaized': '',
+ 'corrprotetto-vh.akamaihd': '',
+ 'gazzetta-f.akamaihd': '',
+ 'gazzettapmd-gazzetta-it.akamaized': '',
+ 'gazzprotetto-vh.akamaihd': '',
+ 'periodici-f.akamaihd': '',
+ 'periodicisecure-vh.akamaihd': '',
+ 'videocoracademy-vh.akamaihd': ''
+ }
+
+ def _get_video_src(self, video):
+ mediaFiles = video.get('mediaProfile').get('mediaFile')
+ src = {}
+ # audio
+ if video.get('mediaType') == 'AUDIO':
+ for aud in mediaFiles:
+ # todo: check
+ src['mp3'] = aud.get('value')
+ # video
+ else:
+ for vid in mediaFiles:
+ if vid.get('mimeType') == 'application/vnd.apple.mpegurl':
+ src['m3u8'] = vid.get('value')
+ if vid.get('mimeType') == 'video/mp4':
+ src['mp4'] = vid.get('value')
+
+ # replace host
+ for t in src:
+ for s, r in self._ALL_REPLACE.items():
+ src[t] = src[t].replace(s, r)
+ for s, r in self._MP4_REPLACE.items():
+ src[t] = src[t].replace(s, r)
+
+ # switch cdn
+ if 'mp4' in src and 'm3u8' in src:
+ if ('-lh.akamaihd' not in src.get('m3u8')
+ and 'akamai' in src.get('mp4')):
+ if 'm3u8' in src:
+ matches = re.search(r'(?:https*:)?\/\/(?P<host>.*)\.net\/i(?P<path>.*)$', src.get('m3u8'))
+ src['m3u8'] = 'https://vod.rcsobjects.it/hls/%s%s' % (
+ self._MIGRATION_MAP[matches.group('host')],
+ matches.group('path').replace(
+ '///', '/').replace(
+ '//', '/').replace(
+ '.csmil', '.urlset'
+ )
+ )
+ if 'mp4' in src:
+ matches = re.search(r'(?:https*:)?\/\/(?P<host>.*)\.net\/i(?P<path>.*)$', src.get('mp4'))
+ if matches:
+ if matches.group('host') in self._MIGRATION_MEDIA:
+ vh_stream = 'https://media2.corriereobjects.it'
+ if src.get('mp4').find('fcs.quotidiani_!'):
+ vh_stream = 'https://media2-it.corriereobjects.it'
+ src['mp4'] = '%s%s' % (
+ vh_stream,
+ matches.group('path').replace(
+ '///', '/').replace(
+ '//', '/').replace(
+ '/fcs.quotidiani/mediacenter', '').replace(
+ '/fcs.quotidiani_!/mediacenter', '').replace(
+ 'corriere/content/mediacenter/', '').replace(
+ 'gazzetta/content/mediacenter/', '')
+ )
+ else:
+ src['mp4'] = 'https://vod.rcsobjects.it/%s%s' % (
+ self._MIGRATION_MAP[matches.group('host')],
+ matches.group('path').replace('///', '/').replace('//', '/')
+ )
+
+ if 'mp3' in src:
+ src['mp3'] = src.get('mp3').replace(
+ 'media2vam-corriere-it.akamaized.net',
+ 'vod.rcsobjects.it/corriere')
+ if 'mp4' in src:
+ if src.get('mp4').find('fcs.quotidiani_!'):
+ src['mp4'] = src.get('mp4').replace('vod.rcsobjects', 'vod-it.rcsobjects')
+ if 'm3u8' in src:
+ if src.get('m3u8').find('fcs.quotidiani_!'):
+ src['m3u8'] = src.get('m3u8').replace('vod.rcsobjects', 'vod-it.rcsobjects')
+
+ if 'geoblocking' in video.get('mediaProfile'):
+ if 'm3u8' in src:
+ src['m3u8'] = src.get('m3u8').replace('vod.rcsobjects', 'vod-it.rcsobjects')
+ if 'mp4' in src:
+ src['mp4'] = src.get('mp4').replace('vod.rcsobjects', 'vod-it.rcsobjects')
+ if 'm3u8' in src:
+ if src.get('m3u8').find('csmil') and src.get('m3u8').find('vod'):
+ src['m3u8'] = src.get('m3u8').replace('.csmil', '.urlset')
+
+ return src
+
+ def _create_formats(self, urls, video_id):
+ formats = []
+ formats = self._extract_m3u8_formats(
+ urls.get('m3u8'), video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False)
+
+ if urls.get('mp4'):
+ formats.append({
+ 'format_id': 'http-mp4',
+ 'url': urls['mp4']
+ })
+ self._sort_formats(formats)
+ return formats
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ video_id = mobj.group('id')
+
+ if 'cdn' not in mobj.groupdict():
+ raise ExtractorError('CDN not found in url: %s' % url)
+
+ # for leitv/youreporter/viaggi don't use the embed page
+ if ((mobj.group('cdn') not in ['leitv.it', 'youreporter.it'])
+ and (mobj.group('vid') == 'video')):
+ url = 'https://video.%s/video-embed/%s' % (mobj.group('cdn'), video_id)
+
+ page = self._download_webpage(url, video_id)
+
+ video_data = None
+ # look for json video data url
+ json = self._search_regex(
+ r'''(?x)url\s*=\s*(["'])
+ (?P<url>
+ (?:https?:)?//video\.rcs\.it
+ /fragment-includes/video-includes/.+?\.json
+ )\1;''',
+ page, video_id, group='url', default=None)
+ if json:
+ if json.startswith('//'):
+ json = 'https:%s' % json
+ video_data = self._download_json(json, video_id)
+
+ # if json url not found, look for json video data directly in the page
+ else:
+ # RCS normal pages and most of the embeds
+ json = self._search_regex(
+ r'[\s;]video\s*=\s*({[\s\S]+?})(?:;|,playlist=)',
+ page, video_id, default=None)
+ if not json and 'video-embed' in url:
+ page = self._download_webpage(url.replace('video-embed', 'video-json'), video_id)
+ json = self._search_regex(
+ r'##start-video##({[\s\S]+?})##end-video##',
+ page, video_id, default=None)
+ if not json:
+ # if no video data found try search for iframes
+ emb = RCSEmbedsIE._extract_url(page)
+ if emb:
+ return {
+ '_type': 'url_transparent',
+ 'url': emb,
+ 'ie_key': RCSEmbedsIE.ie_key()
+ }
+ if json:
+ video_data = self._parse_json(
+ json, video_id, transform_source=js_to_json)
+
+ if not video_data:
+ raise ExtractorError('Video data not found in the page')
+
+ formats = self._create_formats(
+ self._get_video_src(video_data), video_id)
+
+ description = (video_data.get('description')
+ or clean_html(video_data.get('htmlDescription'))
+ or self._html_search_meta('description', page))
+ uploader = video_data.get('provider') or mobj.group('cdn')
+
+ return {
+ 'id': video_id,
+ 'title': video_data.get('title'),
+ 'description': description,
+ 'uploader': uploader,
+ 'formats': formats
+ }
+
+
+class RCSEmbedsIE(RCSBaseIE):
+ _VALID_URL = r'''(?x)
+ https?://(?P<vid>video)\.
+ (?P<cdn>
+ (?:
+ rcs|
+ (?:corriere\w+\.)?corriere|
+ (?:gazzanet\.)?gazzetta
+ )\.it)
+ /video-embed/(?P<id>[^/=&\?]+?)(?:$|\?)'''
+ _TESTS = [{
+ 'url': 'https://video.rcs.it/video-embed/iodonna-0001585037',
+ 'md5': '623ecc8ffe7299b2d0c1046d8331a9df',
+ 'info_dict': {
+ 'id': 'iodonna-0001585037',
+ 'ext': 'mp4',
+ 'title': 'Sky Arte racconta Madonna nella serie "Artist to icon"',
+ 'description': 'md5:65b09633df9ffee57f48b39e34c9e067',
+ 'uploader': 'rcs.it',
+ }
+ }, {
+ # redownload the page changing 'video-embed' in 'video-json'
+ 'url': 'https://video.gazzanet.gazzetta.it/video-embed/gazzanet-mo05-0000260789',
+ 'md5': 'a043e3fecbe4d9ed7fc5d888652a5440',
+ 'info_dict': {
+ 'id': 'gazzanet-mo05-0000260789',
+ 'ext': 'mp4',
+ 'title': 'Valentino Rossi e papà Graziano si divertono col drifting',
+ 'description': 'md5:a8bf90d6adafd9815f70fc74c0fc370a',
+ 'uploader': 'rcd',
+ }
+ }, {
+ 'url': 'https://video.corriere.it/video-embed/b727632a-f9d0-11ea-91b0-38d50a849abb?player',
+ 'match_only': True
+ }, {
+ 'url': 'https://video.gazzetta.it/video-embed/49612410-00ca-11eb-bcd8-30d4253e0140',
+ 'match_only': True
+ }]
+
+ @staticmethod
+ def _sanitize_urls(urls):
+ # add protocol if missing
+ for i, e in enumerate(urls):
+ if e.startswith('//'):
+ urls[i] = 'https:%s' % e
+ # clean iframes urls
+ for i, e in enumerate(urls):
+ urls[i] = urljoin(base_url(e), url_basename(e))
+ return urls
+
+ @staticmethod
+ def _extract_urls(webpage):
+ entries = [
+ mobj.group('url')
+ for mobj in re.finditer(r'''(?x)
+ (?:
+ data-frame-src=|
+ <iframe[^\n]+src=
+ )
+ (["'])
+ (?P<url>(?:https?:)?//video\.
+ (?:
+ rcs|
+ (?:corriere\w+\.)?corriere|
+ (?:gazzanet\.)?gazzetta
+ )
+ \.it/video-embed/.+?)
+ \1''', webpage)]
+ return RCSEmbedsIE._sanitize_urls(entries)
+
+ @staticmethod
+ def _extract_url(webpage):
+ urls = RCSEmbedsIE._extract_urls(webpage)
+ return urls[0] if urls else None
+
+
+class RCSIE(RCSBaseIE):
+ _VALID_URL = r'''(?x)https?://(?P<vid>video|viaggi)\.
+ (?P<cdn>
+ (?:
+ corrieredelmezzogiorno\.
+ |corrieredelveneto\.
+ |corrieredibologna\.
+ |corrierefiorentino\.
+ )?corriere\.it
+ |(?:gazzanet\.)?gazzetta\.it)
+ /(?!video-embed/).+?/(?P<id>[^/\?]+)(?=\?|/$|$)'''
+ _TESTS = [{
+ 'url': 'https://video.corriere.it/sport/formula-1/vettel-guida-ferrari-sf90-mugello-suo-fianco-c-elecrerc-bendato-video-esilarante/b727632a-f9d0-11ea-91b0-38d50a849abb',
+ 'md5': '0f4ededc202b0f00b6e509d831e2dcda',
+ 'info_dict': {
+ 'id': 'b727632a-f9d0-11ea-91b0-38d50a849abb',
+ 'ext': 'mp4',
+ 'title': 'Vettel guida la Ferrari SF90 al Mugello e al suo fianco c\'è Leclerc (bendato): il video è esilarante',
+ 'description': 'md5:93b51c9161ac8a64fb2f997b054d0152',
+ 'uploader': 'Corriere Tv',
+ }
+ }, {
+ # video data inside iframe
+ 'url': 'https://viaggi.corriere.it/video/norvegia-il-nuovo-ponte-spettacolare-sopra-la-cascata-di-voringsfossen/',
+ 'md5': 'da378e4918d2afbf7d61c35abb948d4c',
+ 'info_dict': {
+ 'id': '5b7cd134-e2c1-11ea-89b3-b56dd0df2aa2',
+ 'ext': 'mp4',
+ 'title': 'La nuova spettacolare attrazione in Norvegia: il ponte sopra Vøringsfossen',
+ 'description': 'md5:18b35a291f6746c0c8dacd16e5f5f4f8',
+ 'uploader': 'DOVE Viaggi',
+ }
+ }, {
+ 'url': 'https://video.gazzetta.it/video-motogp-catalogna-cadute-dovizioso-vale-rossi/49612410-00ca-11eb-bcd8-30d4253e0140?vclk=Videobar',
+ 'md5': 'eedc1b5defd18e67383afef51ff7bdf9',
+ 'info_dict': {
+ 'id': '49612410-00ca-11eb-bcd8-30d4253e0140',
+ 'ext': 'mp4',
+ 'title': 'Dovizioso, il contatto con Zarco e la caduta. E anche Vale finisce a terra',
+ 'description': 'md5:8c6e905dc3b9413218beca11ebd69778',
+ 'uploader': 'AMorici',
+ }
+ }, {
+ 'url': 'https://video.corriere.it/video-360/metro-copenaghen-tutta-italiana/a248a7f0-e2db-11e9-9830-af2de6b1f945',
+ 'match_only': True
+ }]
+
+
+class RCSVariousIE(RCSBaseIE):
+ _VALID_URL = r'''(?x)https?://www\.
+ (?P<cdn>
+ leitv\.it|
+ youreporter\.it
+ )/(?:[^/]+/)?(?P<id>[^/]+?)(?:$|\?|/)'''
+ _TESTS = [{
+ 'url': 'https://www.leitv.it/benessere/mal-di-testa-come-combatterlo-ed-evitarne-la-comparsa/',
+ 'md5': '92b4e63667b8f95acb0a04da25ae28a1',
+ 'info_dict': {
+ 'id': 'mal-di-testa-come-combatterlo-ed-evitarne-la-comparsa',
+ 'ext': 'mp4',
+ 'title': 'Cervicalgia e mal di testa, il video con i suggerimenti dell\'esperto',
+ 'description': 'md5:ae21418f34cee0b8d02a487f55bcabb5',
+ 'uploader': 'leitv.it',
+ }
+ }, {
+ 'url': 'https://www.youreporter.it/fiume-sesia-3-ottobre-2020/',
+ 'md5': '8dccd436b47a830bab5b4a88232f391a',
+ 'info_dict': {
+ 'id': 'fiume-sesia-3-ottobre-2020',
+ 'ext': 'mp4',
+ 'title': 'Fiume Sesia 3 ottobre 2020',
+ 'description': 'md5:0070eef1cc884d13c970a4125063de55',
+ 'uploader': 'youreporter.it',
+ }
+ }]
diff --git a/hypervideo_dl/extractor/rcti.py b/hypervideo_dl/extractor/rcti.py
new file mode 100644
index 0000000..31d9779
--- /dev/null
+++ b/hypervideo_dl/extractor/rcti.py
@@ -0,0 +1,354 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import itertools
+import json
+import random
+import time
+
+from .common import InfoExtractor
+from ..compat import compat_HTTPError
+from ..utils import (
+ dict_get,
+ ExtractorError,
+ strip_or_none,
+ try_get
+)
+
+
+class RCTIPlusBaseIE(InfoExtractor):
+ def _real_initialize(self):
+ self._AUTH_KEY = self._download_json(
+ 'https://api.rctiplus.com/api/v1/visitor?platform=web', # platform can be web, mweb, android, ios
+ None, 'Fetching authorization key')['data']['access_token']
+
+ def _call_api(self, url, video_id, note=None):
+ json = self._download_json(
+ url, video_id, note=note, headers={'Authorization': self._AUTH_KEY})
+ if json.get('status', {}).get('code', 0) != 0:
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, json["status"]["message_client"]), cause=json)
+ return json.get('data'), json.get('meta')
+
+
+class RCTIPlusIE(RCTIPlusBaseIE):
+ _VALID_URL = r'https://www\.rctiplus\.com/(?:programs/\d+?/.*?/)?(?P<type>episode|clip|extra|live-event|missed-event)/(?P<id>\d+)/(?P<display_id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.rctiplus.com/programs/1259/kiko-untuk-lola/episode/22124/untuk-lola',
+ 'md5': '56ed45affad45fa18d5592a1bc199997',
+ 'info_dict': {
+ 'id': 'v_e22124',
+ 'title': 'Untuk Lola',
+ 'display_id': 'untuk-lola',
+ 'description': 'md5:2b809075c0b1e071e228ad6d13e41deb',
+ 'ext': 'mp4',
+ 'duration': 1400,
+ 'timestamp': 1615978800,
+ 'upload_date': '20210317',
+ 'series': 'Kiko : Untuk Lola',
+ 'season_number': 1,
+ 'episode_number': 1,
+ 'channel': 'RCTI',
+ },
+ 'params': {
+ 'fixup': 'never',
+ },
+ }, { # Clip; Series title doesn't appear on metadata JSON
+ 'url': 'https://www.rctiplus.com/programs/316/cahaya-terindah/clip/3921/make-a-wish',
+ 'md5': 'd179b2ff356f0e91a53bcc6a4d8504f0',
+ 'info_dict': {
+ 'id': 'v_c3921',
+ 'title': 'Make A Wish',
+ 'display_id': 'make-a-wish',
+ 'description': 'Make A Wish',
+ 'ext': 'mp4',
+ 'duration': 288,
+ 'timestamp': 1571652600,
+ 'upload_date': '20191021',
+ 'series': 'Cahaya Terindah',
+ 'channel': 'RCTI',
+ },
+ 'params': {
+ 'fixup': 'never',
+ },
+ }, { # Extra
+ 'url': 'https://www.rctiplus.com/programs/616/inews-malam/extra/9438/diungkapkan-melalui-surat-terbuka-ceo-ruangguru-belva-devara-mundur-dari-staf-khusus-presiden',
+ 'md5': 'c48106afdbce609749f5e0c007d9278a',
+ 'info_dict': {
+ 'id': 'v_ex9438',
+ 'title': 'md5:2ede828c0f8bde249e0912be150314ca',
+ 'display_id': 'md5:62b8d4e9ff096db527a1ad797e8a9933',
+ 'description': 'md5:2ede828c0f8bde249e0912be150314ca',
+ 'ext': 'mp4',
+ 'duration': 93,
+ 'timestamp': 1587561540,
+ 'upload_date': '20200422',
+ 'series': 'iNews Malam',
+ 'channel': 'INews',
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ },
+ }, { # Missed event/replay
+ 'url': 'https://www.rctiplus.com/missed-event/2507/mou-signing-ceremony-27-juli-2021-1400-wib',
+ 'md5': '649c5f27250faed1452ca8b91e06922d',
+ 'info_dict': {
+ 'id': 'v_pe2507',
+ 'title': 'MOU Signing Ceremony | 27 Juli 2021 | 14.00 WIB',
+ 'display_id': 'mou-signing-ceremony-27-juli-2021-1400-wib',
+ 'ext': 'mp4',
+ 'timestamp': 1627142400,
+ 'upload_date': '20210724',
+ 'was_live': True,
+ 'release_timestamp': 1627369200,
+ },
+ 'params': {
+ 'fixup': 'never',
+ },
+ }, { # Live event; Cloudfront CDN
+ 'url': 'https://www.rctiplus.com/live-event/2530/dai-muda-charging-imun-dengan-iman-4-agustus-2021-1600-wib',
+ 'info_dict': {
+ 'id': 'v_le2530',
+ 'title': 'Dai Muda : Charging Imun dengan Iman | 4 Agustus 2021 | 16.00 WIB',
+ 'display_id': 'dai-muda-charging-imun-dengan-iman-4-agustus-2021-1600-wib',
+ 'ext': 'mp4',
+ 'timestamp': 1627898400,
+ 'upload_date': '20210802',
+ 'release_timestamp': 1628067600,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'This live event has ended.',
+ }, { # TV; live_at is null
+ 'url': 'https://www.rctiplus.com/live-event/1/rcti',
+ 'info_dict': {
+ 'id': 'v_lt1',
+ 'title': 'RCTI',
+ 'display_id': 'rcti',
+ 'ext': 'mp4',
+ 'timestamp': 1546344000,
+ 'upload_date': '20190101',
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': True,
+ 'format': 'bestvideo',
+ },
+ }]
+ _CONVIVA_JSON_TEMPLATE = {
+ 't': 'CwsSessionHb',
+ 'cid': 'ff84ae928c3b33064b76dec08f12500465e59a6f',
+ 'clid': '0',
+ 'sid': 0,
+ 'seq': 0,
+ 'caps': 0,
+ 'sf': 7,
+ 'sdk': True,
+ }
+
+ def _real_extract(self, url):
+ match = self._match_valid_url(url).groupdict()
+ video_type, video_id, display_id = match['type'], match['id'], match['display_id']
+
+ url_api_version = 'v2' if video_type == 'missed-event' else 'v1'
+ appier_id = '23984824_' + str(random.randint(0, 10000000000)) # Based on the webpage's uuidRandom generator
+ video_json = self._call_api(
+ f'https://api.rctiplus.com/api/{url_api_version}/{video_type}/{video_id}/url?appierid={appier_id}', display_id, 'Downloading video URL JSON')[0]
+ video_url = video_json['url']
+
+ is_upcoming = try_get(video_json, lambda x: x['current_date'] < x['live_at'])
+ if is_upcoming is None:
+ is_upcoming = try_get(video_json, lambda x: x['current_date'] < x['start_date'])
+ if is_upcoming:
+ self.raise_no_formats(
+ 'This event will start at %s.' % video_json['live_label'] if video_json.get('live_label') else 'This event has not started yet.', expected=True)
+ if 'akamaized' in video_url:
+ # For some videos hosted on Akamai's CDN (possibly AES-encrypted ones?), a session needs to at least be made via Conviva's API
+ conviva_json_data = {
+ **self._CONVIVA_JSON_TEMPLATE,
+ 'url': video_url,
+ 'sst': int(time.time())
+ }
+ conviva_json_res = self._download_json(
+ 'https://ff84ae928c3b33064b76dec08f12500465e59a6f.cws.conviva.com/0/wsg', display_id,
+ 'Creating Conviva session', 'Failed to create Conviva session',
+ fatal=False, data=json.dumps(conviva_json_data).encode('utf-8'))
+ if conviva_json_res and conviva_json_res.get('err') != 'ok':
+ self.report_warning('Conviva said: %s' % str(conviva_json_res.get('err')))
+
+ video_meta, meta_paths = self._call_api(
+ 'https://api.rctiplus.com/api/v1/%s/%s' % (video_type, video_id), display_id, 'Downloading video metadata')
+
+ thumbnails, image_path = [], meta_paths.get('image_path', 'https://rstatic.akamaized.net/media/')
+ if video_meta.get('portrait_image'):
+ thumbnails.append({
+ 'id': 'portrait_image',
+ 'url': '%s%d%s' % (image_path, 2000, video_meta['portrait_image']) # 2000px seems to be the highest resolution that can be given
+ })
+ if video_meta.get('landscape_image'):
+ thumbnails.append({
+ 'id': 'landscape_image',
+ 'url': '%s%d%s' % (image_path, 2000, video_meta['landscape_image'])
+ })
+ try:
+ formats = self._extract_m3u8_formats(video_url, display_id, 'mp4', headers={'Referer': 'https://www.rctiplus.com/'})
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+ self.raise_geo_restricted(countries=['ID'], metadata_available=True)
+ else:
+ raise e
+ for f in formats:
+ if 'akamaized' in f['url'] or 'cloudfront' in f['url']:
+ f.setdefault('http_headers', {})['Referer'] = 'https://www.rctiplus.com/' # Referer header is required for akamai/cloudfront CDNs
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_meta.get('product_id') or video_json.get('product_id'),
+ 'title': dict_get(video_meta, ('title', 'name')) or dict_get(video_json, ('content_name', 'assets_name')),
+ 'display_id': display_id,
+ 'description': video_meta.get('summary'),
+ 'timestamp': video_meta.get('release_date') or video_json.get('start_date'),
+ 'duration': video_meta.get('duration'),
+ 'categories': [video_meta['genre']] if video_meta.get('genre') else None,
+ 'average_rating': video_meta.get('star_rating'),
+ 'series': video_meta.get('program_title') or video_json.get('program_title'),
+ 'season_number': video_meta.get('season'),
+ 'episode_number': video_meta.get('episode'),
+ 'channel': video_json.get('tv_name'),
+ 'channel_id': video_json.get('tv_id'),
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ 'is_live': video_type == 'live-event' and not is_upcoming,
+ 'was_live': video_type == 'missed-event',
+ 'live_status': 'is_upcoming' if is_upcoming else None,
+ 'release_timestamp': video_json.get('live_at'),
+ }
+
+
+class RCTIPlusSeriesIE(RCTIPlusBaseIE):
+ _VALID_URL = r'https://www\.rctiplus\.com/programs/(?P<id>\d+)/(?P<display_id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.rctiplus.com/programs/540/upin-ipin',
+ 'playlist_mincount': 417,
+ 'info_dict': {
+ 'id': '540',
+ 'title': 'Upin & Ipin',
+ 'description': 'md5:22cc912381f389664416844e1ec4f86b',
+ },
+ }, {
+ 'url': 'https://www.rctiplus.com/programs/540/upin-ipin/episodes?utm_source=Rplusdweb&utm_medium=share_copy&utm_campaign=programsupin-ipin',
+ 'only_matching': True,
+ }]
+ _AGE_RATINGS = { # Based off https://id.wikipedia.org/wiki/Sistem_rating_konten_televisi with additional ratings
+ 'S-SU': 2,
+ 'SU': 2,
+ 'P': 2,
+ 'A': 7,
+ 'R': 13,
+ 'R-R/1': 17, # Labelled as 17+ despite being R
+ 'D': 18,
+ }
+
+ @classmethod
+ def suitable(cls, url):
+ return False if RCTIPlusIE.suitable(url) else super(RCTIPlusSeriesIE, cls).suitable(url)
+
+ def _entries(self, url, display_id=None, note='Downloading entries JSON', metadata={}):
+ total_pages = 0
+ try:
+ total_pages = self._call_api(
+ '%s&length=20&page=0' % url,
+ display_id, note)[1]['pagination']['total_page']
+ except ExtractorError as e:
+ if 'not found' in str(e):
+ return []
+ raise e
+ if total_pages <= 0:
+ return []
+
+ for page_num in range(1, total_pages + 1):
+ episode_list = self._call_api(
+ '%s&length=20&page=%s' % (url, page_num),
+ display_id, '%s page %s' % (note, page_num))[0] or []
+
+ for video_json in episode_list:
+ link = video_json['share_link']
+ url_res = self.url_result(link, 'RCTIPlus', video_json.get('product_id'), video_json.get('title'))
+ url_res.update(metadata)
+ yield url_res
+
+ def _real_extract(self, url):
+ series_id, display_id = self._match_valid_url(url).groups()
+
+ series_meta, meta_paths = self._call_api(
+ 'https://api.rctiplus.com/api/v1/program/%s/detail' % series_id, display_id, 'Downloading series metadata')
+ metadata = {
+ 'age_limit': try_get(series_meta, lambda x: self._AGE_RATINGS[x['age_restriction'][0]['code']])
+ }
+
+ cast = []
+ for star in series_meta.get('starring', []):
+ cast.append(strip_or_none(star.get('name')))
+ for star in series_meta.get('creator', []):
+ cast.append(strip_or_none(star.get('name')))
+ for star in series_meta.get('writer', []):
+ cast.append(strip_or_none(star.get('name')))
+ metadata['cast'] = cast
+
+ tags = []
+ for tag in series_meta.get('tag', []):
+ tags.append(strip_or_none(tag.get('name')))
+ metadata['tag'] = tags
+
+ entries = []
+ seasons_list = self._call_api(
+ 'https://api.rctiplus.com/api/v1/program/%s/season' % series_id, display_id, 'Downloading seasons list JSON')[0]
+ for season in seasons_list:
+ entries.append(self._entries('https://api.rctiplus.com/api/v2/program/%s/episode?season=%s' % (series_id, season['season']),
+ display_id, 'Downloading season %s episode entries' % season['season'], metadata))
+
+ entries.append(self._entries('https://api.rctiplus.com/api/v2/program/%s/clip?content_id=0' % series_id,
+ display_id, 'Downloading clip entries', metadata))
+ entries.append(self._entries('https://api.rctiplus.com/api/v2/program/%s/extra?content_id=0' % series_id,
+ display_id, 'Downloading extra entries', metadata))
+
+ return self.playlist_result(itertools.chain(*entries), series_id, series_meta.get('title'), series_meta.get('summary'), **metadata)
+
+
+class RCTIPlusTVIE(RCTIPlusBaseIE):
+ _VALID_URL = r'https://www\.rctiplus\.com/((tv/(?P<tvname>\w+))|(?P<eventname>live-event|missed-event))'
+ _TESTS = [{
+ 'url': 'https://www.rctiplus.com/tv/rcti',
+ 'info_dict': {
+ 'id': 'v_lt1',
+ 'title': 'RCTI',
+ 'ext': 'mp4',
+ 'timestamp': 1546344000,
+ 'upload_date': '20190101',
+ },
+ 'params': {
+ 'skip_download': True,
+ 'format': 'bestvideo',
+ }
+ }, {
+ # Returned video will always change
+ 'url': 'https://www.rctiplus.com/live-event',
+ 'only_matching': True,
+ }, {
+ # Returned video will also always change
+ 'url': 'https://www.rctiplus.com/missed-event',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if RCTIPlusIE.suitable(url) else super(RCTIPlusTVIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ match = self._match_valid_url(url).groupdict()
+ tv_id = match.get('tvname') or match.get('eventname')
+ webpage = self._download_webpage(url, tv_id)
+ video_type, video_id = self._search_regex(
+ r'url\s*:\s*["\']https://api\.rctiplus\.com/api/v./(?P<type>[^/]+)/(?P<id>\d+)/url', webpage, 'video link', group=('type', 'id'))
+ return self.url_result(f'https://www.rctiplus.com/{video_type}/{video_id}/{tv_id}', 'RCTIPlus')
diff --git a/hypervideo_dl/extractor/redbulltv.py b/hypervideo_dl/extractor/redbulltv.py
index 6d000b3..e7fdcce 100644
--- a/hypervideo_dl/extractor/redbulltv.py
+++ b/hypervideo_dl/extractor/redbulltv.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..compat import compat_HTTPError
@@ -161,7 +160,7 @@ class RedBullTVRrnContentIE(InfoExtractor):
}]
def _real_extract(self, url):
- region, lang, rrn_id = re.search(self._VALID_URL, url).groups()
+ region, lang, rrn_id = self._match_valid_url(url).groups()
rrn_id += ':%s-%s' % (lang, region.upper())
return self.url_result(
'https://www.redbull.com/embed/' + rrn_id,
@@ -204,7 +203,7 @@ class RedBullIE(InfoExtractor):
_LAT_FALLBACK_MAP = ['ar', 'bo', 'car', 'cl', 'co', 'mx', 'pe']
def _real_extract(self, url):
- region, lang, filter_type, display_id = re.search(self._VALID_URL, url).groups()
+ region, lang, filter_type, display_id = self._match_valid_url(url).groups()
if filter_type == 'episodes':
filter_type = 'episode-videos'
elif filter_type == 'live':
diff --git a/hypervideo_dl/extractor/reddit.py b/hypervideo_dl/extractor/reddit.py
index 222fa01..c75d95a 100644
--- a/hypervideo_dl/extractor/reddit.py
+++ b/hypervideo_dl/extractor/reddit.py
@@ -1,6 +1,4 @@
-from __future__ import unicode_literals
-
-import re
+import random
from .common import InfoExtractor
from ..utils import (
@@ -50,7 +48,7 @@ class RedditIE(InfoExtractor):
class RedditRIE(InfoExtractor):
- _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?reddit\.com/r/[^/]+/comments/(?P<id>[^/?#&]+))'
+ _VALID_URL = r'https?://(?P<subdomain>[^/]+\.)?reddit(?:media)?\.com/r/(?P<slug>[^/]+/comments/(?P<id>[^/?#&]+))'
_TESTS = [{
'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/',
'info_dict': {
@@ -95,17 +93,27 @@ class RedditRIE(InfoExtractor):
# reddit video @ nm reddit
'url': 'https://nm.reddit.com/r/Cricket/comments/8idvby/lousy_cameraman_finds_himself_in_cairns_line_of/',
'only_matching': True,
+ }, {
+ 'url': 'https://www.redditmedia.com/r/serbia/comments/pu9wbx/ako_vu%C4%8Di%C4%87_izgubi_izbore_ja_%C4%87u_da_crknem/',
+ 'only_matching': True,
}]
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- url, video_id = mobj.group('url', 'id')
-
- video_id = self._match_id(url)
-
- data = self._download_json(
- url + '/.json', video_id)[0]['data']['children'][0]['data']
+ @staticmethod
+ def _gen_session_id():
+ id_length = 16
+ rand_max = 1 << (id_length * 4)
+ return '%0.*x' % (id_length, random.randrange(rand_max))
+ def _real_extract(self, url):
+ subdomain, slug, video_id = self._match_valid_url(url).group('subdomain', 'slug', 'id')
+
+ self._set_cookie('.reddit.com', 'reddit_session', self._gen_session_id())
+ self._set_cookie('.reddit.com', '_options', '%7B%22pref_quarantine_optin%22%3A%20true%7D')
+ data = self._download_json(f'https://{subdomain}reddit.com/r/{slug}/.json', video_id, fatal=False)
+ if not data:
+ # Fall back to old.reddit.com in case the requested subdomain fails
+ data = self._download_json(f'https://old.reddit.com/r/{slug}/.json', video_id)
+ data = data[0]['data']['children'][0]['data']
video_url = data['url']
# Avoid recursing into the same reddit URL
diff --git a/hypervideo_dl/extractor/redtube.py b/hypervideo_dl/extractor/redtube.py
index a1ca791..747ce51 100644
--- a/hypervideo_dl/extractor/redtube.py
+++ b/hypervideo_dl/extractor/redtube.py
@@ -98,13 +98,14 @@ class RedTubeIE(InfoExtractor):
format_id = media.get('quality')
formats.append({
'url': format_url,
+ 'ext': 'mp4',
'format_id': format_id,
'height': int_or_none(format_id),
})
if not formats:
video_url = self._html_search_regex(
r'<source src="(.+?)" type="video/mp4">', webpage, 'video URL')
- formats.append({'url': video_url})
+ formats.append({'url': video_url, 'ext': 'mp4'})
self._sort_formats(formats)
thumbnail = self._og_search_thumbnail(webpage)
diff --git a/hypervideo_dl/extractor/rice.py b/hypervideo_dl/extractor/rice.py
index f855719..cf2bb1b 100644
--- a/hypervideo_dl/extractor/rice.py
+++ b/hypervideo_dl/extractor/rice.py
@@ -30,7 +30,7 @@ class RICEIE(InfoExtractor):
_NS = 'http://schemas.datacontract.org/2004/07/ensembleVideo.Data.Service.Contracts.Models.Player.Config'
def _real_extract(self, url):
- qs = compat_parse_qs(re.match(self._VALID_URL, url).group('query'))
+ qs = compat_parse_qs(self._match_valid_url(url).group('query'))
if not qs.get('PortalID') or not qs.get('DestinationID') or not qs.get('ContentID'):
raise ExtractorError('Invalid URL', expected=True)
diff --git a/hypervideo_dl/extractor/rmcdecouverte.py b/hypervideo_dl/extractor/rmcdecouverte.py
index c3623ed..422d47a 100644
--- a/hypervideo_dl/extractor/rmcdecouverte.py
+++ b/hypervideo_dl/extractor/rmcdecouverte.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from .brightcove import BrightcoveLegacyIE
@@ -13,9 +12,24 @@ from ..utils import smuggle_url
class RMCDecouverteIE(InfoExtractor):
- _VALID_URL = r'https?://rmcdecouverte\.bfmtv\.com/(?:(?:[^/]+/)*program_(?P<id>\d+)|(?P<live_id>mediaplayer-direct))'
+ _VALID_URL = r'https?://rmcdecouverte\.bfmtv\.com/(?:[^?#]*_(?P<id>\d+)|mediaplayer-direct)/?(?:[#?]|$)'
_TESTS = [{
+ 'url': 'https://rmcdecouverte.bfmtv.com/vestiges-de-guerre_22240/les-bunkers-secrets-domaha-beach_25303/',
+ 'info_dict': {
+ 'id': '6250879771001',
+ 'ext': 'mp4',
+ 'title': 'LES BUNKERS SECRETS D´OMAHA BEACH',
+ 'uploader_id': '1969646226001',
+ 'description': 'md5:aed573ca24abde62a148e0eba909657d',
+ 'timestamp': 1619622984,
+ 'upload_date': '20210428',
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ 'skip_download': True,
+ },
+ }, {
'url': 'https://rmcdecouverte.bfmtv.com/wheeler-dealers-occasions-a-saisir/program_2566/',
'info_dict': {
'id': '5983675500001',
@@ -31,6 +45,13 @@ class RMCDecouverteIE(InfoExtractor):
},
'skip': 'only available for a week',
}, {
+ 'url': 'https://rmcdecouverte.bfmtv.com/avions-furtifs-la-technologie-de-lextreme_10598',
+ 'only_matching': True,
+ }, {
+ # The website accepts any URL as long as it has _\d+ at the end
+ 'url': 'https://rmcdecouverte.bfmtv.com/any/thing/can/go/here/_10598',
+ 'only_matching': True,
+ }, {
# live, geo restricted, bypassable
'url': 'https://rmcdecouverte.bfmtv.com/mediaplayer-direct/',
'only_matching': True,
@@ -38,8 +59,8 @@ class RMCDecouverteIE(InfoExtractor):
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1969646226001/default_default/index.html?videoId=%s'
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- display_id = mobj.group('id') or mobj.group('live_id')
+ mobj = self._match_valid_url(url)
+ display_id = mobj.group('id') or 'direct'
webpage = self._download_webpage(url, display_id)
brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
if brightcove_legacy_url:
diff --git a/hypervideo_dl/extractor/roosterteeth.py b/hypervideo_dl/extractor/roosterteeth.py
index 8883639..2c815bd 100644
--- a/hypervideo_dl/extractor/roosterteeth.py
+++ b/hypervideo_dl/extractor/roosterteeth.py
@@ -31,6 +31,19 @@ class RoosterTeethIE(InfoExtractor):
'episode': 'Million Dollars, But... The Game Announcement',
},
}, {
+ 'url': 'https://roosterteeth.com/watch/rwby-bonus-25',
+ 'md5': 'fe8d9d976b272c18a24fe7f1f5830084',
+ 'info_dict': {
+ 'id': '31',
+ 'display_id': 'rwby-bonus-25',
+ 'title': 'Volume 2, World of Remnant 3',
+ 'description': 'md5:8d58d3270292ea11da00ea712bbfb009',
+ 'episode': 'Volume 2, World of Remnant 3',
+ 'channel_id': 'fab60c1c-29cb-43bc-9383-5c3538d9e246',
+ 'thumbnail': r're:^https?://.*\.(png|jpe?g)$',
+ 'ext': 'mp4',
+ },
+ }, {
'url': 'http://achievementhunter.roosterteeth.com/episode/off-topic-the-achievement-hunter-podcast-2016-i-didn-t-think-it-would-pass-31',
'only_matching': True,
}, {
@@ -50,7 +63,7 @@ class RoosterTeethIE(InfoExtractor):
'url': 'https://roosterteeth.com/watch/million-dollars-but-season-2-million-dollars-but-the-game-announcement',
'only_matching': True,
}]
- _EPISODE_BASE_URL = 'https://svod-be.roosterteeth.com/api/v1/episodes/'
+ _EPISODE_BASE_URL = 'https://svod-be.roosterteeth.com/api/v1/watch/'
def _login(self):
username, password = self._get_login_info()
@@ -86,9 +99,11 @@ class RoosterTeethIE(InfoExtractor):
api_episode_url = self._EPISODE_BASE_URL + display_id
try:
- m3u8_url = self._download_json(
+ video_data = self._download_json(
api_episode_url + '/videos', display_id,
- 'Downloading video JSON metadata')['data'][0]['attributes']['url']
+ 'Downloading video JSON metadata')['data'][0]
+ m3u8_url = video_data['attributes']['url']
+ # XXX: additional URL at video_data['links']['download']
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
if self._parse_json(e.cause.read().decode(), display_id).get('access') is False:
@@ -96,7 +111,7 @@ class RoosterTeethIE(InfoExtractor):
'%s is only available for FIRST members' % display_id)
raise
- formats = self._extract_m3u8_formats(
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
m3u8_url, display_id, 'mp4', 'm3u8_native', m3u8_id='hls')
self._sort_formats(formats)
@@ -109,7 +124,7 @@ class RoosterTeethIE(InfoExtractor):
thumbnails = []
for image in episode.get('included', {}).get('images', []):
- if image.get('type') == 'episode_image':
+ if image.get('type') in ('episode_image', 'bonus_feature_image'):
img_attributes = image.get('attributes') or {}
for k in ('thumb', 'small', 'medium', 'large'):
img_url = img_attributes.get(k)
@@ -134,4 +149,5 @@ class RoosterTeethIE(InfoExtractor):
'formats': formats,
'channel_id': attributes.get('channel_id'),
'duration': int_or_none(attributes.get('length')),
+ 'subtitles': subtitles
}
diff --git a/hypervideo_dl/extractor/roxwel.py b/hypervideo_dl/extractor/roxwel.py
index 6528464..84bb1aa 100644
--- a/hypervideo_dl/extractor/roxwel.py
+++ b/hypervideo_dl/extractor/roxwel.py
@@ -1,6 +1,5 @@
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..utils import unified_strdate, determine_ext
@@ -27,7 +26,7 @@ class RoxwelIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
filename = mobj.group('filename')
info_url = 'http://www.roxwel.com/api/videos/%s' % filename
info = self._download_json(info_url, filename)
diff --git a/hypervideo_dl/extractor/rtbf.py b/hypervideo_dl/extractor/rtbf.py
index 3b0f308..f9979d0 100644
--- a/hypervideo_dl/extractor/rtbf.py
+++ b/hypervideo_dl/extractor/rtbf.py
@@ -68,7 +68,7 @@ class RTBFIE(InfoExtractor):
]
def _real_extract(self, url):
- live, media_id = re.match(self._VALID_URL, url).groups()
+ live, media_id = self._match_valid_url(url).groups()
embed_page = self._download_webpage(
'https://www.rtbf.be/auvio/embed/' + ('direct' if live else 'media'),
media_id, query={'id': media_id})
@@ -125,7 +125,7 @@ class RTBFIE(InfoExtractor):
})
mpd_url = data.get('urlDash')
- if not data.get('drm') and mpd_url:
+ if mpd_url and (self.get_param('allow_unplayable_formats') or not data.get('drm')):
formats.extend(self._extract_mpd_formats(
mpd_url, media_id, mpd_id='dash', fatal=False))
diff --git a/hypervideo_dl/extractor/rtl2.py b/hypervideo_dl/extractor/rtl2.py
index 70f000c..4e3aa03 100644
--- a/hypervideo_dl/extractor/rtl2.py
+++ b/hypervideo_dl/extractor/rtl2.py
@@ -51,7 +51,7 @@ class RTL2IE(InfoExtractor):
}]
def _real_extract(self, url):
- vico_id, vivi_id, display_id = re.match(self._VALID_URL, url).groups()
+ vico_id, vivi_id, display_id = self._match_valid_url(url).groups()
if not vico_id:
webpage = self._download_webpage(url, display_id)
@@ -93,7 +93,7 @@ class RTL2IE(InfoExtractor):
'flash_version': 'LNX 11,2,202,429',
'rtmp_conn': rtmp_conn,
'no_resume': True,
- 'preference': 1,
+ 'quality': 1,
})
m3u8_url = video_info.get('streamurl_hls')
diff --git a/hypervideo_dl/extractor/rtp.py b/hypervideo_dl/extractor/rtp.py
index 02986f4..c165ade 100644
--- a/hypervideo_dl/extractor/rtp.py
+++ b/hypervideo_dl/extractor/rtp.py
@@ -2,10 +2,11 @@
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..utils import (
- determine_ext,
- js_to_json,
-)
+from ..utils import js_to_json
+import re
+import json
+import urllib.parse
+import base64
class RTPIE(InfoExtractor):
@@ -25,6 +26,22 @@ class RTPIE(InfoExtractor):
'only_matching': True,
}]
+ _RX_OBFUSCATION = re.compile(r'''(?xs)
+ atob\s*\(\s*decodeURIComponent\s*\(\s*
+ (\[[0-9A-Za-z%,'"]*\])
+ \s*\.\s*join\(\s*(?:""|'')\s*\)\s*\)\s*\)
+ ''')
+
+ def __unobfuscate(self, data, *, video_id):
+ if data.startswith('{'):
+ data = self._RX_OBFUSCATION.sub(
+ lambda m: json.dumps(
+ base64.b64decode(urllib.parse.unquote(
+ ''.join(self._parse_json(m.group(1), video_id))
+ )).decode('iso-8859-1')),
+ data)
+ return js_to_json(data)
+
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -32,30 +49,46 @@ class RTPIE(InfoExtractor):
title = self._html_search_meta(
'twitter:title', webpage, display_name='title', fatal=True)
- config = self._parse_json(self._search_regex(
- r'(?s)RTPPlayer\(({.+?})\);', webpage,
- 'player config'), video_id, js_to_json)
- file_url = config['file']
- ext = determine_ext(file_url)
- if ext == 'm3u8':
- file_key = config.get('fileKey')
- formats = self._extract_m3u8_formats(
- file_url, video_id, 'mp4', 'm3u8_native',
- m3u8_id='hls', fatal=file_key)
- if file_key:
- formats.append({
- 'url': 'https://cdn-ondemand.rtp.pt' + file_key,
- 'preference': 1,
- })
- self._sort_formats(formats)
+ f, config = self._search_regex(
+ r'''(?sx)
+ var\s+f\s*=\s*(?P<f>".*?"|{[^;]+?});\s*
+ var\s+player1\s+=\s+new\s+RTPPlayer\s*\((?P<config>{(?:(?!\*/).)+?})\);(?!\s*\*/)
+ ''', webpage,
+ 'player config', group=('f', 'config'))
+
+ f = self._parse_json(
+ f, video_id,
+ lambda data: self.__unobfuscate(data, video_id=video_id))
+ config = self._parse_json(
+ config, video_id,
+ lambda data: self.__unobfuscate(data, video_id=video_id))
+
+ formats = []
+ if isinstance(f, dict):
+ f_hls = f.get('hls')
+ if f_hls is not None:
+ formats.extend(self._extract_m3u8_formats(
+ f_hls, video_id, 'mp4', 'm3u8_native', m3u8_id='hls'))
+
+ f_dash = f.get('dash')
+ if f_dash is not None:
+ formats.extend(self._extract_mpd_formats(f_dash, video_id, mpd_id='dash'))
else:
- formats = [{
- 'url': file_url,
- 'ext': ext,
- }]
- if config.get('mediaType') == 'audio':
- for f in formats:
- f['vcodec'] = 'none'
+ formats.append({
+ 'format_id': 'f',
+ 'url': f,
+ 'vcodec': 'none' if config.get('mediaType') == 'audio' else None,
+ })
+
+ subtitles = {}
+
+ vtt = config.get('vtt')
+ if vtt is not None:
+ for lcode, lname, url in vtt:
+ subtitles.setdefault(lcode, []).append({
+ 'name': lname,
+ 'url': url,
+ })
return {
'id': video_id,
@@ -63,4 +96,5 @@ class RTPIE(InfoExtractor):
'formats': formats,
'description': self._html_search_meta(['description', 'twitter:description'], webpage),
'thumbnail': config.get('poster') or self._og_search_thumbnail(webpage),
+ 'subtitles': subtitles,
}
diff --git a/hypervideo_dl/extractor/rts.py b/hypervideo_dl/extractor/rts.py
index aed35f8..865a730 100644
--- a/hypervideo_dl/extractor/rts.py
+++ b/hypervideo_dl/extractor/rts.py
@@ -116,7 +116,7 @@ class RTSIE(SRGSSRIE):
]
def _real_extract(self, url):
- m = re.match(self._VALID_URL, url)
+ m = self._match_valid_url(url)
media_id = m.group('rts_id') or m.group('id')
display_id = m.group('display_id') or media_id
diff --git a/hypervideo_dl/extractor/rtve.py b/hypervideo_dl/extractor/rtve.py
index d2fb754..59832ee 100644
--- a/hypervideo_dl/extractor/rtve.py
+++ b/hypervideo_dl/extractor/rtve.py
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
import base64
import io
-import re
import sys
from .common import InfoExtractor
@@ -216,7 +215,7 @@ class RTVELiveIE(RTVEALaCartaIE):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
diff --git a/hypervideo_dl/extractor/rumble.py b/hypervideo_dl/extractor/rumble.py
index 4a02251..49c1f44 100644
--- a/hypervideo_dl/extractor/rumble.py
+++ b/hypervideo_dl/extractor/rumble.py
@@ -1,13 +1,17 @@
# coding: utf-8
from __future__ import unicode_literals
+import itertools
+import re
+
from .common import InfoExtractor
-from ..compat import compat_str
+from ..compat import compat_str, compat_HTTPError
from ..utils import (
determine_ext,
int_or_none,
parse_iso8601,
try_get,
+ ExtractorError,
)
@@ -28,6 +32,14 @@ class RumbleEmbedIE(InfoExtractor):
'only_matching': True,
}]
+ @staticmethod
+ def _extract_urls(webpage):
+ return [
+ mobj.group('url')
+ for mobj in re.finditer(
+ r'(?:<(?:script|iframe)[^>]+\bsrc=|["\']embedUrl["\']\s*:\s*)["\'](?P<url>%s)' % RumbleEmbedIE._VALID_URL,
+ webpage)]
+
def _real_extract(self, url):
video_id = self._match_id(url)
video = self._download_json(
@@ -65,3 +77,36 @@ class RumbleEmbedIE(InfoExtractor):
'channel_url': author.get('url'),
'duration': int_or_none(video.get('duration')),
}
+
+
+class RumbleChannelIE(InfoExtractor):
+ _VALID_URL = r'(?P<url>https?://(?:www\.)?rumble\.com/(?:c|user)/(?P<id>[^&?#$/]+))'
+
+ _TESTS = [{
+ 'url': 'https://rumble.com/c/Styxhexenhammer666',
+ 'playlist_mincount': 1160,
+ 'info_dict': {
+ 'id': 'Styxhexenhammer666',
+ },
+ }, {
+ 'url': 'https://rumble.com/user/goldenpoodleharleyeuna',
+ 'playlist_count': 4,
+ 'info_dict': {
+ 'id': 'goldenpoodleharleyeuna',
+ },
+ }]
+
+ def entries(self, url, playlist_id):
+ for page in itertools.count(1):
+ try:
+ webpage = self._download_webpage(f'{url}?page={page}', playlist_id, note='Downloading page %d' % page)
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
+ break
+ raise
+ for video_url in re.findall(r'class=video-item--a\s?href=([^>]+\.html)', webpage):
+ yield self.url_result('https://rumble.com' + video_url)
+
+ def _real_extract(self, url):
+ url, playlist_id = self._match_valid_url(url).groups()
+ return self.playlist_result(self.entries(url, playlist_id), playlist_id=playlist_id)
diff --git a/hypervideo_dl/extractor/rutube.py b/hypervideo_dl/extractor/rutube.py
index 8f54d56..d027412 100644
--- a/hypervideo_dl/extractor/rutube.py
+++ b/hypervideo_dl/extractor/rutube.py
@@ -7,13 +7,12 @@ import itertools
from .common import InfoExtractor
from ..compat import (
compat_str,
- compat_parse_qs,
- compat_urllib_parse_urlparse,
)
from ..utils import (
determine_ext,
bool_or_none,
int_or_none,
+ parse_qs,
try_get,
unified_timestamp,
url_or_none,
@@ -178,7 +177,7 @@ class RutubeEmbedIE(RutubeBaseIE):
embed_id = self._match_id(url)
# Query may contain private videos token and should be passed to API
# requests (see #19163)
- query = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+ query = parse_qs(url)
options = self._download_api_options(embed_id, query)
video_id = options['effective_video']
formats = self._extract_formats(options, video_id)
@@ -298,16 +297,18 @@ class RutubePlaylistIE(RutubePlaylistBaseIE):
@classmethod
def suitable(cls, url):
+ from ..utils import int_or_none, parse_qs
+
if not super(RutubePlaylistIE, cls).suitable(url):
return False
- params = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+ params = parse_qs(url)
return params.get('pl_type', [None])[0] and int_or_none(params.get('pl_id', [None])[0])
def _next_page_url(self, page_num, playlist_id, item_kind):
return self._PAGE_TEMPLATE % (item_kind, playlist_id, page_num)
def _real_extract(self, url):
- qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+ qs = parse_qs(url)
playlist_kind = qs['pl_type'][0]
playlist_id = qs['pl_id'][0]
return self._extract_playlist(playlist_id, item_kind=playlist_kind)
diff --git a/hypervideo_dl/extractor/rutv.py b/hypervideo_dl/extractor/rutv.py
index d2713c1..7e0de99 100644
--- a/hypervideo_dl/extractor/rutv.py
+++ b/hypervideo_dl/extractor/rutv.py
@@ -123,7 +123,7 @@ class RUTVIE(InfoExtractor):
return mobj.group('url')
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
video_path = mobj.group('path')
@@ -139,7 +139,7 @@ class RUTVIE(InfoExtractor):
is_live = video_type == 'live'
json_data = self._download_json(
- 'http://player.rutv.ru/iframe/data%s/id/%s' % ('live' if is_live else 'video', video_id),
+ 'http://player.vgtrk.com/iframe/data%s/id/%s' % ('live' if is_live else 'video', video_id),
video_id, 'Downloading JSON')
if json_data['errors']:
@@ -180,11 +180,11 @@ class RUTVIE(InfoExtractor):
'rtmp_live': True,
'ext': 'flv',
'vbr': int(quality),
- 'preference': preference,
+ 'quality': preference,
}
elif transport == 'm3u8':
formats.extend(self._extract_m3u8_formats(
- url, video_id, 'mp4', preference=preference, m3u8_id='hls'))
+ url, video_id, 'mp4', quality=preference, m3u8_id='hls'))
continue
else:
fmt = {
diff --git a/hypervideo_dl/extractor/ruutu.py b/hypervideo_dl/extractor/ruutu.py
index c50cd3e..d9cf39d 100644
--- a/hypervideo_dl/extractor/ruutu.py
+++ b/hypervideo_dl/extractor/ruutu.py
@@ -200,9 +200,9 @@ class RuutuIE(InfoExtractor):
return node.get('value')
if not formats:
- drm = xpath_text(video_xml, './Clip/DRM', default=None)
- if drm:
- raise ExtractorError('This video is DRM protected.', expected=True)
+ if (not self.get_param('allow_unplayable_formats')
+ and xpath_text(video_xml, './Clip/DRM', default=None)):
+ self.report_drm(video_id)
ns_st_cds = pv('ns_st_cds')
if ns_st_cds != 'free':
raise ExtractorError('This video is %s.' % ns_st_cds, expected=True)
diff --git a/hypervideo_dl/extractor/safari.py b/hypervideo_dl/extractor/safari.py
index 2cc6651..cca4464 100644
--- a/hypervideo_dl/extractor/safari.py
+++ b/hypervideo_dl/extractor/safari.py
@@ -127,7 +127,7 @@ class SafariIE(SafariBaseIE):
_UICONF_ID = '29375172'
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
reference_id = mobj.group('reference_id')
if reference_id:
@@ -189,11 +189,16 @@ class SafariApiIE(SafariBaseIE):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
part = self._download_json(
url, '%s/%s' % (mobj.group('course_id'), mobj.group('part')),
'Downloading part JSON')
- return self.url_result(part['web_url'], SafariIE.ie_key())
+ web_url = part['web_url']
+ if 'library/view' in web_url:
+ web_url = web_url.replace('library/view', 'videos')
+ natural_keys = part['natural_key']
+ web_url = f'{web_url.rsplit("/", 1)[0]}/{natural_keys[0]}-{natural_keys[1][:-5]}'
+ return self.url_result(web_url, SafariIE.ie_key())
class SafariCourseIE(SafariBaseIE):
diff --git a/hypervideo_dl/extractor/saitosan.py b/hypervideo_dl/extractor/saitosan.py
new file mode 100644
index 0000000..621335c
--- /dev/null
+++ b/hypervideo_dl/extractor/saitosan.py
@@ -0,0 +1,78 @@
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import ExtractorError, try_get
+
+
+class SaitosanIE(InfoExtractor):
+ IE_NAME = 'Saitosan'
+ _VALID_URL = r'https?://(?:www\.)?saitosan\.net/bview.html\?id=(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'http://www.saitosan.net/bview.html?id=10031846',
+ 'info_dict': {
+ 'id': '10031846',
+ 'ext': 'mp4',
+ 'title': '井下原 和弥',
+ 'uploader': '井下原 和弥',
+ 'thumbnail': 'http://111.171.196.85:8088/921f916f-7f55-4c97-b92e-5d9d0fef8f5f/thumb',
+ 'is_live': True,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'skip': 'Broadcasts are ephemeral',
+ },
+ {
+ 'url': 'http://www.saitosan.net/bview.html?id=10031795',
+ 'info_dict': {
+ 'id': '10031795',
+ 'ext': 'mp4',
+ 'title': '橋本',
+ 'uploader': '橋本',
+ 'thumbnail': 'http://111.171.196.85:8088/1a3933e1-a01a-483b-8931-af15f37f8082/thumb',
+ 'is_live': True,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'skip': 'Broadcasts are ephemeral',
+ }]
+
+ def _real_extract(self, url):
+ b_id = self._match_id(url)
+
+ base = 'http://hankachi.saitosan-api.net:8002/socket.io/?transport=polling&EIO=3'
+ sid = self._download_socket_json(base, b_id, note='Opening socket').get('sid')
+ base += '&sid=' + sid
+
+ self._download_webpage(base, b_id, note='Polling socket')
+ payload = '420["room_start_join",{"room_id":"%s"}]' % b_id
+ payload = '%s:%s' % (len(payload), payload)
+
+ self._download_webpage(base, b_id, data=payload, note='Polling socket with payload')
+ response = self._download_socket_json(base, b_id, note='Polling socket')
+ if not response.get('ok'):
+ err = response.get('error') or {}
+ raise ExtractorError(
+ '%s said: %s - %s' % (self.IE_NAME, err.get('code', '?'), err.get('msg', 'Unknown')) if err
+ else 'The socket reported that the broadcast could not be joined. Maybe it\'s offline or the URL is incorrect',
+ expected=True, video_id=b_id)
+
+ self._download_webpage(base, b_id, data='26:421["room_finish_join",{}]', note='Polling socket')
+ b_data = self._download_socket_json(base, b_id, note='Getting broadcast metadata from socket')
+ m3u8_url = b_data.get('url')
+
+ self._download_webpage(base, b_id, data='1:1', note='Closing socket', fatal=False)
+
+ return {
+ 'id': b_id,
+ 'title': b_data.get('name'),
+ 'formats': self._extract_m3u8_formats(m3u8_url, b_id, 'mp4', live=True),
+ 'thumbnail': m3u8_url.replace('av.m3u8', 'thumb'),
+ 'uploader': try_get(b_data, lambda x: x['broadcast_user']['name']), # same as title
+ 'is_live': True
+ }
diff --git a/hypervideo_dl/extractor/sapo.py b/hypervideo_dl/extractor/sapo.py
index 49a9b31..df202a3 100644
--- a/hypervideo_dl/extractor/sapo.py
+++ b/hypervideo_dl/extractor/sapo.py
@@ -63,7 +63,7 @@ class SapoIE(InfoExtractor):
]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
item = self._download_xml(
diff --git a/hypervideo_dl/extractor/savefrom.py b/hypervideo_dl/extractor/savefrom.py
index 21e44b6..98efdc2 100644
--- a/hypervideo_dl/extractor/savefrom.py
+++ b/hypervideo_dl/extractor/savefrom.py
@@ -2,7 +2,6 @@
from __future__ import unicode_literals
import os.path
-import re
from .common import InfoExtractor
@@ -28,7 +27,7 @@ class SaveFromIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = os.path.splitext(url.split('/')[-1])[0]
return self.url_result(mobj.group('url'), video_id=video_id)
diff --git a/hypervideo_dl/extractor/scrippsnetworks.py b/hypervideo_dl/extractor/scrippsnetworks.py
index b40b4c4..84918b6 100644
--- a/hypervideo_dl/extractor/scrippsnetworks.py
+++ b/hypervideo_dl/extractor/scrippsnetworks.py
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
import json
import hashlib
-import re
from .aws import AWSIE
from .anvato import AnvatoIE
@@ -55,7 +54,7 @@ class ScrippsNetworksWatchIE(AWSIE):
_AWS_USER_AGENT = 'aws-sdk-js/2.80.0 callback'
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
site_id, video_id = mobj.group('site', 'id')
aws_identity_id_json = json.dumps({
@@ -146,7 +145,7 @@ class ScrippsNetworksIE(InfoExtractor):
_TP_TEMPL = 'https://link.theplatform.com/s/ip77QC/media/guid/%d/%s?mbr=true'
def _real_extract(self, url):
- site, guid = re.match(self._VALID_URL, url).groups()
+ site, guid = self._match_valid_url(url).groups()
return self.url_result(smuggle_url(
self._TP_TEMPL % (self._ACCOUNT_MAP[site], guid),
{'force_smil_url': True}), 'ThePlatform', guid)
diff --git a/hypervideo_dl/extractor/seeker.py b/hypervideo_dl/extractor/seeker.py
index 7872dc8..e5c18c7 100644
--- a/hypervideo_dl/extractor/seeker.py
+++ b/hypervideo_dl/extractor/seeker.py
@@ -46,7 +46,7 @@ class SeekerIE(InfoExtractor):
}]
def _real_extract(self, url):
- display_id, article_id = re.match(self._VALID_URL, url).groups()
+ display_id, article_id = self._match_valid_url(url).groups()
webpage = self._download_webpage(url, display_id)
entries = []
for jwp_id in re.findall(r'data-video-id="([a-zA-Z0-9]{8})"', webpage):
diff --git a/hypervideo_dl/extractor/senateisvp.py b/hypervideo_dl/extractor/senateisvp.py
index db5ef8b..8794d47 100644
--- a/hypervideo_dl/extractor/senateisvp.py
+++ b/hypervideo_dl/extractor/senateisvp.py
@@ -102,7 +102,7 @@ class SenateISVPIE(InfoExtractor):
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
- qs = compat_parse_qs(re.match(self._VALID_URL, url).group('qs'))
+ qs = compat_parse_qs(self._match_valid_url(url).group('qs'))
if not qs.get('filename') or not qs.get('type') or not qs.get('comm'):
raise ExtractorError('Invalid URL', expected=True)
diff --git a/hypervideo_dl/extractor/sendtonews.py b/hypervideo_dl/extractor/sendtonews.py
index 9d96529..bc38a0f 100644
--- a/hypervideo_dl/extractor/sendtonews.py
+++ b/hypervideo_dl/extractor/sendtonews.py
@@ -80,7 +80,9 @@ class SendtoNewsIE(InfoExtractor):
'format_id': '%s-%d' % (determine_protocol(f), tbr),
'tbr': tbr,
})
- self._sort_formats(info_dict['formats'], ('tbr', 'height', 'width', 'format_id'))
+ # 'tbr' was explicitly set to be prefered over 'height' originally,
+ # So this is being kept unless someone can confirm this is unnecessary
+ self._sort_formats(info_dict['formats'], ('tbr', 'res'))
thumbnails = []
if video.get('thumbnailUrl'):
diff --git a/hypervideo_dl/extractor/sevenplus.py b/hypervideo_dl/extractor/sevenplus.py
index 240afc1..210c44a 100644
--- a/hypervideo_dl/extractor/sevenplus.py
+++ b/hypervideo_dl/extractor/sevenplus.py
@@ -1,6 +1,7 @@
# coding: utf-8
from __future__ import unicode_literals
+import json
import re
from .brightcove import BrightcoveNewIE
@@ -42,8 +43,51 @@ class SevenPlusIE(BrightcoveNewIE):
'only_matching': True,
}]
+ def _real_initialize(self):
+ self.token = None
+
+ cookies = self._get_cookies('https://7plus.com.au')
+ api_key = next((x for x in cookies if x.startswith('glt_')), '')[4:]
+ if not api_key: # Cookies are signed out, skip login
+ return
+
+ login_resp = self._download_json(
+ 'https://login.7plus.com.au/accounts.getJWT', None, 'Logging in', fatal=False,
+ query={
+ 'APIKey': api_key,
+ 'sdk': 'js_latest',
+ 'login_token': cookies[f'glt_{api_key}'].value,
+ 'authMode': 'cookie',
+ 'pageURL': 'https://7plus.com.au/',
+ 'sdkBuild': '12471',
+ 'format': 'json',
+ }) or {}
+
+ if 'errorMessage' in login_resp:
+ self.report_warning(f'Unable to login: 7plus said: {login_resp["errorMessage"]}')
+ return
+ id_token = login_resp.get('id_token')
+ if not id_token:
+ self.report_warning('Unable to login: Could not extract id token')
+ return
+
+ token_resp = self._download_json(
+ 'https://7plus.com.au/auth/token', None, 'Getting auth token', fatal=False,
+ headers={'Content-Type': 'application/json'}, data=json.dumps({
+ 'idToken': id_token,
+ 'platformId': 'web',
+ 'regSource': '7plus',
+ }).encode('utf-8')) or {}
+ self.token = token_resp.get('token')
+ if not self.token:
+ self.report_warning('Unable to log in: Could not extract auth token')
+
def _real_extract(self, url):
- path, episode_id = re.match(self._VALID_URL, url).groups()
+ path, episode_id = self._match_valid_url(url).groups()
+
+ headers = {}
+ if self.token:
+ headers['Authorization'] = f'Bearer {self.token}'
try:
media = self._download_json(
@@ -55,7 +99,7 @@ class SevenPlusIE(BrightcoveNewIE):
'referenceId': 'ref:' + episode_id,
'deliveryId': 'csai',
'videoType': 'vod',
- })['media']
+ }, headers=headers)['media']
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
raise ExtractorError(self._parse_json(
diff --git a/hypervideo_dl/extractor/seznamzpravy.py b/hypervideo_dl/extractor/seznamzpravy.py
index 7a1c7e3..eef4975 100644
--- a/hypervideo_dl/extractor/seznamzpravy.py
+++ b/hypervideo_dl/extractor/seznamzpravy.py
@@ -5,7 +5,6 @@ import re
from .common import InfoExtractor
from ..compat import (
- compat_parse_qs,
compat_str,
compat_urllib_parse_urlparse,
)
@@ -13,6 +12,7 @@ from ..utils import (
urljoin,
int_or_none,
parse_codecs,
+ parse_qs,
try_get,
)
@@ -108,7 +108,7 @@ class SeznamZpravyIE(InfoExtractor):
return formats
def _real_extract(self, url):
- params = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+ params = parse_qs(url)
src = params['src'][0]
title = params['title'][0]
diff --git a/hypervideo_dl/extractor/shahid.py b/hypervideo_dl/extractor/shahid.py
index 88b938e..42de41a 100644
--- a/hypervideo_dl/extractor/shahid.py
+++ b/hypervideo_dl/extractor/shahid.py
@@ -111,15 +111,15 @@ class ShahidIE(ShahidBaseIE):
}))
def _real_extract(self, url):
- page_type, video_id = re.match(self._VALID_URL, url).groups()
+ page_type, video_id = self._match_valid_url(url).groups()
if page_type == 'clip':
page_type = 'episode'
playout = self._call_api(
'playout/new/url/' + video_id, video_id)['playout']
- if playout.get('drm'):
- raise ExtractorError('This video is DRM protected.', expected=True)
+ if not self.get_param('allow_unplayable_formats') and playout.get('drm'):
+ self.report_drm(video_id)
formats = self._extract_m3u8_formats(re.sub(
# https://docs.aws.amazon.com/mediapackage/latest/ug/manifest-filtering.html
diff --git a/hypervideo_dl/extractor/shemaroome.py b/hypervideo_dl/extractor/shemaroome.py
new file mode 100644
index 0000000..142d5dc
--- /dev/null
+++ b/hypervideo_dl/extractor/shemaroome.py
@@ -0,0 +1,104 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..aes import aes_cbc_decrypt
+from ..compat import (
+ compat_b64decode,
+ compat_ord,
+)
+from ..utils import (
+ bytes_to_intlist,
+ ExtractorError,
+ intlist_to_bytes,
+ unified_strdate,
+)
+
+
+class ShemarooMeIE(InfoExtractor):
+ _VALID_URL = r'(?:https?://)(?:www\.)?shemaroome\.com/(?:movies|shows)/(?P<id>[^?#]+)'
+ _TESTS = [{
+ 'url': 'https://www.shemaroome.com/movies/dil-hai-tumhaara',
+ 'info_dict': {
+ 'id': 'dil-hai-tumhaara',
+ 'ext': 'mp4',
+ 'title': 'Dil Hai Tumhaara',
+ 'release_date': '20020906',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'description': 'md5:2782c4127807103cf5a6ae2ca33645ce',
+ },
+ 'params': {
+ 'skip_download': True
+ }
+ }, {
+ 'url': 'https://www.shemaroome.com/shows/jurm-aur-jazbaat/laalach',
+ 'info_dict': {
+ 'id': 'jurm-aur-jazbaat_laalach',
+ 'ext': 'mp4',
+ 'title': 'Laalach',
+ 'description': 'md5:92b79c2dcb539b0ab53f9fa5a048f53c',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'release_date': '20210507',
+ },
+ 'params': {
+ 'skip_download': True
+ },
+ 'skip': 'Premium videos cannot be downloaded yet.'
+ }, {
+ 'url': 'https://www.shemaroome.com/shows/jai-jai-jai-bajrang-bali/jai-jai-jai-bajrang-bali-episode-99',
+ 'info_dict': {
+ 'id': 'jai-jai-jai-bajrang-bali_jai-jai-jai-bajrang-bali-episode-99',
+ 'ext': 'mp4',
+ 'title': 'Jai Jai Jai Bajrang Bali Episode 99',
+ 'description': 'md5:850d127a18ee3f9529d7fbde2f49910d',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'release_date': '20110101',
+ },
+ 'params': {
+ 'skip_download': True
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url).replace('/', '_')
+ webpage = self._download_webpage(url, video_id)
+ title = self._search_regex(r'id=\"ma_title\" value=\"([^\"]+)', webpage, 'title')
+ thumbnail = self._og_search_thumbnail(webpage)
+ content_def = self._search_regex(r'id=\"content_definition\" value=\"([^\"]+)', webpage, 'content_def')
+ catalog_id = self._search_regex(r'id=\"catalog_id\" value=\"([^\"]+)', webpage, 'catalog_id')
+ item_category = self._search_regex(r'id=\"item_category\" value=\"([^\"]+)', webpage, 'item_category')
+ content_id = self._search_regex(r'id=\"content_id\" value=\"([^\"]+)', webpage, 'content_id')
+
+ data = f'catalog_id={catalog_id}&content_id={content_id}&category={item_category}&content_def={content_def}'
+ data_json = self._download_json('https://www.shemaroome.com/users/user_all_lists', video_id, data=data.encode())
+ if not data_json.get('status'):
+ raise ExtractorError('Premium videos cannot be downloaded yet.', expected=True)
+ url_data = bytes_to_intlist(compat_b64decode(data_json['new_play_url']))
+ key = bytes_to_intlist(compat_b64decode(data_json['key']))
+ iv = [0] * 16
+ m3u8_url = intlist_to_bytes(aes_cbc_decrypt(url_data, key, iv))
+ m3u8_url = m3u8_url[:-compat_ord((m3u8_url[-1]))].decode('ascii')
+ formats = self._extract_m3u8_formats(m3u8_url, video_id, fatal=False, headers={'stream_key': data_json['stream_key']})
+ self._sort_formats(formats)
+
+ release_date = self._html_search_regex(
+ (r'itemprop="uploadDate">\s*([\d-]+)', r'id="release_date" value="([\d-]+)'),
+ webpage, 'release date', fatal=False)
+
+ subtitles = {}
+ sub_url = data_json.get('subtitle')
+ if sub_url:
+ subtitles.setdefault('EN', []).append({
+ 'url': self._proto_relative_url(sub_url),
+ })
+ description = self._html_search_regex(r'(?s)>Synopsis(</.+?)</', webpage, 'description', fatal=False)
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'release_date': unified_strdate(release_date),
+ 'description': description,
+ 'subtitles': subtitles,
+ }
diff --git a/hypervideo_dl/extractor/simplecast.py b/hypervideo_dl/extractor/simplecast.py
index 2d0b3c0..857e941 100644
--- a/hypervideo_dl/extractor/simplecast.py
+++ b/hypervideo_dl/extractor/simplecast.py
@@ -122,7 +122,7 @@ class SimplecastEpisodeIE(SimplecastBaseIE):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
episode = self._call_search_api(
'episode', mobj.group(1), mobj.group(0))
return self._parse_episode(episode)
diff --git a/hypervideo_dl/extractor/sina.py b/hypervideo_dl/extractor/sina.py
index 07b766b..b62b0c3 100644
--- a/hypervideo_dl/extractor/sina.py
+++ b/hypervideo_dl/extractor/sina.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..utils import (
@@ -18,7 +17,7 @@ from ..utils import (
class SinaIE(InfoExtractor):
_VALID_URL = r'''(?x)https?://(?:.*?\.)?video\.sina\.com\.cn/
(?:
- (?:view/|.*\#)(?P<video_id>\d+)|
+ (?:view/|.*\#)(?P<id>\d+)|
.+?/(?P<pseudo_id>[^/?#]+)(?:\.s?html)|
# This is used by external sites like Weibo
api/sinawebApi/outplay.php/(?P<token>.+?)\.swf
@@ -56,9 +55,9 @@ class SinaIE(InfoExtractor):
]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
- video_id = mobj.group('video_id')
+ video_id = mobj.group('id')
if not video_id:
if mobj.group('token') is not None:
# The video id is in the redirected url
@@ -99,7 +98,7 @@ class SinaIE(InfoExtractor):
formats.append({
'format_id': quality_id,
'url': update_url_query(file_api, {'vid': file_id}),
- 'preference': preference(quality_id),
+ 'quality': preference(quality_id),
'ext': 'mp4',
})
self._sort_formats(formats)
diff --git a/hypervideo_dl/extractor/sixplay.py b/hypervideo_dl/extractor/sixplay.py
index 7ec66ec..fd747f5 100644
--- a/hypervideo_dl/extractor/sixplay.py
+++ b/hypervideo_dl/extractor/sixplay.py
@@ -1,17 +1,15 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..compat import (
- compat_parse_qs,
compat_str,
- compat_urllib_parse_urlparse,
)
from ..utils import (
determine_ext,
int_or_none,
+ parse_qs,
try_get,
qualities,
)
@@ -41,7 +39,7 @@ class SixPlayIE(InfoExtractor):
}]
def _real_extract(self, url):
- domain, video_id = re.search(self._VALID_URL, url).groups()
+ domain, video_id = self._match_valid_url(url).groups()
service, consumer_name = {
'6play.fr': ('6play', 'm6web'),
'rtlplay.be': ('rtlbe_rtl_play', 'rtlbe'),
@@ -79,7 +77,7 @@ class SixPlayIE(InfoExtractor):
continue
if container == 'm3u8' or ext == 'm3u8':
if protocol == 'usp':
- if compat_parse_qs(compat_urllib_parse_urlparse(asset_url).query).get('token', [None])[0]:
+ if parse_qs(asset_url).get('token', [None])[0]:
urlh = self._request_webpage(
asset_url, video_id, fatal=False,
headers=self.geo_verification_headers())
diff --git a/hypervideo_dl/extractor/skynewsau.py b/hypervideo_dl/extractor/skynewsau.py
new file mode 100644
index 0000000..b1d7795
--- /dev/null
+++ b/hypervideo_dl/extractor/skynewsau.py
@@ -0,0 +1,46 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ try_get,
+ unified_strdate,
+)
+
+
+class SkyNewsAUIE(InfoExtractor):
+ _VALID_URL = r'(?:https?://)(?:www\.)?skynews\.com\.au/[^/]+/[^/]+/[^/]+/video/(?P<id>[a-z0-9]+)'
+
+ _TESTS = [{
+ 'url': 'https://www.skynews.com.au/world-news/united-states/incredible-vision-shows-lava-overflowing-from-spains-la-palma-volcano/video/0f4c6243d6903502c01251f228b91a71',
+ 'info_dict': {
+ 'id': '6277184925001',
+ 'ext': 'mp4',
+ 'title': 'md5:60594f1ea6d5ae93e292900f4d34e9ae',
+ 'description': 'md5:60594f1ea6d5ae93e292900f4d34e9ae',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 76.394,
+ 'timestamp': 1634271300,
+ 'uploader_id': '5348771529001',
+ 'tags': ['fblink', 'msn', 'usa', 'world', 'yt'],
+ 'upload_date': '20211015',
+ },
+ 'params': {'skip_download': True, 'format': 'bv'}
+ }]
+
+ _API_KEY = '6krsj3w249nk779d8fukqx9f'
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+ webpage = self._download_webpage(url, id)
+ embedcode = self._search_regex(r'embedcode\s?=\s?\"([^\"]+)\"', webpage, 'embedcode')
+ data_json = self._download_json(
+ f'https://content.api.news/v3/videos/brightcove/{embedcode}?api_key={self._API_KEY}', id)['content']
+ return {
+ 'id': id,
+ '_type': 'url_transparent',
+ 'url': 'https://players.brightcove.net/%s/default_default/index.html?videoId=%s' % tuple(embedcode.split('-')),
+ 'ie_key': 'BrightcoveNew',
+ 'title': data_json.get('caption'),
+ 'upload_date': unified_strdate(try_get(data_json, lambda x: x['date']['created'])),
+ }
diff --git a/hypervideo_dl/extractor/slideshare.py b/hypervideo_dl/extractor/slideshare.py
index e89ebeb..9b3ad0a 100644
--- a/hypervideo_dl/extractor/slideshare.py
+++ b/hypervideo_dl/extractor/slideshare.py
@@ -1,6 +1,5 @@
from __future__ import unicode_literals
-import re
import json
from .common import InfoExtractor
@@ -27,7 +26,7 @@ class SlideshareIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
page_title = mobj.group('title')
webpage = self._download_webpage(url, page_title)
slideshare_obj = self._search_regex(
diff --git a/hypervideo_dl/extractor/snotr.py b/hypervideo_dl/extractor/snotr.py
index f773547..0bb5482 100644
--- a/hypervideo_dl/extractor/snotr.py
+++ b/hypervideo_dl/extractor/snotr.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..utils import (
@@ -39,7 +38,7 @@ class SnotrIE(InfoExtractor):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
diff --git a/hypervideo_dl/extractor/sohu.py b/hypervideo_dl/extractor/sohu.py
index 9d73650..3bff5c5 100644
--- a/hypervideo_dl/extractor/sohu.py
+++ b/hypervideo_dl/extractor/sohu.py
@@ -77,7 +77,7 @@ class SohuIE(InfoExtractor):
'info_dict': {
'id': '78932792',
'ext': 'mp4',
- 'title': 'hypervideo testing video',
+ 'title': 'youtube-dl testing video',
},
'params': {
'skip_download': True
@@ -97,7 +97,7 @@ class SohuIE(InfoExtractor):
'Downloading JSON data for %s' % vid_id,
headers=self.geo_verification_headers())
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
mytv = mobj.group('mytv') is not None
diff --git a/hypervideo_dl/extractor/sonyliv.py b/hypervideo_dl/extractor/sonyliv.py
index fedfceb..c3ed442 100644
--- a/hypervideo_dl/extractor/sonyliv.py
+++ b/hypervideo_dl/extractor/sonyliv.py
@@ -9,15 +9,22 @@ from ..compat import compat_HTTPError
from ..utils import (
ExtractorError,
int_or_none,
+ try_get,
)
class SonyLIVIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?sonyliv\.com/(?:s(?:how|port)s/[^/]+|movies|clip|trailer|music-videos)/[^/?#&]+-(?P<id>\d+)'
+ _VALID_URL = r'''(?x)
+ (?:
+ sonyliv:|
+ https?://(?:www\.)?sonyliv\.com/(?:s(?:how|port)s/[^/]+|movies|clip|trailer|music-videos)/[^/?#&]+-
+ )
+ (?P<id>\d+)
+ '''
_TESTS = [{
'url': 'https://www.sonyliv.com/shows/bachelors-delight-1700000113/achaari-cheese-toast-1000022678?watch=true',
'info_dict': {
- 'title': 'Bachelors Delight - Achaari Cheese Toast',
+ 'title': 'Achaari Cheese Toast',
'id': '1000022678',
'ext': 'mp4',
'upload_date': '20200411',
@@ -25,7 +32,7 @@ class SonyLIVIE(InfoExtractor):
'timestamp': 1586632091,
'duration': 185,
'season_number': 1,
- 'episode': 'Achaari Cheese Toast',
+ 'series': 'Bachelors Delight',
'episode_number': 1,
'release_year': 2016,
},
@@ -75,8 +82,8 @@ class SonyLIVIE(InfoExtractor):
video_id = self._match_id(url)
content = self._call_api(
'1.5', 'IN/CONTENT/VIDEOURL/VOD/' + video_id, video_id)
- if content.get('isEncrypted'):
- raise ExtractorError('This video is DRM protected.', expected=True)
+ if not self.get_param('allow_unplayable_formats') and content.get('isEncrypted'):
+ self.report_drm(video_id)
dash_url = content['videoURL']
headers = {
'x-playback-session-id': '%s-%d' % (uuid.uuid4().hex, time.time() * 1000)
@@ -92,11 +99,15 @@ class SonyLIVIE(InfoExtractor):
metadata = self._call_api(
'1.6', 'IN/DETAIL/' + video_id, video_id)['containers'][0]['metadata']
- title = metadata['title']
- episode = metadata.get('episodeTitle')
- if episode and title != episode:
- title += ' - ' + episode
-
+ title = metadata['episodeTitle']
+ subtitles = {}
+ for sub in content.get('subtitle', []):
+ sub_url = sub.get('subtitleUrl')
+ if not sub_url:
+ continue
+ subtitles.setdefault(sub.get('subtitleLanguageName', 'ENG'), []).append({
+ 'url': sub_url,
+ })
return {
'id': video_id,
'title': title,
@@ -106,7 +117,46 @@ class SonyLIVIE(InfoExtractor):
'timestamp': int_or_none(metadata.get('creationDate'), 1000),
'duration': int_or_none(metadata.get('duration')),
'season_number': int_or_none(metadata.get('season')),
- 'episode': episode,
+ 'series': metadata.get('title'),
'episode_number': int_or_none(metadata.get('episodeNumber')),
'release_year': int_or_none(metadata.get('year')),
+ 'subtitles': subtitles,
+ }
+
+
+class SonyLIVSeriesIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?sonyliv\.com/shows/[^/?#&]+-(?P<id>\d{10})$'
+ _TESTS = [{
+ 'url': 'https://www.sonyliv.com/shows/adaalat-1700000091',
+ 'playlist_mincount': 456,
+ 'info_dict': {
+ 'id': '1700000091',
+ },
+ }]
+ _API_SHOW_URL = "https://apiv2.sonyliv.com/AGL/1.9/R/ENG/WEB/IN/DL/DETAIL/{}?kids_safe=false&from=0&to=49"
+ _API_EPISODES_URL = "https://apiv2.sonyliv.com/AGL/1.4/R/ENG/WEB/IN/CONTENT/DETAIL/BUNDLE/{}?from=0&to=1000&orderBy=episodeNumber&sortOrder=asc"
+ _API_SECURITY_URL = 'https://apiv2.sonyliv.com/AGL/1.4/A/ENG/WEB/ALL/GETTOKEN'
+
+ def _entries(self, show_id):
+ headers = {
+ 'Accept': 'application/json, text/plain, */*',
+ 'Referer': 'https://www.sonyliv.com',
}
+ headers['security_token'] = self._download_json(
+ self._API_SECURITY_URL, video_id=show_id, headers=headers,
+ note='Downloading security token')['resultObj']
+ seasons = try_get(
+ self._download_json(self._API_SHOW_URL.format(show_id), video_id=show_id, headers=headers),
+ lambda x: x['resultObj']['containers'][0]['containers'], list)
+ for season in seasons or []:
+ season_id = season['id']
+ episodes = try_get(
+ self._download_json(self._API_EPISODES_URL.format(season_id), video_id=season_id, headers=headers),
+ lambda x: x['resultObj']['containers'][0]['containers'], list)
+ for episode in episodes or []:
+ video_id = episode.get('id')
+ yield self.url_result('sonyliv:%s' % video_id, ie=SonyLIVIE.ie_key(), video_id=video_id)
+
+ def _real_extract(self, url):
+ show_id = self._match_id(url)
+ return self.playlist_result(self._entries(show_id), playlist_id=show_id)
diff --git a/hypervideo_dl/extractor/soundcloud.py b/hypervideo_dl/extractor/soundcloud.py
index abb85e1..78fecd1 100644
--- a/hypervideo_dl/extractor/soundcloud.py
+++ b/hypervideo_dl/extractor/soundcloud.py
@@ -3,6 +3,8 @@ from __future__ import unicode_literals
import itertools
import re
+import json
+# import random
from .common import (
InfoExtractor,
@@ -12,7 +14,6 @@ from ..compat import (
compat_HTTPError,
compat_kwargs,
compat_str,
- compat_urlparse,
)
from ..utils import (
error_to_compat_str,
@@ -22,12 +23,15 @@ from ..utils import (
int_or_none,
KNOWN_EXTENSIONS,
mimetype2ext,
+ remove_end,
+ parse_qs,
str_or_none,
try_get,
unified_timestamp,
update_url_query,
url_or_none,
urlhandle_detect_ext,
+ sanitized_Request,
)
@@ -46,8 +50,7 @@ class SoundcloudEmbedIE(InfoExtractor):
webpage)]
def _real_extract(self, url):
- query = compat_urlparse.parse_qs(
- compat_urlparse.urlparse(url).query)
+ query = parse_qs(url)
api_url = query['url'][0]
secret_token = query.get('secret_token')
if secret_token:
@@ -161,23 +164,11 @@ class SoundcloudIE(InfoExtractor):
},
# downloadable song
{
- 'url': 'https://soundcloud.com/oddsamples/bus-brakes',
- 'md5': '7624f2351f8a3b2e7cd51522496e7631',
+ 'url': 'https://soundcloud.com/the80m/the-following',
+ 'md5': '9ffcddb08c87d74fb5808a3c183a1d04',
'info_dict': {
- 'id': '128590877',
- 'ext': 'mp3',
- 'title': 'Bus Brakes',
- 'description': 'md5:0053ca6396e8d2fd7b7e1595ef12ab66',
- 'uploader': 'oddsamples',
- 'uploader_id': '73680509',
- 'timestamp': 1389232924,
- 'upload_date': '20140109',
- 'duration': 17.346,
- 'license': 'cc-by-sa',
- 'view_count': int,
- 'like_count': int,
- 'comment_count': int,
- 'repost_count': int,
+ 'id': '343609555',
+ 'ext': 'wav',
},
},
# private link, downloadable format
@@ -248,10 +239,15 @@ class SoundcloudIE(InfoExtractor):
},
},
{
- # with AAC HQ format available via OAuth token
+ # AAC HQ format available (account with active subscription needed)
'url': 'https://soundcloud.com/wandw/the-chainsmokers-ft-daya-dont-let-me-down-ww-remix-1',
'only_matching': True,
},
+ {
+ # Go+ (account with active subscription needed)
+ 'url': 'https://soundcloud.com/taylorswiftofficial/look-what-you-made-me-do',
+ 'only_matching': True,
+ },
]
_API_V2_BASE = 'https://api-v2.soundcloud.com/'
@@ -299,17 +295,110 @@ class SoundcloudIE(InfoExtractor):
try:
return super(SoundcloudIE, self)._download_json(*args, **compat_kwargs(kwargs))
except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403):
self._store_client_id(None)
self._update_client_id()
continue
elif non_fatal:
- self._downloader.report_warning(error_to_compat_str(e))
+ self.report_warning(error_to_compat_str(e))
return False
raise
def _real_initialize(self):
- self._CLIENT_ID = self._downloader.cache.load('soundcloud', 'client_id') or 'YUKXoArFcqrlQn9tfNHvvyfnDISj04zk'
+ self._CLIENT_ID = self._downloader.cache.load('soundcloud', 'client_id') or 'a3e059563d7fd3372b49b37f00a00bcf'
+ self._login()
+
+ _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36'
+ _API_AUTH_QUERY_TEMPLATE = '?client_id=%s'
+ _API_AUTH_URL_PW = 'https://api-auth.soundcloud.com/web-auth/sign-in/password%s'
+ _API_VERIFY_AUTH_TOKEN = 'https://api-auth.soundcloud.com/connect/session%s'
+ _access_token = None
+ _HEADERS = {}
+ _NETRC_MACHINE = 'soundcloud'
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None:
+ return
+
+ if username == 'oauth' and password is not None:
+ self._access_token = password
+ query = self._API_AUTH_QUERY_TEMPLATE % self._CLIENT_ID
+ payload = {'session': {'access_token': self._access_token}}
+ token_verification = sanitized_Request(self._API_VERIFY_AUTH_TOKEN % query, json.dumps(payload).encode('utf-8'))
+ response = self._download_json(token_verification, None, note='Verifying login token...', fatal=False)
+ if response is not False:
+ self._HEADERS = {'Authorization': 'OAuth ' + self._access_token}
+ self.report_login()
+ else:
+ self.report_warning('Provided authorization token seems to be invalid. Continue as guest')
+ elif username is not None:
+ self.report_warning(
+ 'Login using username and password is not currently supported. '
+ 'Use "--user oauth --password <oauth_token>" to login using an oauth token')
+
+ r'''
+ def genDevId():
+ def genNumBlock():
+ return ''.join([str(random.randrange(10)) for i in range(6)])
+ return '-'.join([genNumBlock() for i in range(4)])
+
+ payload = {
+ 'client_id': self._CLIENT_ID,
+ 'recaptcha_pubkey': 'null',
+ 'recaptcha_response': 'null',
+ 'credentials': {
+ 'identifier': username,
+ 'password': password
+ },
+ 'signature': self.sign(username, password, self._CLIENT_ID),
+ 'device_id': genDevId(),
+ 'user_agent': self._USER_AGENT
+ }
+
+ query = self._API_AUTH_QUERY_TEMPLATE % self._CLIENT_ID
+ login = sanitized_Request(self._API_AUTH_URL_PW % query, json.dumps(payload).encode('utf-8'))
+ response = self._download_json(login, None)
+ self._access_token = response.get('session').get('access_token')
+ if not self._access_token:
+ self.report_warning('Unable to get access token, login may has failed')
+ else:
+ self._HEADERS = {'Authorization': 'OAuth ' + self._access_token}
+ '''
+
+ # signature generation
+ def sign(self, user, pw, clid):
+ a = 33
+ i = 1
+ s = 440123
+ w = 117
+ u = 1800000
+ l = 1042
+ b = 37
+ k = 37
+ c = 5
+ n = '0763ed7314c69015fd4a0dc16bbf4b90' # _KEY
+ y = '8' # _REV
+ r = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36' # _USER_AGENT
+ e = user # _USERNAME
+ t = clid # _CLIENT_ID
+
+ d = '-'.join([str(mInt) for mInt in [a, i, s, w, u, l, b, k]])
+ p = n + y + d + r + e + t + d + n
+ h = p
+
+ m = 8011470
+ f = 0
+
+ for f in range(f, len(h)):
+ m = (m >> 1) + ((1 & m) << 23)
+ m += ord(h[f])
+ m &= 16777215
+
+ # c is not even needed
+ out = str(y) + ':' + str(d) + ':' + format(m, 'x') + ':' + str(c)
+
+ return out
@classmethod
def _resolv_url(cls, url):
@@ -340,7 +429,7 @@ class SoundcloudIE(InfoExtractor):
'ext': urlhandle_detect_ext(urlh) or 'mp3',
'filesize': int_or_none(urlh.headers.get('Content-Length')),
'url': format_url,
- 'preference': 10,
+ 'quality': 10,
})
def invalid_url(url):
@@ -389,7 +478,7 @@ class SoundcloudIE(InfoExtractor):
if not format_url:
continue
stream = self._download_json(
- format_url, track_id, query=query, fatal=False)
+ format_url, track_id, query=query, fatal=False, headers=self._HEADERS)
if not isinstance(stream, dict):
continue
stream_url = url_or_none(stream.get('url'))
@@ -416,7 +505,7 @@ class SoundcloudIE(InfoExtractor):
f['vcodec'] = 'none'
if not formats and info.get('policy') == 'BLOCK':
- self.raise_geo_restricted()
+ self.raise_geo_restricted(metadata_available=True)
self._sort_formats(formats)
user = info.get('user') or {}
@@ -468,7 +557,7 @@ class SoundcloudIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
track_id = mobj.group('track_id')
@@ -487,7 +576,7 @@ class SoundcloudIE(InfoExtractor):
info_json_url = self._resolv_url(self._BASE_URL + resolve_title)
info = self._download_json(
- info_json_url, full_title, 'Downloading info JSON', query=query)
+ info_json_url, full_title, 'Downloading info JSON', query=query, headers=self._HEADERS)
return self._extract_info_dict(info, full_title, token)
@@ -503,7 +592,7 @@ class SoundcloudPlaylistBaseIE(SoundcloudIE):
'ids': ','.join([compat_str(t['id']) for t in tracks]),
'playlistId': playlist_id,
'playlistSecretToken': token,
- })
+ }, headers=self._HEADERS)
entries = []
for track in tracks:
track_id = str_or_none(track.get('id'))
@@ -523,7 +612,7 @@ class SoundcloudPlaylistBaseIE(SoundcloudIE):
class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
- _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<uploader>[\w\d-]+)/sets/(?P<slug_title>[\w\d-]+)(?:/(?P<token>[^?/]+))?'
+ _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<uploader>[\w\d-]+)/sets/(?P<slug_title>[:\w\d-]+)(?:/(?P<token>[^?/]+))?'
IE_NAME = 'soundcloud:set'
_TESTS = [{
'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep',
@@ -536,10 +625,19 @@ class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
}, {
'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep/token',
'only_matching': True,
+ }, {
+ 'url': 'https://soundcloud.com/discover/sets/weekly::flacmatic',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://soundcloud.com/discover/sets/charts-top:all-music:de',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://soundcloud.com/discover/sets/charts-top:hiphoprap:kr',
+ 'only_matching': True,
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
full_title = '%s/sets/%s' % mobj.group('uploader', 'slug_title')
token = mobj.group('token')
@@ -547,7 +645,7 @@ class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
full_title += '/' + token
info = self._download_json(self._resolv_url(
- self._BASE_URL + full_title), full_title)
+ self._BASE_URL + full_title), full_title, headers=self._HEADERS)
if 'errors' in info:
msgs = (compat_str(err['error_message']) for err in info['errors'])
@@ -558,64 +656,60 @@ class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
class SoundcloudPagedPlaylistBaseIE(SoundcloudIE):
def _extract_playlist(self, base_url, playlist_id, playlist_title):
+ return {
+ '_type': 'playlist',
+ 'id': playlist_id,
+ 'title': playlist_title,
+ 'entries': self._entries(base_url, playlist_id),
+ }
+
+ def _entries(self, url, playlist_id):
# Per the SoundCloud documentation, the maximum limit for a linked partitioning query is 200.
# https://developers.soundcloud.com/blog/offset-pagination-deprecated
- COMMON_QUERY = {
+ query = {
'limit': 200,
'linked_partitioning': '1',
+ 'offset': 0,
}
- query = COMMON_QUERY.copy()
- query['offset'] = 0
-
- next_href = base_url
+ retries = self.get_param('extractor_retries', 3)
- entries = []
for i in itertools.count():
- response = self._download_json(
- next_href, playlist_id,
- 'Downloading track page %s' % (i + 1), query=query)
-
- collection = response['collection']
-
- if not isinstance(collection, list):
- collection = []
-
- # Empty collection may be returned, in this case we proceed
- # straight to next_href
-
- def resolve_entry(candidates):
+ attempt, last_error = -1, None
+ while attempt < retries:
+ attempt += 1
+ if last_error:
+ self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'), playlist_id)
+ try:
+ response = self._download_json(
+ url, playlist_id, query=query, headers=self._HEADERS,
+ note='Downloading track page %s%s' % (i + 1, f' (retry #{attempt})' if attempt else ''))
+ break
+ except ExtractorError as e:
+ # Downloading page may result in intermittent 502 HTTP error
+ # See https://github.com/hypervideo/hypervideo/issues/872
+ if attempt >= retries or not isinstance(e.cause, compat_HTTPError) or e.cause.code != 502:
+ raise
+ last_error = str(e.cause or e.msg)
+
+ def resolve_entry(*candidates):
for cand in candidates:
if not isinstance(cand, dict):
continue
permalink_url = url_or_none(cand.get('permalink_url'))
- if not permalink_url:
- continue
- return self.url_result(
- permalink_url,
- SoundcloudIE.ie_key() if SoundcloudIE.suitable(permalink_url) else None,
- str_or_none(cand.get('id')), cand.get('title'))
-
- for e in collection:
- entry = resolve_entry((e, e.get('track'), e.get('playlist')))
- if entry:
- entries.append(entry)
-
- next_href = response.get('next_href')
- if not next_href:
- break
+ if permalink_url:
+ return self.url_result(
+ permalink_url,
+ SoundcloudIE.ie_key() if SoundcloudIE.suitable(permalink_url) else None,
+ str_or_none(cand.get('id')), cand.get('title'))
- next_href = response['next_href']
- parsed_next_href = compat_urlparse.urlparse(next_href)
- query = compat_urlparse.parse_qs(parsed_next_href.query)
- query.update(COMMON_QUERY)
+ for e in response['collection'] or []:
+ yield resolve_entry(e, e.get('track'), e.get('playlist'))
- return {
- '_type': 'playlist',
- 'id': playlist_id,
- 'title': playlist_title,
- 'entries': entries,
- }
+ url = response.get('next_href')
+ if not url:
+ break
+ query.pop('offset', None)
class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE):
@@ -691,12 +785,12 @@ class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
uploader = mobj.group('user')
user = self._download_json(
self._resolv_url(self._BASE_URL + uploader),
- uploader, 'Downloading user info')
+ uploader, 'Downloading user info', headers=self._HEADERS)
resource = mobj.group('rsrc') or 'all'
@@ -721,7 +815,7 @@ class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE):
def _real_extract(self, url):
track_name = self._match_id(url)
- track = self._download_json(self._resolv_url(url), track_name)
+ track = self._download_json(self._resolv_url(url), track_name, headers=self._HEADERS)
track_id = self._search_regex(
r'soundcloud:track-stations:(\d+)', track['id'], 'track id')
@@ -744,7 +838,7 @@ class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
playlist_id = mobj.group('id')
query = {}
@@ -754,7 +848,7 @@ class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE):
data = self._download_json(
self._API_V2_BASE + 'playlists/' + playlist_id,
- playlist_id, 'Downloading playlist', query=query)
+ playlist_id, 'Downloading playlist', query=query, headers=self._HEADERS)
return self._extract_set(data, token)
@@ -786,25 +880,14 @@ class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE):
})
next_url = update_url_query(self._API_V2_BASE + endpoint, query)
- collected_results = 0
-
for i in itertools.count(1):
response = self._download_json(
- next_url, collection_id, 'Downloading page {0}'.format(i),
- 'Unable to download API page')
-
- collection = response.get('collection', [])
- if not collection:
- break
+ next_url, collection_id, f'Downloading page {i}',
+ 'Unable to download API page', headers=self._HEADERS)
- collection = list(filter(bool, collection))
- collected_results += len(collection)
-
- for item in collection:
- yield self.url_result(item['uri'], SoundcloudIE.ie_key())
-
- if not collection or collected_results >= limit:
- break
+ for item in response.get('collection') or []:
+ if item:
+ yield self.url_result(item['uri'], SoundcloudIE.ie_key())
next_url = response.get('next_href')
if not next_url:
@@ -812,4 +895,4 @@ class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE):
def _get_n_results(self, query, n):
tracks = self._get_collection('search/tracks', query, limit=n, q=query)
- return self.playlist_result(tracks, playlist_title=query)
+ return self.playlist_result(tracks, query, query)
diff --git a/hypervideo_dl/extractor/soundgasm.py b/hypervideo_dl/extractor/soundgasm.py
index 3d78a9d..d608eb7 100644
--- a/hypervideo_dl/extractor/soundgasm.py
+++ b/hypervideo_dl/extractor/soundgasm.py
@@ -22,7 +22,7 @@ class SoundgasmIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
display_id = mobj.group('display_id')
webpage = self._download_webpage(url, display_id)
diff --git a/hypervideo_dl/extractor/southpark.py b/hypervideo_dl/extractor/southpark.py
index 0774da0..d497494 100644
--- a/hypervideo_dl/extractor/southpark.py
+++ b/hypervideo_dl/extractor/southpark.py
@@ -56,40 +56,62 @@ class SouthParkEsIE(SouthParkIE):
class SouthParkDeIE(SouthParkIE):
IE_NAME = 'southpark.de'
- _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.de/(?:clips|alle-episoden|collections)/(?P<id>.+?)(\?|#|$))'
- _FEED_URL = 'http://www.southpark.de/feeds/video-player/mrss/'
-
+ _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.de/(?:(en/(videoclip|collections|episodes|video-clips))|(videoclip|collections|folgen))/(?P<id>(?P<unique_id>.+?)/.+?)(?:\?|#|$))'
_TESTS = [{
- 'url': 'http://www.southpark.de/clips/uygssh/the-government-wont-respect-my-privacy#tab=featured',
+ 'url': 'https://www.southpark.de/videoclip/rsribv/south-park-rueckzug-zum-gummibonbon-wald',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.southpark.de/folgen/jiru42/south-park-verkabelung-staffel-23-ep-9',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.southpark.de/collections/zzno5a/south-park-good-eats/7q26gp',
+ 'only_matching': True,
+ }, {
+ # clip
+ 'url': 'https://www.southpark.de/en/video-clips/ct46op/south-park-tooth-fairy-cartman',
'info_dict': {
- 'id': '85487c96-b3b9-4e39-9127-ad88583d9bf2',
+ 'id': 'e99d45ea-ed00-11e0-aca6-0026b9414f30',
'ext': 'mp4',
- 'title': 'South Park|The Government Won\'t Respect My Privacy',
- 'description': 'Cartman explains the benefits of "Shitter" to Stan, Kyle and Craig.',
- 'timestamp': 1380160800,
- 'upload_date': '20130926',
+ 'title': 'Tooth Fairy Cartman',
+ 'description': 'md5:db02e23818b4dc9cb5f0c5a7e8833a68',
},
}, {
- # non-ASCII characters in initial URL
- 'url': 'http://www.southpark.de/alle-episoden/s18e09-hashtag-aufwärmen',
+ # episode
+ 'url': 'https://www.southpark.de/en/episodes/yy0vjs/south-park-the-pandemic-special-season-24-ep-1',
'info_dict': {
- 'title': 'Hashtag „Aufwärmen“',
- 'description': 'Kyle will mit seinem kleinen Bruder Ike Videospiele spielen. Als der nicht mehr mit ihm spielen will, hat Kyle Angst, dass er die Kids von heute nicht mehr versteht.',
+ 'id': 'f5fbd823-04bc-11eb-9b1b-0e40cf2fc285',
+ 'ext': 'mp4',
+ 'title': 'South Park',
+ 'description': 'md5:ae0d875eff169dcbed16b21531857ac1',
},
- 'playlist_count': 3,
}, {
- # non-ASCII characters in redirect URL
- 'url': 'http://www.southpark.de/alle-episoden/s18e09',
+ # clip
+ 'url': 'https://www.southpark.de/videoclip/ct46op/south-park-zahnfee-cartman',
'info_dict': {
- 'title': 'Hashtag „Aufwärmen“',
- 'description': 'Kyle will mit seinem kleinen Bruder Ike Videospiele spielen. Als der nicht mehr mit ihm spielen will, hat Kyle Angst, dass er die Kids von heute nicht mehr versteht.',
+ 'id': 'e99d45ea-ed00-11e0-aca6-0026b9414f30',
+ 'ext': 'mp4',
+ 'title': 'Zahnfee Cartman',
+ 'description': 'md5:b917eec991d388811d911fd1377671ac'
},
- 'playlist_count': 3,
}, {
- 'url': 'http://www.southpark.de/collections/2476/superhero-showdown/1',
- 'only_matching': True,
+ # episode
+ 'url': 'https://www.southpark.de/folgen/242csn/south-park-her-mit-dem-hirn-staffel-1-ep-7',
+ 'info_dict': {
+ 'id': '607115f3-496f-40c3-8647-2b0bcff486c0',
+ 'ext': 'mp4',
+ 'title': 'md5:South Park | Pink Eye | E 0107 | HDSS0107X deu | Version: 634312 | Comedy Central S1',
+ },
}]
+ def _get_feed_url(self, uri, url=None):
+ video_id = self._id_from_uri(uri)
+ config = self._download_json(
+ 'http://media.mtvnservices.com/pmt/e1/access/index.html?uri=%s&configtype=edge&ref=%s' % (uri, url), video_id)
+ return self._remove_template_parameter(config['feedWithQueryParams'])
+
+ def _get_feed_query(self, uri):
+ return
+
class SouthParkNlIE(SouthParkIE):
IE_NAME = 'southpark.nl'
diff --git a/hypervideo_dl/extractor/sovietscloset.py b/hypervideo_dl/extractor/sovietscloset.py
new file mode 100644
index 0000000..7df2375
--- /dev/null
+++ b/hypervideo_dl/extractor/sovietscloset.py
@@ -0,0 +1,221 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ js_to_json,
+ try_get,
+ unified_timestamp
+)
+
+
+class SovietsClosetBaseIE(InfoExtractor):
+ MEDIADELIVERY_REFERER = {'Referer': 'https://iframe.mediadelivery.net/'}
+
+ def parse_nuxt_jsonp(self, nuxt_jsonp_url, video_id, name):
+ nuxt_jsonp = self._download_webpage(nuxt_jsonp_url, video_id, note=f'Downloading {name} __NUXT_JSONP__')
+ js, arg_keys, arg_vals = self._search_regex(
+ r'__NUXT_JSONP__\(.*?\(function\((?P<arg_keys>.*?)\)\{return\s(?P<js>\{.*?\})\}\((?P<arg_vals>.*?)\)',
+ nuxt_jsonp, '__NUXT_JSONP__', group=['js', 'arg_keys', 'arg_vals'])
+
+ args = dict(zip(arg_keys.split(','), arg_vals.split(',')))
+
+ for key, val in args.items():
+ if val in ('undefined', 'void 0'):
+ args[key] = 'null'
+
+ return self._parse_json(js_to_json(js, args), video_id)['data'][0]
+
+ def video_meta(self, video_id, game_name, category_name, episode_number, stream_date):
+ title = game_name
+ if category_name and category_name != 'Misc':
+ title += f' - {category_name}'
+ if episode_number:
+ title += f' #{episode_number}'
+
+ timestamp = unified_timestamp(stream_date)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'http_headers': self.MEDIADELIVERY_REFERER,
+ 'uploader': 'SovietWomble',
+ 'creator': 'SovietWomble',
+ 'release_timestamp': timestamp,
+ 'timestamp': timestamp,
+ 'uploader_id': 'SovietWomble',
+ 'uploader_url': 'https://www.twitch.tv/SovietWomble',
+ 'was_live': True,
+ 'availability': 'public',
+ 'series': game_name,
+ 'season': category_name,
+ 'episode_number': episode_number,
+ }
+
+
+class SovietsClosetIE(SovietsClosetBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?sovietscloset\.com/video/(?P<id>[0-9]+)/?'
+ _TESTS = [
+ {
+ 'url': 'https://sovietscloset.com/video/1337',
+ 'md5': '11e58781c4ca5b283307aa54db5b3f93',
+ 'info_dict': {
+ 'id': '1337',
+ 'ext': 'mp4',
+ 'title': 'The Witcher #13',
+ 'thumbnail': r're:^https?://.*\.b-cdn\.net/2f0cfbf4-3588-43a9-a7d6-7c9ea3755e67/thumbnail\.jpg$',
+ 'uploader': 'SovietWomble',
+ 'creator': 'SovietWomble',
+ 'release_timestamp': 1492091580,
+ 'release_date': '20170413',
+ 'timestamp': 1492091580,
+ 'upload_date': '20170413',
+ 'uploader_id': 'SovietWomble',
+ 'uploader_url': 'https://www.twitch.tv/SovietWomble',
+ 'duration': 7007,
+ 'was_live': True,
+ 'availability': 'public',
+ 'series': 'The Witcher',
+ 'season': 'Misc',
+ 'episode_number': 13,
+ },
+ },
+ {
+ 'url': 'https://sovietscloset.com/video/1105',
+ 'md5': '578b1958a379e7110ba38697042e9efb',
+ 'info_dict': {
+ 'id': '1105',
+ 'ext': 'mp4',
+ 'title': 'Arma 3 - Zeus Games #3',
+ 'uploader': 'SovietWomble',
+ 'thumbnail': r're:^https?://.*\.b-cdn\.net/c0e5e76f-3a93-40b4-bf01-12343c2eec5d/thumbnail\.jpg$',
+ 'uploader': 'SovietWomble',
+ 'creator': 'SovietWomble',
+ 'release_timestamp': 1461157200,
+ 'release_date': '20160420',
+ 'timestamp': 1461157200,
+ 'upload_date': '20160420',
+ 'uploader_id': 'SovietWomble',
+ 'uploader_url': 'https://www.twitch.tv/SovietWomble',
+ 'duration': 8804,
+ 'was_live': True,
+ 'availability': 'public',
+ 'series': 'Arma 3',
+ 'season': 'Zeus Games',
+ 'episode_number': 3,
+ },
+ },
+ ]
+
+ def _extract_bunnycdn_iframe(self, video_id, bunnycdn_id):
+ iframe = self._download_webpage(
+ f'https://iframe.mediadelivery.net/embed/5105/{bunnycdn_id}',
+ video_id, note='Downloading BunnyCDN iframe', headers=self.MEDIADELIVERY_REFERER)
+
+ m3u8_url = self._search_regex(r'(https?://.*?\.m3u8)', iframe, 'm3u8 url')
+ thumbnail_url = self._search_regex(r'(https?://.*?thumbnail\.jpg)', iframe, 'thumbnail url')
+
+ m3u8_formats = self._extract_m3u8_formats(m3u8_url, video_id, headers=self.MEDIADELIVERY_REFERER)
+ self._sort_formats(m3u8_formats)
+
+ if not m3u8_formats:
+ duration = None
+ else:
+ duration = self._extract_m3u8_vod_duration(
+ m3u8_formats[0]['url'], video_id, headers=self.MEDIADELIVERY_REFERER)
+
+ return {
+ 'formats': m3u8_formats,
+ 'thumbnail': thumbnail_url,
+ 'duration': duration,
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ static_assets_base = self._search_regex(r'staticAssetsBase:\"(.*?)\"', webpage, 'staticAssetsBase')
+ static_assets_base = f'https://sovietscloset.com{static_assets_base}'
+
+ stream = self.parse_nuxt_jsonp(f'{static_assets_base}/video/{video_id}/payload.js', video_id, 'video')['stream']
+
+ return {
+ **self.video_meta(
+ video_id=video_id, game_name=stream['game']['name'],
+ category_name=try_get(stream, lambda x: x['subcategory']['name'], str),
+ episode_number=stream.get('number'), stream_date=stream.get('date')),
+ **self._extract_bunnycdn_iframe(video_id, stream['bunnyId']),
+ }
+
+
+class SovietsClosetPlaylistIE(SovietsClosetBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?sovietscloset\.com/(?!video)(?P<id>[^#?]+)'
+ _TESTS = [
+
+ {
+ 'url': 'https://sovietscloset.com/The-Witcher',
+ 'info_dict': {
+ 'id': 'The-Witcher',
+ 'title': 'The Witcher',
+ },
+ 'playlist_mincount': 31,
+ },
+ {
+ 'url': 'https://sovietscloset.com/Arma-3/Zeus-Games',
+ 'info_dict': {
+ 'id': 'Arma-3/Zeus-Games',
+ 'title': 'Arma 3 - Zeus Games',
+ },
+ 'playlist_mincount': 3,
+ },
+ {
+ 'url': 'https://sovietscloset.com/arma-3/zeus-games/',
+ 'info_dict': {
+ 'id': 'arma-3/zeus-games',
+ 'title': 'Arma 3 - Zeus Games',
+ },
+ 'playlist_mincount': 3,
+ },
+ {
+ 'url': 'https://sovietscloset.com/Total-War-Warhammer',
+ 'info_dict': {
+ 'id': 'Total-War-Warhammer',
+ 'title': 'Total War: Warhammer - Greenskins',
+ },
+ 'playlist_mincount': 33,
+ },
+ ]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ if playlist_id.endswith('/'):
+ playlist_id = playlist_id[:-1]
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ static_assets_base = self._search_regex(r'staticAssetsBase:\"(.*?)\"', webpage, 'staticAssetsBase')
+ static_assets_base = f'https://sovietscloset.com{static_assets_base}'
+
+ sovietscloset = self.parse_nuxt_jsonp(f'{static_assets_base}/payload.js', playlist_id, 'global')['games']
+
+ if '/' in playlist_id:
+ game_slug, category_slug = playlist_id.lower().split('/')
+ else:
+ game_slug = playlist_id.lower()
+ category_slug = 'misc'
+
+ game = next(game for game in sovietscloset if game['slug'].lower() == game_slug)
+ category = next((cat for cat in game['subcategories'] if cat.get('slug', '').lower() == category_slug),
+ game['subcategories'][0])
+ category_slug = category.get('slug', '').lower() or category_slug
+ playlist_title = game.get('name') or game_slug
+ if category_slug != 'misc':
+ playlist_title += f' - {category.get("name") or category_slug}'
+ entries = [{
+ **self.url_result(f'https://sovietscloset.com/video/{stream["id"]}', ie=SovietsClosetIE.ie_key()),
+ **self.video_meta(
+ video_id=stream['id'], game_name=game['name'], category_name=category.get('name'),
+ episode_number=i + 1, stream_date=stream.get('date')),
+ } for i, stream in enumerate(category['streams'])]
+
+ return self.playlist_result(entries, playlist_id, playlist_title)
diff --git a/hypervideo_dl/extractor/spankbang.py b/hypervideo_dl/extractor/spankbang.py
index 37cb8c8..dd849ae 100644
--- a/hypervideo_dl/extractor/spankbang.py
+++ b/hypervideo_dl/extractor/spankbang.py
@@ -26,17 +26,18 @@ class SpankBangIE(InfoExtractor):
)
'''
_TESTS = [{
- 'url': 'http://spankbang.com/3vvn/video/fantasy+solo',
- 'md5': '1cc433e1d6aa14bc376535b8679302f7',
+ 'url': 'https://spankbang.com/56b3d/video/the+slut+maker+hmv',
+ 'md5': '2D13903DE4ECC7895B5D55930741650A',
'info_dict': {
- 'id': '3vvn',
+ 'id': '56b3d',
'ext': 'mp4',
- 'title': 'fantasy solo',
- 'description': 'dillion harper masturbates on a bed',
+ 'title': 'The Slut Maker HMV',
+ 'description': 'Girls getting converted into cock slaves.',
'thumbnail': r're:^https?://.*\.jpg$',
- 'uploader': 'silly2587',
- 'timestamp': 1422571989,
- 'upload_date': '20150129',
+ 'uploader': 'Mindself',
+ 'uploader_id': 'mindself',
+ 'timestamp': 1617109572,
+ 'upload_date': '20210330',
'age_limit': 18,
}
}, {
@@ -70,7 +71,7 @@ class SpankBangIE(InfoExtractor):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id') or mobj.group('id_2')
webpage = self._download_webpage(
url.replace('/%s/embed' % video_id, '/%s/video' % video_id),
@@ -129,20 +130,20 @@ class SpankBangIE(InfoExtractor):
format_url = format_url[0]
extract_format(format_id, format_url)
- self._sort_formats(formats, field_preference=('preference', 'height', 'width', 'fps', 'tbr', 'format_id'))
+ self._sort_formats(formats)
info = self._search_json_ld(webpage, video_id, default={})
title = self._html_search_regex(
- r'(?s)<h1[^>]*>(.+?)</h1>', webpage, 'title', default=None)
+ r'(?s)<h1[^>]+\btitle=["\']([^"]+)["\']>', webpage, 'title', default=None)
description = self._search_regex(
r'<div[^>]+\bclass=["\']bottom[^>]+>\s*<p>[^<]*</p>\s*<p>([^<]+)',
webpage, 'description', default=None)
thumbnail = self._og_search_thumbnail(webpage, default=None)
uploader = self._html_search_regex(
- (r'(?s)<li[^>]+class=["\']profile[^>]+>(.+?)</a>',
- r'class="user"[^>]*><img[^>]+>([^<]+)'),
- webpage, 'uploader', default=None)
+ r'<svg[^>]+\bclass="(?:[^"]*?user[^"]*?)">.*?</svg>([^<]+)', webpage, 'uploader', default=None)
+ uploader_id = self._html_search_regex(
+ r'<a[^>]+href="/profile/([^"]+)"', webpage, 'uploader_id', default=None)
duration = parse_duration(self._search_regex(
r'<div[^>]+\bclass=["\']right_side[^>]+>\s*<span>([^<]+)',
webpage, 'duration', default=None))
@@ -157,6 +158,7 @@ class SpankBangIE(InfoExtractor):
'description': description,
'thumbnail': thumbnail,
'uploader': uploader,
+ 'uploader_id': uploader_id,
'duration': duration,
'view_count': view_count,
'formats': formats,
@@ -177,7 +179,7 @@ class SpankBangPlaylistIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
playlist_id = mobj.group('id')
display_id = mobj.group('display_id')
diff --git a/hypervideo_dl/extractor/spankwire.py b/hypervideo_dl/extractor/spankwire.py
index 35ab9ec..e97c1d2 100644
--- a/hypervideo_dl/extractor/spankwire.py
+++ b/hypervideo_dl/extractor/spankwire.py
@@ -108,7 +108,7 @@ class SpankwireIE(InfoExtractor):
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
- self._sort_formats(formats, ('height', 'tbr', 'width', 'format_id'))
+ self._sort_formats(formats)
view_count = str_to_int(video.get('viewed'))
diff --git a/hypervideo_dl/extractor/spiegeltv.py b/hypervideo_dl/extractor/spiegeltv.py
new file mode 100644
index 0000000..6ccf4c3
--- /dev/null
+++ b/hypervideo_dl/extractor/spiegeltv.py
@@ -0,0 +1,17 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from .nexx import NexxIE
+
+
+class SpiegeltvIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?spiegel\.tv/videos/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://www.spiegel.tv/videos/161681-flug-mh370/',
+ 'only_matching': True,
+ }
+
+ def _real_extract(self, url):
+ return self.url_result(
+ 'https://api.nexx.cloud/v3/748/videos/byid/%s'
+ % self._match_id(url), ie=NexxIE.ie_key())
diff --git a/hypervideo_dl/extractor/sport5.py b/hypervideo_dl/extractor/sport5.py
index a417b5a..35c57d6 100644
--- a/hypervideo_dl/extractor/sport5.py
+++ b/hypervideo_dl/extractor/sport5.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..utils import ExtractorError
@@ -36,7 +35,7 @@ class Sport5IE(InfoExtractor):
]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
media_id = mobj.group('id')
webpage = self._download_webpage(url, media_id)
diff --git a/hypervideo_dl/extractor/sportdeutschland.py b/hypervideo_dl/extractor/sportdeutschland.py
index 3e497a9..94bcaba 100644
--- a/hypervideo_dl/extractor/sportdeutschland.py
+++ b/hypervideo_dl/extractor/sportdeutschland.py
@@ -2,15 +2,12 @@
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..compat import (
- compat_parse_qs,
- compat_urllib_parse_urlparse,
-)
from ..utils import (
clean_html,
float_or_none,
int_or_none,
parse_iso8601,
+ parse_qs,
strip_or_none,
try_get,
)
@@ -61,9 +58,9 @@ class SportDeutschlandIE(InfoExtractor):
}
videos = asset.get('videos') or []
if len(videos) > 1:
- playlist_id = compat_parse_qs(compat_urllib_parse_urlparse(url).query).get('playlistId', [None])[0]
+ playlist_id = parse_qs(url).get('playlistId', [None])[0]
if playlist_id:
- if self._downloader.params.get('noplaylist'):
+ if self.get_param('noplaylist'):
videos = [videos[int(playlist_id)]]
self.to_screen('Downloading just a single video because of --no-playlist')
else:
@@ -77,7 +74,7 @@ class SportDeutschlandIE(InfoExtractor):
continue
formats = self._extract_m3u8_formats(
video_url.replace('.smil', '.m3u8'), video_id, 'mp4', fatal=False)
- if not formats:
+ if not formats and not self.get_param('ignore_no_formats'):
continue
yield {
'id': video_id,
diff --git a/hypervideo_dl/extractor/springboardplatform.py b/hypervideo_dl/extractor/springboardplatform.py
index 07d99b5..49ac1f5 100644
--- a/hypervideo_dl/extractor/springboardplatform.py
+++ b/hypervideo_dl/extractor/springboardplatform.py
@@ -57,7 +57,7 @@ class SpringboardPlatformIE(InfoExtractor):
webpage)]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id') or mobj.group('id_2')
index = mobj.group('index') or mobj.group('index_2')
diff --git a/hypervideo_dl/extractor/srgssr.py b/hypervideo_dl/extractor/srgssr.py
index ac018e7..cbc1c47 100644
--- a/hypervideo_dl/extractor/srgssr.py
+++ b/hypervideo_dl/extractor/srgssr.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..utils import (
@@ -82,11 +81,12 @@ class SRGSSRIE(InfoExtractor):
return media_data
def _real_extract(self, url):
- bu, media_type, media_id = re.match(self._VALID_URL, url).groups()
+ bu, media_type, media_id = self._match_valid_url(url).groups()
media_data = self._get_media_data(bu, media_type, media_id)
title = media_data['title']
formats = []
+ subtitles = {}
q = qualities(['SD', 'HD'])
for source in (media_data.get('resourceList') or []):
format_url = source.get('url')
@@ -104,12 +104,16 @@ class SRGSSRIE(InfoExtractor):
if source.get('tokenType') == 'AKAMAI':
format_url = self._get_tokenized_src(
format_url, media_id, format_id)
- formats.extend(self._extract_akamai_formats(
- format_url, media_id))
+ fmts, subs = self._extract_akamai_formats_and_subtitles(
+ format_url, media_id)
+ formats.extend(fmts)
+ subtitles = self._merge_subtitles(subtitles, subs)
elif protocol == 'HLS':
- formats.extend(self._extract_m3u8_formats(
+ m3u8_fmts, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
format_url, media_id, 'mp4', 'm3u8_native',
- m3u8_id=format_id, fatal=False))
+ m3u8_id=format_id, fatal=False)
+ formats.extend(m3u8_fmts)
+ subtitles = self._merge_subtitles(subtitles, m3u8_subs)
elif protocol in ('HTTP', 'HTTPS'):
formats.append({
'format_id': format_id,
@@ -133,7 +137,6 @@ class SRGSSRIE(InfoExtractor):
})
self._sort_formats(formats)
- subtitles = {}
if media_type == 'video':
for sub in (media_data.get('subtitleList') or []):
sub_url = sub.get('url')
@@ -245,7 +248,7 @@ class SRGSSRPlayIE(InfoExtractor):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
bu = mobj.group('bu')
media_type = mobj.group('type') or mobj.group('type_2')
media_id = mobj.group('id')
diff --git a/hypervideo_dl/extractor/stanfordoc.py b/hypervideo_dl/extractor/stanfordoc.py
index ae3dd13..0003075 100644
--- a/hypervideo_dl/extractor/stanfordoc.py
+++ b/hypervideo_dl/extractor/stanfordoc.py
@@ -25,7 +25,7 @@ class StanfordOpenClassroomIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
if mobj.group('course') and mobj.group('video'): # A specific video
course = mobj.group('course')
diff --git a/hypervideo_dl/extractor/startv.py b/hypervideo_dl/extractor/startv.py
new file mode 100644
index 0000000..411320e
--- /dev/null
+++ b/hypervideo_dl/extractor/startv.py
@@ -0,0 +1,103 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+)
+from ..utils import (
+ clean_html,
+ ExtractorError,
+ traverse_obj,
+ int_or_none,
+)
+
+
+class StarTVIE(InfoExtractor):
+ _VALID_URL = r"""(?x)
+ https?://(?:www\.)?startv\.com\.tr/
+ (?:
+ (?:dizi|program)/(?:[^/?#&]+)/(?:bolumler|fragmanlar|ekstralar)|
+ video/arsiv/(?:dizi|program)/(?:[^/?#&]+)
+ )/
+ (?P<id>[^/?#&]+)
+ """
+ IE_NAME = 'startv'
+ _TESTS = [
+ {
+ 'url': 'https://www.startv.com.tr/dizi/cocuk/bolumler/3-bolum',
+ 'md5': '72381a32bcc2e2eb5841e8c8bf68f127',
+ 'info_dict': {
+ 'id': '904972',
+ 'display_id': '3-bolum',
+ 'ext': 'mp4',
+ 'title': '3. Bölüm',
+ 'description': 'md5:3a8049f05a75c2e8747116a673275de4',
+ 'thumbnail': r're:^https?://.*\.jpg(?:\?.*?)?$',
+ 'timestamp': 1569281400,
+ 'upload_date': '20190923'
+ },
+ },
+ {
+ 'url': 'https://www.startv.com.tr/video/arsiv/dizi/avlu/44-bolum',
+ 'only_matching': True
+ },
+ {
+ 'url': 'https://www.startv.com.tr/dizi/cocuk/fragmanlar/5-bolum-fragmani',
+ 'only_matching': True
+ },
+ {
+ 'url': 'https://www.startv.com.tr/dizi/cocuk/ekstralar/5-bolumun-nefes-kesen-final-sahnesi',
+ 'only_matching': True
+ },
+ {
+ 'url': 'https://www.startv.com.tr/program/burcu-ile-haftasonu/bolumler/1-bolum',
+ 'only_matching': True
+ },
+ {
+ 'url': 'https://www.startv.com.tr/program/burcu-ile-haftasonu/fragmanlar/2-fragman',
+ 'only_matching': True
+ },
+ {
+ 'url': 'https://www.startv.com.tr/video/arsiv/program/buyukrisk/14-bolumde-hangi-unlu-ne-sordu-',
+ 'only_matching': True
+ },
+ {
+ 'url': 'https://www.startv.com.tr/video/arsiv/program/buyukrisk/buyuk-risk-334-bolum',
+ 'only_matching': True
+ },
+ {
+ 'url': 'https://www.startv.com.tr/video/arsiv/program/dada/dada-58-bolum',
+ 'only_matching': True
+ }
+ ]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ info_url = self._search_regex(
+ r'(["\'])videoUrl\1\s*:\s*\1(?P<url>(?:(?!\1).)+)\1\s*',
+ webpage, 'video info url', group='url')
+
+ info = traverse_obj(self._download_json(info_url, display_id), 'data', expected_type=dict)
+ if not info:
+ raise ExtractorError('Failed to extract API data')
+
+ video_id = compat_str(info.get('id'))
+ title = info.get('title') or self._og_search_title(webpage)
+ description = clean_html(info.get('description')) or self._og_search_description(webpage, default=None)
+ thumbnail = self._proto_relative_url(
+ self._og_search_thumbnail(webpage), scheme='http:')
+
+ formats = self._extract_m3u8_formats(
+ traverse_obj(info, ('flavors', 'hls')), video_id, entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'timestamp': int_or_none(info.get('release_date')),
+ 'formats': formats
+ }
diff --git a/hypervideo_dl/extractor/steam.py b/hypervideo_dl/extractor/steam.py
index a6a191c..7f777c4 100644
--- a/hypervideo_dl/extractor/steam.py
+++ b/hypervideo_dl/extractor/steam.py
@@ -66,7 +66,7 @@ class SteamIE(InfoExtractor):
}]
def _real_extract(self, url):
- m = re.match(self._VALID_URL, url)
+ m = self._match_valid_url(url)
fileID = m.group('fileID')
if fileID:
videourl = url
@@ -139,7 +139,7 @@ class SteamIE(InfoExtractor):
'format_id': ext + quality,
'url': video_url,
})
- if not formats:
+ if not formats and not self.get_param('ignore_no_formats'):
continue
entry['formats'] = formats
entries.append(entry)
diff --git a/hypervideo_dl/extractor/streamable.py b/hypervideo_dl/extractor/streamable.py
index 3472527..8081296 100644
--- a/hypervideo_dl/extractor/streamable.py
+++ b/hypervideo_dl/extractor/streamable.py
@@ -8,6 +8,8 @@ from ..utils import (
ExtractorError,
float_or_none,
int_or_none,
+ try_get,
+ parse_codecs,
)
@@ -29,7 +31,7 @@ class StreamableIE(InfoExtractor):
'view_count': int,
}
},
- # older video without bitrate, width/height, etc. info
+ # older video without bitrate, width/height, codecs, etc. info
{
'url': 'https://streamable.com/moo',
'md5': '2cf6923639b87fba3279ad0df3a64e73',
@@ -95,7 +97,9 @@ class StreamableIE(InfoExtractor):
'height': int_or_none(info.get('height')),
'filesize': int_or_none(info.get('size')),
'fps': int_or_none(info.get('framerate')),
- 'vbr': float_or_none(info.get('bitrate'), 1000)
+ 'vbr': float_or_none(info.get('bitrate'), 1000),
+ 'vcodec': parse_codecs(try_get(info, lambda x: x['input_metadata']['video_codec_name'])).get('vcodec'),
+ 'acodec': parse_codecs(try_get(info, lambda x: x['input_metadata']['audio_codec_name'])).get('acodec'),
})
self._sort_formats(formats)
diff --git a/hypervideo_dl/extractor/streamanity.py b/hypervideo_dl/extractor/streamanity.py
new file mode 100644
index 0000000..2e2d5ee
--- /dev/null
+++ b/hypervideo_dl/extractor/streamanity.py
@@ -0,0 +1,51 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class StreamanityIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?streamanity\.com/video/(?P<id>[A-Za-z0-9]+)'
+ _TESTS = [{
+ 'url': 'https://streamanity.com/video/9DFPTnuYi8f2',
+ 'md5': '6ab171e8d4a02ad5dcbff6bea44cf5a1',
+ 'info_dict': {
+ 'id': '9DFPTnuYi8f2',
+ 'ext': 'mp4',
+ 'title': 'Bitcoin vs The Lighting Network',
+ 'thumbnail': r're:https://res\.cloudinary\.com/.+\.png',
+ 'description': '',
+ 'uploader': 'Tom Bombadil (Freddy78)',
+ }
+ }, {
+ 'url': 'https://streamanity.com/video/JktOUjSlfzTD',
+ 'md5': '31f131e28abd3377c38be586a59532dc',
+ 'info_dict': {
+ 'id': 'JktOUjSlfzTD',
+ 'ext': 'mp4',
+ 'title': 'Share data when you see it',
+ 'thumbnail': r're:https://res\.cloudinary\.com/.+\.png',
+ 'description': 'Reposting as data should be public and stored on blockchain',
+ 'uploader': 'digitalcurrencydaily',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video_info = self._download_json(
+ f'https://app.streamanity.com/api/video/{video_id}', video_id)['data']['video']
+
+ formats = self._extract_m3u8_formats(
+ f'https://stream.mux.com/{video_info["play_id"]}.m3u8?token={video_info["token"]}',
+ video_id, ext='mp4', m3u8_id='hls')
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': video_info['title'],
+ 'description': video_info.get('description'),
+ 'uploader': video_info.get('author_name'),
+ 'is_live': False,
+ 'thumbnail': video_info.get('thumb'),
+ 'formats': formats,
+ }
diff --git a/hypervideo_dl/extractor/streamcloud.py b/hypervideo_dl/extractor/streamcloud.py
index 984dea4..b97bb43 100644
--- a/hypervideo_dl/extractor/streamcloud.py
+++ b/hypervideo_dl/extractor/streamcloud.py
@@ -15,12 +15,12 @@ class StreamcloudIE(InfoExtractor):
_VALID_URL = r'https?://streamcloud\.eu/(?P<id>[a-zA-Z0-9_-]+)(?:/(?P<fname>[^#?]*)\.html)?'
_TESTS = [{
- 'url': 'http://streamcloud.eu/skp9j99s4bpz/youtube_dl_test_video_____________-BaW_jenozKc.mp4.html',
+ 'url': 'http://streamcloud.eu/skp9j99s4bpz/youtube-dl_test_video_____________-BaW_jenozKc.mp4.html',
'md5': '6bea4c7fa5daaacc2a946b7146286686',
'info_dict': {
'id': 'skp9j99s4bpz',
'ext': 'mp4',
- 'title': 'hypervideo test video \'/\\ ä ↭',
+ 'title': 'youtube-dl test video \'/\\ ä ↭',
},
'skip': 'Only available from the EU'
}, {
diff --git a/hypervideo_dl/extractor/stv.py b/hypervideo_dl/extractor/stv.py
index 539220a..d36a4b6 100644
--- a/hypervideo_dl/extractor/stv.py
+++ b/hypervideo_dl/extractor/stv.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..utils import (
@@ -43,7 +42,7 @@ class STVPlayerIE(InfoExtractor):
}
def _real_extract(self, url):
- ptype, video_id = re.match(self._VALID_URL, url).groups()
+ ptype, video_id = self._match_valid_url(url).groups()
webpage = self._download_webpage(url, video_id, fatal=False) or ''
props = (self._parse_json(self._search_regex(
diff --git a/hypervideo_dl/extractor/svt.py b/hypervideo_dl/extractor/svt.py
index a5bb6da..38e0086 100644
--- a/hypervideo_dl/extractor/svt.py
+++ b/hypervideo_dl/extractor/svt.py
@@ -49,7 +49,7 @@ class SVTBaseIE(InfoExtractor):
if not formats and rights.get('geoBlockedSweden'):
self.raise_geo_restricted(
'This video is only available in Sweden',
- countries=self._GEO_COUNTRIES)
+ countries=self._GEO_COUNTRIES, metadata_available=True)
self._sort_formats(formats)
subtitles = {}
@@ -119,7 +119,7 @@ class SVTIE(SVTBaseIE):
return mobj.group('url')
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
widget_id = mobj.group('widget_id')
article_id = mobj.group('id')
@@ -225,7 +225,7 @@ class SVTPlayIE(SVTPlayBaseIE):
return info_dict
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
svt_id = mobj.group('svt_id') or mobj.group('modal_id')
@@ -301,7 +301,7 @@ class SVTSeriesIE(SVTPlayBaseIE):
return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super(SVTSeriesIE, cls).suitable(url)
def _real_extract(self, url):
- series_slug, season_id = re.match(self._VALID_URL, url).groups()
+ series_slug, season_id = self._match_valid_url(url).groups()
series = self._download_json(
'https://api.svt.se/contento/graphql', series_slug,
@@ -400,7 +400,7 @@ class SVTPageIE(InfoExtractor):
return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super(SVTPageIE, cls).suitable(url)
def _real_extract(self, url):
- path, display_id = re.match(self._VALID_URL, url).groups()
+ path, display_id = self._match_valid_url(url).groups()
article = self._download_json(
'https://api.svt.se/nss-api/page/' + path, display_id,
diff --git a/hypervideo_dl/extractor/tagesschau.py b/hypervideo_dl/extractor/tagesschau.py
index 8ceab7e..25c2004 100644
--- a/hypervideo_dl/extractor/tagesschau.py
+++ b/hypervideo_dl/extractor/tagesschau.py
@@ -78,7 +78,7 @@ class TagesschauPlayerIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
# kind = mobj.group('kind').lower()
@@ -263,7 +263,7 @@ class TagesschauIE(InfoExtractor):
return formats
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id') or mobj.group('path')
display_id = video_id.lstrip('-')
diff --git a/hypervideo_dl/extractor/tastytrade.py b/hypervideo_dl/extractor/tastytrade.py
new file mode 100644
index 0000000..7fe96bd
--- /dev/null
+++ b/hypervideo_dl/extractor/tastytrade.py
@@ -0,0 +1,43 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from .ooyala import OoyalaIE
+
+
+class TastyTradeIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?tastytrade\.com/tt/shows/[^/]+/episodes/(?P<id>[^/?#&]+)'
+
+ _TESTS = [{
+ 'url': 'https://www.tastytrade.com/tt/shows/market-measures/episodes/correlation-in-short-volatility-06-28-2017',
+ 'info_dict': {
+ 'id': 'F3bnlzbToeI6pLEfRyrlfooIILUjz4nM',
+ 'ext': 'mp4',
+ 'title': 'A History of Teaming',
+ 'description': 'md5:2a9033db8da81f2edffa4c99888140b3',
+ 'duration': 422.255,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': ['Ooyala'],
+ }, {
+ 'url': 'https://www.tastytrade.com/tt/shows/daily-dose/episodes/daily-dose-06-30-2017',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ ooyala_code = self._search_regex(
+ r'data-media-id=(["\'])(?P<code>(?:(?!\1).)+)\1',
+ webpage, 'ooyala code', group='code')
+
+ info = self._search_json_ld(webpage, display_id, fatal=False)
+ info.update({
+ '_type': 'url_transparent',
+ 'ie_key': OoyalaIE.ie_key(),
+ 'url': 'ooyala:%s' % ooyala_code,
+ 'display_id': display_id,
+ })
+ return info
diff --git a/hypervideo_dl/extractor/tbs.py b/hypervideo_dl/extractor/tbs.py
index e8a7c65..c7d62ff 100644
--- a/hypervideo_dl/extractor/tbs.py
+++ b/hypervideo_dl/extractor/tbs.py
@@ -16,7 +16,7 @@ from ..utils import (
class TBSIE(TurnerBaseIE):
- _VALID_URL = r'https?://(?:www\.)?(?P<site>tbs|tntdrama)\.com(?P<path>/(?:movies|shows/[^/]+/(?:clips|season-\d+/episode-\d+))/(?P<id>[^/?#]+))'
+ _VALID_URL = r'https?://(?:www\.)?(?P<site>tbs|tntdrama)\.com(?P<path>/(?:movies|watchtnt|watchtbs|shows/[^/]+/(?:clips|season-\d+/episode-\d+))/(?P<id>[^/?#]+))'
_TESTS = [{
'url': 'http://www.tntdrama.com/shows/the-alienist/clips/monster',
'info_dict': {
@@ -40,12 +40,13 @@ class TBSIE(TurnerBaseIE):
}]
def _real_extract(self, url):
- site, path, display_id = re.match(self._VALID_URL, url).groups()
+ site, path, display_id = self._match_valid_url(url).groups()
webpage = self._download_webpage(url, display_id)
drupal_settings = self._parse_json(self._search_regex(
r'<script[^>]+?data-drupal-selector="drupal-settings-json"[^>]*?>({.+?})</script>',
webpage, 'drupal setting'), display_id)
- video_data = next(v for v in drupal_settings['turner_playlist'] if v.get('url') == path)
+ isLive = 'watchtnt' in path or 'watchtbs' in path
+ video_data = next(v for v in drupal_settings['turner_playlist'] if isLive or v.get('url') == path)
media_id = video_data['mediaID']
title = video_data['title']
@@ -56,7 +57,8 @@ class TBSIE(TurnerBaseIE):
media_id, tokenizer_query, {
'url': url,
'site_name': site[:3].upper(),
- 'auth_required': video_data.get('authRequired') == '1',
+ 'auth_required': video_data.get('authRequired') == '1' or isLive,
+ 'is_live': isLive
})
thumbnails = []
@@ -85,5 +87,6 @@ class TBSIE(TurnerBaseIE):
'season_number': int_or_none(video_data.get('season')),
'episode_number': int_or_none(video_data.get('episode')),
'thumbnails': thumbnails,
+ 'is_live': isLive
})
return info
diff --git a/hypervideo_dl/extractor/teachable.py b/hypervideo_dl/extractor/teachable.py
index 2394f86..37eae82 100644
--- a/hypervideo_dl/extractor/teachable.py
+++ b/hypervideo_dl/extractor/teachable.py
@@ -151,7 +151,7 @@ class TeachableIE(TeachableBaseIE):
return '%s%s' % (TeachableBaseIE._URL_PREFIX, source_url)
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
site = mobj.group('site') or mobj.group('site_t')
video_id = mobj.group('id')
@@ -248,7 +248,7 @@ class TeachableCourseIE(TeachableBaseIE):
TeachableCourseIE, cls).suitable(url)
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
site = mobj.group('site') or mobj.group('site_t')
course_id = mobj.group('id')
diff --git a/hypervideo_dl/extractor/teachertube.py b/hypervideo_dl/extractor/teachertube.py
index 1272078..e22f011 100644
--- a/hypervideo_dl/extractor/teachertube.py
+++ b/hypervideo_dl/extractor/teachertube.py
@@ -111,7 +111,7 @@ class TeacherTubeUserIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
user_id = mobj.group('user')
urls = []
diff --git a/hypervideo_dl/extractor/techtalks.py b/hypervideo_dl/extractor/techtalks.py
index a5b62c7..78f0731 100644
--- a/hypervideo_dl/extractor/techtalks.py
+++ b/hypervideo_dl/extractor/techtalks.py
@@ -44,7 +44,7 @@ class TechTalksIE(InfoExtractor):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
talk_id = mobj.group('id')
webpage = self._download_webpage(url, talk_id)
rtmp_url = self._search_regex(
diff --git a/hypervideo_dl/extractor/tele13.py b/hypervideo_dl/extractor/tele13.py
index a29a64b..f8a2755 100644
--- a/hypervideo_dl/extractor/tele13.py
+++ b/hypervideo_dl/extractor/tele13.py
@@ -70,7 +70,7 @@ class Tele13IE(InfoExtractor):
formats.append({
'url': format_url,
'format_id': f.get('label'),
- 'preference': preference(f.get('label')),
+ 'quality': preference(f.get('label')),
'ext': ext,
})
urls.append(format_url)
diff --git a/hypervideo_dl/extractor/tele5.py b/hypervideo_dl/extractor/tele5.py
index 3e1a7a9..0d9cf75 100644
--- a/hypervideo_dl/extractor/tele5.py
+++ b/hypervideo_dl/extractor/tele5.py
@@ -6,9 +6,9 @@ import re
from .common import InfoExtractor
from .jwplatform import JWPlatformIE
from .nexx import NexxIE
-from ..compat import compat_urlparse
from ..utils import (
NO_DEFAULT,
+ parse_qs,
smuggle_url,
)
@@ -64,7 +64,7 @@ class Tele5IE(InfoExtractor):
}]
def _real_extract(self, url):
- qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+ qs = parse_qs(url)
video_id = (qs.get('vid') or qs.get('ve_id') or [None])[0]
NEXX_ID_RE = r'\d{6,}'
diff --git a/hypervideo_dl/extractor/telemb.py b/hypervideo_dl/extractor/telemb.py
index 9bcac4e..ac2d603 100644
--- a/hypervideo_dl/extractor/telemb.py
+++ b/hypervideo_dl/extractor/telemb.py
@@ -38,7 +38,7 @@ class TeleMBIE(InfoExtractor):
]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
display_id = mobj.group('display_id')
@@ -57,7 +57,7 @@ class TeleMBIE(InfoExtractor):
'app': rtmp.group('app'),
'player_url': 'http://p.jwpcdn.com/6/10/jwplayer.flash.swf',
'page_url': 'http://www.telemb.be',
- 'preference': -1,
+ 'preference': -10,
})
formats.append(fmt)
self._sort_formats(formats)
diff --git a/hypervideo_dl/extractor/telemundo.py b/hypervideo_dl/extractor/telemundo.py
new file mode 100644
index 0000000..18552a0
--- /dev/null
+++ b/hypervideo_dl/extractor/telemundo.py
@@ -0,0 +1,58 @@
+# coding=utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ try_get,
+ unified_timestamp,
+ HEADRequest,
+)
+
+
+class TelemundoIE(InfoExtractor):
+
+ _VALID_URL = r'https?:\/\/(?:www\.)?telemundo\.com\/.+?video\/[^\/]+(?P<id>tmvo\d{7})'
+ _TESTS = [{
+ 'url': 'https://www.telemundo.com/noticias/noticias-telemundo-en-la-noche/empleo/video/esta-aplicacion-gratuita-esta-ayudando-los-latinos-encontrar-trabajo-en-estados-unidos-tmvo9829325',
+ 'info_dict': {
+ 'id': 'tmvo9829325',
+ 'timestamp': 1621396800,
+ 'title': 'Esta aplicación gratuita está ayudando a los latinos a encontrar trabajo en Estados Unidos',
+ 'uploader': 'Telemundo',
+ 'uploader_id': 'NBCU_Telemundo',
+ 'ext': 'mp4',
+ 'upload_date': '20210519',
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'https://www.telemundo.com/shows/al-rojo-vivo/empleo/video/personajes-de-times-square-piden-que-la-ciudad-de-nueva-york-los-deje-volver-trabajar-tmvo9816272',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ metadata = self._parse_json(
+ self._search_regex(r'<[^>]+id="__NEXT_DATA__"[^>]+>([^<]+)', webpage, 'JSON metadata'), video_id)
+ redirect_url = try_get(
+ metadata,
+ lambda x: x['props']['initialState']['video']['associatedPlaylists'][0]['videos'][0]['videoAssets'][0]['publicUrl'])
+
+ m3u8_url = self._request_webpage(HEADRequest(
+ redirect_url + '?format=redirect&manifest=m3u&format=redirect&Tracking=true&Embedded=true&formats=MPEG4'),
+ video_id, 'Processing m3u8').geturl()
+ formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
+ self._sort_formats(formats)
+ date = unified_timestamp(try_get(
+ metadata, lambda x: x['props']['initialState']['video']['associatedPlaylists'][0]['videos'][0]['datePublished'].split(' ', 1)[1]))
+ return {
+ 'url': url,
+ 'id': video_id,
+ 'title': self._search_regex(r'<h1[^>]+>([^<]+)', webpage, 'title', fatal=False),
+ 'formats': formats,
+ 'timestamp': date,
+ 'uploader': 'Telemundo',
+ 'uploader_id': self._search_regex(r'https?:\/\/(?:[^/]+\/){3}video\/(?P<id>[^\/]+)', m3u8_url, 'Akamai account', fatal=False)
+ }
diff --git a/hypervideo_dl/extractor/tennistv.py b/hypervideo_dl/extractor/tennistv.py
index a586f30..a39a2fc 100644
--- a/hypervideo_dl/extractor/tennistv.py
+++ b/hypervideo_dl/extractor/tennistv.py
@@ -69,7 +69,7 @@ class TennisTVIE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- internal_id = self._search_regex(r'video=([0-9]+)', webpage, 'internal video id')
+ internal_id = self._search_regex(r'video=([\w-]+)', webpage, 'internal video id')
headers = {
'Origin': 'https://www.tennistv.com',
@@ -79,16 +79,18 @@ class TennisTVIE(InfoExtractor):
}
check_data = {
'videoID': internal_id,
- 'VideoUrlType': 'HLSV3',
+ 'VideoUrlType': 'HLS',
}
check_json = json.dumps(check_data).encode('utf-8')
check_result = self._download_json(
'https://www.tennistv.com/api/users/v1/entitlementchecknondiva',
video_id, note='Checking video authorization', headers=headers, data=check_json)
formats = self._extract_m3u8_formats(check_result['contentUrl'], video_id, ext='mp4')
+ self._sort_formats(formats)
- vdata_url = 'https://www.tennistv.com/api/channels/v1/de/none/video/%s' % video_id
- vdata = self._download_json(vdata_url, video_id)
+ vdata = self._download_json(
+ 'https://www.tennistv.com/api/en/v2/none/common/video/%s' % video_id,
+ video_id, headers=headers)
timestamp = unified_timestamp(vdata['timestamp'])
thumbnail = vdata['video']['thumbnailUrl']
diff --git a/hypervideo_dl/extractor/tenplay.py b/hypervideo_dl/extractor/tenplay.py
index cd30d57..c810cfd 100644
--- a/hypervideo_dl/extractor/tenplay.py
+++ b/hypervideo_dl/extractor/tenplay.py
@@ -1,70 +1,90 @@
# coding: utf-8
from __future__ import unicode_literals
+from datetime import datetime
+import base64
+
from .common import InfoExtractor
from ..utils import (
HEADRequest,
- parse_age_limit,
- parse_iso8601,
- # smuggle_url,
+ urlencode_postdata,
)
class TenPlayIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?10play\.com\.au/(?:[^/]+/)+(?P<id>tpv\d{6}[a-z]{5})'
+ _NETRC_MACHINE = '10play'
_TESTS = [{
- 'url': 'https://10play.com.au/masterchef/episodes/season-1/masterchef-s1-ep-1/tpv190718kwzga',
+ 'url': 'https://10play.com.au/todd-sampsons-body-hack/episodes/season-4/episode-7/tpv200921kvngh',
'info_dict': {
- 'id': '6060533435001',
+ 'id': '6192880312001',
'ext': 'mp4',
- 'title': 'MasterChef - S1 Ep. 1',
- 'description': 'md5:4fe7b78e28af8f2d900cd20d900ef95c',
- 'age_limit': 10,
- 'timestamp': 1240828200,
- 'upload_date': '20090427',
- 'uploader_id': '2199827728001',
+ 'title': "Todd Sampson's Body Hack - S4 Ep. 2",
+ 'description': 'md5:fa278820ad90f08ea187f9458316ac74',
+ 'age_limit': 15,
+ 'timestamp': 1600770600,
+ 'upload_date': '20200922',
+ 'uploader': 'Channel 10',
+ 'uploader_id': '2199827728001'
},
'params': {
- # 'format': 'bestvideo',
'skip_download': True,
}
}, {
'url': 'https://10play.com.au/how-to-stay-married/web-extras/season-1/terrys-talks-ep-1-embracing-change/tpv190915ylupc',
'only_matching': True,
}]
- # BRIGHTCOVE_URL_TEMPLATE = 'https://players.brightcove.net/2199827728001/cN6vRtRQt_default/index.html?videoId=%s'
_GEO_BYPASS = False
- _FASTLY_URL_TEMPL = 'https://10-selector.global.ssl.fastly.net/s/kYEXFC/media/%s?mbr=true&manifest=m3u&format=redirect'
+
+ _AUS_AGES = {
+ 'G': 0,
+ 'PG': 15,
+ 'M': 15,
+ 'MA': 15,
+ 'MA15+': 15,
+ 'R': 18,
+ 'X': 18
+ }
+
+ def _get_bearer_token(self, video_id):
+ username, password = self._get_login_info()
+ if username is None or password is None:
+ self.raise_login_required('Your 10play account\'s details must be provided with --username and --password.')
+ _timestamp = datetime.now().strftime('%Y%m%d000000')
+ _auth_header = base64.b64encode(_timestamp.encode('ascii')).decode('ascii')
+ data = self._download_json('https://10play.com.au/api/user/auth', video_id, 'Getting bearer token', headers={
+ 'X-Network-Ten-Auth': _auth_header,
+ }, data=urlencode_postdata({
+ 'email': username,
+ 'password': password,
+ }))
+ return "Bearer " + data['jwt']['accessToken']
def _real_extract(self, url):
content_id = self._match_id(url)
+ _token = self._get_bearer_token(content_id)
data = self._download_json(
- 'https://10play.com.au/api/video/' + content_id, content_id)
- video = data.get('video') or {}
- metadata = data.get('metaData') or {}
- brightcove_id = video.get('videoId') or metadata['showContentVideoId']
- # brightcove_url = smuggle_url(
- # self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
- # {'geo_countries': ['AU']})
+ 'https://10play.com.au/api/v1/videos/' + content_id, content_id)
+ _video_url = self._download_json(
+ data.get('playbackApiEndpoint'), content_id, 'Downloading video JSON',
+ headers={'Authorization': _token}).get('source')
m3u8_url = self._request_webpage(HEADRequest(
- self._FASTLY_URL_TEMPL % brightcove_id), brightcove_id).geturl()
+ _video_url), content_id).geturl()
if '10play-not-in-oz' in m3u8_url:
self.raise_geo_restricted(countries=['AU'])
- formats = self._extract_m3u8_formats(m3u8_url, brightcove_id, 'mp4')
+ formats = self._extract_m3u8_formats(m3u8_url, content_id, 'mp4')
self._sort_formats(formats)
return {
- # '_type': 'url_transparent',
- # 'url': brightcove_url,
'formats': formats,
- 'id': brightcove_id,
- 'title': video.get('title') or metadata.get('pageContentName') or metadata['showContentName'],
- 'description': video.get('description'),
- 'age_limit': parse_age_limit(video.get('showRatingClassification') or metadata.get('showProgramClassification')),
- 'series': metadata.get('showName'),
- 'season': metadata.get('showContentSeason'),
- 'timestamp': parse_iso8601(metadata.get('contentPublishDate') or metadata.get('pageContentPublishDate')),
- 'thumbnail': video.get('poster'),
+ 'id': data.get('altId') or content_id,
+ 'title': data.get('title'),
+ 'description': data.get('description'),
+ 'age_limit': self._AUS_AGES.get(data.get('classification')),
+ 'series': data.get('showName'),
+ 'season': data.get('showContentSeason'),
+ 'timestamp': data.get('published'),
+ 'thumbnail': data.get('imageUrl'),
+ 'uploader': 'Channel 10',
'uploader_id': '2199827728001',
- # 'ie_key': 'BrightcoveNew',
}
diff --git a/hypervideo_dl/extractor/testurl.py b/hypervideo_dl/extractor/testurl.py
index 84a14a0..8bc512a 100644
--- a/hypervideo_dl/extractor/testurl.py
+++ b/hypervideo_dl/extractor/testurl.py
@@ -15,7 +15,7 @@ class TestURLIE(InfoExtractor):
def _real_extract(self, url):
from ..extractor import gen_extractors
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
extractor_id = mobj.group('extractor')
all_extractors = gen_extractors()
diff --git a/hypervideo_dl/extractor/tf1.py b/hypervideo_dl/extractor/tf1.py
index 23c2808..669eb50 100644
--- a/hypervideo_dl/extractor/tf1.py
+++ b/hypervideo_dl/extractor/tf1.py
@@ -2,7 +2,6 @@
from __future__ import unicode_literals
import json
-import re
from .common import InfoExtractor
from ..utils import (
@@ -41,7 +40,7 @@ class TF1IE(InfoExtractor):
}]
def _real_extract(self, url):
- program_slug, slug = re.match(self._VALID_URL, url).groups()
+ program_slug, slug = self._match_valid_url(url).groups()
video = self._download_json(
'https://www.tf1.fr/graphql/web', slug, query={
'id': '9b80783950b85247541dd1d851f9cc7fa36574af015621f853ab111a679ce26f',
diff --git a/hypervideo_dl/extractor/theplatform.py b/hypervideo_dl/extractor/theplatform.py
index adfe11e..c2729f1 100644
--- a/hypervideo_dl/extractor/theplatform.py
+++ b/hypervideo_dl/extractor/theplatform.py
@@ -10,15 +10,12 @@ import hashlib
from .once import OnceIE
from .adobepass import AdobePassIE
-from ..compat import (
- compat_parse_qs,
- compat_urllib_parse_urlparse,
-)
from ..utils import (
determine_ext,
ExtractorError,
float_or_none,
int_or_none,
+ parse_qs,
sanitized_Request,
unsmuggle_url,
update_url_query,
@@ -238,7 +235,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
'countries': smuggled_data.get('geo_countries'),
})
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
provider_id = mobj.group('provider_id')
video_id = mobj.group('id')
@@ -250,7 +247,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
path += mobj.group('media')
path += video_id
- qs_dict = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+ qs_dict = parse_qs(url)
if 'guid' in qs_dict:
webpage = self._download_webpage(url, video_id)
scripts = re.findall(r'<script[^>]+src="([^"]+)"', webpage)
@@ -359,7 +356,7 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
if first_video_id is None:
first_video_id = cur_video_id
duration = float_or_none(item.get('plfile$duration'))
- file_asset_types = item.get('plfile$assetTypes') or compat_parse_qs(compat_urllib_parse_urlparse(smil_url).query)['assetTypes']
+ file_asset_types = item.get('plfile$assetTypes') or parse_qs(smil_url)['assetTypes']
for asset_type in file_asset_types:
if asset_type in asset_types:
continue
@@ -404,7 +401,7 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
return ret
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
provider_id = mobj.group('provider_id')
diff --git a/hypervideo_dl/extractor/theta.py b/hypervideo_dl/extractor/theta.py
new file mode 100644
index 0000000..3b65436
--- /dev/null
+++ b/hypervideo_dl/extractor/theta.py
@@ -0,0 +1,87 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import try_get
+
+
+class ThetaStreamIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?theta\.tv/(?!video/)(?P<id>[a-z0-9]+)'
+ _TESTS = [{
+ 'url': 'https://www.theta.tv/davirus',
+ 'skip': 'The live may have ended',
+ 'info_dict': {
+ 'id': 'DaVirus',
+ 'ext': 'mp4',
+ 'title': 'I choose you - My Community is King -👀 - YO HABLO ESPANOL - CODE DAVIRUS',
+ 'thumbnail': r're:https://live-thumbnails-prod-theta-tv\.imgix\.net/thumbnail/.+\.jpg',
+ }
+ }, {
+ 'url': 'https://www.theta.tv/mst3k',
+ 'note': 'This channel is live 24/7',
+ 'info_dict': {
+ 'id': 'MST3K',
+ 'ext': 'mp4',
+ 'title': 'Mystery Science Theatre 3000 24/7 Powered by the THETA Network.',
+ 'thumbnail': r're:https://user-prod-theta-tv\.imgix\.net/.+\.jpg',
+ }
+ }]
+
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
+ info = self._download_json(f'https://api.theta.tv/v1/channel?alias={channel_id}', channel_id)['body']
+
+ m3u8_playlist = next(
+ data['url'] for data in info['live_stream']['video_urls']
+ if data.get('type') != 'embed' and data.get('resolution') in ('master', 'source'))
+
+ formats = self._extract_m3u8_formats(m3u8_playlist, channel_id, 'mp4', m3u8_id='hls', live=True)
+ self._sort_formats(formats)
+
+ channel = try_get(info, lambda x: x['user']['username']) # using this field instead of channel_id due to capitalization
+
+ return {
+ 'id': channel,
+ 'title': try_get(info, lambda x: x['live_stream']['title']),
+ 'channel': channel,
+ 'view_count': try_get(info, lambda x: x['live_stream']['view_count']),
+ 'is_live': True,
+ 'formats': formats,
+ 'thumbnail': try_get(info, lambda x: x['live_stream']['thumbnail_url']),
+ }
+
+
+class ThetaVideoIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?theta\.tv/video/(?P<id>vid[a-z0-9]+)'
+ _TEST = {
+ 'url': 'https://www.theta.tv/video/vidiq6aaet3kzf799p0',
+ 'md5': '633d8c29eb276bb38a111dbd591c677f',
+ 'info_dict': {
+ 'id': 'vidiq6aaet3kzf799p0',
+ 'ext': 'mp4',
+ 'title': 'Theta EdgeCast Tutorial',
+ 'uploader': 'Pixiekittie',
+ 'description': 'md5:e316253f5bdced8b5a46bb50ae60a09f',
+ 'thumbnail': r're:https://user-prod-theta-tv\.imgix\.net/.+/vod_thumb/.+.jpg',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ info = self._download_json(f'https://api.theta.tv/v1/video/{video_id}/raw', video_id)['body']
+
+ m3u8_playlist = try_get(info, lambda x: x['video_urls'][0]['url'])
+
+ formats = self._extract_m3u8_formats(m3u8_playlist, video_id, 'mp4', m3u8_id='hls')
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': info.get('title'),
+ 'uploader': try_get(info, lambda x: x['user']['username']),
+ 'description': info.get('description'),
+ 'view_count': info.get('view_count'),
+ 'like_count': info.get('like_count'),
+ 'formats': formats,
+ 'thumbnail': info.get('thumbnail_url'),
+ }
diff --git a/hypervideo_dl/extractor/theweatherchannel.py b/hypervideo_dl/extractor/theweatherchannel.py
index b2a8c37..9e506c9 100644
--- a/hypervideo_dl/extractor/theweatherchannel.py
+++ b/hypervideo_dl/extractor/theweatherchannel.py
@@ -2,7 +2,6 @@
from __future__ import unicode_literals
import json
-import re
from .theplatform import ThePlatformIE
from ..utils import (
@@ -33,7 +32,7 @@ class TheWeatherChannelIE(ThePlatformIE):
}]
def _real_extract(self, url):
- asset_name, locale, display_id = re.match(self._VALID_URL, url).groups()
+ asset_name, locale, display_id = self._match_valid_url(url).groups()
if not locale:
locale = 'en-US'
video_data = list(self._download_json(
diff --git a/hypervideo_dl/extractor/thisav.py b/hypervideo_dl/extractor/thisav.py
index dc3dd03..4af286e 100644
--- a/hypervideo_dl/extractor/thisav.py
+++ b/hypervideo_dl/extractor/thisav.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..utils import remove_end
@@ -34,7 +33,7 @@ class ThisAVIE(InfoExtractor):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
diff --git a/hypervideo_dl/extractor/threeqsdn.py b/hypervideo_dl/extractor/threeqsdn.py
index f6d37bb..bb76103 100644
--- a/hypervideo_dl/extractor/threeqsdn.py
+++ b/hypervideo_dl/extractor/threeqsdn.py
@@ -99,16 +99,21 @@ class ThreeQSDNIE(InfoExtractor):
aspect = float_or_none(config.get('aspect'))
formats = []
+ subtitles = {}
for source_type, source in (config.get('sources') or {}).items():
if not source:
continue
if source_type == 'dash':
- formats.extend(self._extract_mpd_formats(
- source, video_id, mpd_id='mpd', fatal=False))
+ fmts, subs = self._extract_mpd_formats_and_subtitles(
+ source, video_id, mpd_id='mpd', fatal=False)
+ formats.extend(fmts)
+ subtitles = self._merge_subtitles(subtitles, subs)
elif source_type == 'hls':
- formats.extend(self._extract_m3u8_formats(
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
source, video_id, 'mp4', 'm3u8' if live else 'm3u8_native',
- m3u8_id='hls', fatal=False))
+ m3u8_id='hls', fatal=False)
+ formats.extend(fmts)
+ subtitles = self._merge_subtitles(subtitles, subs)
elif source_type == 'progressive':
for s in source:
src = s.get('src')
@@ -133,14 +138,11 @@ class ThreeQSDNIE(InfoExtractor):
'vcodec': 'none' if height == 0 else None,
'width': width,
})
- for f in formats:
- if f.get('acodec') == 'none':
- f['preference'] = -40
- elif f.get('vcodec') == 'none':
- f['preference'] = -50
- self._sort_formats(formats, ('preference', 'width', 'height', 'source_preference', 'tbr', 'vbr', 'abr', 'ext', 'format_id'))
+ # It seems like this would be correctly handled by default
+ # However, unless someone can confirm this, the old
+ # behaviour is being kept as-is
+ self._sort_formats(formats, ('res', 'source_preference'))
- subtitles = {}
for subtitle in (config.get('subtitles') or []):
src = subtitle.get('src')
if not src:
diff --git a/hypervideo_dl/extractor/tiktok.py b/hypervideo_dl/extractor/tiktok.py
index 4faa6de..1db6327 100644
--- a/hypervideo_dl/extractor/tiktok.py
+++ b/hypervideo_dl/extractor/tiktok.py
@@ -1,147 +1,563 @@
# coding: utf-8
from __future__ import unicode_literals
+import itertools
+import random
+import string
+import time
+import json
+
from .common import InfoExtractor
+from ..compat import compat_urllib_parse_unquote
from ..utils import (
- compat_str,
ExtractorError,
- float_or_none,
int_or_none,
str_or_none,
+ traverse_obj,
try_get,
url_or_none,
+ qualities,
)
class TikTokBaseIE(InfoExtractor):
- def _extract_video(self, data, video_id=None):
- video = data['video']
- description = str_or_none(try_get(data, lambda x: x['desc']))
- width = int_or_none(try_get(data, lambda x: video['width']))
- height = int_or_none(try_get(data, lambda x: video['height']))
+ _APP_VERSION = '20.9.3'
+ _MANIFEST_APP_VERSION = '291'
+ _APP_NAME = 'trill'
+ _AID = 1180
+ _API_HOSTNAME = 'api-t2.tiktokv.com'
+ _UPLOADER_URL_FORMAT = 'https://www.tiktok.com/@%s'
+ QUALITIES = ('360p', '540p', '720p')
- format_urls = set()
- formats = []
- for format_id in ('download', 'play'):
- format_url = url_or_none(video.get('%sAddr' % format_id))
- if not format_url:
- continue
- if format_url in format_urls:
- continue
- format_urls.add(format_url)
- formats.append({
- 'url': format_url,
+ def _call_api(self, ep, query, video_id, fatal=True,
+ note='Downloading API JSON', errnote='Unable to download API page'):
+ real_query = {
+ **query,
+ 'version_name': self._APP_VERSION,
+ 'version_code': self._MANIFEST_APP_VERSION,
+ 'build_number': self._APP_VERSION,
+ 'manifest_version_code': self._MANIFEST_APP_VERSION,
+ 'update_version_code': self._MANIFEST_APP_VERSION,
+ 'openudid': ''.join(random.choice('0123456789abcdef') for i in range(16)),
+ 'uuid': ''.join([random.choice(string.digits) for num in range(16)]),
+ '_rticket': int(time.time() * 1000),
+ 'ts': int(time.time()),
+ 'device_brand': 'Google',
+ 'device_type': 'Pixel 4',
+ 'device_platform': 'android',
+ 'resolution': '1080*1920',
+ 'dpi': 420,
+ 'os_version': '10',
+ 'os_api': '29',
+ 'carrier_region': 'US',
+ 'sys_region': 'US',
+ 'region': 'US',
+ 'app_name': self._APP_NAME,
+ 'app_language': 'en',
+ 'language': 'en',
+ 'timezone_name': 'America/New_York',
+ 'timezone_offset': '-14400',
+ 'channel': 'googleplay',
+ 'ac': 'wifi',
+ 'mcc_mnc': '310260',
+ 'is_my_cn': 0,
+ 'aid': self._AID,
+ 'ssmix': 'a',
+ 'as': 'a1qwert123',
+ 'cp': 'cbfhckdckkde1',
+ }
+ self._set_cookie(self._API_HOSTNAME, 'odin_tt', ''.join(random.choice('0123456789abcdef') for i in range(160)))
+ return self._download_json(
+ 'https://%s/aweme/v1/%s/' % (self._API_HOSTNAME, ep), video_id=video_id,
+ fatal=fatal, note=note, errnote=errnote, headers={
+ 'User-Agent': f'com.ss.android.ugc.trill/{self._MANIFEST_APP_VERSION} (Linux; U; Android 10; en_US; Pixel 4; Build/QQ3A.200805.001; Cronet/58.0.2991.0)',
+ 'Accept': 'application/json',
+ }, query=real_query)
+
+ def _parse_aweme_video_app(self, aweme_detail):
+ aweme_id = aweme_detail['aweme_id']
+ video_info = aweme_detail['video']
+
+ def parse_url_key(url_key):
+ format_id, codec, res, bitrate = self._search_regex(
+ r'v[^_]+_(?P<id>(?P<codec>[^_]+)_(?P<res>\d+p)_(?P<bitrate>\d+))', url_key,
+ 'url key', default=(None, None, None, None), group=('id', 'codec', 'res', 'bitrate'))
+ if not format_id:
+ return {}, None
+ return {
+ 'format_id': format_id,
+ 'vcodec': 'h265' if codec == 'bytevc1' else codec,
+ 'tbr': int_or_none(bitrate, scale=1000) or None,
+ 'quality': qualities(self.QUALITIES)(res),
+ }, res
+
+ known_resolutions = {}
+
+ def extract_addr(addr, add_meta={}):
+ parsed_meta, res = parse_url_key(addr.get('url_key', ''))
+ if res:
+ known_resolutions.setdefault(res, {}).setdefault('height', add_meta.get('height'))
+ known_resolutions[res].setdefault('width', add_meta.get('width'))
+ parsed_meta.update(known_resolutions.get(res, {}))
+ add_meta.setdefault('height', int_or_none(res[:-1]))
+ return [{
+ 'url': url,
+ 'filesize': int_or_none(addr.get('data_size')),
'ext': 'mp4',
- 'height': height,
- 'width': width,
- 'http_headers': {
- 'Referer': 'https://www.tiktok.com/',
- }
- })
- self._sort_formats(formats)
+ 'acodec': 'aac',
+ 'source_preference': -2 if 'aweme/v1' in url else -1, # Downloads from API might get blocked
+ **add_meta, **parsed_meta,
+ 'format_note': ' '.join(filter(None, (
+ add_meta.get('format_note'), '(API)' if 'aweme/v1' in url else '')))
+ } for url in addr.get('url_list') or []]
- thumbnail = url_or_none(video.get('cover'))
- duration = float_or_none(video.get('duration'))
+ # Hack: Add direct video links first to prioritize them when removing duplicate formats
+ formats = []
+ if video_info.get('play_addr'):
+ formats.extend(extract_addr(video_info['play_addr'], {
+ 'format_id': 'play_addr',
+ 'format_note': 'Direct video',
+ 'vcodec': 'h265' if traverse_obj(
+ video_info, 'is_bytevc1', 'is_h265') else 'h264', # Always h264?
+ 'width': video_info.get('width'),
+ 'height': video_info.get('height'),
+ }))
+ if video_info.get('download_addr'):
+ formats.extend(extract_addr(video_info['download_addr'], {
+ 'format_id': 'download_addr',
+ 'format_note': 'Download video%s' % (', watermarked' if video_info.get('has_watermark') else ''),
+ 'vcodec': 'h264',
+ 'width': video_info.get('width'),
+ 'height': video_info.get('height'),
+ 'preference': -2 if video_info.get('has_watermark') else -1,
+ }))
+ if video_info.get('play_addr_h264'):
+ formats.extend(extract_addr(video_info['play_addr_h264'], {
+ 'format_id': 'play_addr_h264',
+ 'format_note': 'Direct video',
+ 'vcodec': 'h264',
+ }))
+ if video_info.get('play_addr_bytevc1'):
+ formats.extend(extract_addr(video_info['play_addr_bytevc1'], {
+ 'format_id': 'play_addr_bytevc1',
+ 'format_note': 'Direct video',
+ 'vcodec': 'h265',
+ }))
+
+ for bitrate in video_info.get('bit_rate', []):
+ if bitrate.get('play_addr'):
+ formats.extend(extract_addr(bitrate['play_addr'], {
+ 'format_id': bitrate.get('gear_name'),
+ 'format_note': 'Playback video',
+ 'tbr': try_get(bitrate, lambda x: x['bit_rate'] / 1000),
+ 'vcodec': 'h265' if traverse_obj(
+ bitrate, 'is_bytevc1', 'is_h265') else 'h264',
+ 'fps': bitrate.get('FPS'),
+ }))
- uploader = try_get(data, lambda x: x['author']['nickname'], compat_str)
- uploader_id = try_get(data, lambda x: x['author']['id'], compat_str)
+ self._remove_duplicate_formats(formats)
+ self._sort_formats(formats, ('quality', 'codec', 'size', 'br'))
- timestamp = int_or_none(data.get('createTime'))
+ thumbnails = []
+ for cover_id in ('cover', 'ai_dynamic_cover', 'animated_cover', 'ai_dynamic_cover_bak',
+ 'origin_cover', 'dynamic_cover'):
+ cover = video_info.get(cover_id)
+ if cover:
+ for cover_url in cover['url_list']:
+ thumbnails.append({
+ 'id': cover_id,
+ 'url': cover_url,
+ })
- def stats(key):
- return int_or_none(try_get(
- data, lambda x: x['stats']['%sCount' % key]))
+ stats_info = aweme_detail.get('statistics', {})
+ author_info = aweme_detail.get('author', {})
+ music_info = aweme_detail.get('music', {})
+ user_url = self._UPLOADER_URL_FORMAT % (traverse_obj(author_info,
+ 'sec_uid', 'id', 'uid', 'unique_id',
+ expected_type=str_or_none, get_all=False))
- view_count = stats('play')
- like_count = stats('digg')
- comment_count = stats('comment')
- repost_count = stats('share')
+ contained_music_track = traverse_obj(
+ music_info, ('matched_song', 'title'), ('matched_pgc_sound', 'title'), expected_type=str)
+ contained_music_author = traverse_obj(
+ music_info, ('matched_song', 'author'), ('matched_pgc_sound', 'author'), 'author', expected_type=str)
- aweme_id = data.get('id') or video_id
+ is_generic_og_trackname = music_info.get('is_original_sound') and music_info.get('title') == 'original sound - %s' % music_info.get('owner_handle')
+ if is_generic_og_trackname:
+ music_track, music_author = contained_music_track or 'original sound', contained_music_author
+ else:
+ music_track, music_author = music_info.get('title'), music_info.get('author')
return {
'id': aweme_id,
- 'title': uploader or aweme_id,
- 'description': description,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'uploader': uploader,
- 'uploader_id': uploader_id,
- 'timestamp': timestamp,
- 'view_count': view_count,
- 'like_count': like_count,
- 'comment_count': comment_count,
- 'repost_count': repost_count,
+ 'title': aweme_detail['desc'],
+ 'description': aweme_detail['desc'],
+ 'view_count': int_or_none(stats_info.get('play_count')),
+ 'like_count': int_or_none(stats_info.get('digg_count')),
+ 'repost_count': int_or_none(stats_info.get('share_count')),
+ 'comment_count': int_or_none(stats_info.get('comment_count')),
+ 'uploader': str_or_none(author_info.get('unique_id')),
+ 'creator': str_or_none(author_info.get('nickname')),
+ 'uploader_id': str_or_none(author_info.get('uid')),
+ 'uploader_url': user_url,
+ 'track': music_track,
+ 'album': str_or_none(music_info.get('album')) or None,
+ 'artist': music_author,
+ 'timestamp': int_or_none(aweme_detail.get('create_time')),
'formats': formats,
+ 'thumbnails': thumbnails,
+ 'duration': int_or_none(traverse_obj(video_info, 'duration', ('download_addr', 'duration')), scale=1000)
+ }
+
+ def _parse_aweme_video_web(self, aweme_detail, webpage_url):
+ video_info = aweme_detail['video']
+ author_info = traverse_obj(aweme_detail, 'author', 'authorInfo', default={})
+ music_info = aweme_detail.get('music') or {}
+ stats_info = aweme_detail.get('stats') or {}
+ user_url = self._UPLOADER_URL_FORMAT % (traverse_obj(author_info,
+ 'secUid', 'id', 'uid', 'uniqueId',
+ expected_type=str_or_none, get_all=False))
+
+ formats = []
+ play_url = video_info.get('playAddr')
+ width = video_info.get('width')
+ height = video_info.get('height')
+ if isinstance(play_url, str):
+ formats = [{
+ 'url': self._proto_relative_url(play_url),
+ 'ext': 'mp4',
+ 'width': width,
+ 'height': height,
+ }]
+ elif isinstance(play_url, list):
+ formats = [{
+ 'url': self._proto_relative_url(url),
+ 'ext': 'mp4',
+ 'width': width,
+ 'height': height,
+ } for url in traverse_obj(play_url, (..., 'src'), expected_type=url_or_none, default=[]) if url]
+
+ download_url = url_or_none(video_info.get('downloadAddr')) or traverse_obj(video_info, ('download', 'url'), expected_type=url_or_none)
+ if download_url:
+ formats.append({
+ 'format_id': 'download',
+ 'url': self._proto_relative_url(download_url),
+ 'ext': 'mp4',
+ 'width': width,
+ 'height': height,
+ })
+ self._remove_duplicate_formats(formats)
+ self._sort_formats(formats)
+
+ thumbnails = []
+ for thumbnail_name in ('thumbnail', 'cover', 'dynamicCover', 'originCover'):
+ if aweme_detail.get(thumbnail_name):
+ thumbnails = [{
+ 'url': self._proto_relative_url(aweme_detail[thumbnail_name]),
+ 'width': width,
+ 'height': height
+ }]
+
+ return {
+ 'id': traverse_obj(aweme_detail, 'id', 'awemeId', expected_type=str_or_none),
+ 'title': aweme_detail.get('desc'),
+ 'duration': try_get(aweme_detail, lambda x: x['video']['duration'], int),
+ 'view_count': int_or_none(stats_info.get('playCount')),
+ 'like_count': int_or_none(stats_info.get('diggCount')),
+ 'repost_count': int_or_none(stats_info.get('shareCount')),
+ 'comment_count': int_or_none(stats_info.get('commentCount')),
+ 'timestamp': int_or_none(aweme_detail.get('createTime')),
+ 'creator': str_or_none(author_info.get('nickname')),
+ 'uploader': str_or_none(author_info.get('uniqueId')),
+ 'uploader_id': str_or_none(author_info.get('id')),
+ 'uploader_url': user_url,
+ 'track': str_or_none(music_info.get('title')),
+ 'album': str_or_none(music_info.get('album')) or None,
+ 'artist': str_or_none(music_info.get('authorName')),
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ 'description': str_or_none(aweme_detail.get('desc')),
+ 'http_headers': {
+ 'Referer': webpage_url
+ }
}
class TikTokIE(TikTokBaseIE):
- _VALID_URL = r'https?://(?:www\.)?tiktok\.com/@[^/]+/video/(?P<id>\d+)'
+ _VALID_URL = r'https?://www\.tiktok\.com/@[\w\.-]+/video/(?P<id>\d+)'
+
_TESTS = [{
- 'url': 'https://www.tiktok.com/@zureeal/video/6606727368545406213',
- 'md5': '163ceff303bb52de60e6887fe399e6cd',
+ 'url': 'https://www.tiktok.com/@leenabhushan/video/6748451240264420610',
+ 'md5': '736bb7a466c6f0a6afeb597da1e6f5b7',
'info_dict': {
- 'id': '6606727368545406213',
+ 'id': '6748451240264420610',
'ext': 'mp4',
- 'title': 'Zureeal',
- 'description': '#bowsette#mario#cosplay#uk#lgbt#gaming#asian#bowsettecosplay',
- 'thumbnail': r're:^https?://.*',
- 'duration': 15,
- 'uploader': 'Zureeal',
- 'uploader_id': '188294915489964032',
- 'timestamp': 1538248586,
- 'upload_date': '20180929',
+ 'title': '#jassmanak #lehanga #leenabhushan',
+ 'description': '#jassmanak #lehanga #leenabhushan',
+ 'duration': 13,
+ 'height': 1024,
+ 'width': 576,
+ 'uploader': 'leenabhushan',
+ 'uploader_id': '6691488002098119685',
+ 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAA_Eb4t1vodM1IuTy_cvp9CY22RAb59xqrO0Xtz9CYQJvgXaDvZxYnZYRzDWhhgJmy',
+ 'creator': 'facestoriesbyleenabh',
+ 'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?',
+ 'upload_date': '20191016',
+ 'timestamp': 1571246252,
'view_count': int,
'like_count': int,
+ 'repost_count': int,
'comment_count': int,
+ }
+ }, {
+ 'url': 'https://www.tiktok.com/@patroxofficial/video/6742501081818877190?langCountry=en',
+ 'md5': '6f3cf8cdd9b28cb8363fe0a9a160695b',
+ 'info_dict': {
+ 'id': '6742501081818877190',
+ 'ext': 'mp4',
+ 'title': 'md5:5e2a23877420bb85ce6521dbee39ba94',
+ 'description': 'md5:5e2a23877420bb85ce6521dbee39ba94',
+ 'duration': 27,
+ 'height': 960,
+ 'width': 540,
+ 'uploader': 'patrox',
+ 'uploader_id': '18702747',
+ 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAiFnldaILebi5heDoVU6bn4jBWWycX6-9U3xuNPqZ8Ws',
+ 'creator': 'patroX',
+ 'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?',
+ 'upload_date': '20190930',
+ 'timestamp': 1569860870,
+ 'view_count': int,
+ 'like_count': int,
'repost_count': int,
+ 'comment_count': int,
}
+ }, {
+ # Promoted content/ad
+ 'url': 'https://www.tiktok.com/@MS4wLjABAAAAAR29F6J2Ktu0Daw03BJyXPNoRQ-W7U5a0Mn3lVCq2rQhjOd_WNLclHUoFgwX8Eno/video/6932675057474981122',
+ 'only_matching': True,
}]
- def _real_initialize(self):
- # Setup session (will set necessary cookies)
- self._request_webpage(
- 'https://www.tiktok.com/', None, note='Setting up session')
+ def _extract_aweme_app(self, aweme_id):
+ aweme_detail = self._call_api('aweme/detail', {'aweme_id': aweme_id}, aweme_id,
+ note='Downloading video details', errnote='Unable to download video details')['aweme_detail']
+ return self._parse_aweme_video_app(aweme_detail)
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
- page_props = self._parse_json(self._search_regex(
- r'<script[^>]+\bid=["\']__NEXT_DATA__[^>]+>\s*({.+?})\s*</script',
- webpage, 'data'), video_id)['props']['pageProps']
- data = try_get(page_props, lambda x: x['itemInfo']['itemStruct'], dict)
- if not data and page_props.get('statusCode') == 10216:
+
+ try:
+ return self._extract_aweme_app(video_id)
+ except ExtractorError as e:
+ self.report_warning(f'{e}; Retrying with webpage')
+
+ # If we only call once, we get a 403 when downlaoding the video.
+ self._download_webpage(url, video_id)
+ webpage = self._download_webpage(url, video_id, note='Downloading video webpage')
+ json_string = self._search_regex(
+ r'id=\"__NEXT_DATA__\"\s+type=\"application\/json\"\s*[^>]+>\s*(?P<json_string_ld>[^<]+)',
+ webpage, 'json_string', group='json_string_ld')
+ json_data = self._parse_json(json_string, video_id)
+ props_data = try_get(json_data, lambda x: x['props'], expected_type=dict)
+
+ # Chech statusCode for success
+ status = props_data.get('pageProps').get('statusCode')
+ if status == 0:
+ return self._parse_aweme_video_web(props_data['pageProps']['itemInfo']['itemStruct'], url)
+ elif status == 10216:
raise ExtractorError('This video is private', expected=True)
- return self._extract_video(data, video_id)
+
+ raise ExtractorError('Video not available', video_id=video_id)
class TikTokUserIE(TikTokBaseIE):
- _VALID_URL = r'https://(?:www\.)?tiktok\.com/@(?P<id>[^/?#&]+)'
+ IE_NAME = 'tiktok:user'
+ _VALID_URL = r'https?://(?:www\.)?tiktok\.com/@(?P<id>[\w\.-]+)/?(?:$|[#?])'
_TESTS = [{
- 'url': 'https://www.tiktok.com/@zureeal',
+ 'url': 'https://tiktok.com/@corgibobaa?lang=en',
+ 'playlist_mincount': 45,
+ 'info_dict': {
+ 'id': '6935371178089399301',
+ 'title': 'corgibobaa',
+ },
+ 'expected_warnings': ['Retrying']
+ }, {
+ 'url': 'https://www.tiktok.com/@meme',
+ 'playlist_mincount': 593,
'info_dict': {
- 'id': '188294915489964032',
+ 'id': '79005827461758976',
+ 'title': 'meme',
},
- 'playlist_mincount': 24,
+ 'expected_warnings': ['Retrying']
}]
- _WORKING = False
- @classmethod
- def suitable(cls, url):
- return False if TikTokIE.suitable(url) else super(TikTokUserIE, cls).suitable(url)
+ r''' # TODO: Fix by adding _signature to api_url
+ def _entries(self, webpage, user_id, username):
+ secuid = self._search_regex(r'\"secUid\":\"(?P<secUid>[^\"]+)', webpage, username)
+ verifyfp_cookie = self._get_cookies('https://www.tiktok.com').get('s_v_web_id')
+ if not verifyfp_cookie:
+ raise ExtractorError('Improper cookies (missing s_v_web_id).', expected=True)
+ api_url = f'https://m.tiktok.com/api/post/item_list/?aid=1988&cookie_enabled=true&count=30&verifyFp={verifyfp_cookie.value}&secUid={secuid}&cursor='
+ cursor = '0'
+ for page in itertools.count():
+ data_json = self._download_json(api_url + cursor, username, note='Downloading Page %d' % page)
+ for video in data_json.get('itemList', []):
+ video_id = video['id']
+ video_url = f'https://www.tiktok.com/@{user_id}/video/{video_id}'
+ yield self._url_result(video_url, 'TikTok', video_id, str_or_none(video.get('desc')))
+ if not data_json.get('hasMore'):
+ break
+ cursor = data_json['cursor']
+ '''
+
+ def _entries_api(self, webpage, user_id, username):
+ query = {
+ 'user_id': user_id,
+ 'count': 21,
+ 'max_cursor': 0,
+ 'min_cursor': 0,
+ 'retry_type': 'no_retry',
+ 'device_id': ''.join(random.choice(string.digits) for i in range(19)), # Some endpoints don't like randomized device_id, so it isn't directly set in _call_api.
+ }
+
+ max_retries = self.get_param('extractor_retries', 3)
+ for page in itertools.count(1):
+ for retries in itertools.count():
+ try:
+ post_list = self._call_api('aweme/post', query, username,
+ note='Downloading user video list page %d%s' % (page, f' (attempt {retries})' if retries != 0 else ''),
+ errnote='Unable to download user video list')
+ except ExtractorError as e:
+ if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0 and retries != max_retries:
+ self.report_warning('%s. Retrying...' % str(e.cause or e.msg))
+ continue
+ raise
+ break
+ for video in post_list.get('aweme_list', []):
+ yield {
+ **self._parse_aweme_video_app(video),
+ 'ie_key': TikTokIE.ie_key(),
+ 'extractor': 'TikTok',
+ }
+ if not post_list.get('has_more'):
+ break
+ query['max_cursor'] = post_list['max_cursor']
+
+ def _real_extract(self, url):
+ user_name = self._match_id(url)
+ webpage = self._download_webpage(url, user_name, headers={
+ 'User-Agent': 'facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)'
+ })
+ user_id = self._html_search_regex(r'snssdk\d*://user/profile/(\d+)', webpage, 'user ID')
+ return self.playlist_result(self._entries_api(webpage, user_id, user_name), user_id, user_name)
+
+
+class DouyinIE(TikTokIE):
+ _VALID_URL = r'https?://(?:www\.)?douyin\.com/video/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'https://www.douyin.com/video/6961737553342991651',
+ 'md5': '10523312c8b8100f353620ac9dc8f067',
+ 'info_dict': {
+ 'id': '6961737553342991651',
+ 'ext': 'mp4',
+ 'title': '#杨超越 小小水手带你去远航❤️',
+ 'uploader': '杨超越',
+ 'upload_date': '20210513',
+ 'timestamp': 1620905839,
+ 'uploader_id': '110403406559',
+ 'view_count': int,
+ 'like_count': int,
+ 'repost_count': int,
+ 'comment_count': int,
+ }
+ }, {
+ 'url': 'https://www.douyin.com/video/6982497745948921092',
+ 'md5': 'd78408c984b9b5102904cf6b6bc2d712',
+ 'info_dict': {
+ 'id': '6982497745948921092',
+ 'ext': 'mp4',
+ 'title': '这个夏日和小羊@杨超越 一起遇见白色幻想',
+ 'uploader': '杨超越工作室',
+ 'upload_date': '20210708',
+ 'timestamp': 1625739481,
+ 'uploader_id': '408654318141572',
+ 'view_count': int,
+ 'like_count': int,
+ 'repost_count': int,
+ 'comment_count': int,
+ }
+ }, {
+ 'url': 'https://www.douyin.com/video/6953975910773099811',
+ 'md5': '72e882e24f75064c218b76c8b713c185',
+ 'info_dict': {
+ 'id': '6953975910773099811',
+ 'ext': 'mp4',
+ 'title': '#一起看海 出现在你的夏日里',
+ 'uploader': '杨超越',
+ 'upload_date': '20210422',
+ 'timestamp': 1619098692,
+ 'uploader_id': '110403406559',
+ 'view_count': int,
+ 'like_count': int,
+ 'repost_count': int,
+ 'comment_count': int,
+ }
+ }, {
+ 'url': 'https://www.douyin.com/video/6950251282489675042',
+ 'md5': 'b4db86aec367ef810ddd38b1737d2fed',
+ 'info_dict': {
+ 'id': '6950251282489675042',
+ 'ext': 'mp4',
+ 'title': '哈哈哈,成功了哈哈哈哈哈哈',
+ 'uploader': '杨超越',
+ 'upload_date': '20210412',
+ 'timestamp': 1618231483,
+ 'uploader_id': '110403406559',
+ 'view_count': int,
+ 'like_count': int,
+ 'repost_count': int,
+ 'comment_count': int,
+ }
+ }, {
+ 'url': 'https://www.douyin.com/video/6963263655114722595',
+ 'md5': '1abe1c477d05ee62efb40bf2329957cf',
+ 'info_dict': {
+ 'id': '6963263655114722595',
+ 'ext': 'mp4',
+ 'title': '#哪个爱豆的105度最甜 换个角度看看我哈哈',
+ 'uploader': '杨超越',
+ 'upload_date': '20210517',
+ 'timestamp': 1621261163,
+ 'uploader_id': '110403406559',
+ 'view_count': int,
+ 'like_count': int,
+ 'repost_count': int,
+ 'comment_count': int,
+ }
+ }]
+ _APP_VERSION = '9.6.0'
+ _MANIFEST_APP_VERSION = '960'
+ _APP_NAME = 'aweme'
+ _AID = 1128
+ _API_HOSTNAME = 'aweme.snssdk.com'
+ _UPLOADER_URL_FORMAT = 'https://www.douyin.com/user/%s'
def _real_extract(self, url):
- user_id = self._match_id(url)
- data = self._download_json(
- 'https://m.tiktok.com/h5/share/usr/list/%s/' % user_id, user_id,
- query={'_signature': '_'})
- entries = []
- for aweme in data['aweme_list']:
- try:
- entry = self._extract_video(aweme)
- except ExtractorError:
- continue
- entry['extractor_key'] = TikTokIE.ie_key()
- entries.append(entry)
- return self.playlist_result(entries, user_id)
+ video_id = self._match_id(url)
+
+ try:
+ return self._extract_aweme_app(video_id)
+ except ExtractorError as e:
+ self.report_warning(f'{e}; Retrying with webpage')
+
+ webpage = self._download_webpage(url, video_id)
+ render_data_json = self._search_regex(
+ r'<script [^>]*\bid=[\'"]RENDER_DATA[\'"][^>]*>(%7B.+%7D)</script>',
+ webpage, 'render data', default=None)
+ if not render_data_json:
+ # TODO: Run verification challenge code to generate signature cookies
+ raise ExtractorError('Fresh cookies (not necessarily logged in) are needed')
+
+ render_data = self._parse_json(
+ render_data_json, video_id, transform_source=compat_urllib_parse_unquote)
+ return self._parse_aweme_video_web(
+ traverse_obj(render_data, (..., 'aweme', 'detail'), get_all=False), url)
diff --git a/hypervideo_dl/extractor/tinypic.py b/hypervideo_dl/extractor/tinypic.py
index bc2def5..39056e5 100644
--- a/hypervideo_dl/extractor/tinypic.py
+++ b/hypervideo_dl/extractor/tinypic.py
@@ -28,7 +28,7 @@ class TinyPicIE(InfoExtractor):
]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id, 'Downloading page')
diff --git a/hypervideo_dl/extractor/tmz.py b/hypervideo_dl/extractor/tmz.py
index 3d1bf75..aee2273 100644
--- a/hypervideo_dl/extractor/tmz.py
+++ b/hypervideo_dl/extractor/tmz.py
@@ -1,111 +1,157 @@
# coding: utf-8
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
-from .jwplatform import JWPlatformIE
-from .kaltura import KalturaIE
from ..utils import (
- int_or_none,
- unified_timestamp,
+ ExtractorError,
+ get_element_by_attribute,
)
class TMZIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?tmz\.com/videos/(?P<id>[^/?#&]+)'
- _TESTS = [{
- 'url': 'http://www.tmz.com/videos/0-cegprt2p/',
- 'md5': '31f9223e20eef55954973359afa61a20',
- 'info_dict': {
- 'id': 'P6YjLBLk',
- 'ext': 'mp4',
- 'title': "No Charges Against Hillary Clinton? Harvey Says It Ain't Over Yet",
- 'description': 'md5:b714359fc18607715ebccbd2da8ff488',
- 'timestamp': 1467831837,
- 'upload_date': '20160706',
+ _VALID_URL = r"https?://(?:www\.)?tmz\.com/.*"
+ _TESTS = [
+ {
+ "url": "http://www.tmz.com/videos/0-cegprt2p/",
+ "info_dict": {
+ "id": "http://www.tmz.com/videos/0-cegprt2p/",
+ "ext": "mp4",
+ "title": "No Charges Against Hillary Clinton? Harvey Says It Ain't Over Yet",
+ "description": "Harvey talks about Director Comey’s decision not to prosecute Hillary Clinton.",
+ "timestamp": 1467831837,
+ "uploader": "{'@type': 'Person', 'name': 'TMZ Staff'}",
+ "upload_date": "20160706",
+ },
},
- 'add_ie': [JWPlatformIE.ie_key()],
- }, {
- 'url': 'http://www.tmz.com/videos/0_okj015ty/',
- 'only_matching': True,
- }, {
- 'url': 'https://www.tmz.com/videos/071119-chris-morgan-women-4590005-0-zcsejvcr/',
- 'only_matching': True,
- }, {
- 'url': 'https://www.tmz.com/videos/2021-02-19-021921-floyd-mayweather-1043872/',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url).replace('-', '_')
-
- webpage = self._download_webpage(url, video_id, fatal=False)
- if webpage:
- tmz_video_id = self._search_regex(
- r'nodeRef\s*:\s*["\']tmz:video:([\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12})',
- webpage, 'video id', default=None)
- video = self._download_json(
- 'https://www.tmz.com/_/video/%s' % tmz_video_id, video_id,
- fatal=False)
- if video:
- message = video['message']
- info = {
- '_type': 'url_transparent',
- 'title': message.get('title'),
- 'description': message.get('description'),
- 'timestamp': unified_timestamp(message.get('published_at')),
- 'duration': int_or_none(message.get('duration')),
- }
- jwplatform_id = message.get('jwplayer_media_id')
- if jwplatform_id:
- info.update({
- 'url': 'jwplatform:%s' % jwplatform_id,
- 'ie_key': JWPlatformIE.ie_key(),
- })
- else:
- kaltura_entry_id = message.get('kaltura_entry_id') or video_id
- kaltura_partner_id = message.get('kaltura_partner_id') or '591531'
- info.update({
- 'url': 'kaltura:%s:%s' % (kaltura_partner_id, kaltura_entry_id),
- 'ie_key': KalturaIE.ie_key(),
- })
- return info
-
- return self.url_result(
- 'kaltura:591531:%s' % video_id, KalturaIE.ie_key(), video_id)
-
-
-class TMZArticleIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?tmz\.com/\d{4}/\d{2}/\d{2}/(?P<id>[^/?#&]+)'
- _TEST = {
- 'url': 'http://www.tmz.com/2015/04/19/bobby-brown-bobbi-kristina-awake-video-concert',
- 'info_dict': {
- 'id': 'PAKZa97W',
- 'ext': 'mp4',
- 'title': 'Bobby Brown Tells Crowd ... Bobbi Kristina is Awake',
- 'description': 'Bobby Brown stunned his audience during a concert Saturday night, when he told the crowd, "Bobbi is awake. She\'s watching me."',
- 'timestamp': 1429466400,
- 'upload_date': '20150419',
+ {
+ "url": "https://www.tmz.com/videos/071119-chris-morgan-women-4590005-0-zcsejvcr/",
+ "info_dict": {
+ "id": "https://www.tmz.com/videos/071119-chris-morgan-women-4590005-0-zcsejvcr/",
+ "ext": "mp4",
+ "title": "Angry Bagel Shop Guy Says He Doesn't Trust Women",
+ "description": "The enraged man who went viral for ranting about women on dating sites before getting ragdolled in a bagel shop is defending his misogyny ... he says it's women's fault in the first place.",
+ "timestamp": 1562889485,
+ "uploader": "{'@type': 'Person', 'name': 'TMZ Staff'}",
+ "upload_date": "20190711",
+ },
+ },
+ {
+ "url": "http://www.tmz.com/2015/04/19/bobby-brown-bobbi-kristina-awake-video-concert",
+ "md5": "5429c85db8bde39a473a56ca8c4c5602",
+ "info_dict": {
+ "id": "http://www.tmz.com/2015/04/19/bobby-brown-bobbi-kristina-awake-video-concert",
+ "ext": "mp4",
+ "title": "Bobby Brown Tells Crowd ... Bobbi Kristina is Awake",
+ "description": 'Bobby Brown stunned his audience during a concert Saturday night, when he told the crowd, "Bobbi is awake. She\'s watching me."',
+ "timestamp": 1429467813,
+ "uploader": "{'@type': 'Person', 'name': 'TMZ Staff'}",
+ "upload_date": "20150419",
+ },
},
- 'params': {
- 'skip_download': True,
+ {
+ "url": "http://www.tmz.com/2015/09/19/patti-labelle-concert-fan-stripping-kicked-out-nicki-minaj/",
+ "info_dict": {
+ "id": "http://www.tmz.com/2015/09/19/patti-labelle-concert-fan-stripping-kicked-out-nicki-minaj/",
+ "ext": "mp4",
+ "title": "Patti LaBelle -- Goes Nuclear On Stripping Fan",
+ "description": "Patti LaBelle made it known loud and clear last night ... NO "
+ "ONE gets on her stage and strips down.",
+ "timestamp": 1442683746,
+ "uploader": "{'@type': 'Person', 'name': 'TMZ Staff'}",
+ "upload_date": "20150919",
+ },
},
- 'add_ie': [JWPlatformIE.ie_key()],
- }
+ {
+ "url": "http://www.tmz.com/2016/01/28/adam-silver-sting-drake-blake-griffin/",
+ "info_dict": {
+ "id": "http://www.tmz.com/2016/01/28/adam-silver-sting-drake-blake-griffin/",
+ "ext": "mp4",
+ "title": "NBA's Adam Silver -- Blake Griffin's a Great Guy ... He'll Learn from This",
+ "description": "Two pretty parts of this video with NBA Commish Adam Silver.",
+ "timestamp": 1454010989,
+ "uploader": "{'@type': 'Person', 'name': 'TMZ Staff'}",
+ "upload_date": "20160128",
+ },
+ },
+ {
+ "url": "http://www.tmz.com/2016/10/27/donald-trump-star-vandal-arrested-james-otis/",
+ "info_dict": {
+ "id": "http://www.tmz.com/2016/10/27/donald-trump-star-vandal-arrested-james-otis/",
+ "ext": "mp4",
+ "title": "Trump Star Vandal -- I'm Not Afraid of Donald or the Cops!",
+ "description": "James Otis is the the guy who took a pickaxe to Donald Trump's star on the Walk of Fame, and he tells TMZ .. he's ready and willing to go to jail for the crime.",
+ "timestamp": 1477500095,
+ "uploader": "{'@type': 'Person', 'name': 'TMZ Staff'}",
+ "upload_date": "20161026",
+ },
+ },
+ {
+ "url": "https://www.tmz.com/videos/2020-10-31-103120-beverly-hills-protest-4878209/",
+ "info_dict": {
+ "id": "https://www.tmz.com/videos/2020-10-31-103120-beverly-hills-protest-4878209/",
+ "ext": "mp4",
+ "title": "Cops Use Billy Clubs Against Pro-Trump and Anti-Fascist "
+ "Demonstrators",
+ "description": "Beverly Hills may be an omen of what's coming next week, "
+ "because things got crazy on the streets and cops started "
+ "swinging their billy clubs at both Anti-Fascist and Pro-Trump "
+ "demonstrators.",
+ "timestamp": 1604182772,
+ "uploader": "{'@type': 'Person', 'name': 'TMZ Staff'}",
+ "upload_date": "20201031",
+ },
+ },
+ {
+ "url": "https://www.tmz.com/2020/11/05/gervonta-davis-car-crash-hit-and-run-police/",
+ "info_dict": {
+ "id": "Dddb6IGe-ws",
+ "ext": "mp4",
+ "title": "SICK LAMBO GERVONTA DAVIS IN HIS NEW RIDE RIGHT AFTER KO AFTER LEO EsNews Boxing",
+ "uploader": "ESNEWS",
+ "description": "md5:49675bc58883ccf80474b8aa701e1064",
+ "upload_date": "20201101",
+ "uploader_id": "ESNEWS",
+ },
+ },
+ {
+ "url": "https://www.tmz.com/2020/11/19/conor-mcgregor-dustin-poirier-contract-fight-ufc-257-fight-island/",
+ "info_dict": {
+ "id": "1329450007125225473",
+ "ext": "mp4",
+ "title": "TheMacLife - BREAKING: Conor McGregor (@thenotoriousmma) has signed his bout agreement for his rematch with Dustin Poirier for January 23.",
+ "uploader": "TheMacLife",
+ "description": "md5:56e6009bbc3d12498e10d08a8e1f1c69",
+ "upload_date": "20201119",
+ "uploader_id": "Maclifeofficial",
+ "timestamp": 1605800556,
+ },
+ },
+ ]
def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- tmz_url = self._search_regex(
- r'clickLink\s*\(\s*["\'](?P<url>%s)' % TMZIE._VALID_URL, webpage,
- 'video id', default=None, group='url')
- if tmz_url:
- return self.url_result(tmz_url, ie=TMZIE.ie_key())
-
- embedded_video_info = self._parse_json(self._html_search_regex(
- r'tmzVideoEmbed\(({.+?})\);', webpage, 'embedded video info'),
- video_id)
- return self.url_result(
- 'http://www.tmz.com/videos/%s/' % embedded_video_info['id'],
- ie=TMZIE.ie_key())
+ webpage = self._download_webpage(url, url)
+ jsonld = self._search_json_ld(webpage, url)
+ if not jsonld or "url" not in jsonld:
+ # try to extract from YouTube Player API
+ # see https://developers.google.com/youtube/iframe_api_reference#Video_Queueing_Functions
+ match_obj = re.search(r'\.cueVideoById\(\s*(?P<quote>[\'"])(?P<id>.*?)(?P=quote)', webpage)
+ if match_obj:
+ res = self.url_result(match_obj.group("id"))
+ return res
+ # try to extract from twitter
+ blockquote_el = get_element_by_attribute("class", "twitter-tweet", webpage)
+ if blockquote_el:
+ matches = re.findall(
+ r'<a[^>]+href=\s*(?P<quote>[\'"])(?P<link>.*?)(?P=quote)',
+ blockquote_el)
+ if matches:
+ for _, match in matches:
+ if "/status/" in match:
+ res = self.url_result(match)
+ return res
+ raise ExtractorError("No video found!")
+ if id not in jsonld:
+ jsonld["id"] = url
+ return jsonld
diff --git a/hypervideo_dl/extractor/tnaflix.py b/hypervideo_dl/extractor/tnaflix.py
index b3573c6..d7617f7 100644
--- a/hypervideo_dl/extractor/tnaflix.py
+++ b/hypervideo_dl/extractor/tnaflix.py
@@ -73,7 +73,7 @@ class TNAFlixNetworkBaseIE(InfoExtractor):
} for i in range(first, last + 1)]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
for display_id_key in ('display_id', 'display_id_2'):
if display_id_key in mobj.groupdict():
diff --git a/hypervideo_dl/extractor/toggle.py b/hypervideo_dl/extractor/toggle.py
index 270c84d..eb87349 100644
--- a/hypervideo_dl/extractor/toggle.py
+++ b/hypervideo_dl/extractor/toggle.py
@@ -7,7 +7,6 @@ import re
from .common import InfoExtractor
from ..utils import (
determine_ext,
- ExtractorError,
float_or_none,
int_or_none,
parse_iso8601,
@@ -154,11 +153,10 @@ class ToggleIE(InfoExtractor):
})
if not formats:
for meta in (info.get('Metas') or []):
- if meta.get('Key') == 'Encryption' and meta.get('Value') == '1':
- raise ExtractorError(
- 'This video is DRM protected.', expected=True)
- # Most likely because geo-blocked
- raise ExtractorError('No downloadable videos found', expected=True)
+ if (not self.get_param('allow_unplayable_formats')
+ and meta.get('Key') == 'Encryption' and meta.get('Value') == '1'):
+ self.report_drm(video_id)
+ # Most likely because geo-blocked if no formats and no DRM
self._sort_formats(formats)
thumbnails = []
diff --git a/hypervideo_dl/extractor/tokentube.py b/hypervideo_dl/extractor/tokentube.py
new file mode 100644
index 0000000..d636211
--- /dev/null
+++ b/hypervideo_dl/extractor/tokentube.py
@@ -0,0 +1,152 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import functools
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ parse_count,
+ unified_strdate,
+ js_to_json,
+ OnDemandPagedList,
+)
+
+
+class TokentubeIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?tokentube\.net/(?:view\?[vl]=|[vl]/)(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://tokentube.net/l/3236632011/Praise-A-Thon-Pastori-Chrisin-ja-Pastori-Bennyn-kanssa-27-8-2021',
+ 'info_dict': {
+ 'id': '3236632011',
+ 'ext': 'mp4',
+ 'title': 'Praise-A-Thon Pastori Chrisin ja Pastori Bennyn kanssa 27.8.2021',
+ 'description': '',
+ 'uploader': 'Pastori Chris - Rapsodia.fi',
+ 'upload_date': '20210827',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://tokentube.net/v/3950239124/Linux-Ubuntu-Studio-perus-k%C3%A4ytt%C3%B6',
+ 'md5': '0e1f00421f501f5eada9890d38fcfb56',
+ 'info_dict': {
+ 'id': '3950239124',
+ 'ext': 'mp4',
+ 'title': 'Linux Ubuntu Studio perus käyttö',
+ 'description': 'md5:854ff1dc732ff708976de2880ea32050',
+ 'uploader': 'jyrilehtonen',
+ 'upload_date': '20210825',
+ },
+ }, {
+ 'url': 'https://tokentube.net/view?v=3582463289',
+ 'info_dict': {
+ 'id': '3582463289',
+ 'ext': 'mp4',
+ 'title': 'Police for Freedom - toiminta aloitetaan Suomessa ❤️??',
+ 'description': 'md5:cd92e620d7f5fa162e8410d0fc9a08be',
+ 'uploader': 'Voitontie',
+ 'upload_date': '20210428',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._html_search_regex(r'<h1\s*class=["\']title-text["\']>(.+?)</h1>', webpage, 'title')
+
+ data_json = self._html_search_regex(r'({["\']html5["\'].+?}}}+)', webpage, 'data json')
+ data_json = self._parse_json(js_to_json(data_json), video_id, fatal=False)
+
+ sources = data_json.get('sources') or self._parse_json(
+ self._html_search_regex(r'updateSrc\(([^\)]+)\)', webpage, 'sources'),
+ video_id, transform_source=js_to_json)
+
+ formats = [{
+ 'url': format.get('src'),
+ 'format_id': format.get('label'),
+ 'height': format.get('res'),
+ } for format in sources]
+
+ view_count = parse_count(self._html_search_regex(
+ r'<p\s*class=["\']views_counter["\']>\s*([\d\.,]+)\s*<span>views?</span></p>',
+ webpage, 'view_count', fatal=False))
+
+ like_count = parse_count(self._html_search_regex(
+ r'<div\s*class="sh_button\s*likes_count">\s*(\d+)\s*</div>',
+ webpage, 'like count', fatal=False))
+
+ dislike_count = parse_count(self._html_search_regex(
+ r'<div\s*class="sh_button\s*dislikes_count">\s*(\d+)\s*</div>',
+ webpage, 'dislike count', fatal=False))
+
+ upload_date = unified_strdate(self._html_search_regex(
+ r'<span\s*class="p-date">Published\s*on\s+([^<]+)',
+ webpage, 'upload date', fatal=False))
+
+ uploader = self._html_search_regex(
+ r'<a\s*class="place-left"[^>]+>(.+?)</a>',
+ webpage, 'uploader', fatal=False)
+
+ description = self._html_search_meta('description', webpage)
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': title,
+ 'view_count': view_count,
+ 'like_count': like_count,
+ 'dislike_count': dislike_count,
+ 'upload_date': upload_date,
+ 'description': description,
+ 'uploader': uploader,
+ }
+
+
+class TokentubeChannelIE(InfoExtractor):
+ _PAGE_SIZE = 20
+ IE_NAME = 'Tokentube:channel'
+ _VALID_URL = r'https?://(?:www\.)?tokentube\.net/channel/(?P<id>\d+)/[^/]+(?:/videos)?'
+ _TESTS = [{
+ 'url': 'https://tokentube.net/channel/3697658904/TokenTube',
+ 'info_dict': {
+ 'id': '3697658904',
+ },
+ 'playlist_mincount': 7,
+ }, {
+ 'url': 'https://tokentube.net/channel/3353234420/Linux/videos',
+ 'info_dict': {
+ 'id': '3353234420',
+ },
+ 'playlist_mincount': 20,
+ }, {
+ 'url': 'https://tokentube.net/channel/3475834195/Voitontie',
+ 'info_dict': {
+ 'id': '3475834195',
+ },
+ 'playlist_mincount': 150,
+ }]
+
+ def _fetch_page(self, channel_id, page):
+ page += 1
+ videos_info = self._download_webpage(
+ f'https://tokentube.net/videos?p=0&m=1&sort=recent&u={channel_id}&page={page}',
+ channel_id, headers={'X-Requested-With': 'XMLHttpRequest'},
+ note=f'Downloading page {page}', fatal=False)
+ if '</i> Sorry, no results were found.' not in videos_info:
+ for path, media_id in re.findall(
+ r'<a[^>]+\bhref=["\']([^"\']+/[lv]/(\d+)/\S+)["\'][^>]+>',
+ videos_info):
+ yield self.url_result(path, ie=TokentubeIE.ie_key(), video_id=media_id)
+
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
+
+ entries = OnDemandPagedList(functools.partial(
+ self._fetch_page, channel_id), self._PAGE_SIZE)
+
+ return self.playlist_result(entries, channel_id)
diff --git a/hypervideo_dl/extractor/toongoggles.py b/hypervideo_dl/extractor/toongoggles.py
index b5ba1c0..df13d64 100644
--- a/hypervideo_dl/extractor/toongoggles.py
+++ b/hypervideo_dl/extractor/toongoggles.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..utils import (
@@ -63,7 +62,7 @@ class ToonGogglesIE(InfoExtractor):
}
def _real_extract(self, url):
- show_id, episode_id = re.match(self._VALID_URL, url).groups()
+ show_id, episode_id = self._match_valid_url(url).groups()
if episode_id:
episode_data = self._call_api('search', episode_id, {
'filter': 'episode',
diff --git a/hypervideo_dl/extractor/toutv.py b/hypervideo_dl/extractor/toutv.py
index 44b022f..6c84c21 100644
--- a/hypervideo_dl/extractor/toutv.py
+++ b/hypervideo_dl/extractor/toutv.py
@@ -74,7 +74,7 @@ class TouTvIE(RadioCanadaIE):
})
# IsDrm does not necessarily mean the video is DRM protected (see
# https://github.com/ytdl-org/youtube-dl/issues/13994).
- if metadata.get('IsDrm'):
+ if not self.get_param('allow_unplayable_formats') and metadata.get('IsDrm'):
self.report_warning('This video is probably DRM protected.', path)
video_id = metadata['IdMedia']
details = metadata['Details']
diff --git a/hypervideo_dl/extractor/traileraddict.py b/hypervideo_dl/extractor/traileraddict.py
index 747370d..10100fb 100644
--- a/hypervideo_dl/extractor/traileraddict.py
+++ b/hypervideo_dl/extractor/traileraddict.py
@@ -20,7 +20,7 @@ class TrailerAddictIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
name = mobj.group('movie') + '/' + mobj.group('trailer_name')
webpage = self._download_webpage(url, name)
diff --git a/hypervideo_dl/extractor/trovo.py b/hypervideo_dl/extractor/trovo.py
index de0107a..ec55f41 100644
--- a/hypervideo_dl/extractor/trovo.py
+++ b/hypervideo_dl/extractor/trovo.py
@@ -1,6 +1,7 @@
# coding: utf-8
from __future__ import unicode_literals
+import itertools
import json
from .common import InfoExtractor
@@ -14,6 +15,7 @@ from ..utils import (
class TrovoBaseIE(InfoExtractor):
_VALID_URL_BASE = r'https?://(?:www\.)?trovo\.live/'
+ _HEADERS = {'Origin': 'https://trovo.live'}
def _extract_streamer_info(self, data):
streamer_info = data.get('streamerInfo') or {}
@@ -35,7 +37,7 @@ class TrovoIE(TrovoBaseIE):
'query': '''{
getLiveInfo(params: {userName: "%s"}) {
isLive
- programInfo {
+ programInfo {
coverUrl
id
streamInfo {
@@ -68,6 +70,7 @@ class TrovoIE(TrovoBaseIE):
'format_id': format_id,
'height': int_or_none(format_id[:-1]) if format_id else None,
'url': play_url,
+ 'http_headers': self._HEADERS,
})
self._sort_formats(formats)
@@ -153,7 +156,7 @@ class TrovoVodIE(TrovoBaseIE):
'protocol': 'm3u8_native',
'tbr': int_or_none(play_info.get('bitrate')),
'url': play_url,
- 'http_headers': {'Origin': 'https://trovo.live'},
+ 'http_headers': self._HEADERS,
})
self._sort_formats(formats)
@@ -192,3 +195,69 @@ class TrovoVodIE(TrovoBaseIE):
}
info.update(self._extract_streamer_info(vod_detail_info))
return info
+
+
+class TrovoChannelBaseIE(InfoExtractor):
+ def _get_vod_json(self, page, uid):
+ raise NotImplementedError('This method must be implemented by subclasses')
+
+ def _entries(self, uid):
+ for page in itertools.count(1):
+ vod_json = self._get_vod_json(page, uid)
+ vods = vod_json.get('vodInfos', [])
+ for vod in vods:
+ yield self.url_result(
+ 'https://trovo.live/%s/%s' % (self._TYPE, vod.get('vid')),
+ ie=TrovoVodIE.ie_key())
+ has_more = vod_json['hasMore']
+ if not has_more:
+ break
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+ uid = str(self._download_json('https://gql.trovo.live/', id, query={
+ 'query': '{getLiveInfo(params:{userName:"%s"}){streamerInfo{uid}}}' % id
+ })['data']['getLiveInfo']['streamerInfo']['uid'])
+ return self.playlist_result(self._entries(uid), playlist_id=uid)
+
+
+class TrovoChannelVodIE(TrovoChannelBaseIE):
+ _VALID_URL = r'trovovod:(?P<id>[^\s]+)'
+ IE_DESC = 'All VODs of a trovo.live channel, "trovovod" keyword'
+
+ _TESTS = [{
+ 'url': 'trovovod:OneTappedYou',
+ 'playlist_mincount': 24,
+ 'info_dict': {
+ 'id': '100719456',
+ },
+ }]
+
+ _QUERY = '{getChannelLtvVideoInfos(params:{pageSize:99,currPage:%d,channelID:%s}){hasMore,vodInfos{vid}}}'
+ _TYPE = 'video'
+
+ def _get_vod_json(self, page, uid):
+ return self._download_json('https://gql.trovo.live/', uid, query={
+ 'query': self._QUERY % (page, uid)
+ })['data']['getChannelLtvVideoInfos']
+
+
+class TrovoChannelClipIE(TrovoChannelBaseIE):
+ _VALID_URL = r'trovoclip:(?P<id>[^\s]+)'
+ IE_DESC = 'All Clips of a trovo.live channel, "trovoclip" keyword'
+
+ _TESTS = [{
+ 'url': 'trovoclip:OneTappedYou',
+ 'playlist_mincount': 29,
+ 'info_dict': {
+ 'id': '100719456',
+ },
+ }]
+
+ _QUERY = '{getChannelClipVideoInfos(params:{pageSize:99,currPage:%d,channelID:%s,albumType:VOD_CLIP_ALBUM_TYPE_LATEST}){hasMore,vodInfos{vid}}}'
+ _TYPE = 'clip'
+
+ def _get_vod_json(self, page, uid):
+ return self._download_json('https://gql.trovo.live/', uid, query={
+ 'query': self._QUERY % (page, uid)
+ })['data']['getChannelClipVideoInfos']
diff --git a/hypervideo_dl/extractor/trutv.py b/hypervideo_dl/extractor/trutv.py
index ce892c8..c09ff89 100644
--- a/hypervideo_dl/extractor/trutv.py
+++ b/hypervideo_dl/extractor/trutv.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .turner import TurnerBaseIE
from ..utils import (
@@ -27,7 +26,7 @@ class TruTVIE(TurnerBaseIE):
}
def _real_extract(self, url):
- series_slug, clip_slug, video_id = re.match(self._VALID_URL, url).groups()
+ series_slug, clip_slug, video_id = self._match_valid_url(url).groups()
if video_id:
path = 'episode'
diff --git a/hypervideo_dl/extractor/tubitv.py b/hypervideo_dl/extractor/tubitv.py
index ebfb05c..2e9b325 100644
--- a/hypervideo_dl/extractor/tubitv.py
+++ b/hypervideo_dl/extractor/tubitv.py
@@ -7,13 +7,19 @@ from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
+ js_to_json,
sanitized_Request,
urlencode_postdata,
)
class TubiTvIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?tubitv\.com/(?:video|movies|tv-shows)/(?P<id>[0-9]+)'
+ _VALID_URL = r'''(?x)
+ (?:
+ tubitv:|
+ https?://(?:www\.)?tubitv\.com/(?:video|movies|tv-shows)/
+ )
+ (?P<id>[0-9]+)'''
_LOGIN_URL = 'http://tubitv.com/login'
_NETRC_MACHINE = 'tubitv'
_GEO_COUNTRIES = ['US']
@@ -75,9 +81,13 @@ class TubiTvIE(InfoExtractor):
'http://tubitv.com/oz/videos/%s/content' % video_id, video_id)
title = video_data['title']
- formats = self._extract_m3u8_formats(
- self._proto_relative_url(video_data['url']),
- video_id, 'mp4', 'm3u8_native')
+ formats = []
+ url = video_data['url']
+ # URL can be sometimes empty. Does this only happen when there is DRM?
+ if url:
+ formats = self._extract_m3u8_formats(
+ self._proto_relative_url(url),
+ video_id, 'mp4', 'm3u8_native')
self._sort_formats(formats)
thumbnails = []
@@ -108,3 +118,28 @@ class TubiTvIE(InfoExtractor):
'uploader_id': video_data.get('publisher_id'),
'release_year': int_or_none(video_data.get('year')),
}
+
+
+class TubiTvShowIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?tubitv\.com/series/[0-9]+/(?P<show_name>[^/?#]+)'
+ _TESTS = [{
+ 'url': 'https://tubitv.com/series/3936/the-joy-of-painting-with-bob-ross?start=true',
+ 'playlist_mincount': 390,
+ 'info_dict': {
+ 'id': 'the-joy-of-painting-with-bob-ross',
+ }
+ }]
+
+ def _entries(self, show_url, show_name):
+ show_webpage = self._download_webpage(show_url, show_name)
+ show_json = self._parse_json(self._search_regex(
+ r"window\.__data\s*=\s*({.+?});\s*</script>",
+ show_webpage, 'data',), show_name, transform_source=js_to_json)['video']
+ for episode_id in show_json['fullContentById'].keys():
+ yield self.url_result(
+ 'tubitv:%s' % episode_id,
+ ie=TubiTvIE.ie_key(), video_id=episode_id)
+
+ def _real_extract(self, url):
+ show_name = self._match_valid_url(url).group('show_name')
+ return self.playlist_result(self._entries(url, show_name), playlist_id=show_name)
diff --git a/hypervideo_dl/extractor/tumblr.py b/hypervideo_dl/extractor/tumblr.py
index ae584ad..adc3701 100644
--- a/hypervideo_dl/extractor/tumblr.py
+++ b/hypervideo_dl/extractor/tumblr.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..utils import (
@@ -143,7 +142,7 @@ class TumblrIE(InfoExtractor):
self.report_warning('Login has probably failed')
def _real_extract(self, url):
- m_url = re.match(self._VALID_URL, url)
+ m_url = self._match_valid_url(url)
video_id = m_url.group('id')
blog = m_url.group('blog_name')
diff --git a/hypervideo_dl/extractor/turbo.py b/hypervideo_dl/extractor/turbo.py
index be3eaa5..f6bbf25 100644
--- a/hypervideo_dl/extractor/turbo.py
+++ b/hypervideo_dl/extractor/turbo.py
@@ -30,7 +30,7 @@ class TurboIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
diff --git a/hypervideo_dl/extractor/turner.py b/hypervideo_dl/extractor/turner.py
index 81229a5..32125bc 100644
--- a/hypervideo_dl/extractor/turner.py
+++ b/hypervideo_dl/extractor/turner.py
@@ -144,7 +144,7 @@ class TurnerBaseIE(AdobePassIE):
m3u8_id=format_id or 'hls', fatal=False)
if '/secure/' in video_url and '?hdnea=' in video_url:
for f in m3u8_formats:
- f['_seekable'] = False
+ f['_ffmpeg_args'] = ['-seekable', '0']
formats.extend(m3u8_formats)
elif ext == 'f4m':
formats.extend(self._extract_f4m_formats(
@@ -221,6 +221,7 @@ class TurnerBaseIE(AdobePassIE):
}
def _extract_ngtv_info(self, media_id, tokenizer_query, ap_data=None):
+ is_live = ap_data.get('is_live')
streams_data = self._download_json(
'http://medium.ngtv.io/media/%s/tv' % media_id,
media_id)['media']['tv']
@@ -237,11 +238,11 @@ class TurnerBaseIE(AdobePassIE):
'http://token.ngtv.io/token/token_spe',
m3u8_url, media_id, ap_data or {}, tokenizer_query)
formats.extend(self._extract_m3u8_formats(
- m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False))
+ m3u8_url, media_id, 'mp4', m3u8_id='hls', live=is_live, fatal=False))
duration = float_or_none(stream_data.get('totalRuntime'))
- if not chapters:
+ if not chapters and not is_live:
for chapter in stream_data.get('contentSegments', []):
start_time = float_or_none(chapter.get('start'))
chapter_duration = float_or_none(chapter.get('duration'))
diff --git a/hypervideo_dl/extractor/tv2.py b/hypervideo_dl/extractor/tv2.py
index 334b7d5..e085153 100644
--- a/hypervideo_dl/extractor/tv2.py
+++ b/hypervideo_dl/extractor/tv2.py
@@ -24,37 +24,34 @@ class TV2IE(InfoExtractor):
'url': 'http://www.tv2.no/v/916509/',
'info_dict': {
'id': '916509',
- 'ext': 'flv',
+ 'ext': 'mp4',
'title': 'Se Frode Gryttens hyllest av Steven Gerrard',
'description': 'TV 2 Sportens huspoet tar avskjed med Liverpools kaptein Steven Gerrard.',
'timestamp': 1431715610,
'upload_date': '20150515',
- 'duration': 156.967,
+ 'duration': 157,
'view_count': int,
'categories': list,
},
}]
- _API_DOMAIN = 'sumo.tv2.no'
- _PROTOCOLS = ('HDS', 'HLS', 'DASH')
+ _PROTOCOLS = ('HLS', 'DASH')
_GEO_COUNTRIES = ['NO']
def _real_extract(self, url):
video_id = self._match_id(url)
- api_base = 'http://%s/api/web/asset/%s' % (self._API_DOMAIN, video_id)
-
- asset = self._download_json(
- api_base + '.json', video_id,
- 'Downloading metadata JSON')['asset']
- title = asset.get('subtitle') or asset['title']
+ asset = self._download_json('https://sumo.tv2.no/rest/assets/' + video_id, video_id,
+ 'Downloading metadata JSON')
+ title = asset['title']
is_live = asset.get('live') is True
formats = []
format_urls = []
for protocol in self._PROTOCOLS:
try:
- data = self._download_json(
- api_base + '/play.json?protocol=%s&videoFormat=SMIL+ISMUSP' % protocol,
- video_id, 'Downloading play JSON')['playback']
+ data = self._download_json('https://api.sumo.tv2.no/play/%s?stream=%s' % (video_id, protocol),
+ video_id, 'Downloading playabck JSON',
+ headers={'content-type': 'application/json'},
+ data='{"device":{"id":"1-1-1","name":"Nettleser (HTML)"}}'.encode())['playback']
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
error = self._parse_json(e.cause.read().decode(), video_id)['error']
@@ -65,18 +62,12 @@ class TV2IE(InfoExtractor):
self.raise_login_required()
raise ExtractorError(error['description'])
raise
- items = try_get(data, lambda x: x['items']['item'])
- if not items:
- continue
- if not isinstance(items, list):
- items = [items]
+ items = data.get('streams', [])
for item in items:
- if not isinstance(item, dict):
- continue
video_url = item.get('url')
if not video_url or video_url in format_urls:
continue
- format_id = '%s-%s' % (protocol.lower(), item.get('mediaFormat'))
+ format_id = '%s-%s' % (protocol.lower(), item.get('type'))
if not self._is_valid_url(video_url, video_id, format_id):
continue
format_urls.append(video_url)
@@ -99,17 +90,15 @@ class TV2IE(InfoExtractor):
formats.append({
'url': video_url,
'format_id': format_id,
- 'tbr': int_or_none(item.get('bitrate')),
- 'filesize': int_or_none(item.get('fileSize')),
})
if not formats and data.get('drmProtected'):
- raise ExtractorError('This video is DRM protected.', expected=True)
+ self.report_drm(video_id)
self._sort_formats(formats)
thumbnails = [{
- 'id': thumbnail.get('@type'),
- 'url': thumbnail.get('url'),
- } for _, thumbnail in (asset.get('imageVersions') or {}).items()]
+ 'id': type,
+ 'url': thumb_url,
+ } for type, thumb_url in (asset.get('images') or {}).items()]
return {
'id': video_id,
@@ -117,10 +106,10 @@ class TV2IE(InfoExtractor):
'title': self._live_title(title) if is_live else title,
'description': strip_or_none(asset.get('description')),
'thumbnails': thumbnails,
- 'timestamp': parse_iso8601(asset.get('createTime')),
+ 'timestamp': parse_iso8601(asset.get('live_broadcast_time') or asset.get('update_time')),
'duration': float_or_none(asset.get('accurateDuration') or asset.get('duration')),
'view_count': int_or_none(asset.get('views')),
- 'categories': asset.get('keywords', '').split(','),
+ 'categories': asset.get('tags', '').split(','),
'formats': formats,
'is_live': is_live,
}
@@ -170,7 +159,7 @@ class TV2ArticleIE(InfoExtractor):
return self.playlist_result(entries, playlist_id, title, description)
-class KatsomoIE(TV2IE):
+class KatsomoIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?:katsomo|mtv(uutiset)?)\.fi/(?:sarja/[0-9a-z-]+-\d+/[0-9a-z-]+-|(?:#!/)?jakso/(?:\d+/[^/]+/)?|video/prog)(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.mtv.fi/sarja/mtv-uutiset-live-33001002003/lahden-pelicans-teki-kovan-ratkaisun-ville-nieminen-pihalle-1181321',
@@ -203,6 +192,93 @@ class KatsomoIE(TV2IE):
_PROTOCOLS = ('HLS', 'MPD')
_GEO_COUNTRIES = ['FI']
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ api_base = 'http://%s/api/web/asset/%s' % (self._API_DOMAIN, video_id)
+
+ asset = self._download_json(
+ api_base + '.json', video_id,
+ 'Downloading metadata JSON')['asset']
+ title = asset.get('subtitle') or asset['title']
+ is_live = asset.get('live') is True
+
+ formats = []
+ format_urls = []
+ for protocol in self._PROTOCOLS:
+ try:
+ data = self._download_json(
+ api_base + '/play.json?protocol=%s&videoFormat=SMIL+ISMUSP' % protocol,
+ video_id, 'Downloading play JSON')['playback']
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+ error = self._parse_json(e.cause.read().decode(), video_id)['error']
+ error_code = error.get('code')
+ if error_code == 'ASSET_PLAYBACK_INVALID_GEO_LOCATION':
+ self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
+ elif error_code == 'SESSION_NOT_AUTHENTICATED':
+ self.raise_login_required()
+ raise ExtractorError(error['description'])
+ raise
+ items = try_get(data, lambda x: x['items']['item'])
+ if not items:
+ continue
+ if not isinstance(items, list):
+ items = [items]
+ for item in items:
+ if not isinstance(item, dict):
+ continue
+ video_url = item.get('url')
+ if not video_url or video_url in format_urls:
+ continue
+ format_id = '%s-%s' % (protocol.lower(), item.get('mediaFormat'))
+ if not self._is_valid_url(video_url, video_id, format_id):
+ continue
+ format_urls.append(video_url)
+ ext = determine_ext(video_url)
+ if ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ video_url, video_id, f4m_id=format_id, fatal=False))
+ elif ext == 'm3u8':
+ if not data.get('drmProtected'):
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, 'mp4',
+ 'm3u8' if is_live else 'm3u8_native',
+ m3u8_id=format_id, fatal=False))
+ elif ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ video_url, video_id, format_id, fatal=False))
+ elif ext == 'ism' or video_url.endswith('.ism/Manifest'):
+ pass
+ else:
+ formats.append({
+ 'url': video_url,
+ 'format_id': format_id,
+ 'tbr': int_or_none(item.get('bitrate')),
+ 'filesize': int_or_none(item.get('fileSize')),
+ })
+ if not formats and data.get('drmProtected'):
+ self.report_drm(video_id)
+ self._sort_formats(formats)
+
+ thumbnails = [{
+ 'id': thumbnail.get('@type'),
+ 'url': thumbnail.get('url'),
+ } for _, thumbnail in (asset.get('imageVersions') or {}).items()]
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': self._live_title(title) if is_live else title,
+ 'description': strip_or_none(asset.get('description')),
+ 'thumbnails': thumbnails,
+ 'timestamp': parse_iso8601(asset.get('createTime')),
+ 'duration': float_or_none(asset.get('accurateDuration') or asset.get('duration')),
+ 'view_count': int_or_none(asset.get('views')),
+ 'categories': asset.get('keywords', '').split(','),
+ 'formats': formats,
+ 'is_live': is_live,
+ }
+
class MTVUutisetArticleIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)mtvuutiset\.fi/artikkeli/[^/]+/(?P<id>\d+)'
diff --git a/hypervideo_dl/extractor/tv2hu.py b/hypervideo_dl/extractor/tv2hu.py
index 86017b7..f210435 100644
--- a/hypervideo_dl/extractor/tv2hu.py
+++ b/hypervideo_dl/extractor/tv2hu.py
@@ -2,61 +2,109 @@
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..utils import int_or_none
+from ..utils import (
+ traverse_obj,
+ UnsupportedError,
+)
class TV2HuIE(InfoExtractor):
- IE_NAME = 'tv2.hu'
- _VALID_URL = r'https?://(?:www\.)?tv2\.hu/(?:[^/]+/)+(?P<id>\d+)_[^/?#]+?\.html'
+ IE_NAME = 'tv2play.hu'
+ _VALID_URL = r'https?://(?:www\.)?tv2play\.hu/(?!szalag/)(?P<id>[^#&?]+)'
_TESTS = [{
- 'url': 'http://tv2.hu/ezek_megorultek/217679_ezek-megorultek---1.-adas-1.-resz.html',
- 'md5': '585e58e2e090f34603804bb2c48e98d8',
+ 'url': 'https://tv2play.hu/mintaapak/mintaapak_213_epizod_resz',
'info_dict': {
- 'id': '217679',
+ 'id': '249240',
'ext': 'mp4',
- 'title': 'Ezek megőrültek! - 1. adás 1. rész',
- 'upload_date': '20160826',
- 'thumbnail': r're:^https?://.*\.jpg$'
- }
- }, {
- 'url': 'http://tv2.hu/ezek_megorultek/teljes_adasok/217677_ezek-megorultek---1.-adas-2.-resz.html',
- 'only_matching': True
+ 'title': 'Mintaapák - 213. epizód',
+ 'series': 'Mintaapák',
+ 'duration': 2164,
+ 'description': 'md5:7350147e75485a59598e806c47967b07',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'release_date': '20210825',
+ 'season_number': None,
+ 'episode_number': 213,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
}, {
- 'url': 'http://tv2.hu/musoraink/aktiv/aktiv_teljes_adas/217963_aktiv-teljes-adas---2016.08.30..html',
- 'only_matching': True
+ 'url': 'https://tv2play.hu/taxi_2',
+ 'md5': '585e58e2e090f34603804bb2c48e98d8',
+ 'info_dict': {
+ 'id': '199363',
+ 'ext': 'mp4',
+ 'title': 'Taxi 2',
+ 'series': 'Taxi 2',
+ 'duration': 5087,
+ 'description': 'md5:47762155dc9a50241797ded101b1b08c',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'release_date': '20210118',
+ 'season_number': None,
+ 'episode_number': None,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
}]
def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
- json_url = self._search_regex(
- r'jsonUrl\s*=\s*"([^"]+)"', webpage, 'json url')
- json_data = self._download_json(json_url, video_id)
-
- formats = []
- for b in ('bitrates', 'backupBitrates'):
- bitrates = json_data.get(b, {})
- m3u8_url = bitrates.get('hls')
- if m3u8_url:
- formats.extend(self._extract_wowza_formats(
- m3u8_url, video_id, skip_protocols=['rtmp', 'rtsp']))
-
- for mp4_url in bitrates.get('mp4', []):
- height = int_or_none(self._search_regex(
- r'\.(\d+)p\.mp4', mp4_url, 'height', default=None))
- formats.append({
- 'format_id': 'http' + ('-%d' % height if height else ''),
- 'url': mp4_url,
- 'height': height,
- 'width': int_or_none(height / 9.0 * 16.0 if height else None),
- })
+ id = self._match_id(url)
+ json_data = self._download_json(f'https://tv2play.hu/api/search/{id}', id)
+
+ if json_data['contentType'] == 'showpage':
+ ribbon_ids = traverse_obj(json_data, ('pages', ..., 'tabs', ..., 'ribbonIds'), get_all=False, expected_type=list)
+ entries = [self.url_result(f'https://tv2play.hu/szalag/{ribbon_id}',
+ ie=TV2HuSeriesIE.ie_key(), video_id=ribbon_id) for ribbon_id in ribbon_ids]
+ return self.playlist_result(entries, playlist_id=id)
+ elif json_data['contentType'] != 'video':
+ raise UnsupportedError(url)
+
+ video_id = str(json_data['id'])
+ player_id = json_data.get('playerId')
+ series_json = json_data.get('seriesInfo', {})
+
+ video_json_url = self._download_json(f'https://tv2play.hu/api/streaming-url?playerId={player_id}', video_id)['url']
+ video_json = self._download_json(video_json_url, video_id)
+ m3u8_url = self._proto_relative_url(traverse_obj(video_json, ('bitrates', 'hls')))
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id)
self._sort_formats(formats)
return {
'id': video_id,
- 'title': self._og_search_title(webpage).strip(),
- 'thumbnail': self._og_search_thumbnail(webpage),
- 'upload_date': self._search_regex(
- r'/vod/(\d{8})/', json_url, 'upload_date', default=None),
+ 'title': json_data['title'],
+ 'series': json_data.get('seriesTitle'),
+ 'duration': json_data.get('length'),
+ 'description': json_data.get('description'),
+ 'thumbnail': 'https://tv2play.hu' + json_data.get('thumbnailUrl'),
+ 'release_date': json_data.get('uploadedAt').replace('.', ''),
+ 'season_number': series_json.get('seasonNr'),
+ 'episode_number': series_json.get('episodeNr'),
'formats': formats,
+ 'subtitles': subtitles,
}
+
+
+class TV2HuSeriesIE(InfoExtractor):
+ IE_NAME = 'tv2playseries.hu'
+ _VALID_URL = r'https?://(?:www\.)?tv2play\.hu/szalag/(?P<id>[^#&?]+)'
+
+ _TESTS = [{
+ 'url': 'https://tv2play.hu/szalag/59?rendezes=nepszeruseg',
+ 'playlist_mincount': 284,
+ 'info_dict': {
+ 'id': '59',
+ }
+ }]
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+ json_data = self._download_json(f'https://tv2play.hu/api/ribbons/{id}/0?size=100000', id)
+ entries = []
+ for card in json_data.get('cards', []):
+ video_id = card.get('slug')
+ if video_id:
+ entries.append(self.url_result(f'https://tv2play.hu/{video_id}',
+ ie=TV2HuIE.ie_key(), video_id=video_id))
+
+ return self.playlist_result(entries, playlist_id=id)
diff --git a/hypervideo_dl/extractor/tv4.py b/hypervideo_dl/extractor/tv4.py
index b73bab9..4043e63 100644
--- a/hypervideo_dl/extractor/tv4.py
+++ b/hypervideo_dl/extractor/tv4.py
@@ -93,21 +93,34 @@ class TV4IE(InfoExtractor):
'device': 'browser',
'protocol': 'hls',
})['playbackItem']['manifestUrl']
- formats = self._extract_m3u8_formats(
+ formats = []
+ subtitles = {}
+
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
manifest_url, video_id, 'mp4',
'm3u8_native', m3u8_id='hls', fatal=False)
- formats.extend(self._extract_mpd_formats(
+ formats.extend(fmts)
+ subtitles = self._merge_subtitles(subtitles, subs)
+
+ fmts, subs = self._extract_mpd_formats_and_subtitles(
manifest_url.replace('.m3u8', '.mpd'),
- video_id, mpd_id='dash', fatal=False))
- formats.extend(self._extract_f4m_formats(
+ video_id, mpd_id='dash', fatal=False)
+ formats.extend(fmts)
+ subtitles = self._merge_subtitles(subtitles, subs)
+
+ fmts = self._extract_f4m_formats(
manifest_url.replace('.m3u8', '.f4m'),
- video_id, f4m_id='hds', fatal=False))
- formats.extend(self._extract_ism_formats(
+ video_id, f4m_id='hds', fatal=False)
+ formats.extend(fmts)
+
+ fmts, subs = self._extract_ism_formats_and_subtitles(
re.sub(r'\.ism/.*?\.m3u8', r'.ism/Manifest', manifest_url),
- video_id, ism_id='mss', fatal=False))
+ video_id, ism_id='mss', fatal=False)
+ formats.extend(fmts)
+ subtitles = self._merge_subtitles(subtitles, subs)
if not formats and info.get('is_geo_restricted'):
- self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
+ self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True)
self._sort_formats(formats)
@@ -115,7 +128,7 @@ class TV4IE(InfoExtractor):
'id': video_id,
'title': title,
'formats': formats,
- # 'subtitles': subtitles,
+ 'subtitles': subtitles,
'description': info.get('description'),
'timestamp': parse_iso8601(info.get('broadcast_date_time')),
'duration': int_or_none(info.get('duration')),
diff --git a/hypervideo_dl/extractor/tv5mondeplus.py b/hypervideo_dl/extractor/tv5mondeplus.py
index b7fe082..a0832d2 100644
--- a/hypervideo_dl/extractor/tv5mondeplus.py
+++ b/hypervideo_dl/extractor/tv5mondeplus.py
@@ -7,6 +7,7 @@ from ..utils import (
extract_attributes,
int_or_none,
parse_duration,
+ try_get,
)
@@ -15,28 +16,28 @@ class TV5MondePlusIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?:tv5mondeplus|revoir\.tv5monde)\.com/toutes-les-videos/[^/]+/(?P<id>[^/?#]+)'
_TESTS = [{
# movie
- 'url': 'https://revoir.tv5monde.com/toutes-les-videos/cinema/rendez-vous-a-atlit',
- 'md5': '8cbde5ea7b296cf635073e27895e227f',
+ 'url': 'https://revoir.tv5monde.com/toutes-les-videos/cinema/ceux-qui-travaillent',
+ 'md5': '32fa0cde16a4480d1251502a66856d5f',
'info_dict': {
- 'id': '822a4756-0712-7329-1859-a13ac7fd1407',
- 'display_id': 'rendez-vous-a-atlit',
+ 'id': 'dc57a011-ec4b-4648-2a9a-4f03f8352ed3',
+ 'display_id': 'ceux-qui-travaillent',
'ext': 'mp4',
- 'title': 'Rendez-vous à Atlit',
- 'description': 'md5:2893a4c5e1dbac3eedff2d87956e4efb',
- 'upload_date': '20200130',
+ 'title': 'Ceux qui travaillent',
+ 'description': 'md5:570e8bb688036ace873b2d50d24c026d',
+ 'upload_date': '20210819',
},
}, {
# series episode
- 'url': 'https://revoir.tv5monde.com/toutes-les-videos/series-fictions/c-est-la-vie-ennemie-juree',
+ 'url': 'https://revoir.tv5monde.com/toutes-les-videos/series-fictions/vestiaires-caro-actrice',
'info_dict': {
- 'id': '0df7007c-4900-3936-c601-87a13a93a068',
- 'display_id': 'c-est-la-vie-ennemie-juree',
+ 'id': '9e9d599e-23af-6915-843e-ecbf62e97925',
+ 'display_id': 'vestiaires-caro-actrice',
'ext': 'mp4',
- 'title': "C'est la vie - Ennemie jurée",
- 'description': 'md5:dfb5c63087b6f35fe0cc0af4fe44287e',
- 'upload_date': '20200130',
- 'series': "C'est la vie",
- 'episode': 'Ennemie jurée',
+ 'title': "Vestiaires - Caro actrice",
+ 'description': 'md5:db15d2e1976641e08377f942778058ea',
+ 'upload_date': '20210819',
+ 'series': "Vestiaires",
+ 'episode': 'Caro actrice',
},
'params': {
'skip_download': True,
@@ -63,7 +64,7 @@ class TV5MondePlusIE(InfoExtractor):
webpage, 'video player loader'))
video_files = self._parse_json(
- vpl_data['data-broadcast'], display_id).get('files', [])
+ vpl_data['data-broadcast'], display_id)
formats = []
for video_file in video_files:
v_url = video_file.get('url')
@@ -81,6 +82,11 @@ class TV5MondePlusIE(InfoExtractor):
})
self._sort_formats(formats)
+ metadata = self._parse_json(
+ vpl_data['data-metadata'], display_id)
+ duration = (int_or_none(try_get(metadata, lambda x: x['content']['duration']))
+ or parse_duration(self._html_search_meta('duration', webpage)))
+
description = self._html_search_regex(
r'(?s)<div[^>]+class=["\']episode-texte[^>]+>(.+?)</div>', webpage,
'description', fatal=False)
@@ -109,7 +115,7 @@ class TV5MondePlusIE(InfoExtractor):
'title': title,
'description': description,
'thumbnail': vpl_data.get('data-image'),
- 'duration': int_or_none(vpl_data.get('data-duration')) or parse_duration(self._html_search_meta('duration', webpage)),
+ 'duration': duration,
'upload_date': upload_date,
'formats': formats,
'series': series,
diff --git a/hypervideo_dl/extractor/tv5unis.py b/hypervideo_dl/extractor/tv5unis.py
index eabdc22..398b85d 100644
--- a/hypervideo_dl/extractor/tv5unis.py
+++ b/hypervideo_dl/extractor/tv5unis.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..utils import (
@@ -16,7 +15,7 @@ class TV5UnisBaseIE(InfoExtractor):
_GEO_COUNTRIES = ['CA']
def _real_extract(self, url):
- groups = re.match(self._VALID_URL, url).groups()
+ groups = self._match_valid_url(url).groups()
product = self._download_json(
'https://api.tv5unis.ca/graphql', groups[0], query={
'query': '''{
diff --git a/hypervideo_dl/extractor/tver.py b/hypervideo_dl/extractor/tver.py
index a4a30b1..943b3eb 100644
--- a/hypervideo_dl/extractor/tver.py
+++ b/hypervideo_dl/extractor/tver.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..compat import compat_str
@@ -38,7 +37,7 @@ class TVerIE(InfoExtractor):
'https://tver.jp/api/access_token.php', None)['token']
def _real_extract(self, url):
- path, video_id = re.match(self._VALID_URL, url).groups()
+ path, video_id = self._match_valid_url(url).groups()
main = self._download_json(
'https://api.tver.jp/v4/' + path, video_id,
query={'token': self._TOKEN})['main']
diff --git a/hypervideo_dl/extractor/tvigle.py b/hypervideo_dl/extractor/tvigle.py
index 180259a..aa25ba0 100644
--- a/hypervideo_dl/extractor/tvigle.py
+++ b/hypervideo_dl/extractor/tvigle.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..utils import (
@@ -54,7 +53,7 @@ class TvigleIE(InfoExtractor):
]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
display_id = mobj.group('display_id')
diff --git a/hypervideo_dl/extractor/tvland.py b/hypervideo_dl/extractor/tvland.py
index 7911441..9ebf57f 100644
--- a/hypervideo_dl/extractor/tvland.py
+++ b/hypervideo_dl/extractor/tvland.py
@@ -1,10 +1,12 @@
# coding: utf-8
from __future__ import unicode_literals
-from .spike import ParamountNetworkIE
+from .mtv import MTVServicesInfoExtractor
+# TODO: Remove - Reason not used anymore - Service moved to youtube
-class TVLandIE(ParamountNetworkIE):
+
+class TVLandIE(MTVServicesInfoExtractor):
IE_NAME = 'tvland.com'
_VALID_URL = r'https?://(?:www\.)?tvland\.com/(?:video-clips|(?:full-)?episodes)/(?P<id>[^/?#.]+)'
_FEED_URL = 'http://www.tvland.com/feeds/mrss/'
@@ -17,6 +19,7 @@ class TVLandIE(ParamountNetworkIE):
'title': 'The Dog',
},
'playlist_mincount': 5,
+ 'skip': '404 Not found',
}, {
'url': 'https://www.tvland.com/video-clips/4n87f2/younger-a-first-look-at-younger-season-6',
'md5': 'e2c6389401cf485df26c79c247b08713',
diff --git a/hypervideo_dl/extractor/tvnow.py b/hypervideo_dl/extractor/tvnow.py
index 9c8a8a0..b318184 100644
--- a/hypervideo_dl/extractor/tvnow.py
+++ b/hypervideo_dl/extractor/tvnow.py
@@ -7,10 +7,12 @@ from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ExtractorError,
+ get_element_by_id,
int_or_none,
parse_iso8601,
parse_duration,
str_or_none,
+ try_get,
update_url_query,
urljoin,
)
@@ -67,7 +69,7 @@ class TVNowBaseIE(InfoExtractor):
if formats:
break
else:
- if info.get('isDrm'):
+ if not self.get_param('allow_unplayable_formats') and info.get('isDrm'):
raise ExtractorError(
'Video %s is DRM protected' % video_id, expected=True)
if info.get('geoblocked'):
@@ -167,7 +169,7 @@ class TVNowIE(TVNowBaseIE):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
display_id = '%s/%s' % mobj.group(2, 3)
info = self._call_api(
@@ -194,7 +196,7 @@ class TVNowNewIE(InfoExtractor):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
base_url = re.sub(r'(?:shows|serien)', '_', mobj.group('base_url'))
show, episode = mobj.group('show', 'episode')
return self.url_result(
@@ -204,6 +206,86 @@ class TVNowNewIE(InfoExtractor):
ie=TVNowIE.ie_key(), video_id=mobj.group('id'))
+class TVNowFilmIE(TVNowBaseIE):
+ _VALID_URL = r'''(?x)
+ (?P<base_url>https?://
+ (?:www\.)?tvnow\.(?:de|at|ch)/
+ (?:filme))/
+ (?P<title>[^/?$&]+)-(?P<id>\d+)
+ '''
+ _TESTS = [{
+ 'url': 'https://www.tvnow.de/filme/lord-of-war-haendler-des-todes-7959',
+ 'info_dict': {
+ 'id': '1426690',
+ 'display_id': 'lord-of-war-haendler-des-todes',
+ 'ext': 'mp4',
+ 'title': 'Lord of War',
+ 'description': 'md5:5eda15c0d5b8cb70dac724c8a0ff89a9',
+ 'timestamp': 1550010000,
+ 'upload_date': '20190212',
+ 'duration': 7016,
+ },
+ }, {
+ 'url': 'https://www.tvnow.de/filme/the-machinist-12157',
+ 'info_dict': {
+ 'id': '328160',
+ 'display_id': 'the-machinist',
+ 'ext': 'mp4',
+ 'title': 'The Machinist',
+ 'description': 'md5:9a0e363fdd74b3a9e1cdd9e21d0ecc28',
+ 'timestamp': 1496469720,
+ 'upload_date': '20170603',
+ 'duration': 5836,
+ },
+ }, {
+ 'url': 'https://www.tvnow.de/filme/horst-schlaemmer-isch-kandidiere-17777',
+ 'only_matching': True, # DRM protected
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ display_id = mobj.group('title')
+
+ webpage = self._download_webpage(url, display_id, fatal=False)
+ if not webpage:
+ raise ExtractorError('Cannot download "%s"' % url, expected=True)
+
+ json_text = get_element_by_id('now-web-state', webpage)
+ if not json_text:
+ raise ExtractorError('Cannot read video data', expected=True)
+
+ json_data = self._parse_json(
+ json_text,
+ display_id,
+ transform_source=lambda x: x.replace('&q;', '"'),
+ fatal=False)
+ if not json_data:
+ raise ExtractorError('Cannot read video data', expected=True)
+
+ player_key = next(
+ (key for key in json_data.keys() if 'module/player' in key),
+ None)
+ page_key = next(
+ (key for key in json_data.keys() if 'page/filme' in key),
+ None)
+ movie_id = try_get(
+ json_data,
+ [
+ lambda x: x[player_key]['body']['id'],
+ lambda x: x[page_key]['body']['modules'][0]['id'],
+ lambda x: x[page_key]['body']['modules'][1]['id']],
+ int)
+ if not movie_id:
+ raise ExtractorError('Cannot extract movie ID', expected=True)
+
+ info = self._call_api(
+ 'movies/%d' % movie_id,
+ display_id,
+ query={'fields': ','.join(self._VIDEO_FIELDS)})
+
+ return self._extract_video(info, display_id)
+
+
class TVNowNewBaseIE(InfoExtractor):
def _call_api(self, path, video_id, query={}):
result = self._download_json(
@@ -342,9 +424,85 @@ class TVNowIE(TVNowNewBaseIE):
}
def _real_extract(self, url):
- display_id, video_id = re.match(self._VALID_URL, url).groups()
+ display_id, video_id = self._match_valid_url(url).groups()
info = self._call_api('player/' + video_id, video_id)
return self._extract_video(info, video_id, display_id)
+
+
+class TVNowFilmIE(TVNowIE):
+ _VALID_URL = r'''(?x)
+ (?P<base_url>https?://
+ (?:www\.)?tvnow\.(?:de|at|ch)/
+ (?:filme))/
+ (?P<title>[^/?$&]+)-(?P<id>\d+)
+ '''
+ _TESTS = [{
+ 'url': 'https://www.tvnow.de/filme/lord-of-war-haendler-des-todes-7959',
+ 'info_dict': {
+ 'id': '1426690',
+ 'display_id': 'lord-of-war-haendler-des-todes',
+ 'ext': 'mp4',
+ 'title': 'Lord of War',
+ 'description': 'md5:5eda15c0d5b8cb70dac724c8a0ff89a9',
+ 'timestamp': 1550010000,
+ 'upload_date': '20190212',
+ 'duration': 7016,
+ },
+ }, {
+ 'url': 'https://www.tvnow.de/filme/the-machinist-12157',
+ 'info_dict': {
+ 'id': '328160',
+ 'display_id': 'the-machinist',
+ 'ext': 'mp4',
+ 'title': 'The Machinist',
+ 'description': 'md5:9a0e363fdd74b3a9e1cdd9e21d0ecc28',
+ 'timestamp': 1496469720,
+ 'upload_date': '20170603',
+ 'duration': 5836,
+ },
+ }, {
+ 'url': 'https://www.tvnow.de/filme/horst-schlaemmer-isch-kandidiere-17777',
+ 'only_matching': True, # DRM protected
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ display_id = mobj.group('title')
+
+ webpage = self._download_webpage(url, display_id, fatal=False)
+ if not webpage:
+ raise ExtractorError('Cannot download "%s"' % url, expected=True)
+
+ json_text = get_element_by_id('now-web-state', webpage)
+ if not json_text:
+ raise ExtractorError('Cannot read video data', expected=True)
+
+ json_data = self._parse_json(
+ json_text,
+ display_id,
+ transform_source=lambda x: x.replace('&q;', '"'),
+ fatal=False)
+ if not json_data:
+ raise ExtractorError('Cannot read video data', expected=True)
+
+ player_key = next(
+ (key for key in json_data.keys() if 'module/player' in key),
+ None)
+ page_key = next(
+ (key for key in json_data.keys() if 'page/filme' in key),
+ None)
+ movie_id = try_get(
+ json_data,
+ [
+ lambda x: x[player_key]['body']['id'],
+ lambda x: x[page_key]['body']['modules'][0]['id'],
+ lambda x: x[page_key]['body']['modules'][1]['id']],
+ int)
+ if not movie_id:
+ raise ExtractorError('Cannot extract movie ID', expected=True)
+
+ info = self._call_api('player/%d' % movie_id, display_id)
+ return self._extract_video(info, url, display_id)
"""
@@ -394,7 +552,7 @@ class TVNowSeasonIE(TVNowListBaseIE):
}]
def _real_extract(self, url):
- _, show_id, season_id = re.match(self._VALID_URL, url).groups()
+ _, show_id, season_id = self._match_valid_url(url).groups()
return self._extract_items(
url, show_id, season_id, {'season': season_id})
@@ -410,7 +568,7 @@ class TVNowAnnualIE(TVNowListBaseIE):
}]
def _real_extract(self, url):
- _, show_id, year, month = re.match(self._VALID_URL, url).groups()
+ _, show_id, year, month = self._match_valid_url(url).groups()
return self._extract_items(
url, show_id, '%s-%s' % (year, month), {
'year': int(year),
@@ -442,7 +600,7 @@ class TVNowShowIE(TVNowListBaseIE):
else super(TVNowShowIE, cls).suitable(url))
def _real_extract(self, url):
- base_url, show_id = re.match(self._VALID_URL, url).groups()
+ base_url, show_id = self._match_valid_url(url).groups()
result = self._call_api(
'teaserrow/format/navigation/' + show_id, show_id)
diff --git a/hypervideo_dl/extractor/tvp.py b/hypervideo_dl/extractor/tvp.py
index accff75..1e42b33 100644
--- a/hypervideo_dl/extractor/tvp.py
+++ b/hypervideo_dl/extractor/tvp.py
@@ -246,7 +246,7 @@ class TVPWebsiteIE(InfoExtractor):
video_id=video_id)
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
display_id, playlist_id = mobj.group('display_id', 'id')
return self.playlist_result(
self._entries(display_id, playlist_id), playlist_id)
diff --git a/hypervideo_dl/extractor/tvplay.py b/hypervideo_dl/extractor/tvplay.py
index 0d858c0..fbafb41 100644
--- a/hypervideo_dl/extractor/tvplay.py
+++ b/hypervideo_dl/extractor/tvplay.py
@@ -34,8 +34,8 @@ class TVPlayIE(InfoExtractor):
tvplay(?:\.skaties)?\.lv(?:/parraides)?|
(?:tv3play|play\.tv3)\.lt(?:/programos)?|
tv3play(?:\.tv3)?\.ee/sisu|
- (?:tv(?:3|6|8|10)play|viafree)\.se/program|
- (?:(?:tv3play|viasat4play|tv6play|viafree)\.no|(?:tv3play|viafree)\.dk)/programmer|
+ (?:tv(?:3|6|8|10)play)\.se/program|
+ (?:(?:tv3play|viasat4play|tv6play)\.no|(?:tv3play)\.dk)/programmer|
play\.nova(?:tv)?\.bg/programi
)
/(?:[^/]+/)+
@@ -224,10 +224,6 @@ class TVPlayIE(InfoExtractor):
'only_matching': True,
},
{
- 'url': 'http://www.viafree.se/program/underhallning/i-like-radio-live/sasong-1/676869',
- 'only_matching': True,
- },
- {
'url': 'mtg:418113',
'only_matching': True,
}
@@ -298,7 +294,8 @@ class TVPlayIE(InfoExtractor):
if not formats and video.get('is_geo_blocked'):
self.raise_geo_restricted(
- 'This content might not be available in your country due to copyright reasons')
+ 'This content might not be available in your country due to copyright reasons',
+ metadata_available=True)
self._sort_formats(formats)
@@ -339,8 +336,8 @@ class ViafreeIE(InfoExtractor):
_VALID_URL = r'''(?x)
https?://
(?:www\.)?
- viafree\.(?P<country>dk|no|se)
- /(?P<id>program(?:mer)?/(?:[^/]+/)+[^/?#&]+)
+ viafree\.(?P<country>dk|no|se|fi)
+ /(?P<id>(?:program(?:mer)?|ohjelmat)?/(?:[^/]+/)+[^/?#&]+)
'''
_TESTS = [{
'url': 'http://www.viafree.no/programmer/underholdning/det-beste-vorspielet/sesong-2/episode-1',
@@ -359,6 +356,23 @@ class ViafreeIE(InfoExtractor):
'skip_download': True,
},
}, {
+ 'url': 'https://www.viafree.dk/programmer/humor/comedy-central-roast-of-charlie-sheen/film/1047660',
+ 'info_dict': {
+ 'id': '1047660',
+ 'ext': 'mp4',
+ 'title': 'Comedy Central Roast of Charlie Sheen - Comedy Central Roast of Charlie Sheen',
+ 'description': 'md5:ec956d941ae9fd7c65a48fd64951dc6d',
+ 'series': 'Comedy Central Roast of Charlie Sheen',
+ 'season_number': 1,
+ 'duration': 3747,
+ 'timestamp': 1608246060,
+ 'upload_date': '20201217'
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ 'skip_download': True
+ }
+ }, {
# with relatedClips
'url': 'http://www.viafree.se/program/reality/sommaren-med-youtube-stjarnorna/sasong-1/avsnitt-1',
'only_matching': True,
@@ -372,15 +386,17 @@ class ViafreeIE(InfoExtractor):
}, {
'url': 'http://www.viafree.dk/programmer/reality/paradise-hotel/saeson-7/episode-5',
'only_matching': True,
+ }, {
+ 'url': 'http://www.viafree.se/program/underhallning/i-like-radio-live/sasong-1/676869',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.viafree.fi/ohjelmat/entertainment/amazing-makeovers/kausi-7/jakso-2',
+ 'only_matching': True,
}]
_GEO_BYPASS = False
- @classmethod
- def suitable(cls, url):
- return False if TVPlayIE.suitable(url) else super(ViafreeIE, cls).suitable(url)
-
def _real_extract(self, url):
- country, path = re.match(self._VALID_URL, url).groups()
+ country, path = self._match_valid_url(url).groups()
content = self._download_json(
'https://viafree-content.mtg-api.com/viafree-content/v1/%s/path/%s' % (country, path), path)
program = content['_embedded']['viafreeBlocks'][0]['_embedded']['program']
@@ -397,16 +413,16 @@ class ViafreeIE(InfoExtractor):
self.raise_geo_restricted(countries=[country])
raise
- formats = self._extract_m3u8_formats(stream_href, guid, 'mp4')
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(stream_href, guid, 'mp4')
self._sort_formats(formats)
episode = program.get('episode') or {}
-
return {
'id': guid,
'title': title,
'thumbnail': meta.get('image'),
'description': meta.get('description'),
'series': episode.get('seriesTitle'),
+ 'subtitles': subtitles,
'episode_number': int_or_none(episode.get('episodeNumber')),
'season_number': int_or_none(episode.get('seasonNumber')),
'duration': int_or_none(try_get(program, lambda x: x['video']['duration']['milliseconds']), 1000),
diff --git a/hypervideo_dl/extractor/twentyfourvideo.py b/hypervideo_dl/extractor/twentyfourvideo.py
index 74d1404..ae19e11 100644
--- a/hypervideo_dl/extractor/twentyfourvideo.py
+++ b/hypervideo_dl/extractor/twentyfourvideo.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..utils import (
@@ -68,7 +67,7 @@ class TwentyFourVideoIE(InfoExtractor):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
host = mobj.group('host')
diff --git a/hypervideo_dl/extractor/twentythreevideo.py b/hypervideo_dl/extractor/twentythreevideo.py
index dc56091..e8cf5a1 100644
--- a/hypervideo_dl/extractor/twentythreevideo.py
+++ b/hypervideo_dl/extractor/twentythreevideo.py
@@ -1,6 +1,5 @@
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..utils import int_or_none
@@ -27,7 +26,7 @@ class TwentyThreeVideoIE(InfoExtractor):
}]
def _real_extract(self, url):
- domain, query, photo_id = re.match(self._VALID_URL, url).groups()
+ domain, query, photo_id = self._match_valid_url(url).groups()
base_url = 'https://%s' % domain
photo_data = self._download_json(
base_url + '/api/photo/list?' + query, photo_id, query={
diff --git a/hypervideo_dl/extractor/twitcasting.py b/hypervideo_dl/extractor/twitcasting.py
index 6596eef..3acf1b1 100644
--- a/hypervideo_dl/extractor/twitcasting.py
+++ b/hypervideo_dl/extractor/twitcasting.py
@@ -1,23 +1,29 @@
# coding: utf-8
from __future__ import unicode_literals
+import itertools
import re
from .common import InfoExtractor
+from ..downloader.websocket import has_websockets
from ..utils import (
clean_html,
float_or_none,
get_element_by_class,
get_element_by_id,
parse_duration,
+ qualities,
str_to_int,
+ try_get,
unified_timestamp,
urlencode_postdata,
+ urljoin,
+ ExtractorError,
)
class TwitCastingIE(InfoExtractor):
- _VALID_URL = r'https?://(?:[^/]+\.)?twitcasting\.tv/(?P<uploader_id>[^/]+)/movie/(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:[^/]+\.)?twitcasting\.tv/(?P<uploader_id>[^/]+)/(?:movie|twplayer)/(?P<id>\d+)'
_TESTS = [{
'url': 'https://twitcasting.tv/ivetesangalo/movie/2357609',
'md5': '745243cad58c4681dc752490f7540d7f',
@@ -57,19 +63,20 @@ class TwitCastingIE(InfoExtractor):
}]
def _real_extract(self, url):
- uploader_id, video_id = re.match(self._VALID_URL, url).groups()
+ uploader_id, video_id = self._match_valid_url(url).groups()
- video_password = self._downloader.params.get('videopassword')
+ video_password = self.get_param('videopassword')
request_data = None
if video_password:
request_data = urlencode_postdata({
'password': video_password,
})
- webpage = self._download_webpage(url, video_id, data=request_data)
+ webpage = self._download_webpage(
+ url, video_id, data=request_data,
+ headers={'Origin': 'https://twitcasting.tv'})
- title = clean_html(get_element_by_id(
- 'movietitle', webpage)) or self._html_search_meta(
- ['og:title', 'twitter:title'], webpage, fatal=True)
+ title = (clean_html(get_element_by_id('movietitle', webpage))
+ or self._html_search_meta(['og:title', 'twitter:title'], webpage, fatal=True))
video_js_data = {}
m3u8_url = self._search_regex(
@@ -77,13 +84,31 @@ class TwitCastingIE(InfoExtractor):
webpage, 'm3u8 url', group='url', default=None)
if not m3u8_url:
video_js_data = self._parse_json(self._search_regex(
- r"data-movie-playlist='(\[[^']+\])'",
- webpage, 'movie playlist'), video_id)[0]
- m3u8_url = video_js_data['source']['url']
+ r'data-movie-playlist=(["\'])(?P<url>(?:(?!\1).)+)',
+ webpage, 'movie playlist', group='url', default='[{}]'), video_id)
+ if isinstance(video_js_data, dict):
+ video_js_data = list(video_js_data.values())[0]
+ video_js_data = video_js_data[0]
+ m3u8_url = try_get(video_js_data, lambda x: x['source']['url'])
- # use `m3u8` entry_protocol until EXT-X-MAP is properly supported by `m3u8_native` entry_protocol
- formats = self._extract_m3u8_formats(
- m3u8_url, video_id, 'mp4', m3u8_id='hls')
+ stream_server_data = self._download_json(
+ 'https://twitcasting.tv/streamserver.php?target=%s&mode=client' % uploader_id, video_id,
+ 'Downloading live info', fatal=False)
+
+ is_live = 'data-status="online"' in webpage
+ formats = []
+ if is_live and not m3u8_url:
+ m3u8_url = 'https://twitcasting.tv/%s/metastream.m3u8' % uploader_id
+ if is_live and has_websockets and stream_server_data:
+ qq = qualities(['base', 'mobilesource', 'main'])
+ for mode, ws_url in stream_server_data['llfmp4']['streams'].items():
+ formats.append({
+ 'url': ws_url,
+ 'format_id': 'ws-%s' % mode,
+ 'ext': 'mp4',
+ 'quality': qq(mode),
+ 'protocol': 'websocket_frag', # TwitCasting simply sends moof atom directly over WS
+ })
thumbnail = video_js_data.get('thumbnailUrl') or self._og_search_thumbnail(webpage)
description = clean_html(get_element_by_id(
@@ -98,6 +123,11 @@ class TwitCastingIE(InfoExtractor):
r'data-toggle="true"[^>]+datetime="([^"]+)"',
webpage, 'datetime', None))
+ if m3u8_url:
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', live=is_live))
+ self._sort_formats(formats)
+
return {
'id': video_id,
'title': title,
@@ -108,4 +138,59 @@ class TwitCastingIE(InfoExtractor):
'duration': duration,
'view_count': view_count,
'formats': formats,
+ 'is_live': is_live,
}
+
+
+class TwitCastingLiveIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:[^/]+\.)?twitcasting\.tv/(?P<id>[^/]+)/?(?:[#?]|$)'
+ _TESTS = [{
+ 'url': 'https://twitcasting.tv/ivetesangalo',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ uploader_id = self._match_id(url)
+ self.to_screen(
+ 'Downloading live video of user {0}. '
+ 'Pass "https://twitcasting.tv/{0}/show" to download the history'.format(uploader_id))
+
+ webpage = self._download_webpage(url, uploader_id)
+ current_live = self._search_regex(
+ (r'data-type="movie" data-id="(\d+)">',
+ r'tw-sound-flag-open-link" data-id="(\d+)" style=',),
+ webpage, 'current live ID', default=None)
+ if not current_live:
+ raise ExtractorError('The user is not currently live')
+ return self.url_result('https://twitcasting.tv/%s/movie/%s' % (uploader_id, current_live))
+
+
+class TwitCastingUserIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:[^/]+\.)?twitcasting\.tv/(?P<id>[^/]+)/show/?(?:[#?]|$)'
+ _TESTS = [{
+ 'url': 'https://twitcasting.tv/noriyukicas/show',
+ 'only_matching': True,
+ }]
+
+ def _entries(self, uploader_id):
+ base_url = next_url = 'https://twitcasting.tv/%s/show' % uploader_id
+ for page_num in itertools.count(1):
+ webpage = self._download_webpage(
+ next_url, uploader_id, query={'filter': 'watchable'}, note='Downloading page %d' % page_num)
+ matches = re.finditer(
+ r'''(?isx)<a\s+class="tw-movie-thumbnail"\s*href="(?P<url>/[^/]+/movie/\d+)"\s*>.+?</a>''',
+ webpage)
+ for mobj in matches:
+ yield self.url_result(urljoin(base_url, mobj.group('url')))
+
+ next_url = self._search_regex(
+ r'<a href="(/%s/show/%d-\d+)[?"]' % (re.escape(uploader_id), page_num),
+ webpage, 'next url', default=None)
+ next_url = urljoin(base_url, next_url)
+ if not next_url:
+ return
+
+ def _real_extract(self, url):
+ uploader_id = self._match_id(url)
+ return self.playlist_result(
+ self._entries(uploader_id), uploader_id, '%s - Live History' % uploader_id)
diff --git a/hypervideo_dl/extractor/twitch.py b/hypervideo_dl/extractor/twitch.py
index a378bd6..be70bee 100644
--- a/hypervideo_dl/extractor/twitch.py
+++ b/hypervideo_dl/extractor/twitch.py
@@ -11,7 +11,6 @@ from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
compat_str,
- compat_urlparse,
compat_urllib_parse_urlencode,
compat_urllib_parse_urlparse,
)
@@ -23,6 +22,7 @@ from ..utils import (
int_or_none,
parse_duration,
parse_iso8601,
+ parse_qs,
qualities,
try_get,
unified_timestamp,
@@ -376,7 +376,7 @@ def _make_video_result(node):
return {
'_type': 'url_transparent',
'ie_key': TwitchVodIE.ie_key(),
- 'id': video_id,
+ 'id': 'v' + video_id,
'url': 'https://www.twitch.tv/videos/%s' % video_id,
'title': node.get('title'),
'thumbnail': node.get('previewThumbnailURL'),
@@ -571,7 +571,7 @@ class TwitchVideosIE(TwitchPlaylistBaseIE):
def _real_extract(self, url):
channel_name = self._match_id(url)
- qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+ qs = parse_qs(url)
filter = qs.get('filter', ['all'])[0]
sort = qs.get('sort', ['time'])[0]
broadcast = self._BROADCASTS.get(filter, self._DEFAULT_BROADCAST)
@@ -647,7 +647,7 @@ class TwitchVideosClipsIE(TwitchPlaylistBaseIE):
def _real_extract(self, url):
channel_name = self._match_id(url)
- qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+ qs = parse_qs(url)
range = qs.get('range', ['7d'])[0]
clip = self._RANGE.get(range, self._DEFAULT_CLIP)
return self.playlist_result(
@@ -864,6 +864,7 @@ class TwitchClipsIE(TwitchBaseIE):
'md5': '761769e1eafce0ffebfb4089cb3847cd',
'info_dict': {
'id': '42850523',
+ 'display_id': 'FaintLightGullWholeWheat',
'ext': 'mp4',
'title': 'EA Play 2016 Live from the Novo Theatre',
'thumbnail': r're:^https?://.*\.jpg',
@@ -976,6 +977,7 @@ class TwitchClipsIE(TwitchBaseIE):
return {
'id': clip.get('id') or video_id,
+ 'display_id': video_id,
'title': clip.get('title') or video_id,
'formats': formats,
'duration': int_or_none(clip.get('durationSeconds')),
diff --git a/hypervideo_dl/extractor/twitter.py b/hypervideo_dl/extractor/twitter.py
index cfa7a73..485b781 100644
--- a/hypervideo_dl/extractor/twitter.py
+++ b/hypervideo_dl/extractor/twitter.py
@@ -37,9 +37,9 @@ class TwitterBaseIE(InfoExtractor):
def _extract_variant_formats(self, variant, video_id):
variant_url = variant.get('url')
if not variant_url:
- return []
+ return [], {}
elif '.m3u8' in variant_url:
- return self._extract_m3u8_formats(
+ return self._extract_m3u8_formats_and_subtitles(
variant_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False)
else:
@@ -50,7 +50,7 @@ class TwitterBaseIE(InfoExtractor):
'tbr': tbr,
}
self._search_dimensions_in_video_url(f, variant_url)
- return [f]
+ return [f], {}
def _extract_formats_from_vmap_url(self, vmap_url, video_id):
vmap_url = url_or_none(vmap_url)
@@ -58,17 +58,22 @@ class TwitterBaseIE(InfoExtractor):
return []
vmap_data = self._download_xml(vmap_url, video_id)
formats = []
+ subtitles = {}
urls = []
for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
video_variant.attrib['url'] = compat_urllib_parse_unquote(
video_variant.attrib['url'])
urls.append(video_variant.attrib['url'])
- formats.extend(self._extract_variant_formats(
- video_variant.attrib, video_id))
+ fmts, subs = self._extract_variant_formats(
+ video_variant.attrib, video_id)
+ formats.extend(fmts)
+ subtitles = self._merge_subtitles(subtitles, subs)
video_url = strip_or_none(xpath_text(vmap_data, './/MediaFile'))
if video_url not in urls:
- formats.extend(self._extract_variant_formats({'url': video_url}, video_id))
- return formats
+ fmts, subs = self._extract_variant_formats({'url': video_url}, video_id)
+ formats.extend(fmts)
+ subtitles = self._merge_subtitles(subtitles, subs)
+ return formats, subtitles
@staticmethod
def _search_dimensions_in_video_url(a_format, video_url):
@@ -475,8 +480,11 @@ class TwitterIE(TwitterBaseIE):
video_info = media.get('video_info') or {}
formats = []
+ subtitles = {}
for variant in video_info.get('variants', []):
- formats.extend(self._extract_variant_formats(variant, twid))
+ fmts, subs = self._extract_variant_formats(variant, twid)
+ subtitles = self._merge_subtitles(subtitles, subs)
+ formats.extend(fmts)
self._sort_formats(formats)
thumbnails = []
@@ -495,6 +503,7 @@ class TwitterIE(TwitterBaseIE):
info.update({
'formats': formats,
+ 'subtitles': subtitles,
'thumbnails': thumbnails,
'duration': float_or_none(video_info.get('duration_millis'), 1000),
})
@@ -544,7 +553,7 @@ class TwitterIE(TwitterBaseIE):
is_amplify = card_name == 'amplify'
vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
- formats = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
+ formats, subtitles = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
self._sort_formats(formats)
thumbnails = []
@@ -562,6 +571,7 @@ class TwitterIE(TwitterBaseIE):
info.update({
'formats': formats,
+ 'subtitles': subtitles,
'thumbnails': thumbnails,
'duration': int_or_none(get_binding_value(
'content_duration_seconds')),
@@ -667,3 +677,21 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
info['formats'] = self._extract_pscp_m3u8_formats(
m3u8_url, broadcast_id, m3u8_id, state, width, height)
return info
+
+
+class TwitterShortenerIE(TwitterBaseIE):
+ IE_NAME = 'twitter:shortener'
+ _VALID_URL = r'https?://t.co/(?P<id>[^?]+)|tco:(?P<eid>[^?]+)'
+ _BASE_URL = 'https://t.co/'
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ eid, id = mobj.group('eid', 'id')
+ if eid:
+ id = eid
+ url = self._BASE_URL + id
+ new_url = self._request_webpage(url, id, headers={'User-Agent': 'curl'}).geturl()
+ __UNSAFE_LINK = "https://twitter.com/safety/unsafe_link_warning?unsafe_link="
+ if new_url.startswith(__UNSAFE_LINK):
+ new_url = new_url.replace(__UNSAFE_LINK, "")
+ return self.url_result(new_url)
diff --git a/hypervideo_dl/extractor/udemy.py b/hypervideo_dl/extractor/udemy.py
index bc5059b..74f638e 100644
--- a/hypervideo_dl/extractor/udemy.py
+++ b/hypervideo_dl/extractor/udemy.py
@@ -405,7 +405,7 @@ class UdemyIE(InfoExtractor):
if f.get('url'):
formats.append(f)
- self._sort_formats(formats, field_preference=('height', 'width', 'tbr', 'format_id'))
+ self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/ukcolumn.py b/hypervideo_dl/extractor/ukcolumn.py
new file mode 100644
index 0000000..d2626f0
--- /dev/null
+++ b/hypervideo_dl/extractor/ukcolumn.py
@@ -0,0 +1,72 @@
+from __future__ import unicode_literals
+
+from ..utils import (
+ unescapeHTML,
+ urljoin,
+ ExtractorError,
+)
+from .common import InfoExtractor
+from .vimeo import VimeoIE
+from .youtube import YoutubeIE
+
+
+class UkColumnIE(InfoExtractor):
+ IE_NAME = 'ukcolumn'
+ _VALID_URL = r'(?i)https?://(?:www\.)?ukcolumn\.org(/index\.php)?/(?:video|ukcolumn-news)/(?P<id>[-a-z0-9]+)'
+
+ _TESTS = [{
+ 'url': 'https://www.ukcolumn.org/ukcolumn-news/uk-column-news-28th-april-2021',
+ 'info_dict': {
+ 'id': '541632443',
+ 'ext': 'mp4',
+ 'title': 'UK Column News - 28th April 2021',
+ 'uploader_id': 'ukcolumn',
+ 'uploader': 'UK Column',
+ },
+ 'add_ie': [VimeoIE.ie_key()],
+ 'expected_warnings': ['Unable to download JSON metadata'],
+ 'params': {
+ 'skip_download': 'Handled by Vimeo',
+ },
+ }, {
+ 'url': 'https://www.ukcolumn.org/video/insight-eu-military-unification',
+ 'info_dict': {
+ 'id': 'Fzbnb9t7XAw',
+ 'ext': 'mp4',
+ 'title': 'Insight: EU Military Unification',
+ 'uploader_id': 'ukcolumn',
+ 'description': 'md5:29a207965271af89baa0bc191f5de576',
+ 'uploader': 'UK Column',
+ 'upload_date': '20170514',
+ },
+ 'add_ie': [YoutubeIE.ie_key()],
+ 'params': {
+ 'skip_download': 'Handled by Youtube',
+ },
+ }, {
+ 'url': 'https://www.ukcolumn.org/index.php/ukcolumn-news/uk-column-news-30th-april-2021',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ oembed_url = urljoin(url, unescapeHTML(self._search_regex(
+ r'<iframe[^>]+src=(["\'])(?P<url>/media/oembed\?url=.+?)\1',
+ webpage, 'OEmbed URL', group='url')))
+ oembed_webpage = self._download_webpage(
+ oembed_url, display_id, note='Downloading OEmbed page')
+
+ ie, video_url = YoutubeIE, YoutubeIE._extract_url(oembed_webpage)
+ if not video_url:
+ ie, video_url = VimeoIE, VimeoIE._extract_url(url, oembed_webpage)
+ if not video_url:
+ raise ExtractorError('No embedded video found')
+
+ return {
+ '_type': 'url_transparent',
+ 'title': self._og_search_title(webpage),
+ 'url': video_url,
+ 'ie_key': ie.ie_key(),
+ }
diff --git a/hypervideo_dl/extractor/umg.py b/hypervideo_dl/extractor/umg.py
index d815cd9..c1b65d1 100644
--- a/hypervideo_dl/extractor/umg.py
+++ b/hypervideo_dl/extractor/umg.py
@@ -28,7 +28,7 @@ class UMGDeIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
video_data = self._download_json(
- 'https://api.universal-music.de/graphql',
+ 'https://graphql.universal-music.de/',
video_id, query={
'query': '''{
universalMusic(channel:16) {
@@ -56,11 +56,9 @@ class UMGDeIE(InfoExtractor):
formats = []
def add_m3u8_format(format_id):
- m3u8_formats = self._extract_m3u8_formats(
+ formats.extend(self._extract_m3u8_formats(
hls_url_template % format_id, video_id, 'mp4',
- 'm3u8_native', m3u8_id='hls', fatal='False')
- if m3u8_formats and m3u8_formats[0].get('height'):
- formats.extend(m3u8_formats)
+ 'm3u8_native', m3u8_id='hls', fatal=False))
for f in video_data.get('formats', []):
f_url = f.get('url')
@@ -91,7 +89,7 @@ class UMGDeIE(InfoExtractor):
if not formats:
for format_id in (867, 836, 940):
add_m3u8_format(format_id)
- self._sort_formats(formats, ('width', 'height', 'filesize', 'tbr'))
+ self._sort_formats(formats)
return {
'id': video_id,
diff --git a/hypervideo_dl/extractor/unistra.py b/hypervideo_dl/extractor/unistra.py
index a724cdb..685d74f 100644
--- a/hypervideo_dl/extractor/unistra.py
+++ b/hypervideo_dl/extractor/unistra.py
@@ -33,7 +33,7 @@ class UnistraIE(InfoExtractor):
]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
diff --git a/hypervideo_dl/extractor/uol.py b/hypervideo_dl/extractor/uol.py
index 628adf2..4a2a97f 100644
--- a/hypervideo_dl/extractor/uol.py
+++ b/hypervideo_dl/extractor/uol.py
@@ -110,7 +110,6 @@ class UOLIE(InfoExtractor):
'format_id': format_id,
'url': f_url,
'quality': quality(format_id),
- 'preference': -1,
})
self._sort_formats(formats)
diff --git a/hypervideo_dl/extractor/uplynk.py b/hypervideo_dl/extractor/uplynk.py
index f06bf5b..9adb969 100644
--- a/hypervideo_dl/extractor/uplynk.py
+++ b/hypervideo_dl/extractor/uplynk.py
@@ -30,7 +30,7 @@ class UplynkIE(InfoExtractor):
def _extract_uplynk_info(self, uplynk_content_url):
path, external_id, video_id, session_id = re.match(UplynkIE._VALID_URL, uplynk_content_url).groups()
display_id = video_id or external_id
- formats = self._extract_m3u8_formats(
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
'http://content.uplynk.com/%s.m3u8' % path,
display_id, 'mp4', 'm3u8_native')
if session_id:
@@ -48,6 +48,7 @@ class UplynkIE(InfoExtractor):
'duration': float_or_none(asset.get('duration')),
'uploader_id': asset.get('owner'),
'formats': formats,
+ 'subtitles': subtitles,
}
def _real_extract(self, url):
@@ -60,7 +61,7 @@ class UplynkPreplayIE(UplynkIE):
_TEST = None
def _real_extract(self, url):
- path, external_id, video_id = re.match(self._VALID_URL, url).groups()
+ path, external_id, video_id = self._match_valid_url(url).groups()
display_id = video_id or external_id
preplay = self._download_json(url, display_id)
content_url = 'http://content.uplynk.com/%s.m3u8' % path
diff --git a/hypervideo_dl/extractor/urort.py b/hypervideo_dl/extractor/urort.py
index 8f6edab..020425f 100644
--- a/hypervideo_dl/extractor/urort.py
+++ b/hypervideo_dl/extractor/urort.py
@@ -44,7 +44,7 @@ class UrortIE(InfoExtractor):
'ext': f['FileType'],
'format_id': '%s-%s' % (f['FileType'], f.get('Quality', '')),
'url': 'http://p3urort.blob.core.windows.net/tracks/%s' % f['FileRef'],
- 'preference': 3 if f['FileType'] == 'mp3' else 2,
+ 'quality': 3 if f['FileType'] == 'mp3' else 2,
} for f in s['Files']]
self._sort_formats(formats)
e = {
diff --git a/hypervideo_dl/extractor/urplay.py b/hypervideo_dl/extractor/urplay.py
index d6c7914..753ffa4 100644
--- a/hypervideo_dl/extractor/urplay.py
+++ b/hypervideo_dl/extractor/urplay.py
@@ -56,13 +56,12 @@ class URPlayIE(InfoExtractor):
webpage, 'urplayer data'), video_id)['accessibleEpisodes']
urplayer_data = next(e for e in accessible_episodes if e.get('id') == vid)
episode = urplayer_data['title']
- raw_streaming_info = urplayer_data['streamingInfo']['raw']
- host = self._download_json(
- 'http://streaming-loadbalancer.ur.se/loadbalancer.json',
- video_id)['redirect']
+ host = self._download_json('http://streaming-loadbalancer.ur.se/loadbalancer.json', video_id)['redirect']
formats = []
- for k, v in raw_streaming_info.items():
+ urplayer_streams = urplayer_data.get('streamingInfo', {})
+
+ for k, v in urplayer_streams.get('raw', {}).items():
if not (k in ('sd', 'hd') and isinstance(v, dict)):
continue
file_http = v.get('location')
@@ -72,6 +71,13 @@ class URPlayIE(InfoExtractor):
video_id, skip_protocols=['f4m', 'rtmp', 'rtsp']))
self._sort_formats(formats)
+ subtitles = {}
+ subs = urplayer_streams.get("sweComplete", {}).get("tt", {}).get("location")
+ if subs:
+ subtitles.setdefault('Svenska', []).append({
+ 'url': subs,
+ })
+
image = urplayer_data.get('image') or {}
thumbnails = []
for k, v in image.items():
@@ -92,6 +98,7 @@ class URPlayIE(InfoExtractor):
return {
'id': video_id,
+ 'subtitles': subtitles,
'title': '%s : %s' % (series_title, episode) if series_title else episode,
'description': urplayer_data.get('description'),
'thumbnails': thumbnails,
diff --git a/hypervideo_dl/extractor/usanetwork.py b/hypervideo_dl/extractor/usanetwork.py
index e3784e5..d953e46 100644
--- a/hypervideo_dl/extractor/usanetwork.py
+++ b/hypervideo_dl/extractor/usanetwork.py
@@ -5,7 +5,7 @@ from .nbc import NBCIE
class USANetworkIE(NBCIE):
- _VALID_URL = r'https?(?P<permalink>://(?:www\.)?usanetwork\.com/[^/]+/video/[^/]+/(?P<id>\d+))'
+ _VALID_URL = r'https?(?P<permalink>://(?:www\.)?usanetwork\.com/(?:[^/]+/videos?|movies?)/(?:[^/]+/)?(?P<id>\d+))'
_TESTS = [{
'url': 'https://www.usanetwork.com/peacock-trailers/video/intelligence-trailer/4185302',
'info_dict': {
diff --git a/hypervideo_dl/extractor/ustream.py b/hypervideo_dl/extractor/ustream.py
index 1e29cbe..8b75879 100644
--- a/hypervideo_dl/extractor/ustream.py
+++ b/hypervideo_dl/extractor/ustream.py
@@ -165,7 +165,7 @@ class UstreamIE(InfoExtractor):
return formats
def _real_extract(self, url):
- m = re.match(self._VALID_URL, url)
+ m = self._match_valid_url(url)
video_id = m.group('id')
# some sites use this embed format (see: https://github.com/ytdl-org/youtube-dl/issues/2990)
@@ -258,7 +258,7 @@ class UstreamChannelIE(InfoExtractor):
}
def _real_extract(self, url):
- m = re.match(self._VALID_URL, url)
+ m = self._match_valid_url(url)
display_id = m.group('slug')
webpage = self._download_webpage(url, display_id)
channel_id = self._html_search_meta('ustream:channel_id', webpage)
diff --git a/hypervideo_dl/extractor/ustudio.py b/hypervideo_dl/extractor/ustudio.py
index 56509be..92509d1 100644
--- a/hypervideo_dl/extractor/ustudio.py
+++ b/hypervideo_dl/extractor/ustudio.py
@@ -1,6 +1,5 @@
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..utils import (
@@ -29,7 +28,7 @@ class UstudioIE(InfoExtractor):
}
def _real_extract(self, url):
- video_id, display_id = re.match(self._VALID_URL, url).groups()
+ video_id, display_id = self._match_valid_url(url).groups()
config = self._download_xml(
'http://v1.ustudio.com/embed/%s/ustudio/config.xml' % video_id,
@@ -83,7 +82,7 @@ class UstudioEmbedIE(InfoExtractor):
}
def _real_extract(self, url):
- uploader_id, video_id = re.match(self._VALID_URL, url).groups()
+ uploader_id, video_id = self._match_valid_url(url).groups()
video_data = self._download_json(
'http://app.ustudio.com/embed/%s/%s/config.json' % (uploader_id, video_id),
video_id)['videos'][0]
diff --git a/hypervideo_dl/extractor/utreon.py b/hypervideo_dl/extractor/utreon.py
new file mode 100644
index 0000000..4a25f0c
--- /dev/null
+++ b/hypervideo_dl/extractor/utreon.py
@@ -0,0 +1,85 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ dict_get,
+ int_or_none,
+ str_or_none,
+ try_get,
+ unified_strdate,
+ url_or_none,
+)
+
+
+class UtreonIE(InfoExtractor):
+ _VALID_URL = r'(?:https?://)(?:www\.)?utreon.com/v/(?P<id>[a-zA-Z0-9_-]+)'
+ _TESTS = [{
+ 'url': 'https://utreon.com/v/z_I7ikQbuDw',
+ 'info_dict': {
+ 'id': 'z_I7ikQbuDw',
+ 'ext': 'mp4',
+ 'title': 'Freedom Friday meditation - Rising in the wind',
+ 'description': 'md5:a9bf15a42434a062fe313b938343ad1b',
+ 'uploader': 'Heather Dawn Elemental Health',
+ 'thumbnail': 'https://data-1.utreon.com/v/MG/M2/NT/z_I7ikQbuDw/z_I7ikQbuDw_preview.jpg',
+ 'release_date': '20210723',
+ }
+ }, {
+ 'url': 'https://utreon.com/v/jerJw5EOOVU',
+ 'info_dict': {
+ 'id': 'jerJw5EOOVU',
+ 'ext': 'mp4',
+ 'title': 'When I\'m alone, I love to reflect in peace, to make my dreams come true... [Quotes and Poems]',
+ 'description': 'md5:61ee6c2da98be51b04b969ca80273aaa',
+ 'uploader': 'Frases e Poemas Quotes and Poems',
+ 'thumbnail': 'https://data-1.utreon.com/v/Mz/Zh/ND/jerJw5EOOVU/jerJw5EOOVU_89af85470a4b16eededde7f8674c96d9_cover.jpg',
+ 'release_date': '20210723',
+ }
+ }, {
+ 'url': 'https://utreon.com/v/C4ZxXhYBBmE',
+ 'info_dict': {
+ 'id': 'C4ZxXhYBBmE',
+ 'ext': 'mp4',
+ 'title': 'Biden’s Capital Gains Tax Rate to Test World’s Highest',
+ 'description': 'md5:fb5a6c2e506f013cc76f133f673bc5c8',
+ 'uploader': 'Nomad Capitalist',
+ 'thumbnail': 'https://data-1.utreon.com/v/ZD/k1/Mj/C4ZxXhYBBmE/C4ZxXhYBBmE_628342076198c9c06dd6b2c665978584_cover.jpg',
+ 'release_date': '20210723',
+ }
+ }, {
+ 'url': 'https://utreon.com/v/Y-stEH-FBm8',
+ 'info_dict': {
+ 'id': 'Y-stEH-FBm8',
+ 'ext': 'mp4',
+ 'title': 'Creeper-Chan Pranks Steve! 💚 [MINECRAFT ANIME]',
+ 'description': 'md5:7a48450b0d761b96dec194be0c5ecb5f',
+ 'uploader': 'Merryweather Comics',
+ 'thumbnail': 'https://data-1.utreon.com/v/MT/E4/Zj/Y-stEH-FBm8/Y-stEH-FBm8_5290676a41a4a1096db133b09f54f77b_cover.jpg',
+ 'release_date': '20210718',
+ }},
+ ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ json_data = self._download_json(
+ 'https://api.utreon.com/v1/videos/' + video_id,
+ video_id)
+ videos_json = json_data['videos']
+ formats = [{
+ 'url': format_url,
+ 'format_id': format_key.split('_')[1],
+ 'height': int(format_key.split('_')[1][:-1]),
+ } for format_key, format_url in videos_json.items() if url_or_none(format_url)]
+ self._sort_formats(formats)
+ thumbnail = url_or_none(dict_get(json_data, ('cover_image_url', 'preview_image_url')))
+ return {
+ 'id': video_id,
+ 'title': json_data['title'],
+ 'formats': formats,
+ 'description': str_or_none(json_data.get('description')),
+ 'duration': int_or_none(json_data.get('duration')),
+ 'uploader': str_or_none(try_get(json_data, lambda x: x['channel']['title'])),
+ 'thumbnail': thumbnail,
+ 'release_date': unified_strdate(json_data.get('published_datetime')),
+ }
diff --git a/hypervideo_dl/extractor/varzesh3.py b/hypervideo_dl/extractor/varzesh3.py
index f474ed7..81313dc 100644
--- a/hypervideo_dl/extractor/varzesh3.py
+++ b/hypervideo_dl/extractor/varzesh3.py
@@ -2,12 +2,9 @@
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..compat import (
- compat_urllib_parse_urlparse,
- compat_parse_qs,
-)
from ..utils import (
clean_html,
+ parse_qs,
remove_start,
)
@@ -59,7 +56,7 @@ class Varzesh3IE(InfoExtractor):
fb_sharer_url = self._search_regex(
r'<a[^>]+href="(https?://www\.facebook\.com/sharer/sharer\.php?[^"]+)"',
webpage, 'facebook sharer URL', fatal=False)
- sharer_params = compat_parse_qs(compat_urllib_parse_urlparse(fb_sharer_url).query)
+ sharer_params = parse_qs(fb_sharer_url)
thumbnail = sharer_params.get('p[images][0]', [None])[0]
video_id = self._search_regex(
diff --git a/hypervideo_dl/extractor/veo.py b/hypervideo_dl/extractor/veo.py
new file mode 100644
index 0000000..4e57a52
--- /dev/null
+++ b/hypervideo_dl/extractor/veo.py
@@ -0,0 +1,74 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+from ..utils import (
+ int_or_none,
+ mimetype2ext,
+ unified_timestamp,
+ url_or_none,
+)
+
+
+class VeoIE(InfoExtractor):
+ _VALID_URL = r'https?://app\.veo\.co/matches/(?P<id>[0-9A-Za-z-]+)'
+
+ _TESTS = [{
+ 'url': 'https://app.veo.co/matches/20201027-last-period/',
+ 'info_dict': {
+ 'id': '20201027-last-period',
+ 'ext': 'mp4',
+ 'title': 'Akidemy u11s v Bradford Boys u11s (Game 3)',
+ 'thumbnail': 're:https://c.veocdn.com/.+/thumbnail.jpg',
+ 'upload_date': '20201028',
+ 'timestamp': 1603847208,
+ 'duration': 1916,
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ metadata = self._download_json(
+ 'https://app.veo.co/api/app/matches/%s' % video_id, video_id)
+
+ video_data = self._download_json(
+ 'https://app.veo.co/api/app/matches/%s/videos' % video_id, video_id, 'Downloading video data')
+
+ title = metadata.get('title')
+ thumbnail = url_or_none(metadata.get('thumbnail'))
+
+ timestamp = unified_timestamp(metadata.get('created'))
+ duration = int_or_none(metadata.get('duration'))
+ view_count = int_or_none(metadata.get('view_count'))
+
+ formats = []
+ for fmt in video_data:
+ mimetype = fmt.get('mime_type')
+ # skip configuration file for panoramic video
+ if mimetype == 'video/mp2t':
+ continue
+ height = int_or_none(fmt.get('height'))
+ bitrate = int_or_none(fmt.get('bit_rate'), scale=1000)
+ render_type = fmt.get('render_type')
+ formats.append({
+ 'url': url_or_none(fmt.get('url')),
+ 'format_id': '%s-%sp' % (render_type, height),
+ 'ext': mimetype2ext(mimetype),
+ 'width': int_or_none(fmt.get('width')),
+ 'height': height,
+ 'vbr': bitrate
+ })
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'view_count': view_count,
+ 'duration': duration
+ }
diff --git a/hypervideo_dl/extractor/vesti.py b/hypervideo_dl/extractor/vesti.py
index 5ab7168..002047d 100644
--- a/hypervideo_dl/extractor/vesti.py
+++ b/hypervideo_dl/extractor/vesti.py
@@ -101,7 +101,7 @@ class VestiIE(InfoExtractor):
]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
page = self._download_webpage(url, video_id, 'Downloading page')
diff --git a/hypervideo_dl/extractor/vevo.py b/hypervideo_dl/extractor/vevo.py
index 4ea9f1b..8a0f292 100644
--- a/hypervideo_dl/extractor/vevo.py
+++ b/hypervideo_dl/extractor/vevo.py
@@ -6,13 +6,13 @@ import json
from .common import InfoExtractor
from ..compat import (
compat_str,
- compat_urlparse,
compat_HTTPError,
)
from ..utils import (
ExtractorError,
int_or_none,
parse_iso8601,
+ parse_qs,
)
@@ -38,117 +38,7 @@ class VevoIE(VevoBaseIE):
vevo:)
(?P<id>[^&?#]+)'''
- _TESTS = [{
- 'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
- 'md5': '95ee28ee45e70130e3ab02b0f579ae23',
- 'info_dict': {
- 'id': 'GB1101300280',
- 'ext': 'mp4',
- 'title': 'Hurts - Somebody to Die For',
- 'timestamp': 1372057200,
- 'upload_date': '20130624',
- 'uploader': 'Hurts',
- 'track': 'Somebody to Die For',
- 'artist': 'Hurts',
- 'genre': 'Pop',
- },
- 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
- }, {
- 'note': 'v3 SMIL format',
- 'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923',
- 'md5': 'f6ab09b034f8c22969020b042e5ac7fc',
- 'info_dict': {
- 'id': 'USUV71302923',
- 'ext': 'mp4',
- 'title': 'Cassadee Pope - I Wish I Could Break Your Heart',
- 'timestamp': 1392796919,
- 'upload_date': '20140219',
- 'uploader': 'Cassadee Pope',
- 'track': 'I Wish I Could Break Your Heart',
- 'artist': 'Cassadee Pope',
- 'genre': 'Country',
- },
- 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
- }, {
- 'note': 'Age-limited video',
- 'url': 'https://www.vevo.com/watch/justin-timberlake/tunnel-vision-explicit/USRV81300282',
- 'info_dict': {
- 'id': 'USRV81300282',
- 'ext': 'mp4',
- 'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
- 'age_limit': 18,
- 'timestamp': 1372888800,
- 'upload_date': '20130703',
- 'uploader': 'Justin Timberlake',
- 'track': 'Tunnel Vision (Explicit)',
- 'artist': 'Justin Timberlake',
- 'genre': 'Pop',
- },
- 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
- }, {
- 'note': 'No video_info',
- 'url': 'http://www.vevo.com/watch/k-camp-1/Till-I-Die/USUV71503000',
- 'md5': '8b83cc492d72fc9cf74a02acee7dc1b0',
- 'info_dict': {
- 'id': 'USUV71503000',
- 'ext': 'mp4',
- 'title': 'K Camp ft. T.I. - Till I Die',
- 'age_limit': 18,
- 'timestamp': 1449468000,
- 'upload_date': '20151207',
- 'uploader': 'K Camp',
- 'track': 'Till I Die',
- 'artist': 'K Camp',
- 'genre': 'Hip-Hop',
- },
- 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
- }, {
- 'note': 'Featured test',
- 'url': 'https://www.vevo.com/watch/lemaitre/Wait/USUV71402190',
- 'md5': 'd28675e5e8805035d949dc5cf161071d',
- 'info_dict': {
- 'id': 'USUV71402190',
- 'ext': 'mp4',
- 'title': 'Lemaitre ft. LoLo - Wait',
- 'age_limit': 0,
- 'timestamp': 1413432000,
- 'upload_date': '20141016',
- 'uploader': 'Lemaitre',
- 'track': 'Wait',
- 'artist': 'Lemaitre',
- 'genre': 'Electronic',
- },
- 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
- }, {
- 'note': 'Only available via webpage',
- 'url': 'http://www.vevo.com/watch/GBUV71600656',
- 'md5': '67e79210613865b66a47c33baa5e37fe',
- 'info_dict': {
- 'id': 'GBUV71600656',
- 'ext': 'mp4',
- 'title': 'ABC - Viva Love',
- 'age_limit': 0,
- 'timestamp': 1461830400,
- 'upload_date': '20160428',
- 'uploader': 'ABC',
- 'track': 'Viva Love',
- 'artist': 'ABC',
- 'genre': 'Pop',
- },
- 'expected_warnings': ['Failed to download video versions info'],
- }, {
- # no genres available
- 'url': 'http://www.vevo.com/watch/INS171400764',
- 'only_matching': True,
- }, {
- # Another case available only via the webpage; using streams/streamsV3 formats
- # Geo-restricted to Netherlands/Germany
- 'url': 'http://www.vevo.com/watch/boostee/pop-corn-clip-officiel/FR1A91600909',
- 'only_matching': True,
- }, {
- 'url': 'https://embed.vevo.com/?isrc=USH5V1923499&partnerId=4d61b777-8023-4191-9ede-497ed6c24647&partnerAdCode=',
- 'only_matching': True,
- }]
+ _TESTS = []
_VERSIONS = {
0: 'youtube', # only in AuthenticateVideo videoVersions
1: 'level3',
@@ -310,13 +200,6 @@ class VevoPlaylistIE(VevoBaseIE):
_VALID_URL = r'https?://(?:www\.)?vevo\.com/watch/(?P<kind>playlist|genre)/(?P<id>[^/?#&]+)'
_TESTS = [{
- 'url': 'http://www.vevo.com/watch/playlist/dadbf4e7-b99f-4184-9670-6f0e547b6a29',
- 'info_dict': {
- 'id': 'dadbf4e7-b99f-4184-9670-6f0e547b6a29',
- 'title': 'Best-Of: Birdman',
- },
- 'playlist_count': 10,
- }, {
'url': 'http://www.vevo.com/watch/genre/rock',
'info_dict': {
'id': 'rock',
@@ -324,33 +207,18 @@ class VevoPlaylistIE(VevoBaseIE):
},
'playlist_count': 20,
}, {
- 'url': 'http://www.vevo.com/watch/playlist/dadbf4e7-b99f-4184-9670-6f0e547b6a29?index=0',
- 'md5': '32dcdfddddf9ec6917fc88ca26d36282',
- 'info_dict': {
- 'id': 'USCMV1100073',
- 'ext': 'mp4',
- 'title': 'Birdman - Y.U. MAD',
- 'timestamp': 1323417600,
- 'upload_date': '20111209',
- 'uploader': 'Birdman',
- 'track': 'Y.U. MAD',
- 'artist': 'Birdman',
- 'genre': 'Rap/Hip-Hop',
- },
- 'expected_warnings': ['Unable to download SMIL file'],
- }, {
'url': 'http://www.vevo.com/watch/genre/rock?index=0',
'only_matching': True,
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
playlist_id = mobj.group('id')
playlist_kind = mobj.group('kind')
webpage = self._download_webpage(url, playlist_id)
- qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+ qs = parse_qs(url)
index = qs.get('index', [None])[0]
if index:
diff --git a/hypervideo_dl/extractor/vgtv.py b/hypervideo_dl/extractor/vgtv.py
index 22e99e8..b6131ff 100644
--- a/hypervideo_dl/extractor/vgtv.py
+++ b/hypervideo_dl/extractor/vgtv.py
@@ -165,7 +165,7 @@ class VGTVIE(XstreamIE):
]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
host = mobj.group('host')
appname = self._HOST_TO_APPNAME[host] if host else mobj.group('appname')
diff --git a/hypervideo_dl/extractor/vh1.py b/hypervideo_dl/extractor/vh1.py
index dff94a2..862c5c7 100644
--- a/hypervideo_dl/extractor/vh1.py
+++ b/hypervideo_dl/extractor/vh1.py
@@ -3,27 +3,29 @@ from __future__ import unicode_literals
from .mtv import MTVServicesInfoExtractor
+# TODO Remove - Reason: Outdated Site
+
class VH1IE(MTVServicesInfoExtractor):
IE_NAME = 'vh1.com'
_FEED_URL = 'http://www.vh1.com/feeds/mrss/'
_TESTS = [{
- 'url': 'http://www.vh1.com/episodes/0umwpq/hip-hop-squares-kent-jones-vs-nick-young-season-1-ep-120',
+ 'url': 'https://www.vh1.com/episodes/0aqivv/nick-cannon-presents-wild-n-out-foushee-season-16-ep-12',
'info_dict': {
- 'title': 'Kent Jones vs. Nick Young',
- 'description': 'Come to Play. Stay to Party. With Mike Epps, TIP, O’Shea Jackson Jr., T-Pain, Tisha Campbell-Martin and more.',
+ 'title': 'Fousheé',
+ 'description': 'Fousheé joins Team Evolutions fight against Nick and Team Revolution in Baby Daddy, Baby Mama; Kick Em Out the Classroom; Backseat of My Ride and Wildstyle; and Fousheé performs.',
},
'playlist_mincount': 4,
+ 'skip': '404 Not found',
}, {
# Clip
- 'url': 'http://www.vh1.com/video-clips/t74mif/scared-famous-scared-famous-extended-preview',
+ 'url': 'https://www.vh1.com/video-clips/e0sja0/nick-cannon-presents-wild-n-out-foushee-clap-for-him',
'info_dict': {
- 'id': '0a50c2d2-a86b-4141-9565-911c7e2d0b92',
+ 'id': 'a07563f7-a37b-4e7f-af68-85855c2c7cc3',
'ext': 'mp4',
- 'title': 'Scared Famous|October 9, 2017|1|NO-EPISODE#|Scared Famous + Extended Preview',
- 'description': 'md5:eff5551a274c473a29463de40f7b09da',
- 'upload_date': '20171009',
- 'timestamp': 1507574700,
+ 'title': 'Fousheé - "clap for him"',
+ 'description': 'Singer Fousheé hits the Wild N Out: In the Dark stage with a performance of the tongue-in-cheek track "clap for him" from her 2021 album "time machine."',
+ 'upload_date': '20210826',
},
'params': {
# m3u8 download
@@ -32,10 +34,3 @@ class VH1IE(MTVServicesInfoExtractor):
}]
_VALID_URL = r'https?://(?:www\.)?vh1\.com/(?:video-clips|episodes)/(?P<id>[^/?#.]+)'
-
- def _real_extract(self, url):
- playlist_id = self._match_id(url)
- webpage = self._download_webpage(url, playlist_id)
- mgid = self._extract_triforce_mgid(webpage)
- videos_info = self._get_videos_info(mgid)
- return videos_info
diff --git a/hypervideo_dl/extractor/vice.py b/hypervideo_dl/extractor/vice.py
index e374995..ca4d3ed 100644
--- a/hypervideo_dl/extractor/vice.py
+++ b/hypervideo_dl/extractor/vice.py
@@ -118,7 +118,7 @@ class ViceIE(ViceBaseIE, AdobePassIE):
return urls[0] if urls else None
def _real_extract(self, url):
- locale, video_id = re.match(self._VALID_URL, url).groups()
+ locale, video_id = self._match_valid_url(url).groups()
video = self._call_api('videos', 'id', video_id, locale, '''body
locked
@@ -225,7 +225,7 @@ class ViceShowIE(ViceBaseIE):
video['url'], ViceIE.ie_key(), video.get('id'))
def _real_extract(self, url):
- locale, display_id = re.match(self._VALID_URL, url).groups()
+ locale, display_id = self._match_valid_url(url).groups()
show = self._call_api('shows', 'slug', display_id, locale, '''dek
id
title''')[0]
@@ -302,7 +302,7 @@ class ViceArticleIE(ViceBaseIE):
}]
def _real_extract(self, url):
- locale, display_id = re.match(self._VALID_URL, url).groups()
+ locale, display_id = self._match_valid_url(url).groups()
article = self._call_api('articles', 'slug', display_id, locale, '''body
embed_code''')[0]
diff --git a/hypervideo_dl/extractor/viddler.py b/hypervideo_dl/extractor/viddler.py
index 6423584..ecc4824 100644
--- a/hypervideo_dl/extractor/viddler.py
+++ b/hypervideo_dl/extractor/viddler.py
@@ -1,6 +1,5 @@
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..utils import (
@@ -75,7 +74,7 @@ class ViddlerIE(InfoExtractor):
}]
def _real_extract(self, url):
- video_id, secret = re.match(self._VALID_URL, url).groups()
+ video_id, secret = self._match_valid_url(url).groups()
query = {
'video_id': video_id,
diff --git a/hypervideo_dl/extractor/videa.py b/hypervideo_dl/extractor/videa.py
index ab2c15c..512ade7 100644
--- a/hypervideo_dl/extractor/videa.py
+++ b/hypervideo_dl/extractor/videa.py
@@ -11,7 +11,9 @@ from ..utils import (
int_or_none,
mimetype2ext,
parse_codecs,
+ parse_qs,
update_url_query,
+ urljoin,
xpath_element,
xpath_text,
)
@@ -45,10 +47,24 @@ class VideaIE(InfoExtractor):
},
}, {
'url': 'http://videa.hu/videok/origo/jarmuvek/supercars-elozes-jAHDWfWSJH5XuFhH',
- 'only_matching': True,
+ 'md5': 'd57ccd8812c7fd491d33b1eab8c99975',
+ 'info_dict': {
+ 'id': 'jAHDWfWSJH5XuFhH',
+ 'ext': 'mp4',
+ 'title': 'Supercars előzés',
+ 'thumbnail': r're:^https?://.*',
+ 'duration': 64,
+ },
}, {
'url': 'http://videa.hu/player?v=8YfIAjxwWGwT8HVQ',
- 'only_matching': True,
+ 'md5': '97a7af41faeaffd9f1fc864a7c7e7603',
+ 'info_dict': {
+ 'id': '8YfIAjxwWGwT8HVQ',
+ 'ext': 'mp4',
+ 'title': 'Az őrült kígyász 285 kígyót enged szabadon',
+ 'thumbnail': r're:^https?://.*',
+ 'duration': 21,
+ },
}, {
'url': 'http://videa.hu/player/v/8YfIAjxwWGwT8HVQ?autoplay=1',
'only_matching': True,
@@ -95,9 +111,17 @@ class VideaIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
- query = {'v': video_id}
- player_page = self._download_webpage(
- 'https://videa.hu/player', video_id, query=query)
+
+ video_page = self._download_webpage(url, video_id)
+
+ if 'videa.hu/player' in url:
+ player_url = url
+ player_page = video_page
+ else:
+ player_url = self._search_regex(
+ r'<iframe.*?src="(/player\?[^"]+)"', video_page, 'player url')
+ player_url = urljoin(url, player_url)
+ player_page = self._download_webpage(player_url, video_id)
nonce = self._search_regex(
r'_xt\s*=\s*"([^"]+)"', player_page, 'nonce')
@@ -107,6 +131,7 @@ class VideaIE(InfoExtractor):
for i in range(0, 32):
result += s[i - (self._STATIC_SECRET.index(l[i]) - 31)]
+ query = parse_qs(player_url)
random_seed = ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(8))
query['_s'] = random_seed
query['_t'] = result[:16]
@@ -127,7 +152,7 @@ class VideaIE(InfoExtractor):
sources = xpath_element(
info, './video_sources', 'sources', fatal=True)
hash_values = xpath_element(
- info, './hash_values', 'hash values', fatal=True)
+ info, './hash_values', 'hash values', fatal=False)
title = xpath_text(video, './title', fatal=True)
@@ -136,15 +161,16 @@ class VideaIE(InfoExtractor):
source_url = source.text
source_name = source.get('name')
source_exp = source.get('exp')
- if not (source_url and source_name and source_exp):
+ if not (source_url and source_name):
continue
- hash_value = xpath_text(hash_values, 'hash_value_' + source_name)
- if not hash_value:
- continue
- source_url = update_url_query(source_url, {
- 'md5': hash_value,
- 'expires': source_exp,
- })
+ hash_value = None
+ if hash_values:
+ hash_value = xpath_text(hash_values, 'hash_value_' + source_name)
+ if hash_value and source_exp:
+ source_url = update_url_query(source_url, {
+ 'md5': hash_value,
+ 'expires': source_exp,
+ })
f = parse_codecs(source.get('codecs'))
f.update({
'url': self._proto_relative_url(source_url),
diff --git a/hypervideo_dl/extractor/videomore.py b/hypervideo_dl/extractor/videomore.py
index e0c10aa..17ef3b1 100644
--- a/hypervideo_dl/extractor/videomore.py
+++ b/hypervideo_dl/extractor/videomore.py
@@ -5,13 +5,11 @@ import re
from .common import InfoExtractor
from ..compat import (
- compat_parse_qs,
compat_str,
- compat_urllib_parse_urlparse,
)
from ..utils import (
- ExtractorError,
int_or_none,
+ parse_qs,
)
@@ -145,9 +143,9 @@ class VideomoreIE(InfoExtractor):
return mobj.group('url')
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('sid') or mobj.group('id')
- partner_id = mobj.group('partner_id') or compat_parse_qs(compat_urllib_parse_urlparse(url).query).get('partner_id', [None])[0] or '97'
+ partner_id = mobj.group('partner_id') or parse_qs(url).get('partner_id', [None])[0] or '97'
item = self._download_json(
'https://siren.more.tv/player/config', video_id, query={
@@ -193,8 +191,8 @@ class VideomoreIE(InfoExtractor):
error = item.get('error')
if error:
if error in ('Данное видео недоступно для просмотра на территории этой страны', 'Данное видео доступно для просмотра только на территории России'):
- self.raise_geo_restricted(countries=['RU'])
- raise ExtractorError(error, expected=True)
+ self.raise_geo_restricted(countries=['RU'], metadata_available=True)
+ self.raise_no_formats(error, expected=True)
self._sort_formats(formats)
return {
diff --git a/hypervideo_dl/extractor/vidio.py b/hypervideo_dl/extractor/vidio.py
index b1243e8..571448b 100644
--- a/hypervideo_dl/extractor/vidio.py
+++ b/hypervideo_dl/extractor/vidio.py
@@ -1,19 +1,80 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..utils import (
+ clean_html,
+ ExtractorError,
+ get_element_by_class,
int_or_none,
parse_iso8601,
+ smuggle_url,
str_or_none,
strip_or_none,
try_get,
+ unsmuggle_url,
+ urlencode_postdata,
)
-class VidioIE(InfoExtractor):
+class VidioBaseIE(InfoExtractor):
+ _LOGIN_URL = 'https://www.vidio.com/users/login'
+ _NETRC_MACHINE = 'vidio'
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None:
+ return
+
+ def is_logged_in():
+ res = self._download_json(
+ 'https://www.vidio.com/interactions.json', None, 'Checking if logged in', fatal=False) or {}
+ return bool(res.get('current_user'))
+
+ if is_logged_in():
+ return
+
+ login_page = self._download_webpage(
+ self._LOGIN_URL, None, 'Downloading log in page')
+
+ login_form = self._form_hidden_inputs("login-form", login_page)
+ login_form.update({
+ 'user[login]': username,
+ 'user[password]': password,
+ })
+ login_post, login_post_urlh = self._download_webpage_handle(
+ self._LOGIN_URL, None, 'Logging in', data=urlencode_postdata(login_form), expected_status=[302, 401])
+
+ if login_post_urlh.status == 401:
+ if get_element_by_class('onboarding-content-register-popup__title', login_post):
+ raise ExtractorError(
+ 'Unable to log in: The provided email has not registered yet.', expected=True)
+
+ reason = get_element_by_class('onboarding-form__general-error', login_post) or get_element_by_class('onboarding-modal__title', login_post)
+ if 'Akun terhubung ke' in reason:
+ raise ExtractorError(
+ 'Unable to log in: Your account is linked to a social media account. '
+ 'Use --cookies to provide account credentials instead', expected=True)
+ elif reason:
+ subreason = get_element_by_class('onboarding-modal__description-text', login_post) or ''
+ raise ExtractorError(
+ 'Unable to log in: %s. %s' % (reason, clean_html(subreason)), expected=True)
+ raise ExtractorError('Unable to log in')
+
+ def _real_initialize(self):
+ self._api_key = self._download_json(
+ 'https://www.vidio.com/auth', None, data=b'')['api_key']
+ self._login()
+
+ def _call_api(self, url, video_id, note=None):
+ return self._download_json(url, video_id, note=note, headers={
+ 'Content-Type': 'application/vnd.api+json',
+ 'X-API-KEY': self._api_key,
+ })
+
+
+class VidioIE(VidioBaseIE):
_VALID_URL = r'https?://(?:www\.)?vidio\.com/watch/(?P<id>\d+)-(?P<display_id>[^/?#&]+)'
_TESTS = [{
'url': 'http://www.vidio.com/watch/165683-dj_ambred-booyah-live-2015',
@@ -41,24 +102,43 @@ class VidioIE(InfoExtractor):
}, {
'url': 'https://www.vidio.com/watch/77949-south-korea-test-fires-missile-that-can-strike-all-of-the-north',
'only_matching': True,
+ }, {
+ # Premier-exclusive video
+ 'url': 'https://www.vidio.com/watch/1550718-stand-by-me-doraemon',
+ 'only_matching': True
}]
- def _real_initialize(self):
- self._api_key = self._download_json(
- 'https://www.vidio.com/auth', None, data=b'')['api_key']
-
def _real_extract(self, url):
- video_id, display_id = re.match(self._VALID_URL, url).groups()
- data = self._download_json(
- 'https://api.vidio.com/videos/' + video_id, display_id, headers={
- 'Content-Type': 'application/vnd.api+json',
- 'X-API-KEY': self._api_key,
- })
+ match = self._match_valid_url(url).groupdict()
+ video_id, display_id = match.get('id'), match.get('display_id')
+ data = self._call_api('https://api.vidio.com/videos/' + video_id, display_id)
video = data['videos'][0]
title = video['title'].strip()
+ is_premium = video.get('is_premium')
+
+ if is_premium:
+ sources = self._download_json(
+ 'https://www.vidio.com/interactions_stream.json?video_id=%s&type=videos' % video_id,
+ display_id, note='Downloading premier API JSON')
+ if not (sources.get('source') or sources.get('source_dash')):
+ self.raise_login_required('This video is only available for registered users with the appropriate subscription')
+
+ formats, subs = [], {}
+ if sources.get('source'):
+ hls_formats, hls_subs = self._extract_m3u8_formats_and_subtitles(
+ sources['source'], display_id, 'mp4', 'm3u8_native')
+ formats.extend(hls_formats)
+ subs.update(hls_subs)
+ if sources.get('source_dash'): # TODO: Find video example with source_dash
+ dash_formats, dash_subs = self._extract_mpd_formats_and_subtitles(
+ sources['source_dash'], display_id, 'dash')
+ formats.extend(dash_formats)
+ subs.update(dash_subs)
+ else:
+ hls_url = data['clips'][0]['hls_url']
+ formats, subs = self._extract_m3u8_formats_and_subtitles(
+ hls_url, display_id, 'mp4', 'm3u8_native')
- formats = self._extract_m3u8_formats(
- data['clips'][0]['hls_url'], display_id, 'mp4', 'm3u8_native')
self._sort_formats(formats)
get_first = lambda x: try_get(data, lambda y: y[x + 's'][0], dict) or {}
@@ -76,6 +156,7 @@ class VidioIE(InfoExtractor):
'duration': int_or_none(video.get('duration')),
'like_count': get_count('likes'),
'formats': formats,
+ 'subtitles': subs,
'uploader': user.get('name'),
'timestamp': parse_iso8601(video.get('created_at')),
'uploader_id': username,
@@ -87,3 +168,128 @@ class VidioIE(InfoExtractor):
'comment_count': get_count('comments'),
'tags': video.get('tag_list'),
}
+
+
+class VidioPremierIE(VidioBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?vidio\.com/premier/(?P<id>\d+)/(?P<display_id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.vidio.com/premier/2885/badai-pasti-berlalu',
+ 'playlist_mincount': 14,
+ }, {
+ # Series with both free and premier-exclusive videos
+ 'url': 'https://www.vidio.com/premier/2567/sosmed',
+ 'only_matching': True,
+ }]
+
+ def _playlist_entries(self, playlist_url, display_id):
+ index = 1
+ while playlist_url:
+ playlist_json = self._call_api(playlist_url, display_id, 'Downloading API JSON page %s' % index)
+ for video_json in playlist_json.get('data', []):
+ link = video_json['links']['watchpage']
+ yield self.url_result(link, 'Vidio', video_json['id'])
+ playlist_url = try_get(playlist_json, lambda x: x['links']['next'])
+ index += 1
+
+ def _real_extract(self, url):
+ url, idata = unsmuggle_url(url, {})
+ playlist_id, display_id = self._match_valid_url(url).groups()
+
+ playlist_url = idata.get('url')
+ if playlist_url: # Smuggled data contains an API URL. Download only that playlist
+ playlist_id = idata['id']
+ return self.playlist_result(
+ self._playlist_entries(playlist_url, playlist_id),
+ playlist_id=playlist_id, playlist_title=idata.get('title'))
+
+ playlist_data = self._call_api('https://api.vidio.com/content_profiles/%s/playlists' % playlist_id, display_id)
+
+ return self.playlist_from_matches(
+ playlist_data.get('data', []), playlist_id=playlist_id, ie=self.ie_key(),
+ getter=lambda data: smuggle_url(url, {
+ 'url': data['relationships']['videos']['links']['related'],
+ 'id': data['id'],
+ 'title': try_get(data, lambda x: x['attributes']['name'])
+ }))
+
+
+class VidioLiveIE(VidioBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?vidio\.com/live/(?P<id>\d+)-(?P<display_id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.vidio.com/live/204-sctv',
+ 'info_dict': {
+ 'id': '204',
+ 'title': 'SCTV',
+ 'uploader': 'SCTV',
+ 'uploader_id': 'sctv',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ }, {
+ # Premier-exclusive livestream
+ 'url': 'https://www.vidio.com/live/6362-tvn',
+ 'only_matching': True,
+ }, {
+ # DRM premier-exclusive livestream
+ 'url': 'https://www.vidio.com/live/6299-bein-1',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id, display_id = self._match_valid_url(url).groups()
+ stream_data = self._call_api(
+ 'https://www.vidio.com/api/livestreamings/%s/detail' % video_id, display_id)
+ stream_meta = stream_data['livestreamings'][0]
+ user = stream_data.get('users', [{}])[0]
+
+ title = stream_meta.get('title')
+ username = user.get('username')
+
+ formats = []
+ if stream_meta.get('is_drm'):
+ if not self.get_param('allow_unplayable_formats'):
+ self.report_drm(video_id)
+ if stream_meta.get('is_premium'):
+ sources = self._download_json(
+ 'https://www.vidio.com/interactions_stream.json?video_id=%s&type=livestreamings' % video_id,
+ display_id, note='Downloading premier API JSON')
+ if not (sources.get('source') or sources.get('source_dash')):
+ self.raise_login_required('This video is only available for registered users with the appropriate subscription')
+
+ if str_or_none(sources.get('source')):
+ token_json = self._download_json(
+ 'https://www.vidio.com/live/%s/tokens' % video_id,
+ display_id, note='Downloading HLS token JSON', data=b'')
+ formats.extend(self._extract_m3u8_formats(
+ sources['source'] + '?' + token_json.get('token', ''), display_id, 'mp4', 'm3u8_native'))
+ if str_or_none(sources.get('source_dash')):
+ pass
+ else:
+ if stream_meta.get('stream_token_url'):
+ token_json = self._download_json(
+ 'https://www.vidio.com/live/%s/tokens' % video_id,
+ display_id, note='Downloading HLS token JSON', data=b'')
+ formats.extend(self._extract_m3u8_formats(
+ stream_meta['stream_token_url'] + '?' + token_json.get('token', ''),
+ display_id, 'mp4', 'm3u8_native'))
+ if stream_meta.get('stream_dash_url'):
+ pass
+ if stream_meta.get('stream_url'):
+ formats.extend(self._extract_m3u8_formats(
+ stream_meta['stream_url'], display_id, 'mp4', 'm3u8_native'))
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'is_live': True,
+ 'description': strip_or_none(stream_meta.get('description')),
+ 'thumbnail': stream_meta.get('image'),
+ 'like_count': int_or_none(stream_meta.get('like')),
+ 'dislike_count': int_or_none(stream_meta.get('dislike')),
+ 'formats': formats,
+ 'uploader': user.get('name'),
+ 'timestamp': parse_iso8601(stream_meta.get('start_time')),
+ 'uploader_id': username,
+ 'uploader_url': 'https://www.vidio.com/@' + username if username else None,
+ }
diff --git a/hypervideo_dl/extractor/vidzi.py b/hypervideo_dl/extractor/vidzi.py
new file mode 100644
index 0000000..42ea495
--- /dev/null
+++ b/hypervideo_dl/extractor/vidzi.py
@@ -0,0 +1,68 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ decode_packed_codes,
+ js_to_json,
+ NO_DEFAULT,
+ PACKED_CODES_RE,
+)
+
+
+class VidziIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?vidzi\.(?:tv|cc|si|nu)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
+ _TESTS = [{
+ 'url': 'http://vidzi.tv/cghql9yq6emu.html',
+ 'md5': '4f16c71ca0c8c8635ab6932b5f3f1660',
+ 'info_dict': {
+ 'id': 'cghql9yq6emu',
+ 'ext': 'mp4',
+ 'title': 'youtube-dl test video 1\\\\2\'3/4<5\\\\6ä7↭',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://vidzi.tv/embed-4z2yb0rzphe9-600x338.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://vidzi.cc/cghql9yq6emu.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://vidzi.si/rph9gztxj1et.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://vidzi.nu/cghql9yq6emu.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'http://vidzi.tv/%s' % video_id, video_id)
+ title = self._html_search_regex(
+ r'(?s)<h2 class="video-title">(.*?)</h2>', webpage, 'title')
+
+ codes = [webpage]
+ codes.extend([
+ decode_packed_codes(mobj.group(0)).replace('\\\'', '\'')
+ for mobj in re.finditer(PACKED_CODES_RE, webpage)])
+ for num, code in enumerate(codes, 1):
+ jwplayer_data = self._parse_json(
+ self._search_regex(
+ r'setup\(([^)]+)\)', code, 'jwplayer data',
+ default=NO_DEFAULT if num == len(codes) else '{}'),
+ video_id, transform_source=lambda s: js_to_json(
+ re.sub(r'\s*\+\s*window\[.+?\]', '', s)))
+ if jwplayer_data:
+ break
+
+ info_dict = self._parse_jwplayer_data(jwplayer_data, video_id, require_title=False)
+ info_dict['title'] = title
+
+ return info_dict
diff --git a/hypervideo_dl/extractor/vier.py b/hypervideo_dl/extractor/vier.py
index dbd5ba9..94aa350 100644
--- a/hypervideo_dl/extractor/vier.py
+++ b/hypervideo_dl/extractor/vier.py
@@ -135,7 +135,7 @@ class VierIE(InfoExtractor):
self._logged_in = True
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
embed_id = mobj.group('embed_id')
display_id = mobj.group('display_id') or embed_id
video_id = mobj.group('id') or embed_id
@@ -234,7 +234,7 @@ class VierVideosIE(InfoExtractor):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
program = mobj.group('program')
site = mobj.group('site')
diff --git a/hypervideo_dl/extractor/viewlift.py b/hypervideo_dl/extractor/viewlift.py
index d6b92b1..c3b2e86 100644
--- a/hypervideo_dl/extractor/viewlift.py
+++ b/hypervideo_dl/extractor/viewlift.py
@@ -92,7 +92,7 @@ class ViewLiftEmbedIE(ViewLiftBaseIE):
return mobj.group('url')
def _real_extract(self, url):
- domain, film_id = re.match(self._VALID_URL, url).groups()
+ domain, film_id = self._match_valid_url(url).groups()
site = domain.split('.')[-2]
if site in self._SITE_MAP:
site = self._SITE_MAP[site]
@@ -134,7 +134,7 @@ class ViewLiftEmbedIE(ViewLiftBaseIE):
if hls_url:
formats.extend(self._extract_m3u8_formats(
hls_url, film_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
- self._sort_formats(formats, ('height', 'tbr', 'format_id'))
+ self._sort_formats(formats)
info = {
'id': film_id,
@@ -229,7 +229,7 @@ class ViewLiftIE(ViewLiftBaseIE):
return False if ViewLiftEmbedIE.suitable(url) else super(ViewLiftIE, cls).suitable(url)
def _real_extract(self, url):
- domain, path, display_id = re.match(self._VALID_URL, url).groups()
+ domain, path, display_id = self._match_valid_url(url).groups()
site = domain.split('.')[-2]
if site in self._SITE_MAP:
site = self._SITE_MAP[site]
diff --git a/hypervideo_dl/extractor/viidea.py b/hypervideo_dl/extractor/viidea.py
index a0abbae..0da0681 100644
--- a/hypervideo_dl/extractor/viidea.py
+++ b/hypervideo_dl/extractor/viidea.py
@@ -117,7 +117,7 @@ class ViideaIE(InfoExtractor):
}]
def _real_extract(self, url):
- lecture_slug, explicit_part_id = re.match(self._VALID_URL, url).groups()
+ lecture_slug, explicit_part_id = self._match_valid_url(url).groups()
webpage = self._download_webpage(url, lecture_slug)
diff --git a/hypervideo_dl/extractor/viki.py b/hypervideo_dl/extractor/viki.py
index 2e9cbf1..acb5ae5 100644
--- a/hypervideo_dl/extractor/viki.py
+++ b/hypervideo_dl/extractor/viki.py
@@ -1,38 +1,28 @@
# coding: utf-8
from __future__ import unicode_literals
-
-import base64
import hashlib
import hmac
-import itertools
import json
-import re
import time
from .common import InfoExtractor
-from ..compat import (
- compat_parse_qs,
- compat_urllib_parse_urlparse,
-)
from ..utils import (
ExtractorError,
int_or_none,
parse_age_limit,
parse_iso8601,
- sanitized_Request,
- std_headers,
try_get,
)
class VikiBaseIE(InfoExtractor):
_VALID_URL_BASE = r'https?://(?:www\.)?viki\.(?:com|net|mx|jp|fr)/'
- _API_QUERY_TEMPLATE = '/v4/%sapp=%s&t=%s&site=www.viki.com'
- _API_URL_TEMPLATE = 'https://api.viki.io%s&sig=%s'
+ _API_URL_TEMPLATE = 'https://api.viki.io%s'
+ _DEVICE_ID = '86085977d' # used for android api
_APP = '100005a'
- _APP_VERSION = '6.0.0'
- _APP_SECRET = 'MM_d*yP@`&1@]@!AVrXf_o-HVEnoTnm$O-ti4[G~$JDI/Dc-&piU&z&5.;:}95=Iad'
+ _APP_VERSION = '6.11.3'
+ _APP_SECRET = 'd96704b180208dbb2efa30fe44c48bd8690441af9f567ba8fd710a72badc85198f7472'
_GEO_BYPASS = False
_NETRC_MACHINE = 'viki'
@@ -45,43 +35,57 @@ class VikiBaseIE(InfoExtractor):
'paywall': 'Sorry, this content is only available to Viki Pass Plus subscribers',
}
- def _prepare_call(self, path, timestamp=None, post_data=None):
+ def _stream_headers(self, timestamp, sig):
+ return {
+ 'X-Viki-manufacturer': 'vivo',
+ 'X-Viki-device-model': 'vivo 1606',
+ 'X-Viki-device-os-ver': '6.0.1',
+ 'X-Viki-connection-type': 'WIFI',
+ 'X-Viki-carrier': '',
+ 'X-Viki-as-id': '100005a-1625321982-3932',
+ 'timestamp': str(timestamp),
+ 'signature': str(sig),
+ 'x-viki-app-ver': self._APP_VERSION
+ }
+
+ def _api_query(self, path, version=4, **kwargs):
path += '?' if '?' not in path else '&'
- if not timestamp:
- timestamp = int(time.time())
- query = self._API_QUERY_TEMPLATE % (path, self._APP, timestamp)
+ query = f'/v{version}/{path}app={self._APP}'
if self._token:
query += '&token=%s' % self._token
+ return query + ''.join(f'&{name}={val}' for name, val in kwargs.items())
+
+ def _sign_query(self, path):
+ timestamp = int(time.time())
+ query = self._api_query(path, version=5)
sig = hmac.new(
- self._APP_SECRET.encode('ascii'),
- query.encode('ascii'),
- hashlib.sha1
- ).hexdigest()
- url = self._API_URL_TEMPLATE % (query, sig)
- return sanitized_Request(
- url, json.dumps(post_data).encode('utf-8')) if post_data else url
-
- def _call_api(self, path, video_id, note, timestamp=None, post_data=None):
+ self._APP_SECRET.encode('ascii'), f'{query}&t={timestamp}'.encode('ascii'), hashlib.sha1).hexdigest()
+ return timestamp, sig, self._API_URL_TEMPLATE % query
+
+ def _call_api(
+ self, path, video_id, note='Downloading JSON metadata', data=None, query=None, fatal=True):
+ if query is None:
+ timestamp, sig, url = self._sign_query(path)
+ else:
+ url = self._API_URL_TEMPLATE % self._api_query(path, version=4)
resp = self._download_json(
- self._prepare_call(path, timestamp, post_data), video_id, note,
- headers={'x-viki-app-ver': self._APP_VERSION})
-
- error = resp.get('error')
- if error:
- if error == 'invalid timestamp':
- resp = self._download_json(
- self._prepare_call(path, int(resp['current_timestamp']), post_data),
- video_id, '%s (retry)' % note)
- error = resp.get('error')
- if error:
- self._raise_error(resp['error'])
+ url, video_id, note, fatal=fatal, query=query,
+ data=json.dumps(data).encode('utf-8') if data else None,
+ headers=({'x-viki-app-ver': self._APP_VERSION} if data
+ else self._stream_headers(timestamp, sig) if query is None
+ else None), expected_status=400) or {}
+ self._raise_error(resp.get('error'), fatal)
return resp
- def _raise_error(self, error):
- raise ExtractorError(
- '%s returned error: %s' % (self.IE_NAME, error),
- expected=True)
+ def _raise_error(self, error, fatal=True):
+ if error is None:
+ return
+ msg = '%s said: %s' % (self.IE_NAME, error)
+ if fatal:
+ raise ExtractorError(msg, expected=True)
+ else:
+ self.report_warning(msg)
def _check_errors(self, data):
for reason, status in (data.get('blocking') or {}).items():
@@ -90,9 +94,10 @@ class VikiBaseIE(InfoExtractor):
if reason == 'geo':
self.raise_geo_restricted(msg=message)
elif reason == 'paywall':
+ if try_get(data, lambda x: x['paywallable']['tvod']):
+ self._raise_error('This video is for rent only or TVOD (Transactional Video On demand)')
self.raise_login_required(message)
- raise ExtractorError('%s said: %s' % (
- self.IE_NAME, message), expected=True)
+ self._raise_error(message)
def _real_initialize(self):
self._login()
@@ -102,35 +107,38 @@ class VikiBaseIE(InfoExtractor):
if username is None:
return
- login_form = {
- 'login_id': username,
- 'password': password,
- }
-
- login = self._call_api(
- 'sessions.json', None,
- 'Logging in', post_data=login_form)
-
- self._token = login.get('token')
+ self._token = self._call_api(
+ 'sessions.json', None, 'Logging in', fatal=False,
+ data={'username': username, 'password': password}).get('token')
if not self._token:
- self.report_warning('Unable to get session token, login has probably failed')
+ self.report_warning('Login Failed: Unable to get session token')
@staticmethod
- def dict_selection(dict_obj, preferred_key, allow_fallback=True):
+ def dict_selection(dict_obj, preferred_key):
if preferred_key in dict_obj:
- return dict_obj.get(preferred_key)
-
- if not allow_fallback:
- return
-
- filtered_dict = list(filter(None, [dict_obj.get(k) for k in dict_obj.keys()]))
- return filtered_dict[0] if filtered_dict else None
+ return dict_obj[preferred_key]
+ return (list(filter(None, dict_obj.values())) or [None])[0]
class VikiIE(VikiBaseIE):
IE_NAME = 'viki'
_VALID_URL = r'%s(?:videos|player)/(?P<id>[0-9]+v)' % VikiBaseIE._VALID_URL_BASE
_TESTS = [{
+ 'note': 'Free non-DRM video with storyboards in MPD',
+ 'url': 'https://www.viki.com/videos/1175236v-choosing-spouse-by-lottery-episode-1',
+ 'info_dict': {
+ 'id': '1175236v',
+ 'ext': 'mp4',
+ 'title': 'Choosing Spouse by Lottery - Episode 1',
+ 'timestamp': 1606463239,
+ 'age_limit': 13,
+ 'uploader': 'FCC',
+ 'upload_date': '20201127',
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ },
+ }, {
'url': 'http://www.viki.com/videos/1023585v-heirs-episode-14',
'info_dict': {
'id': '1023585v',
@@ -147,7 +155,6 @@ class VikiIE(VikiBaseIE):
'format': 'bestvideo',
},
'skip': 'Blocked in the US',
- 'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'],
}, {
# clip
'url': 'http://www.viki.com/videos/1067139v-the-avengers-age-of-ultron-press-conference',
@@ -199,7 +206,6 @@ class VikiIE(VikiBaseIE):
'params': {
'format': 'bestvideo',
},
- 'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'],
}, {
# youtube external
'url': 'http://www.viki.com/videos/50562v-poor-nastya-complete-episode-1',
@@ -238,23 +244,14 @@ class VikiIE(VikiBaseIE):
'params': {
'format': 'bestvideo',
},
- 'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'],
}]
def _real_extract(self, url):
video_id = self._match_id(url)
-
- resp = self._download_json(
- 'https://www.viki.com/api/videos/' + video_id,
- video_id, 'Downloading video JSON', headers={
- 'x-client-user-agent': std_headers['User-Agent'],
- 'x-viki-app-ver': '3.0.0',
- })
- video = resp['video']
-
+ video = self._call_api(f'videos/{video_id}.json', video_id, 'Downloading video JSON', query={})
self._check_errors(video)
- title = self.dict_selection(video.get('titles', {}), 'en', allow_fallback=False)
+ title = try_get(video, lambda x: x['titles']['en'], str)
episode_number = int_or_none(video.get('number'))
if not title:
title = 'Episode %d' % episode_number if video.get('type') == 'episode' else video.get('id') or video_id
@@ -262,113 +259,46 @@ class VikiIE(VikiBaseIE):
container_title = self.dict_selection(container_titles, 'en')
title = '%s - %s' % (container_title, title)
- description = self.dict_selection(video.get('descriptions', {}), 'en')
-
- like_count = int_or_none(try_get(video, lambda x: x['likes']['count']))
-
- thumbnails = []
- for thumbnail_id, thumbnail in (video.get('images') or {}).items():
- thumbnails.append({
- 'id': thumbnail_id,
- 'url': thumbnail.get('url'),
- })
-
- subtitles = {}
- for subtitle_lang, _ in (video.get('subtitle_completions') or {}).items():
- subtitles[subtitle_lang] = [{
- 'ext': subtitles_format,
- 'url': self._prepare_call(
- 'videos/%s/subtitles/%s.%s' % (video_id, subtitle_lang, subtitles_format)),
- } for subtitles_format in ('srt', 'vtt')]
+ thumbnails = [{
+ 'id': thumbnail_id,
+ 'url': thumbnail['url'],
+ } for thumbnail_id, thumbnail in (video.get('images') or {}).items() if thumbnail.get('url')]
+
+ resp = self._call_api(
+ 'playback_streams/%s.json?drms=dt1,dt2&device_id=%s' % (video_id, self._DEVICE_ID),
+ video_id, 'Downloading video streams JSON')['main'][0]
+
+ stream_id = try_get(resp, lambda x: x['properties']['track']['stream_id'])
+ subtitles = dict((lang, [{
+ 'ext': ext,
+ 'url': self._API_URL_TEMPLATE % self._api_query(
+ f'videos/{video_id}/auth_subtitles/{lang}.{ext}', stream_id=stream_id)
+ } for ext in ('srt', 'vtt')]) for lang in (video.get('subtitle_completions') or {}).keys())
+
+ mpd_url = resp['url']
+ # 1080p is hidden in another mpd which can be found in the current manifest content
+ mpd_content = self._download_webpage(mpd_url, video_id, note='Downloading initial MPD manifest')
+ mpd_url = self._search_regex(
+ r'(?mi)<BaseURL>(http.+.mpd)', mpd_content, 'new manifest', default=mpd_url)
+ formats = self._extract_mpd_formats(mpd_url, video_id)
+ self._sort_formats(formats)
- result = {
+ return {
'id': video_id,
+ 'formats': formats,
'title': title,
- 'description': description,
+ 'description': self.dict_selection(video.get('descriptions', {}), 'en'),
'duration': int_or_none(video.get('duration')),
'timestamp': parse_iso8601(video.get('created_at')),
'uploader': video.get('author'),
'uploader_url': video.get('author_url'),
- 'like_count': like_count,
+ 'like_count': int_or_none(try_get(video, lambda x: x['likes']['count'])),
'age_limit': parse_age_limit(video.get('rating')),
'thumbnails': thumbnails,
'subtitles': subtitles,
'episode_number': episode_number,
}
- formats = []
-
- def add_format(format_id, format_dict, protocol='http'):
- # rtmps URLs does not seem to work
- if protocol == 'rtmps':
- return
- format_url = format_dict.get('url')
- if not format_url:
- return
- qs = compat_parse_qs(compat_urllib_parse_urlparse(format_url).query)
- stream = qs.get('stream', [None])[0]
- if stream:
- format_url = base64.b64decode(stream).decode()
- if format_id in ('m3u8', 'hls'):
- m3u8_formats = self._extract_m3u8_formats(
- format_url, video_id, 'mp4',
- entry_protocol='m3u8_native',
- m3u8_id='m3u8-%s' % protocol, fatal=False)
- # Despite CODECS metadata in m3u8 all video-only formats
- # are actually video+audio
- for f in m3u8_formats:
- if '_drm/index_' in f['url']:
- continue
- if f.get('acodec') == 'none' and f.get('vcodec') != 'none':
- f['acodec'] = None
- formats.append(f)
- elif format_id in ('mpd', 'dash'):
- formats.extend(self._extract_mpd_formats(
- format_url, video_id, 'mpd-%s' % protocol, fatal=False))
- elif format_url.startswith('rtmp'):
- mobj = re.search(
- r'^(?P<url>rtmp://[^/]+/(?P<app>.+?))/(?P<playpath>mp4:.+)$',
- format_url)
- if not mobj:
- return
- formats.append({
- 'format_id': 'rtmp-%s' % format_id,
- 'ext': 'flv',
- 'url': mobj.group('url'),
- 'play_path': mobj.group('playpath'),
- 'app': mobj.group('app'),
- 'page_url': url,
- })
- else:
- formats.append({
- 'url': format_url,
- 'format_id': '%s-%s' % (format_id, protocol),
- 'height': int_or_none(self._search_regex(
- r'^(\d+)[pP]$', format_id, 'height', default=None)),
- })
-
- for format_id, format_dict in (resp.get('streams') or {}).items():
- add_format(format_id, format_dict)
- if not formats:
- streams = self._call_api(
- 'videos/%s/streams.json' % video_id, video_id,
- 'Downloading video streams JSON')
-
- if 'external' in streams:
- result.update({
- '_type': 'url_transparent',
- 'url': streams['external']['url'],
- })
- return result
-
- for format_id, stream_dict in streams.items():
- for protocol, format_dict in stream_dict.items():
- add_format(format_id, format_dict, protocol)
- self._sort_formats(formats)
-
- result['formats'] = formats
- return result
-
class VikiChannelIE(VikiBaseIE):
IE_NAME = 'viki:channel'
@@ -380,7 +310,7 @@ class VikiChannelIE(VikiBaseIE):
'title': 'Boys Over Flowers',
'description': 'md5:804ce6e7837e1fd527ad2f25420f4d59',
},
- 'playlist_mincount': 71,
+ 'playlist_mincount': 51,
}, {
'url': 'http://www.viki.com/tv/1354c-poor-nastya-complete',
'info_dict': {
@@ -401,33 +331,35 @@ class VikiChannelIE(VikiBaseIE):
'only_matching': True,
}]
- _PER_PAGE = 25
+ _video_types = ('episodes', 'movies', 'clips', 'trailers')
+
+ def _entries(self, channel_id):
+ params = {
+ 'app': self._APP, 'token': self._token, 'only_ids': 'true',
+ 'direction': 'asc', 'sort': 'number', 'per_page': 30
+ }
+ video_types = self._configuration_arg('video_types') or self._video_types
+ for video_type in video_types:
+ if video_type not in self._video_types:
+ self.report_warning(f'Unknown video_type: {video_type}')
+ page_num = 0
+ while True:
+ page_num += 1
+ params['page'] = page_num
+ res = self._call_api(
+ f'containers/{channel_id}/{video_type}.json', channel_id, query=params, fatal=False,
+ note='Downloading %s JSON page %d' % (video_type.title(), page_num))
+
+ for video_id in res.get('response') or []:
+ yield self.url_result(f'https://www.viki.com/videos/{video_id}', VikiIE.ie_key(), video_id)
+ if not res.get('more'):
+ break
def _real_extract(self, url):
channel_id = self._match_id(url)
-
- channel = self._call_api(
- 'containers/%s.json' % channel_id, channel_id,
- 'Downloading channel JSON')
-
+ channel = self._call_api('containers/%s.json' % channel_id, channel_id, 'Downloading channel JSON')
self._check_errors(channel)
-
- title = self.dict_selection(channel['titles'], 'en')
-
- description = self.dict_selection(channel['descriptions'], 'en')
-
- entries = []
- for video_type in ('episodes', 'clips', 'movies'):
- for page_num in itertools.count(1):
- page = self._call_api(
- 'containers/%s/%s.json?per_page=%d&sort=number&direction=asc&with_paging=true&page=%d'
- % (channel_id, video_type, self._PER_PAGE, page_num), channel_id,
- 'Downloading %s JSON page #%d' % (video_type, page_num))
- for video in page['response']:
- video_id = video['id']
- entries.append(self.url_result(
- 'https://www.viki.com/videos/%s' % video_id, 'Viki'))
- if not page['pagination']['next']:
- break
-
- return self.playlist_result(entries, channel_id, title, description)
+ return self.playlist_result(
+ self._entries(channel_id), channel_id,
+ self.dict_selection(channel['titles'], 'en'),
+ self.dict_selection(channel['descriptions'], 'en'))
diff --git a/hypervideo_dl/extractor/vimeo.py b/hypervideo_dl/extractor/vimeo.py
index 6323219..9fb5475 100644
--- a/hypervideo_dl/extractor/vimeo.py
+++ b/hypervideo_dl/extractor/vimeo.py
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
import base64
import functools
+import json
import re
import itertools
@@ -16,14 +17,16 @@ from ..compat import (
from ..utils import (
clean_html,
determine_ext,
+ dict_get,
ExtractorError,
- get_element_by_class,
js_to_json,
int_or_none,
merge_dicts,
OnDemandPagedList,
parse_filesize,
parse_iso8601,
+ parse_qs,
+ RegexNotFoundError,
sanitized_Request,
smuggle_url,
std_headers,
@@ -74,7 +77,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):
raise ExtractorError('Unable to log in')
def _get_video_password(self):
- password = self._downloader.params.get('videopassword')
+ password = self.get_param('videopassword')
if password is None:
raise ExtractorError(
'This video is protected by a password, use the --video-password option',
@@ -118,18 +121,18 @@ class VimeoBaseInfoExtractor(InfoExtractor):
def _vimeo_sort_formats(self, formats):
# Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps
# at the same time without actual units specified. This lead to wrong sorting.
- self._sort_formats(formats, field_preference=('preference', 'height', 'width', 'fps', 'tbr', 'format_id'))
+ # But since hypervideo prefers 'res,fps' anyway, 'field_preference' is not needed
+ self._sort_formats(formats)
def _parse_config(self, config, video_id):
video_data = config['video']
video_title = video_data['title']
live_event = video_data.get('live_event') or {}
is_live = live_event.get('status') == 'started'
- request = config.get('request') or {}
formats = []
- config_files = video_data.get('files') or request.get('files') or {}
- for f in (config_files.get('progressive') or []):
+ config_files = video_data.get('files') or config['request'].get('files', {})
+ for f in config_files.get('progressive', []):
video_url = f.get('url')
if not video_url:
continue
@@ -145,7 +148,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):
# TODO: fix handling of 308 status code returned for live archive manifest requests
sep_pattern = r'/sep/video/'
for files_type in ('hls', 'dash'):
- for cdn_name, cdn_data in (try_get(config_files, lambda x: x[files_type]['cdns']) or {}).items():
+ for cdn_name, cdn_data in config_files.get(files_type, {}).get('cdns', {}).items():
manifest_url = cdn_data.get('url')
if not manifest_url:
continue
@@ -181,25 +184,21 @@ class VimeoBaseInfoExtractor(InfoExtractor):
formats.append({
'format_id': 'live-archive-source',
'url': live_archive_source_url,
- 'preference': 1,
+ 'quality': 10,
})
- for f in formats:
- if f.get('vcodec') == 'none':
- f['preference'] = -50
- elif f.get('acodec') == 'none':
- f['preference'] = -40
-
subtitles = {}
- for tt in (request.get('text_tracks') or []):
- subtitles[tt['lang']] = [{
- 'ext': 'vtt',
- 'url': urljoin('https://vimeo.com', tt['url']),
- }]
+ text_tracks = config['request'].get('text_tracks')
+ if text_tracks:
+ for tt in text_tracks:
+ subtitles[tt['lang']] = [{
+ 'ext': 'vtt',
+ 'url': urljoin('https://vimeo.com', tt['url']),
+ }]
thumbnails = []
if not is_live:
- for key, thumb in (video_data.get('thumbs') or {}).items():
+ for key, thumb in video_data.get('thumbs', {}).items():
thumbnails.append({
'id': key,
'width': int_or_none(key),
@@ -252,9 +251,33 @@ class VimeoBaseInfoExtractor(InfoExtractor):
'height': int_or_none(source_file.get('height')),
'filesize': parse_filesize(source_file.get('size')),
'format_id': source_name,
- 'preference': 1,
+ 'quality': 1,
}
+ jwt_response = self._download_json(
+ 'https://vimeo.com/_rv/viewer', video_id, note='Downloading jwt token', fatal=False) or {}
+ if not jwt_response.get('jwt'):
+ return
+ headers = {'Authorization': 'jwt %s' % jwt_response['jwt']}
+ original_response = self._download_json(
+ f'https://api.vimeo.com/videos/{video_id}', video_id,
+ headers=headers, fatal=False) or {}
+ for download_data in original_response.get('download') or {}:
+ download_url = download_data.get('link')
+ if not download_url or download_data.get('quality') != 'source':
+ continue
+ query = parse_qs(download_url)
+ return {
+ 'url': download_url,
+ 'ext': determine_ext(query.get('filename', [''])[0].lower()),
+ 'format_id': download_data.get('public_name', 'Original'),
+ 'width': int_or_none(download_data.get('width')),
+ 'height': int_or_none(download_data.get('height')),
+ 'fps': int_or_none(download_data.get('fps')),
+ 'filesize': int_or_none(download_data.get('size')),
+ 'quality': 1,
+ }
+
class VimeoIE(VimeoBaseInfoExtractor):
"""Information extractor for vimeo.com."""
@@ -290,7 +313,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
'info_dict': {
'id': '56015672',
'ext': 'mp4',
- 'title': "hypervideo test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",
+ 'title': "youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",
'description': 'md5:2d3305bad981a06ff79f027f19865021',
'timestamp': 1355990239,
'upload_date': '20121220',
@@ -319,7 +342,6 @@ class VimeoIE(VimeoBaseInfoExtractor):
'duration': 1595,
'upload_date': '20130610',
'timestamp': 1370893156,
- 'license': 'by',
},
'params': {
'format': 'best[protocol=https]',
@@ -351,7 +373,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
'info_dict': {
'id': '68375962',
'ext': 'mp4',
- 'title': 'hypervideo password protected test video',
+ 'title': 'youtube-dl password protected test video',
'timestamp': 1371200155,
'upload_date': '20130614',
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user18948128',
@@ -362,7 +384,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
},
'params': {
'format': 'best[protocol=https]',
- 'videopassword': 'hypervideo',
+ 'videopassword': 'youtube-dl',
},
},
{
@@ -398,12 +420,6 @@ class VimeoIE(VimeoBaseInfoExtractor):
'uploader_id': 'staff',
'uploader': 'Vimeo Staff',
'duration': 62,
- 'subtitles': {
- 'de': [{'ext': 'vtt'}],
- 'en': [{'ext': 'vtt'}],
- 'es': [{'ext': 'vtt'}],
- 'fr': [{'ext': 'vtt'}],
- },
}
},
{
@@ -436,6 +452,22 @@ class VimeoIE(VimeoBaseInfoExtractor):
},
},
{
+ 'note': 'Contains original format not accessible in webpage',
+ 'url': 'https://vimeo.com/393756517',
+ 'md5': 'c464af248b592190a5ffbb5d33f382b0',
+ 'info_dict': {
+ 'id': '393756517',
+ 'ext': 'mov',
+ 'timestamp': 1582642091,
+ 'uploader_id': 'frameworkla',
+ 'title': 'Straight To Hell - Sabrina: Netflix',
+ 'uploader': 'Framework Studio',
+ 'description': 'md5:f2edc61af3ea7a5592681ddbb683db73',
+ 'upload_date': '20200225',
+ },
+ 'expected_warnings': ['Unable to download JSON metadata'],
+ },
+ {
# only available via https://vimeo.com/channels/tributes/6213729 and
# not via https://vimeo.com/6213729
'url': 'https://vimeo.com/channels/tributes/6213729',
@@ -484,7 +516,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
'info_dict': {
'id': '68375962',
'ext': 'mp4',
- 'title': 'hypervideo password protected test video',
+ 'title': 'youtube-dl password protected test video',
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user18948128',
'uploader_id': 'user18948128',
'uploader': 'Jaime Marquínez Ferrándiz',
@@ -492,7 +524,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
},
'params': {
'format': 'best[protocol=https]',
- 'videopassword': 'hypervideo',
+ 'videopassword': 'youtube-dl',
},
},
{
@@ -513,6 +545,24 @@ class VimeoIE(VimeoBaseInfoExtractor):
'only_matching': True,
},
{
+ 'url': 'https://vimeo.com/showcase/3253534/video/119195465',
+ 'note': 'A video in a password protected album (showcase)',
+ 'info_dict': {
+ 'id': '119195465',
+ 'ext': 'mp4',
+ 'title': 'youtube-dl test video \'ä"BaW_jenozKc',
+ 'uploader': 'Philipp Hagemeister',
+ 'uploader_id': 'user20132939',
+ 'description': 'md5:fa7b6c6d8db0bdc353893df2f111855b',
+ 'upload_date': '20150209',
+ 'timestamp': 1423518307,
+ },
+ 'params': {
+ 'format': 'best[protocol=https]',
+ 'videopassword': 'youtube-dl',
+ },
+ },
+ {
# source file returns 403: Forbidden
'url': 'https://vimeo.com/7809605',
'only_matching': True,
@@ -576,36 +626,43 @@ class VimeoIE(VimeoBaseInfoExtractor):
def _real_initialize(self):
self._login()
- def _extract_from_api(self, video_id, unlisted_hash=None):
- token = self._download_json(
- 'https://vimeo.com/_rv/jwt', video_id, headers={
- 'X-Requested-With': 'XMLHttpRequest'
- })['token']
- api_url = 'https://api.vimeo.com/videos/' + video_id
- if unlisted_hash:
- api_url += ':' + unlisted_hash
- video = self._download_json(
- api_url, video_id, headers={
- 'Authorization': 'jwt ' + token,
- }, query={
- 'fields': 'config_url,created_time,description,license,metadata.connections.comments.total,metadata.connections.likes.total,release_time,stats.plays',
- })
- info = self._parse_config(self._download_json(
- video['config_url'], video_id), video_id)
- self._vimeo_sort_formats(info['formats'])
- get_timestamp = lambda x: parse_iso8601(video.get(x + '_time'))
- info.update({
- 'description': video.get('description'),
- 'license': video.get('license'),
- 'release_timestamp': get_timestamp('release'),
- 'timestamp': get_timestamp('created'),
- 'view_count': int_or_none(try_get(video, lambda x: x['stats']['plays'])),
- })
- connections = try_get(
- video, lambda x: x['metadata']['connections'], dict) or {}
- for k in ('comment', 'like'):
- info[k + '_count'] = int_or_none(try_get(connections, lambda x: x[k + 's']['total']))
- return info
+ def _try_album_password(self, url):
+ album_id = self._search_regex(
+ r'vimeo\.com/(?:album|showcase)/([^/]+)', url, 'album id', default=None)
+ if not album_id:
+ return
+ viewer = self._download_json(
+ 'https://vimeo.com/_rv/viewer', album_id, fatal=False)
+ if not viewer:
+ webpage = self._download_webpage(url, album_id)
+ viewer = self._parse_json(self._search_regex(
+ r'bootstrap_data\s*=\s*({.+?})</script>',
+ webpage, 'bootstrap data'), album_id)['viewer']
+ jwt = viewer['jwt']
+ album = self._download_json(
+ 'https://api.vimeo.com/albums/' + album_id,
+ album_id, headers={'Authorization': 'jwt ' + jwt},
+ query={'fields': 'description,name,privacy'})
+ if try_get(album, lambda x: x['privacy']['view']) == 'password':
+ password = self.get_param('videopassword')
+ if not password:
+ raise ExtractorError(
+ 'This album is protected by a password, use the --video-password option',
+ expected=True)
+ self._set_vimeo_cookie('vuid', viewer['vuid'])
+ try:
+ self._download_json(
+ 'https://vimeo.com/showcase/%s/auth' % album_id,
+ album_id, 'Verifying the password', data=urlencode_postdata({
+ 'password': password,
+ 'token': viewer['xsrft'],
+ }), headers={
+ 'X-Requested-With': 'XMLHttpRequest',
+ })
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+ raise ExtractorError('Wrong password', expected=True)
+ raise
def _real_extract(self, url):
url, data = unsmuggle_url(url, {})
@@ -615,22 +672,52 @@ class VimeoIE(VimeoBaseInfoExtractor):
if 'Referer' not in headers:
headers['Referer'] = url
- mobj = re.match(self._VALID_URL, url).groupdict()
- video_id, unlisted_hash = mobj['id'], mobj.get('unlisted_hash')
+ # Extract ID from URL
+ video_id, unlisted_hash = self._match_valid_url(url).groups()
if unlisted_hash:
- return self._extract_from_api(video_id, unlisted_hash)
+ token = self._download_json(
+ 'https://vimeo.com/_rv/jwt', video_id, headers={
+ 'X-Requested-With': 'XMLHttpRequest'
+ })['token']
+ video = self._download_json(
+ 'https://api.vimeo.com/videos/%s:%s' % (video_id, unlisted_hash),
+ video_id, headers={
+ 'Authorization': 'jwt ' + token,
+ }, query={
+ 'fields': 'config_url,created_time,description,license,metadata.connections.comments.total,metadata.connections.likes.total,release_time,stats.plays',
+ })
+ info = self._parse_config(self._download_json(
+ video['config_url'], video_id), video_id)
+ self._vimeo_sort_formats(info['formats'])
+ get_timestamp = lambda x: parse_iso8601(video.get(x + '_time'))
+ info.update({
+ 'description': video.get('description'),
+ 'license': video.get('license'),
+ 'release_timestamp': get_timestamp('release'),
+ 'timestamp': get_timestamp('created'),
+ 'view_count': int_or_none(try_get(video, lambda x: x['stats']['plays'])),
+ })
+ connections = try_get(
+ video, lambda x: x['metadata']['connections'], dict) or {}
+ for k in ('comment', 'like'):
+ info[k + '_count'] = int_or_none(try_get(connections, lambda x: x[k + 's']['total']))
+ return info
orig_url = url
is_pro = 'vimeopro.com/' in url
+ is_player = '://player.vimeo.com/video/' in url
if is_pro:
# some videos require portfolio_id to be present in player url
# https://github.com/ytdl-org/youtube-dl/issues/20070
url = self._extract_url(url, self._download_webpage(url, video_id))
if not url:
url = 'https://vimeo.com/' + video_id
+ elif is_player:
+ url = 'https://player.vimeo.com/video/' + video_id
elif any(p in url for p in ('play_redirect_hls', 'moogaloop.swf')):
url = 'https://vimeo.com/' + video_id
+ self._try_album_password(url)
try:
# Retrieve video webpage to extract further information
webpage, urlh = self._download_webpage_handle(
@@ -647,25 +734,14 @@ class VimeoIE(VimeoBaseInfoExtractor):
expected=True)
raise
- if '//player.vimeo.com/video/' in url:
- config = self._parse_json(self._search_regex(
- r'\bconfig\s*=\s*({.+?})\s*;', webpage, 'info section'), video_id)
- if config.get('view') == 4:
- config = self._verify_player_video_password(
- redirect_url, video_id, headers)
- info = self._parse_config(config, video_id)
- self._vimeo_sort_formats(info['formats'])
- return info
-
- if re.search(r'<form[^>]+?id="pw_form"', webpage):
- video_password = self._get_video_password()
- token, vuid = self._extract_xsrft_and_vuid(webpage)
- webpage = self._verify_video_password(
- redirect_url, video_id, video_password, token, vuid)
+ # Now we begin extracting as much information as we can from what we
+ # retrieved. First we extract the information common to all extractors,
+ # and latter we extract those that are Vimeo specific.
+ self.report_extraction(video_id)
vimeo_config = self._extract_vimeo_config(webpage, video_id, default=None)
if vimeo_config:
- seed_status = vimeo_config.get('seed_status') or {}
+ seed_status = vimeo_config.get('seed_status', {})
if seed_status.get('state') == 'failed':
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, seed_status['title']),
@@ -674,40 +750,70 @@ class VimeoIE(VimeoBaseInfoExtractor):
cc_license = None
timestamp = None
video_description = None
- info_dict = {}
- channel_id = self._search_regex(
- r'vimeo\.com/channels/([^/]+)', url, 'channel id', default=None)
- if channel_id:
- config_url = self._html_search_regex(
- r'\bdata-config-url="([^"]+)"', webpage, 'config URL')
- video_description = clean_html(get_element_by_class('description', webpage))
- info_dict.update({
- 'channel_id': channel_id,
- 'channel_url': 'https://vimeo.com/channels/' + channel_id,
- })
+ # Extract the config JSON
+ try:
+ try:
+ config_url = self._html_search_regex(
+ r' data-config-url="(.+?)"', webpage,
+ 'config URL', default=None)
+ if not config_url:
+ # Sometimes new react-based page is served instead of old one that require
+ # different config URL extraction approach (see
+ # https://github.com/ytdl-org/youtube-dl/pull/7209)
+ page_config = self._parse_json(self._search_regex(
+ r'vimeo\.(?:clip|vod_title)_page_config\s*=\s*({.+?});',
+ webpage, 'page config'), video_id)
+ config_url = page_config['player']['config_url']
+ cc_license = page_config.get('cc_license')
+ timestamp = try_get(
+ page_config, lambda x: x['clip']['uploaded_on'],
+ compat_str)
+ video_description = clean_html(dict_get(
+ page_config, ('description', 'description_html_escaped')))
+ config = self._download_json(config_url, video_id)
+ except RegexNotFoundError:
+ # For pro videos or player.vimeo.com urls
+ # We try to find out to which variable is assigned the config dic
+ m_variable_name = re.search(r'(\w)\.video\.id', webpage)
+ if m_variable_name is not None:
+ config_re = [r'%s=({[^}].+?});' % re.escape(m_variable_name.group(1))]
+ else:
+ config_re = [r' = {config:({.+?}),assets:', r'(?:[abc])=({.+?});']
+ config_re.append(r'\bvar\s+r\s*=\s*({.+?})\s*;')
+ config_re.append(r'\bconfig\s*=\s*({.+?})\s*;')
+ config = self._search_regex(config_re, webpage, 'info section',
+ flags=re.DOTALL)
+ config = json.loads(config)
+ except Exception as e:
+ if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage):
+ raise ExtractorError('The author has restricted the access to this video, try with the "--referer" option')
+
+ if re.search(r'<form[^>]+?id="pw_form"', webpage) is not None:
+ if '_video_password_verified' in data:
+ raise ExtractorError('video password verification failed!')
+ video_password = self._get_video_password()
+ token, vuid = self._extract_xsrft_and_vuid(webpage)
+ self._verify_video_password(
+ redirect_url, video_id, video_password, token, vuid)
+ return self._real_extract(
+ smuggle_url(redirect_url, {'_video_password_verified': 'verified'}))
+ else:
+ raise ExtractorError('Unable to extract info section',
+ cause=e)
else:
- page_config = self._parse_json(self._search_regex(
- r'vimeo\.(?:clip|vod_title)_page_config\s*=\s*({.+?});',
- webpage, 'page config', default='{}'), video_id, fatal=False)
- if not page_config:
- return self._extract_from_api(video_id)
- config_url = page_config['player']['config_url']
- cc_license = page_config.get('cc_license')
- clip = page_config.get('clip') or {}
- timestamp = clip.get('uploaded_on')
- video_description = clean_html(
- clip.get('description') or page_config.get('description_html_escaped'))
- config = self._download_json(config_url, video_id)
+ if config.get('view') == 4:
+ config = self._verify_player_video_password(redirect_url, video_id, headers)
+
video = config.get('video') or {}
vod = video.get('vod') or {}
def is_rented():
if '>You rented this title.<' in webpage:
return True
- if try_get(config, lambda x: x['user']['purchased']):
+ if config.get('user', {}).get('purchased'):
return True
- for purchase_option in (vod.get('purchase_options') or []):
+ for purchase_option in vod.get('purchase_options', []):
if purchase_option.get('purchased'):
return True
label = purchase_option.get('label_string')
@@ -722,10 +828,14 @@ class VimeoIE(VimeoBaseInfoExtractor):
'https://player.vimeo.com/player/%s' % feature_id,
{'force_feature_id': True}), 'Vimeo')
+ # Extract video description
+ if not video_description:
+ video_description = self._html_search_regex(
+ r'(?s)<div\s+class="[^"]*description[^"]*"[^>]*>(.*?)</div>',
+ webpage, 'description', default=None)
if not video_description:
video_description = self._html_search_meta(
- ['description', 'og:description', 'twitter:description'],
- webpage, default=None)
+ 'description', webpage, default=None)
if not video_description and is_pro:
orig_webpage = self._download_webpage(
orig_url, video_id,
@@ -734,14 +844,25 @@ class VimeoIE(VimeoBaseInfoExtractor):
if orig_webpage:
video_description = self._html_search_meta(
'description', orig_webpage, default=None)
- if not video_description:
- self._downloader.report_warning('Cannot find video description')
+ if not video_description and not is_player:
+ self.report_warning('Cannot find video description')
+ # Extract upload date
if not timestamp:
timestamp = self._search_regex(
r'<time[^>]+datetime="([^"]+)"', webpage,
'timestamp', default=None)
+ try:
+ view_count = int(self._search_regex(r'UserPlays:(\d+)', webpage, 'view count'))
+ like_count = int(self._search_regex(r'UserLikes:(\d+)', webpage, 'like count'))
+ comment_count = int(self._search_regex(r'UserComments:(\d+)', webpage, 'comment count'))
+ except RegexNotFoundError:
+ # This info is only available in vimeo.com/{id} urls
+ view_count = None
+ like_count = None
+ comment_count = None
+
formats = []
source_format = self._extract_original_format(
@@ -760,20 +881,31 @@ class VimeoIE(VimeoBaseInfoExtractor):
r'<link[^>]+rel=["\']license["\'][^>]+href=(["\'])(?P<license>(?:(?!\1).)+)\1',
webpage, 'license', default=None, group='license')
- info_dict.update({
+ channel_id = self._search_regex(
+ r'vimeo\.com/channels/([^/]+)', url, 'channel id', default=None)
+ channel_url = 'https://vimeo.com/channels/%s' % channel_id if channel_id else None
+
+ info_dict = {
'formats': formats,
'timestamp': unified_timestamp(timestamp),
'description': video_description,
'webpage_url': url,
+ 'view_count': view_count,
+ 'like_count': like_count,
+ 'comment_count': comment_count,
'license': cc_license,
- })
+ 'channel_id': channel_id,
+ 'channel_url': channel_url,
+ }
- return merge_dicts(info_dict, info_dict_config, json_ld)
+ info_dict = merge_dicts(info_dict, info_dict_config, json_ld)
+
+ return info_dict
class VimeoOndemandIE(VimeoIE):
IE_NAME = 'vimeo:ondemand'
- _VALID_URL = r'https?://(?:www\.)?vimeo\.com/ondemand/(?:[^/]+/)?(?P<id>[^/?#&]+)'
+ _VALID_URL = r'https?://(?:www\.)?vimeo\.com/ondemand/([^/]+/)?(?P<id>[^/?#&]+)'
_TESTS = [{
# ondemand video not available via https://vimeo.com/id
'url': 'https://vimeo.com/ondemand/20704',
@@ -920,7 +1052,7 @@ class VimeoAlbumIE(VimeoBaseInfoExtractor):
},
'playlist_count': 1,
'params': {
- 'videopassword': 'hypervideo',
+ 'videopassword': 'youtube-dl',
}
}]
_PAGE_SIZE = 100
@@ -967,7 +1099,7 @@ class VimeoAlbumIE(VimeoBaseInfoExtractor):
query={'fields': 'description,name,privacy'})
hashed_pass = None
if try_get(album, lambda x: x['privacy']['view']) == 'password':
- password = self._downloader.params.get('videopassword')
+ password = self.get_param('videopassword')
if not password:
raise ExtractorError(
'This album is protected by a password, use the --video-password option',
@@ -1056,7 +1188,7 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
self._login()
def _real_extract(self, url):
- page_url, video_id = re.match(self._VALID_URL, url).groups()
+ page_url, video_id = self._match_valid_url(url).groups()
data = self._download_json(
page_url.replace('/review/', '/review/data/'), video_id)
if data.get('isLocked') is True:
diff --git a/hypervideo_dl/extractor/vine.py b/hypervideo_dl/extractor/vine.py
index 80b896b..07fce0d 100644
--- a/hypervideo_dl/extractor/vine.py
+++ b/hypervideo_dl/extractor/vine.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..compat import compat_str
@@ -88,6 +87,7 @@ class VineIE(InfoExtractor):
'format_id': format_id or 'standard',
'quality': quality,
})
+ self._check_formats(formats, video_id)
self._sort_formats(formats)
username = data.get('username')
@@ -132,7 +132,7 @@ class VineUserIE(InfoExtractor):
return False if VineIE.suitable(url) else super(VineUserIE, cls).suitable(url)
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
user = mobj.group('user')
u = mobj.group('u')
diff --git a/hypervideo_dl/extractor/viu.py b/hypervideo_dl/extractor/viu.py
index 3bd3752..1b34c52 100644
--- a/hypervideo_dl/extractor/viu.py
+++ b/hypervideo_dl/extractor/viu.py
@@ -1,16 +1,22 @@
# coding: utf-8
from __future__ import unicode_literals
+import json
import re
from .common import InfoExtractor
from ..compat import (
compat_kwargs,
compat_str,
+ compat_urlparse,
+ compat_urllib_request,
)
from ..utils import (
ExtractorError,
int_or_none,
+ try_get,
+ smuggle_url,
+ unsmuggle_url,
)
@@ -168,7 +174,8 @@ class ViuPlaylistIE(ViuBaseIE):
class ViuOTTIE(InfoExtractor):
IE_NAME = 'viu:ott'
- _VALID_URL = r'https?://(?:www\.)?viu\.com/ott/(?P<country_code>[a-z]{2})/[a-z]{2}-[a-z]{2}/vod/(?P<id>\d+)'
+ _NETRC_MACHINE = 'viu'
+ _VALID_URL = r'https?://(?:www\.)?viu\.com/ott/(?P<country_code>[a-z]{2})/(?P<lang_code>[a-z]{2}-[a-z]{2})/vod/(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.viu.com/ott/sg/en-us/vod/3421/The%20Prime%20Minister%20and%20I',
'info_dict': {
@@ -179,6 +186,7 @@ class ViuOTTIE(InfoExtractor):
},
'params': {
'skip_download': 'm3u8 download',
+ 'noplaylist': True,
},
'skip': 'Geo-restricted to Singapore',
}, {
@@ -191,6 +199,19 @@ class ViuOTTIE(InfoExtractor):
},
'params': {
'skip_download': 'm3u8 download',
+ 'noplaylist': True,
+ },
+ 'skip': 'Geo-restricted to Hong Kong',
+ }, {
+ 'url': 'https://www.viu.com/ott/hk/zh-hk/vod/68776/%E6%99%82%E5%B0%9A%E5%AA%BD%E5%92%AA',
+ 'playlist_count': 12,
+ 'info_dict': {
+ 'id': '3916',
+ 'title': '時尚媽咪',
+ },
+ 'params': {
+ 'skip_download': 'm3u8 download',
+ 'noplaylist': False,
},
'skip': 'Geo-restricted to Hong Kong',
}]
@@ -201,9 +222,51 @@ class ViuOTTIE(InfoExtractor):
'TH': 4,
'PH': 5,
}
+ _LANGUAGE_FLAG = {
+ 'zh-hk': 1,
+ 'zh-cn': 2,
+ 'en-us': 3,
+ }
+ _user_info = None
+
+ def _detect_error(self, response):
+ code = response.get('status', {}).get('code')
+ if code > 0:
+ message = try_get(response, lambda x: x['status']['message'])
+ raise ExtractorError('%s said: %s (%s)' % (
+ self.IE_NAME, message, code), expected=True)
+ return response['data']
+
+ def _raise_login_required(self):
+ raise ExtractorError(
+ 'This video requires login. '
+ 'Specify --username and --password or --netrc (machine: %s) '
+ 'to provide account credentials.' % self._NETRC_MACHINE,
+ expected=True)
+
+ def _login(self, country_code, video_id):
+ if not self._user_info:
+ username, password = self._get_login_info()
+ if username is None or password is None:
+ return
+
+ data = self._download_json(
+ compat_urllib_request.Request(
+ 'https://www.viu.com/ott/%s/index.php' % country_code, method='POST'),
+ video_id, 'Logging in', errnote=False, fatal=False,
+ query={'r': 'user/login'},
+ data=json.dumps({
+ 'username': username,
+ 'password': password,
+ 'platform_flag_label': 'web',
+ }).encode())
+ self._user_info = self._detect_error(data)['user']
+
+ return self._user_info
def _real_extract(self, url):
- country_code, video_id = re.match(self._VALID_URL, url).groups()
+ url, idata = unsmuggle_url(url, {})
+ country_code, lang_code, video_id = self._match_valid_url(url).groups()
query = {
'r': 'vod/ajax-detail',
@@ -223,20 +286,88 @@ class ViuOTTIE(InfoExtractor):
if not video_data:
raise ExtractorError('This video is not available in your region.', expected=True)
- stream_data = self._download_json(
- 'https://d1k2us671qcoau.cloudfront.net/distribute_web_%s.php' % country_code,
- video_id, 'Downloading stream info', query={
- 'ccs_product_id': video_data['ccs_product_id'],
- }, headers={
- 'Referer': url,
- 'Origin': re.search(r'https?://[^/]+', url).group(0),
- })['data']['stream']
+ series_id = video_data.get('series_id')
+ if not self.get_param('noplaylist') and not idata.get('force_noplaylist'):
+ self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % series_id)
+ series = product_data.get('series', {})
+ product = series.get('product')
+ if product:
+ entries = []
+ for entry in sorted(product, key=lambda x: int_or_none(x.get('number', 0))):
+ item_id = entry.get('product_id')
+ if not item_id:
+ continue
+ item_id = compat_str(item_id)
+ entries.append(self.url_result(
+ smuggle_url(
+ 'http://www.viu.com/ott/%s/%s/vod/%s/' % (country_code, lang_code, item_id),
+ {'force_noplaylist': True}), # prevent infinite recursion
+ 'ViuOTT',
+ item_id,
+ entry.get('synopsis', '').strip()))
+
+ return self.playlist_result(entries, series_id, series.get('name'), series.get('description'))
+
+ if self.get_param('noplaylist'):
+ self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
+
+ duration_limit = False
+ query = {
+ 'ccs_product_id': video_data['ccs_product_id'],
+ 'language_flag_id': self._LANGUAGE_FLAG.get(lang_code.lower()) or '3',
+ }
+ headers = {
+ 'Referer': url,
+ 'Origin': url,
+ }
+ try:
+ stream_data = self._download_json(
+ 'https://d1k2us671qcoau.cloudfront.net/distribute_web_%s.php' % country_code,
+ video_id, 'Downloading stream info', query=query, headers=headers)
+ stream_data = self._detect_error(stream_data)['stream']
+ except (ExtractorError, KeyError):
+ stream_data = None
+ if video_data.get('user_level', 0) > 0:
+ user = self._login(country_code, video_id)
+ if user:
+ query['identity'] = user['identity']
+ stream_data = self._download_json(
+ 'https://d1k2us671qcoau.cloudfront.net/distribute_web_%s.php' % country_code,
+ video_id, 'Downloading stream info', query=query, headers=headers)
+ stream_data = self._detect_error(stream_data).get('stream')
+ else:
+ # preview is limited to 3min for non-members
+ # try to bypass the duration limit
+ duration_limit = True
+ query['duration'] = '180'
+ stream_data = self._download_json(
+ 'https://d1k2us671qcoau.cloudfront.net/distribute_web_%s.php' % country_code,
+ video_id, 'Downloading stream info', query=query, headers=headers)
+ try:
+ stream_data = self._detect_error(stream_data)['stream']
+ except (ExtractorError, KeyError): # if still not working, give up
+ self._raise_login_required()
+
+ if not stream_data:
+ raise ExtractorError('Cannot get stream info', expected=True)
stream_sizes = stream_data.get('size', {})
formats = []
for vid_format, stream_url in stream_data.get('url', {}).items():
height = int_or_none(self._search_regex(
r's(\d+)p', vid_format, 'height', default=None))
+
+ # bypass preview duration limit
+ if duration_limit:
+ stream_url = compat_urlparse.urlparse(stream_url)
+ query = dict(compat_urlparse.parse_qsl(stream_url.query, keep_blank_values=True))
+ time_duration = int_or_none(video_data.get('time_duration'))
+ query.update({
+ 'duration': time_duration if time_duration > 0 else '9999999',
+ 'duration_start': '0',
+ })
+ stream_url = stream_url._replace(query=compat_urlparse.urlencode(query)).geturl()
+
formats.append({
'format_id': vid_format,
'url': stream_url,
diff --git a/hypervideo_dl/extractor/vk.py b/hypervideo_dl/extractor/vk.py
index 6b3513e..d8a9b9a 100644
--- a/hypervideo_dl/extractor/vk.py
+++ b/hypervideo_dl/extractor/vk.py
@@ -308,7 +308,7 @@ class VKIE(VKBaseIE):
webpage)]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('videoid')
mv_data = {}
@@ -538,7 +538,7 @@ class VKUserVideosIE(VKBaseIE):
'http://vk.com/video' + video_id, VKIE.ie_key(), video_id)
def _real_extract(self, url):
- page_id, section = re.match(self._VALID_URL, url).groups()
+ page_id, section = self._match_valid_url(url).groups()
if not section:
section = 'all'
diff --git a/hypervideo_dl/extractor/vlive.py b/hypervideo_dl/extractor/vlive.py
index 42da34d..84f51a5 100644
--- a/hypervideo_dl/extractor/vlive.py
+++ b/hypervideo_dl/extractor/vlive.py
@@ -72,6 +72,13 @@ class VLiveIE(VLiveBaseIE):
# works only with gcc=KR
'url': 'https://www.vlive.tv/video/225019',
'only_matching': True,
+ }, {
+ 'url': 'https://www.vlive.tv/video/223906',
+ 'info_dict': {
+ 'id': '58',
+ 'title': 'RUN BTS!'
+ },
+ 'playlist_mincount': 120
}]
def _real_initialize(self):
@@ -105,10 +112,12 @@ class VLiveIE(VLiveBaseIE):
if not is_logged_in():
raise ExtractorError('Unable to log in', expected=True)
- def _call_api(self, path_template, video_id, fields=None):
+ def _call_api(self, path_template, video_id, fields=None, limit=None):
query = {'appId': self._APP_ID, 'gcc': 'KR', 'platformType': 'PC'}
if fields:
query['fields'] = fields
+ if limit:
+ query['limit'] = limit
try:
return self._download_json(
'https://www.vlive.tv/globalv-web/vam-web/' + path_template % video_id, video_id,
@@ -124,10 +133,34 @@ class VLiveIE(VLiveBaseIE):
post = self._call_api(
'post/v1.0/officialVideoPost-%s', video_id,
- 'author{nickname},channel{channelCode,channelName},officialVideo{commentCount,exposeStatus,likeCount,playCount,playTime,status,title,type,vodId}')
-
- video = post['officialVideo']
-
+ 'author{nickname},channel{channelCode,channelName},officialVideo{commentCount,exposeStatus,likeCount,playCount,playTime,status,title,type,vodId},playlist{playlistSeq,totalCount,name}')
+
+ playlist = post.get('playlist')
+ if not playlist or self.get_param('noplaylist'):
+ if playlist:
+ self.to_screen(
+ 'Downloading just video %s because of --no-playlist'
+ % video_id)
+
+ video = post['officialVideo']
+ return self._get_vlive_info(post, video, video_id)
+ else:
+ playlist_name = playlist.get('name')
+ playlist_id = str_or_none(playlist.get('playlistSeq'))
+ playlist_count = str_or_none(playlist.get('totalCount'))
+
+ playlist = self._call_api(
+ 'playlist/v1.0/playlist-%s/posts', playlist_id, 'data', limit=playlist_count)
+
+ entries = []
+ for video_data in playlist['data']:
+ video = video_data.get('officialVideo')
+ video_id = str_or_none(video.get('videoSeq'))
+ entries.append(self._get_vlive_info(video_data, video, video_id))
+
+ return self.playlist_result(entries, playlist_id, playlist_name)
+
+ def _get_vlive_info(self, post, video, video_id):
def get_common_fields():
channel = post.get('channel') or {}
return {
@@ -145,9 +178,15 @@ class VLiveIE(VLiveBaseIE):
if video_type == 'VOD':
inkey = self._call_api('video/v1.0/vod/%s/inkey', video_id)['inkey']
vod_id = video['vodId']
- return merge_dicts(
+ info_dict = merge_dicts(
get_common_fields(),
self._extract_video_info(video_id, vod_id, inkey))
+ thumbnail = video.get('thumb')
+ if thumbnail:
+ if not info_dict.get('thumbnails') and info_dict.get('thumbnail'):
+ info_dict['thumbnails'] = [{'url': info_dict.pop('thumbnail')}]
+ info_dict.setdefault('thumbnails', []).append({'url': thumbnail, 'preference': 1})
+ return info_dict
elif video_type == 'LIVE':
status = video.get('status')
if status == 'ON_AIR':
@@ -316,13 +355,29 @@ class VLiveChannelIE(VLiveBaseIE):
for video in videos:
video_id = video.get('videoSeq')
- if not video_id:
+ video_type = video.get('videoType')
+
+ if not video_id or not video_type:
continue
video_id = compat_str(video_id)
- entries.append(
- self.url_result(
- 'http://www.vlive.tv/video/%s' % video_id,
- ie=VLiveIE.ie_key(), video_id=video_id))
+
+ if video_type in ('PLAYLIST'):
+ first_video_id = try_get(
+ video,
+ lambda x: x['videoPlaylist']['videoList'][0]['videoSeq'], int)
+
+ if not first_video_id:
+ continue
+
+ entries.append(
+ self.url_result(
+ 'http://www.vlive.tv/video/%s' % first_video_id,
+ ie=VLiveIE.ie_key(), video_id=first_video_id))
+ else:
+ entries.append(
+ self.url_result(
+ 'http://www.vlive.tv/video/%s' % video_id,
+ ie=VLiveIE.ie_key(), video_id=video_id))
return self.playlist_result(
entries, channel_code, channel_name)
diff --git a/hypervideo_dl/extractor/voicy.py b/hypervideo_dl/extractor/voicy.py
new file mode 100644
index 0000000..11ebe76
--- /dev/null
+++ b/hypervideo_dl/extractor/voicy.py
@@ -0,0 +1,147 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ smuggle_url,
+ traverse_obj,
+ unsmuggle_url,
+ unified_strdate,
+)
+
+import itertools
+
+
+class VoicyBaseIE(InfoExtractor):
+ def _extract_from_playlist_data(self, value):
+ voice_id = compat_str(value.get('PlaylistId'))
+ upload_date = unified_strdate(value.get('Published'), False)
+ items = [self._extract_single_article(voice_data) for voice_data in value['VoiceData']]
+ return {
+ '_type': 'multi_video',
+ 'entries': items,
+ 'id': voice_id,
+ 'title': compat_str(value.get('PlaylistName')),
+ 'uploader': value.get('SpeakerName'),
+ 'uploader_id': compat_str(value.get('SpeakerId')),
+ 'channel': value.get('ChannelName'),
+ 'channel_id': compat_str(value.get('ChannelId')),
+ 'upload_date': upload_date,
+ }
+
+ def _extract_single_article(self, entry):
+ formats = [{
+ 'url': entry['VoiceHlsFile'],
+ 'format_id': 'hls',
+ 'ext': 'm4a',
+ 'acodec': 'aac',
+ 'vcodec': 'none',
+ 'protocol': 'm3u8_native',
+ }, {
+ 'url': entry['VoiceFile'],
+ 'format_id': 'mp3',
+ 'ext': 'mp3',
+ 'acodec': 'mp3',
+ 'vcodec': 'none',
+ }]
+ self._sort_formats(formats)
+ return {
+ 'id': compat_str(entry.get('ArticleId')),
+ 'title': entry.get('ArticleTitle'),
+ 'description': entry.get('MediaName'),
+ 'formats': formats,
+ }
+
+ def _call_api(self, url, video_id, **kwargs):
+ response = self._download_json(url, video_id, **kwargs)
+ if response.get('Status') != 0:
+ message = traverse_obj(response, ('Value', 'Error', 'Message'), expected_type=compat_str)
+ if not message:
+ message = 'There was a error in the response: %d' % response.get('Status')
+ raise ExtractorError(message, expected=False)
+ return response.get('Value')
+
+
+class VoicyIE(VoicyBaseIE):
+ IE_NAME = 'voicy'
+ _VALID_URL = r'https?://voicy\.jp/channel/(?P<channel_id>\d+)/(?P<id>\d+)'
+ ARTICLE_LIST_API_URL = 'https://vmw.api.voicy.jp/articles_list?channel_id=%s&pid=%s'
+ _TESTS = [{
+ 'url': 'https://voicy.jp/channel/1253/122754',
+ 'info_dict': {
+ 'id': '122754',
+ 'title': '1/21(木)声日記:ついに原稿終わった!!',
+ 'uploader': 'ちょまど@ ITエンジニアなオタク',
+ 'uploader_id': '7339',
+ },
+ 'playlist_mincount': 9,
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ assert mobj
+ voice_id = mobj.group('id')
+ channel_id = mobj.group('channel_id')
+ url, article_list = unsmuggle_url(url)
+ if not article_list:
+ article_list = self._call_api(self.ARTICLE_LIST_API_URL % (channel_id, voice_id), voice_id)
+ return self._extract_from_playlist_data(article_list)
+
+
+class VoicyChannelIE(VoicyBaseIE):
+ IE_NAME = 'voicy:channel'
+ _VALID_URL = r'https?://voicy\.jp/channel/(?P<id>\d+)'
+ PROGRAM_LIST_API_URL = 'https://vmw.api.voicy.jp/program_list/all?channel_id=%s&limit=20&public_type=3%s'
+ _TESTS = [{
+ 'url': 'https://voicy.jp/channel/1253/',
+ 'info_dict': {
+ 'id': '7339',
+ 'title': 'ゆるふわ日常ラジオ #ちょまラジ',
+ 'uploader': 'ちょまど@ ITエンジニアなオタク',
+ 'uploader_id': '7339',
+ },
+ 'playlist_mincount': 54,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return not VoicyIE.suitable(url) and super(VoicyChannelIE, cls).suitable(url)
+
+ def _entries(self, channel_id):
+ pager = ''
+ for count in itertools.count(1):
+ article_list = self._call_api(self.PROGRAM_LIST_API_URL % (channel_id, pager), channel_id, note='Paging #%d' % count)
+ playlist_data = article_list.get('PlaylistData')
+ if not playlist_data:
+ break
+ yield from playlist_data
+ last = playlist_data[-1]
+ pager = '&pid=%d&p_date=%s&play_count=%s' % (last['PlaylistId'], last['Published'], last['PlayCount'])
+
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
+ articles = self._entries(channel_id)
+
+ first_article = next(articles, None)
+ title = traverse_obj(first_article, ('ChannelName', ), expected_type=compat_str)
+ speaker_name = traverse_obj(first_article, ('SpeakerName', ), expected_type=compat_str)
+ if not title and speaker_name:
+ title = 'Uploads from %s' % speaker_name
+ if not title:
+ title = 'Uploads from channel ID %s' % channel_id
+
+ articles = itertools.chain([first_article], articles) if first_article else articles
+
+ playlist = (
+ self.url_result(smuggle_url('https://voicy.jp/channel/%s/%d' % (channel_id, value['PlaylistId']), value), VoicyIE.ie_key())
+ for value in articles)
+ return {
+ '_type': 'playlist',
+ 'entries': playlist,
+ 'id': channel_id,
+ 'title': title,
+ 'channel': speaker_name,
+ 'channel_id': channel_id,
+ }
diff --git a/hypervideo_dl/extractor/voot.py b/hypervideo_dl/extractor/voot.py
index 751b21e..e2944ec 100644
--- a/hypervideo_dl/extractor/voot.py
+++ b/hypervideo_dl/extractor/voot.py
@@ -2,6 +2,7 @@
from __future__ import unicode_literals
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
ExtractorError,
int_or_none,
@@ -11,7 +12,17 @@ from ..utils import (
class VootIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?voot\.com/(?:[^/]+/)+(?P<id>\d+)'
+ _VALID_URL = r'''(?x)
+ (?:
+ voot:|
+ (?:https?://)(?:www\.)?voot\.com/?
+ (?:
+ movies/[^/]+/|
+ (?:shows|kids)/(?:[^/]+/){4}
+ )
+ )
+ (?P<id>\d{3,})
+ '''
_GEO_COUNTRIES = ['IN']
_TESTS = [{
'url': 'https://www.voot.com/shows/ishq-ka-rang-safed/1/360558/is-this-the-end-of-kamini-/441353',
@@ -22,7 +33,6 @@ class VootIE(InfoExtractor):
'description': 'md5:06291fbbbc4dcbe21235c40c262507c1',
'timestamp': 1472162937,
'upload_date': '20160825',
- 'duration': 1146,
'series': 'Ishq Ka Rang Safed',
'season_number': 1,
'episode': 'Is this the end of Kamini?',
@@ -44,7 +54,6 @@ class VootIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
-
media_info = self._download_json(
'https://wapi.voot.com/ws/ott/getMediaInfo.json', video_id,
query={
@@ -82,7 +91,6 @@ class VootIE(InfoExtractor):
episode = value
elif key == 'EpisodeNo':
episode_number = int_or_none(value)
-
return {
'extractor_key': 'Kaltura',
'id': entry_id,
@@ -98,3 +106,45 @@ class VootIE(InfoExtractor):
'like_count': int_or_none(media.get('like_counter')),
'formats': formats,
}
+
+
+class VootSeriesIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?voot\.com/shows/[^/]+/(?P<id>\d{3,})'
+ _TESTS = [{
+ 'url': 'https://www.voot.com/shows/chakravartin-ashoka-samrat/100002',
+ 'playlist_mincount': 442,
+ 'info_dict': {
+ 'id': '100002',
+ },
+ }, {
+ 'url': 'https://www.voot.com/shows/ishq-ka-rang-safed/100003',
+ 'playlist_mincount': 341,
+ 'info_dict': {
+ 'id': '100003',
+ },
+ }]
+ _SHOW_API = 'https://psapi.voot.com/media/voot/v1/voot-web/content/generic/season-by-show?sort=season%3Aasc&id={}&responseType=common'
+ _SEASON_API = 'https://psapi.voot.com/media/voot/v1/voot-web/content/generic/series-wise-episode?sort=episode%3Aasc&id={}&responseType=common&page={:d}'
+
+ def _entries(self, show_id):
+ show_json = self._download_json(self._SHOW_API.format(show_id), video_id=show_id)
+ for season in show_json.get('result', []):
+ page_num = 1
+ season_id = try_get(season, lambda x: x['id'], compat_str)
+ season_json = self._download_json(self._SEASON_API.format(season_id, page_num),
+ video_id=season_id,
+ note='Downloading JSON metadata page %d' % page_num)
+ episodes_json = season_json.get('result', [])
+ while episodes_json:
+ page_num += 1
+ for episode in episodes_json:
+ video_id = episode.get('id')
+ yield self.url_result(
+ 'voot:%s' % video_id, ie=VootIE.ie_key(), video_id=video_id)
+ episodes_json = self._download_json(self._SEASON_API.format(season_id, page_num),
+ video_id=season_id,
+ note='Downloading JSON metadata page %d' % page_num)['result']
+
+ def _real_extract(self, url):
+ show_id = self._match_id(url)
+ return self.playlist_result(self._entries(show_id), playlist_id=show_id)
diff --git a/hypervideo_dl/extractor/vrt.py b/hypervideo_dl/extractor/vrt.py
index 4220252..10dc94a 100644
--- a/hypervideo_dl/extractor/vrt.py
+++ b/hypervideo_dl/extractor/vrt.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..utils import (
@@ -52,16 +51,16 @@ class VRTIE(InfoExtractor):
}
def _real_extract(self, url):
- site, display_id = re.match(self._VALID_URL, url).groups()
+ site, display_id = self._match_valid_url(url).groups()
webpage = self._download_webpage(url, display_id)
attrs = extract_attributes(self._search_regex(
- r'(<[^>]+class="vrtvideo"[^>]*>)', webpage, 'vrt video'))
+ r'(<[^>]+class="vrtvideo( [^"]*)?"[^>]*>)', webpage, 'vrt video'))
- asset_id = attrs['data-videoid']
- publication_id = attrs.get('data-publicationid')
+ asset_id = attrs['data-video-id']
+ publication_id = attrs.get('data-publication-id')
if publication_id:
asset_id = publication_id + '$' + asset_id
- client = attrs.get('data-client') or self._CLIENT_MAP[site]
+ client = attrs.get('data-client-code') or self._CLIENT_MAP[site]
title = strip_or_none(get_element_by_class(
'vrt-title', webpage) or self._html_search_meta(
diff --git a/hypervideo_dl/extractor/vrv.py b/hypervideo_dl/extractor/vrv.py
index 6e51469..4196021 100644
--- a/hypervideo_dl/extractor/vrv.py
+++ b/hypervideo_dl/extractor/vrv.py
@@ -19,6 +19,7 @@ from ..utils import (
ExtractorError,
float_or_none,
int_or_none,
+ traverse_obj,
)
@@ -217,7 +218,7 @@ class VRVIE(VRVBaseIE):
})
thumbnails = []
- for thumbnail in video_data.get('images', {}).get('thumbnails', []):
+ for thumbnail in traverse_obj(video_data, ('images', 'thumbnail', ..., ...)) or []:
thumbnail_url = thumbnail.get('source')
if not thumbnail_url:
continue
diff --git a/hypervideo_dl/extractor/vube.py b/hypervideo_dl/extractor/vube.py
index 8ce3a6b..1c8f80a 100644
--- a/hypervideo_dl/extractor/vube.py
+++ b/hypervideo_dl/extractor/vube.py
@@ -1,6 +1,5 @@
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..compat import (
@@ -8,7 +7,6 @@ from ..compat import (
)
from ..utils import (
int_or_none,
- ExtractorError,
)
@@ -99,7 +97,7 @@ class VubeIE(InfoExtractor):
]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
video = self._download_json(
@@ -125,13 +123,13 @@ class VubeIE(InfoExtractor):
})
formats.append(fmt)
- self._sort_formats(formats)
-
if not formats and video.get('vst') == 'dmca':
- raise ExtractorError(
+ self.raise_no_formats(
'This video has been removed in response to a complaint received under the US Digital Millennium Copyright Act.',
expected=True)
+ self._sort_formats(formats)
+
title = video['title']
description = video.get('description')
thumbnail = self._proto_relative_url(video.get('thumbnail_src'), scheme='http:')
diff --git a/hypervideo_dl/extractor/vupload.py b/hypervideo_dl/extractor/vupload.py
new file mode 100644
index 0000000..9846aba
--- /dev/null
+++ b/hypervideo_dl/extractor/vupload.py
@@ -0,0 +1,51 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ parse_duration,
+ parse_filesize,
+ extract_attributes,
+ int_or_none,
+)
+
+
+class VuploadIE(InfoExtractor):
+ _VALID_URL = r'https://vupload\.com/v/(?P<id>[a-z0-9]+)'
+ _TESTS = [{
+ 'url': 'https://vupload.com/v/u28d0pl2tphy',
+ 'md5': '9b42a4a193cca64d80248e58527d83c8',
+ 'info_dict': {
+ 'id': 'u28d0pl2tphy',
+ 'ext': 'mp4',
+ 'description': 'md5:e9e6c0045c78cbf0d5bb19a55ce199fb',
+ 'title': 'md5:e9e6c0045c78cbf0d5bb19a55ce199fb',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._html_search_regex(r'<title>(.+?)</title>', webpage, 'title')
+ video_e = self._html_search_regex(r'\|([a-z0-9]{60})\|', webpage, 'video')
+ video_url = f'https://wurize.megaupload.to/{video_e}/v.mp4'
+ duration = parse_duration(self._html_search_regex(
+ r'<i\s*class=["\']fad\s*fa-clock["\']></i>\s*([\d:]+)\s*</div>', webpage, 'duration', fatal=False))
+ filesize_approx = parse_filesize(self._html_search_regex(
+ r'<i\s*class=["\']fad\s*fa-save["\']></i>\s*([^<]+)\s*</div>', webpage, 'filesize', fatal=False))
+ extra_video_info = extract_attributes(self._html_search_regex(
+ r'(<video[^>]+>)', webpage, 'video_info', fatal=False))
+ description = self._html_search_meta('description', webpage)
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'duration': duration,
+ 'filesize_approx': filesize_approx,
+ 'width': int_or_none(extra_video_info.get('width')),
+ 'height': int_or_none(extra_video_info.get('height')),
+ 'format_id': extra_video_info.get('height', '') + 'p',
+ 'title': title,
+ 'description': description,
+ }
diff --git a/hypervideo_dl/extractor/vvvvid.py b/hypervideo_dl/extractor/vvvvid.py
index bc196f8..3faa90f 100644
--- a/hypervideo_dl/extractor/vvvvid.py
+++ b/hypervideo_dl/extractor/vvvvid.py
@@ -98,7 +98,7 @@ class VVVVIDIE(InfoExtractor):
}
def _real_extract(self, url):
- show_id, season_id, video_id = re.match(self._VALID_URL, url).groups()
+ show_id, season_id, video_id = self._match_valid_url(url).groups()
response = self._download_info(
show_id, 'season/%s' % season_id,
@@ -246,7 +246,7 @@ class VVVVIDShowIE(VVVVIDIE):
}]
def _real_extract(self, url):
- base_url, show_id, show_title = re.match(self._VALID_URL, url).groups()
+ base_url, show_id, show_title = self._match_valid_url(url).groups()
seasons = self._download_info(
show_id, 'seasons/', show_title)
diff --git a/hypervideo_dl/extractor/vzaar.py b/hypervideo_dl/extractor/vzaar.py
index b7d02fc..54f88bb 100644
--- a/hypervideo_dl/extractor/vzaar.py
+++ b/hypervideo_dl/extractor/vzaar.py
@@ -70,7 +70,7 @@ class VzaarIE(InfoExtractor):
f = {
'url': source_url,
'format_id': 'http',
- 'preference': 1,
+ 'quality': 1,
}
if 'audio' in source_url:
f.update({
diff --git a/hypervideo_dl/extractor/wakanim.py b/hypervideo_dl/extractor/wakanim.py
index f9a2395..c956d61 100644
--- a/hypervideo_dl/extractor/wakanim.py
+++ b/hypervideo_dl/extractor/wakanim.py
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
- ExtractorError,
merge_dicts,
urljoin,
)
@@ -41,12 +40,13 @@ class WakanimIE(InfoExtractor):
m3u8_url = urljoin(url, self._search_regex(
r'file\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, 'm3u8 url',
group='url'))
- # https://docs.microsoft.com/en-us/azure/media-services/previous/media-services-content-protection-overview#streaming-urls
- encryption = self._search_regex(
- r'encryption%3D(c(?:enc|bc(?:s-aapl)?))',
- m3u8_url, 'encryption', default=None)
- if encryption and encryption in ('cenc', 'cbcs-aapl'):
- raise ExtractorError('This video is DRM protected.', expected=True)
+ if not self.get_param('allow_unplayable_formats'):
+ # https://docs.microsoft.com/en-us/azure/media-services/previous/media-services-content-protection-overview#streaming-urls
+ encryption = self._search_regex(
+ r'encryption%3D(c(?:enc|bc(?:s-aapl)?))',
+ m3u8_url, 'encryption', default=None)
+ if encryption in ('cenc', 'cbcs-aapl'):
+ self.report_drm(video_id)
formats = self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
diff --git a/hypervideo_dl/extractor/walla.py b/hypervideo_dl/extractor/walla.py
index cbb5486..00f081b 100644
--- a/hypervideo_dl/extractor/walla.py
+++ b/hypervideo_dl/extractor/walla.py
@@ -34,7 +34,7 @@ class WallaIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
display_id = mobj.group('display_id')
diff --git a/hypervideo_dl/extractor/wat.py b/hypervideo_dl/extractor/wat.py
index f1bccc2..9ff4523 100644
--- a/hypervideo_dl/extractor/wat.py
+++ b/hypervideo_dl/extractor/wat.py
@@ -69,25 +69,30 @@ class WatIE(InfoExtractor):
title = video_info['title']
formats = []
+ subtitles = {}
def extract_formats(manifest_urls):
for f, f_url in manifest_urls.items():
if not f_url:
continue
if f in ('dash', 'mpd'):
- formats.extend(self._extract_mpd_formats(
+ fmts, subs = self._extract_mpd_formats_and_subtitles(
f_url.replace('://das-q1.tf1.fr/', '://das-q1-ssl.tf1.fr/'),
- video_id, mpd_id='dash', fatal=False))
+ video_id, mpd_id='dash', fatal=False)
elif f == 'hls':
- formats.extend(self._extract_m3u8_formats(
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
f_url, video_id, 'mp4',
- 'm3u8_native', m3u8_id='hls', fatal=False))
+ 'm3u8_native', m3u8_id='hls', fatal=False)
+ else:
+ continue
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
delivery = video_data.get('delivery') or {}
extract_formats({delivery.get('format'): delivery.get('url')})
if not formats:
if delivery.get('drm'):
- raise ExtractorError('This video is DRM protected.', expected=True)
+ self.report_drm(video_id)
manifest_urls = self._download_json(
'http://www.wat.tv/get/webhtml/' + video_id, video_id, fatal=False)
if manifest_urls:
@@ -103,4 +108,5 @@ class WatIE(InfoExtractor):
video_data, lambda x: x['mediametrie']['chapters'][0]['estatS4'])),
'duration': int_or_none(video_info.get('duration')),
'formats': formats,
+ 'subtitles': subtitles,
}
diff --git a/hypervideo_dl/extractor/watchbox.py b/hypervideo_dl/extractor/watchbox.py
index 5a4e46e..7469fe9 100644
--- a/hypervideo_dl/extractor/watchbox.py
+++ b/hypervideo_dl/extractor/watchbox.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..compat import compat_str
@@ -63,7 +62,7 @@ class WatchBoxIE(InfoExtractor):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
kind, video_id = mobj.group('kind', 'id')
webpage = self._download_webpage(url, video_id)
diff --git a/hypervideo_dl/extractor/watchindianporn.py b/hypervideo_dl/extractor/watchindianporn.py
index fadc539..a868191 100644
--- a/hypervideo_dl/extractor/watchindianporn.py
+++ b/hypervideo_dl/extractor/watchindianporn.py
@@ -27,7 +27,7 @@ class WatchIndianPornIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
display_id = mobj.group('display_id')
diff --git a/hypervideo_dl/extractor/wdr.py b/hypervideo_dl/extractor/wdr.py
index 2903d18..f54aa6f 100644
--- a/hypervideo_dl/extractor/wdr.py
+++ b/hypervideo_dl/extractor/wdr.py
@@ -44,17 +44,25 @@ class WDRIE(InfoExtractor):
tracker_data = metadata['trackerData']
title = tracker_data['trackerClipTitle']
-
media_resource = metadata['mediaResource']
formats = []
+ subtitles = {}
# check if the metadata contains a direct URL to a file
for kind, media in media_resource.items():
- if not isinstance(media, dict):
+ if kind == 'captionsHash':
+ for ext, url in media.items():
+ subtitles.setdefault('de', []).append({
+ 'url': url,
+ 'ext': ext,
+ })
continue
+
if kind not in ('dflt', 'alt'):
continue
+ if not isinstance(media, dict):
+ continue
for tag_name, medium_url in media.items():
if tag_name not in ('videoURL', 'audioURL'):
@@ -86,7 +94,6 @@ class WDRIE(InfoExtractor):
self._sort_formats(formats)
- subtitles = {}
caption_url = media_resource.get('captionURL')
if caption_url:
subtitles['de'] = [{
@@ -233,7 +240,7 @@ class WDRPageIE(InfoExtractor):
]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
display_id = mobj.group('display_id')
webpage = self._download_webpage(url, display_id)
@@ -335,7 +342,7 @@ class WDRMobileIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
return {
'id': mobj.group('id'),
'title': mobj.group('title'),
diff --git a/hypervideo_dl/extractor/whowatch.py b/hypervideo_dl/extractor/whowatch.py
new file mode 100644
index 0000000..f8bc2e7
--- /dev/null
+++ b/hypervideo_dl/extractor/whowatch.py
@@ -0,0 +1,99 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ qualities,
+ try_get,
+ ExtractorError,
+)
+from ..compat import compat_str
+
+
+class WhoWatchIE(InfoExtractor):
+ IE_NAME = 'whowatch'
+ _VALID_URL = r'https?://whowatch\.tv/viewer/(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'https://whowatch.tv/viewer/21450171',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ self._download_webpage(url, video_id)
+ metadata = self._download_json('https://api.whowatch.tv/lives/%s' % video_id, video_id)
+ live_data = self._download_json('https://api.whowatch.tv/lives/%s/play' % video_id, video_id)
+
+ title = try_get(None, (
+ lambda x: live_data['share_info']['live_title'][1:-1],
+ lambda x: metadata['live']['title'],
+ ), compat_str)
+
+ hls_url = live_data.get('hls_url')
+ if not hls_url:
+ raise ExtractorError(live_data.get('error_message') or 'The user is offline.', expected=True)
+
+ QUALITIES = qualities(['low', 'medium', 'high', 'veryhigh'])
+ formats = []
+
+ for i, fmt in enumerate(live_data.get('streams') or []):
+ name = fmt.get('quality') or fmt.get('name') or compat_str(i)
+ hls_url = fmt.get('hls_url')
+ rtmp_url = fmt.get('rtmp_url')
+ audio_only = fmt.get('audio_only')
+ quality = QUALITIES(fmt.get('quality'))
+
+ if hls_url:
+ hls_fmts = self._extract_m3u8_formats(
+ hls_url, video_id, ext='mp4', m3u8_id='hls-%s' % name, quality=quality)
+ formats.extend(hls_fmts)
+ else:
+ hls_fmts = []
+
+ # RTMP url for audio_only is same as high format, so skip it
+ if rtmp_url and not audio_only:
+ formats.append({
+ 'url': rtmp_url,
+ 'format_id': 'rtmp-%s' % name,
+ 'ext': 'mp4',
+ 'protocol': 'rtmp_ffmpeg', # ffmpeg can, while rtmpdump can't
+ 'vcodec': 'h264',
+ 'acodec': 'aac',
+ 'quality': quality,
+ 'format_note': fmt.get('label'),
+ # note: HLS and RTMP have same resolution for now, so it's acceptable
+ 'width': try_get(hls_fmts, lambda x: x[0]['width'], int),
+ 'height': try_get(hls_fmts, lambda x: x[0]['height'], int),
+ })
+
+ # This contains the same formats as the above manifests and is used only as a fallback
+ formats.extend(self._extract_m3u8_formats(
+ hls_url, video_id, ext='mp4', m3u8_id='hls'))
+ self._remove_duplicate_formats(formats)
+ self._sort_formats(formats)
+
+ uploader_url = try_get(metadata, lambda x: x['live']['user']['user_path'], compat_str)
+ if uploader_url:
+ uploader_url = 'https://whowatch.tv/profile/%s' % uploader_url
+ uploader_id = compat_str(try_get(metadata, lambda x: x['live']['user']['id'], int))
+ uploader = try_get(metadata, lambda x: x['live']['user']['name'], compat_str)
+ thumbnail = try_get(metadata, lambda x: x['live']['latest_thumbnail_url'], compat_str)
+ timestamp = int_or_none(try_get(metadata, lambda x: x['live']['started_at'], int), scale=1000)
+ view_count = try_get(metadata, lambda x: x['live']['total_view_count'], int)
+ comment_count = try_get(metadata, lambda x: x['live']['comment_count'], int)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'uploader_id': uploader_id,
+ 'uploader_url': uploader_url,
+ 'uploader': uploader,
+ 'formats': formats,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'view_count': view_count,
+ 'comment_count': comment_count,
+ 'is_live': True,
+ }
diff --git a/hypervideo_dl/extractor/wimtv.py b/hypervideo_dl/extractor/wimtv.py
new file mode 100644
index 0000000..ea953bf
--- /dev/null
+++ b/hypervideo_dl/extractor/wimtv.py
@@ -0,0 +1,163 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ parse_duration,
+ urlencode_postdata,
+ ExtractorError,
+)
+
+
+class WimTVIE(InfoExtractor):
+ _player = None
+ _UUID_RE = r'[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}'
+ _VALID_URL = r'''(?x)
+ https?://platform.wim.tv/
+ (?:
+ (?:embed/)?\?
+ |\#/webtv/.+?/
+ )
+ (?P<type>vod|live|cast)[=/]
+ (?P<id>%s).*?''' % _UUID_RE
+ _TESTS = [{
+ # vod stream
+ 'url': 'https://platform.wim.tv/embed/?vod=db29fb32-bade-47b6-a3a6-cb69fe80267a',
+ 'md5': 'db29fb32-bade-47b6-a3a6-cb69fe80267a',
+ 'info_dict': {
+ 'id': 'db29fb32-bade-47b6-a3a6-cb69fe80267a',
+ 'ext': 'mp4',
+ 'title': 'AMA SUPERCROSS 2020 - R2 ST. LOUIS',
+ 'duration': 6481,
+ 'thumbnail': r're:https?://.+?/thumbnail/.+?/720$'
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # live stream
+ 'url': 'https://platform.wim.tv/embed/?live=28e22c22-49db-40f3-8c37-8cbb0ff44556&autostart=true',
+ 'info_dict': {
+ 'id': '28e22c22-49db-40f3-8c37-8cbb0ff44556',
+ 'ext': 'mp4',
+ 'title': 'Streaming MSmotorTV',
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://platform.wim.tv/#/webtv/automotornews/vod/422492b6-539e-474d-9c6b-68c9d5893365',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://platform.wim.tv/#/webtv/renzoarborechannel/cast/f47e0d15-5b45-455e-bf0d-dba8ffa96365',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return [
+ mobj.group('url')
+ for mobj in re.finditer(
+ r'<iframe[^>]+src=["\'](?P<url>%s)' % WimTVIE._VALID_URL,
+ webpage)]
+
+ def _real_initialize(self):
+ if not self._player:
+ self._get_player_data()
+
+ def _get_player_data(self):
+ msg_id = 'Player data'
+ self._player = {}
+
+ datas = [{
+ 'url': 'https://platform.wim.tv/common/libs/player/wimtv/wim-rest.js',
+ 'vars': [{
+ 'regex': r'appAuth = "(.+?)"',
+ 'variable': 'app_auth',
+ }]
+ }, {
+ 'url': 'https://platform.wim.tv/common/config/endpointconfig.js',
+ 'vars': [{
+ 'regex': r'PRODUCTION_HOSTNAME_THUMB = "(.+?)"',
+ 'variable': 'thumb_server',
+ }, {
+ 'regex': r'PRODUCTION_HOSTNAME_THUMB\s*\+\s*"(.+?)"',
+ 'variable': 'thumb_server_path',
+ }]
+ }]
+
+ for data in datas:
+ temp = self._download_webpage(data['url'], msg_id)
+ for var in data['vars']:
+ val = self._search_regex(var['regex'], temp, msg_id)
+ if not val:
+ raise ExtractorError('%s not found' % var['variable'])
+ self._player[var['variable']] = val
+
+ def _generate_token(self):
+ json = self._download_json(
+ 'https://platform.wim.tv/wimtv-server/oauth/token', 'Token generation',
+ headers={'Authorization': 'Basic %s' % self._player['app_auth']},
+ data=urlencode_postdata({'grant_type': 'client_credentials'}))
+ token = json.get('access_token')
+ if not token:
+ raise ExtractorError('access token not generated')
+ return token
+
+ def _generate_thumbnail(self, thumb_id, width='720'):
+ if not thumb_id or not self._player.get('thumb_server'):
+ return None
+ if not self._player.get('thumb_server_path'):
+ self._player['thumb_server_path'] = ''
+ return '%s%s/asset/thumbnail/%s/%s' % (
+ self._player['thumb_server'],
+ self._player['thumb_server_path'],
+ thumb_id, width)
+
+ def _real_extract(self, url):
+ urlc = self._match_valid_url(url).groupdict()
+ video_id = urlc['id']
+ stream_type = is_live = None
+ if urlc['type'] in {'live', 'cast'}:
+ stream_type = urlc['type'] + '/channel'
+ is_live = True
+ else:
+ stream_type = 'vod'
+ is_live = False
+ token = self._generate_token()
+ json = self._download_json(
+ 'https://platform.wim.tv/wimtv-server/api/public/%s/%s/play' % (
+ stream_type, video_id), video_id,
+ headers={'Authorization': 'Bearer %s' % token,
+ 'Content-Type': 'application/json'},
+ data=bytes('{}', 'utf-8'))
+
+ formats = []
+ for src in json.get('srcs') or []:
+ if src.get('mimeType') == 'application/x-mpegurl':
+ formats.extend(
+ self._extract_m3u8_formats(
+ src.get('uniqueStreamer'), video_id, 'mp4'))
+ if src.get('mimeType') == 'video/flash':
+ formats.append({
+ 'format_id': 'rtmp',
+ 'url': src.get('uniqueStreamer'),
+ 'ext': determine_ext(src.get('uniqueStreamer'), 'flv'),
+ 'rtmp_live': is_live,
+ })
+ json = json.get('resource')
+ thumb = self._generate_thumbnail(json.get('thumbnailId'))
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': json.get('title') or json.get('name'),
+ 'duration': parse_duration(json.get('duration')),
+ 'formats': formats,
+ 'thumbnail': thumb,
+ 'is_live': is_live,
+ }
diff --git a/hypervideo_dl/extractor/wistia.py b/hypervideo_dl/extractor/wistia.py
index ae32a0a..a170966 100644
--- a/hypervideo_dl/extractor/wistia.py
+++ b/hypervideo_dl/extractor/wistia.py
@@ -62,7 +62,7 @@ class WistiaBaseIE(InfoExtractor):
'format_id': format_id,
'url': aurl,
'tbr': int_or_none(a.get('bitrate')) or None,
- 'preference': 1 if atype == 'original' else None,
+ 'quality': 1 if atype == 'original' else None,
}
if display_name == 'Audio':
f.update({
diff --git a/hypervideo_dl/extractor/xboxclips.py b/hypervideo_dl/extractor/xboxclips.py
index 25f487e..9bac982 100644
--- a/hypervideo_dl/extractor/xboxclips.py
+++ b/hypervideo_dl/extractor/xboxclips.py
@@ -4,14 +4,11 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
-from ..compat import (
- compat_parse_qs,
- compat_urllib_parse_urlparse,
-)
from ..utils import (
int_or_none,
month_by_abbreviation,
parse_filesize,
+ parse_qs,
)
@@ -37,7 +34,7 @@ class XboxClipsIE(InfoExtractor):
video_id = self._match_id(url)
if '/video.php' in url:
- qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+ qs = parse_qs(url)
url = 'https://gameclips.io/%s/%s' % (qs['gamertag'][0], qs['vid'][0])
webpage = self._download_webpage(url, video_id)
diff --git a/hypervideo_dl/extractor/xfileshare.py b/hypervideo_dl/extractor/xfileshare.py
index df9efa9..cd97c77 100644
--- a/hypervideo_dl/extractor/xfileshare.py
+++ b/hypervideo_dl/extractor/xfileshare.py
@@ -98,7 +98,7 @@ class XFileShareIE(InfoExtractor):
webpage)]
def _real_extract(self, url):
- host, video_id = re.match(self._VALID_URL, url).groups()
+ host, video_id = self._match_valid_url(url).groups()
url = 'https://%s/' % host + ('embed-%s.html' % video_id if host in ('govid.me', 'vidlo.us') else video_id)
webpage = self._download_webpage(url, video_id)
diff --git a/hypervideo_dl/extractor/xhamster.py b/hypervideo_dl/extractor/xhamster.py
index f73b977..9d4ed47 100644
--- a/hypervideo_dl/extractor/xhamster.py
+++ b/hypervideo_dl/extractor/xhamster.py
@@ -120,7 +120,7 @@ class XHamsterIE(InfoExtractor):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id') or mobj.group('id_2')
display_id = mobj.group('display_id') or mobj.group('display_id_2')
@@ -231,7 +231,7 @@ class XHamsterIE(InfoExtractor):
'Referer': standard_url,
},
})
- self._sort_formats(formats, field_preference=('height', 'width', 'tbr', 'format_id'))
+ self._sort_formats(formats)
categories_list = video.get('categories')
if isinstance(categories_list, list):
@@ -245,6 +245,8 @@ class XHamsterIE(InfoExtractor):
else:
categories = None
+ uploader_url = url_or_none(try_get(video, lambda x: x['author']['pageURL']))
+
return {
'id': video_id,
'display_id': display_id,
@@ -253,6 +255,8 @@ class XHamsterIE(InfoExtractor):
'timestamp': int_or_none(video.get('created')),
'uploader': try_get(
video, lambda x: x['author']['name'], compat_str),
+ 'uploader_url': uploader_url,
+ 'uploader_id': uploader_url.split('/')[-1] if uploader_url else None,
'thumbnail': video.get('thumbURL'),
'duration': int_or_none(video.get('duration')),
'view_count': int_or_none(video.get('views')),
@@ -352,6 +356,7 @@ class XHamsterIE(InfoExtractor):
'description': description,
'upload_date': upload_date,
'uploader': uploader,
+ 'uploader_id': uploader.lower() if uploader else None,
'thumbnail': thumbnail,
'duration': duration,
'view_count': view_count,
diff --git a/hypervideo_dl/extractor/ximalaya.py b/hypervideo_dl/extractor/ximalaya.py
index a912e54..802d1bb 100644
--- a/hypervideo_dl/extractor/ximalaya.py
+++ b/hypervideo_dl/extractor/ximalaya.py
@@ -198,7 +198,7 @@ class XimalayaAlbumIE(XimalayaBaseIE):
def _real_extract(self, url):
self.scheme = scheme = 'https' if url.startswith('https') else 'http'
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
uid, playlist_id = mobj.group('uid'), mobj.group('id')
webpage = self._download_webpage(self._TEMPLATE_URL % (scheme, uid, playlist_id), playlist_id,
diff --git a/hypervideo_dl/extractor/xnxx.py b/hypervideo_dl/extractor/xnxx.py
index ac1ccc4..dd4fb54 100644
--- a/hypervideo_dl/extractor/xnxx.py
+++ b/hypervideo_dl/extractor/xnxx.py
@@ -54,7 +54,7 @@ class XNXXIE(InfoExtractor):
if determine_ext(format_url) == 'm3u8':
formats.extend(self._extract_m3u8_formats(
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
- preference=1, m3u8_id='hls', fatal=False))
+ quality=1, m3u8_id='hls', fatal=False))
else:
format_id = mobj.group('id')
if format_id:
diff --git a/hypervideo_dl/extractor/xstream.py b/hypervideo_dl/extractor/xstream.py
index 76c91bd..792843d 100644
--- a/hypervideo_dl/extractor/xstream.py
+++ b/hypervideo_dl/extractor/xstream.py
@@ -93,7 +93,7 @@ class XstreamIE(InfoExtractor):
formats.append({
'url': link.get('href'),
'format_id': link.get('rel'),
- 'preference': 1,
+ 'quality': 1,
})
thumbnails = [{
@@ -112,7 +112,7 @@ class XstreamIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
partner_id = mobj.group('partner_id')
video_id = mobj.group('id')
diff --git a/hypervideo_dl/extractor/xtube.py b/hypervideo_dl/extractor/xtube.py
index 7246409..abd3191 100644
--- a/hypervideo_dl/extractor/xtube.py
+++ b/hypervideo_dl/extractor/xtube.py
@@ -40,22 +40,6 @@ class XTubeIE(InfoExtractor):
'age_limit': 18,
}
}, {
- # FLV videos with duplicated formats
- 'url': 'http://www.xtube.com/video-watch/A-Super-Run-Part-1-YT-9299752',
- 'md5': 'a406963eb349dd43692ec54631efd88b',
- 'info_dict': {
- 'id': '9299752',
- 'display_id': 'A-Super-Run-Part-1-YT',
- 'ext': 'flv',
- 'title': 'A Super Run - Part 1 (YT)',
- 'description': 'md5:4cc3af1aa1b0413289babc88f0d4f616',
- 'uploader': 'tshirtguy59',
- 'duration': 579,
- 'view_count': int,
- 'comment_count': int,
- 'age_limit': 18,
- },
- }, {
# new URL schema
'url': 'http://www.xtube.com/video-watch/strange-erotica-625837',
'only_matching': True,
@@ -71,7 +55,7 @@ class XTubeIE(InfoExtractor):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
display_id = mobj.group('display_id')
diff --git a/hypervideo_dl/extractor/xxxymovies.py b/hypervideo_dl/extractor/xxxymovies.py
index e34ebe3..0d53601 100644
--- a/hypervideo_dl/extractor/xxxymovies.py
+++ b/hypervideo_dl/extractor/xxxymovies.py
@@ -1,6 +1,5 @@
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..utils import (
@@ -29,7 +28,7 @@ class XXXYMoviesIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
display_id = mobj.group('display_id')
diff --git a/hypervideo_dl/extractor/yahoo.py b/hypervideo_dl/extractor/yahoo.py
index a17b10d..53556de 100644
--- a/hypervideo_dl/extractor/yahoo.py
+++ b/hypervideo_dl/extractor/yahoo.py
@@ -22,6 +22,7 @@ from ..utils import (
)
from .brightcove import BrightcoveNewIE
+from .youtube import YoutubeIE
class YahooIE(InfoExtractor):
@@ -38,6 +39,7 @@ class YahooIE(InfoExtractor):
'timestamp': 1369812016,
'upload_date': '20130529',
},
+ 'skip': 'No longer exists',
}, {
'url': 'https://screen.yahoo.com/community/community-sizzle-reel-203225340.html?format=embed',
'md5': '7993e572fac98e044588d0b5260f4352',
@@ -50,6 +52,7 @@ class YahooIE(InfoExtractor):
'timestamp': 1406838636,
'upload_date': '20140731',
},
+ 'skip': 'Unfortunately, this video is not available in your region',
}, {
'url': 'https://uk.screen.yahoo.com/editor-picks/cute-raccoon-freed-drain-using-091756545.html',
'md5': '71298482f7c64cbb7fa064e4553ff1c1',
@@ -61,7 +64,8 @@ class YahooIE(InfoExtractor):
'duration': 97,
'timestamp': 1414489862,
'upload_date': '20141028',
- }
+ },
+ 'skip': 'No longer exists',
}, {
'url': 'http://news.yahoo.com/video/china-moses-crazy-blues-104538833.html',
'md5': '88e209b417f173d86186bef6e4d1f160',
@@ -120,6 +124,7 @@ class YahooIE(InfoExtractor):
'season_number': 6,
'episode_number': 1,
},
+ 'skip': 'No longer exists',
}, {
# ytwnews://cavideo/
'url': 'https://tw.video.yahoo.com/movie-tw/單車天使-中文版預-092316541.html',
@@ -156,7 +161,7 @@ class YahooIE(InfoExtractor):
'id': '352CFDOQrKg',
'ext': 'mp4',
'title': 'Kyndal Inskeep "Performs the Hell Out of" Sia\'s "Elastic Heart" - The Voice Knockouts 2019',
- 'description': 'md5:35b61e94c2ae214bc965ff4245f80d11',
+ 'description': 'md5:7fe8e3d5806f96002e55f190d1d94479',
'uploader': 'The Voice',
'uploader_id': 'NBCTheVoice',
'upload_date': '20191029',
@@ -165,7 +170,7 @@ class YahooIE(InfoExtractor):
'params': {
'playlistend': 2,
},
- 'expected_warnings': ['HTTP Error 404'],
+ 'expected_warnings': ['HTTP Error 404', 'Ignoring subtitle tracks'],
}, {
'url': 'https://malaysia.news.yahoo.com/video/bystanders-help-ontario-policeman-bust-190932818.html',
'only_matching': True,
@@ -239,7 +244,7 @@ class YahooIE(InfoExtractor):
'm3u8_native', m3u8_id='hls', fatal=False))
if not formats and msg == 'geo restricted':
- self.raise_geo_restricted()
+ self.raise_geo_restricted(metadata_available=True)
self._sort_formats(formats)
@@ -274,18 +279,19 @@ class YahooIE(InfoExtractor):
}
def _real_extract(self, url):
- url, country, display_id = re.match(self._VALID_URL, url).groups()
+ url, country, display_id = self._match_valid_url(url).groups()
if not country:
country = 'us'
else:
country = country.split('-')[0]
- item = self._download_json(
+ items = self._download_json(
'https://%s.yahoo.com/caas/content/article' % country, display_id,
'Downloading content JSON metadata', query={
'url': url
- })['items'][0]['data']['partnerData']
+ })['items'][0]
+ item = items['data']['partnerData']
if item.get('type') != 'video':
entries = []
@@ -299,9 +305,19 @@ class YahooIE(InfoExtractor):
for e in (item.get('body') or []):
if e.get('type') == 'videoIframe':
iframe_url = e.get('url')
- if not iframe_url:
- continue
+ if iframe_url:
+ entries.append(self.url_result(iframe_url))
+
+ if item.get('type') == 'storywithleadvideo':
+ iframe_url = try_get(item, lambda x: x['meta']['player']['url'])
+ if iframe_url:
entries.append(self.url_result(iframe_url))
+ else:
+ self.report_warning("Yahoo didn't provide an iframe url for this storywithleadvideo")
+
+ if items.get('markup'):
+ entries.extend(
+ self.url_result(yt_url) for yt_url in YoutubeIE._extract_urls(items['markup']))
return self.playlist_result(
entries, item.get('uuid'),
@@ -318,35 +334,19 @@ class YahooSearchIE(SearchInfoExtractor):
IE_NAME = 'screen.yahoo:search'
_SEARCH_KEY = 'yvsearch'
- def _get_n_results(self, query, n):
- """Get a specified number of results for a query"""
- entries = []
+ def _search_results(self, query):
for pagenum in itertools.count(0):
result_url = 'http://video.search.yahoo.com/search/?p=%s&fr=screen&o=js&gs=0&b=%d' % (compat_urllib_parse.quote_plus(query), pagenum * 30)
info = self._download_json(result_url, query,
note='Downloading results page ' + str(pagenum + 1))
- m = info['m']
- results = info['results']
-
- for (i, r) in enumerate(results):
- if (pagenum * 30) + i >= n:
- break
- mobj = re.search(r'(?P<url>screen\.yahoo\.com/.*?-\d*?\.html)"', r)
- e = self.url_result('http://' + mobj.group('url'), 'Yahoo')
- entries.append(e)
- if (pagenum * 30 + i >= n) or (m['last'] >= (m['total'] - 1)):
+ yield from (self.url_result(result['rurl']) for result in info['results'])
+ if info['m']['last'] >= info['m']['total'] - 1:
break
- return {
- '_type': 'playlist',
- 'id': query,
- 'entries': entries,
- }
-
class YahooGyaOPlayerIE(InfoExtractor):
IE_NAME = 'yahoo:gyao:player'
- _VALID_URL = r'https?://(?:gyao\.yahoo\.co\.jp/(?:player|episode/[^/]+)|streaming\.yahoo\.co\.jp/c/y)/(?P<id>\d+/v\d+/v\d+|[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+ _VALID_URL = r'https?://(?:gyao\.yahoo\.co\.jp/(?:player|episode(?:/[^/]+)?)|streaming\.yahoo\.co\.jp/c/y)/(?P<id>\d+/v\d+/v\d+|[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
_TESTS = [{
'url': 'https://gyao.yahoo.co.jp/player/00998/v00818/v0000000000000008564/',
'info_dict': {
@@ -368,6 +368,9 @@ class YahooGyaOPlayerIE(InfoExtractor):
}, {
'url': 'https://gyao.yahoo.co.jp/episode/%E3%81%8D%E3%81%AE%E3%81%86%E4%BD%95%E9%A3%9F%E3%81%B9%E3%81%9F%EF%BC%9F%20%E7%AC%AC2%E8%A9%B1%202019%2F4%2F12%E6%94%BE%E9%80%81%E5%88%86/5cb02352-b725-409e-9f8d-88f947a9f682',
'only_matching': True,
+ }, {
+ 'url': 'https://gyao.yahoo.co.jp/episode/5fa1226c-ef8d-4e93-af7a-fd92f4e30597',
+ 'only_matching': True,
}]
_GEO_BYPASS = False
@@ -508,7 +511,7 @@ class YahooJapanNewsIE(InfoExtractor):
return formats
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
host = mobj.group('host')
display_id = mobj.group('id') or host
diff --git a/hypervideo_dl/extractor/yandexdisk.py b/hypervideo_dl/extractor/yandexdisk.py
index 6fcd8ee..c15f3a4 100644
--- a/hypervideo_dl/extractor/yandexdisk.py
+++ b/hypervideo_dl/extractor/yandexdisk.py
@@ -2,7 +2,6 @@
from __future__ import unicode_literals
import json
-import re
from .common import InfoExtractor
from ..utils import (
@@ -57,7 +56,7 @@ class YandexDiskIE(InfoExtractor):
}]
def _real_extract(self, url):
- domain, video_id = re.match(self._VALID_URL, url).groups()
+ domain, video_id = self._match_valid_url(url).groups()
webpage = self._download_webpage(url, video_id)
store = self._parse_json(self._search_regex(
diff --git a/hypervideo_dl/extractor/yandexmusic.py b/hypervideo_dl/extractor/yandexmusic.py
index 0b86c71..8e94f1f 100644
--- a/hypervideo_dl/extractor/yandexmusic.py
+++ b/hypervideo_dl/extractor/yandexmusic.py
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
import hashlib
import itertools
-import re
from .common import InfoExtractor
from ..compat import compat_str
@@ -109,7 +108,7 @@ class YandexMusicTrackIE(YandexMusicBaseIE):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
tld, album_id, track_id = mobj.group('tld'), mobj.group('album_id'), mobj.group('id')
track = self._call_api(
@@ -291,7 +290,7 @@ class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE):
return False if YandexMusicTrackIE.suitable(url) else super(YandexMusicAlbumIE, cls).suitable(url)
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
tld = mobj.group('tld')
album_id = mobj.group('id')
@@ -342,7 +341,7 @@ class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
tld = mobj.group('tld')
user = mobj.group('user')
playlist_id = mobj.group('id')
@@ -381,7 +380,7 @@ class YandexMusicArtistBaseIE(YandexMusicPlaylistBaseIE):
})
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
tld = mobj.group('tld')
artist_id = mobj.group('id')
data = self._call_artist(tld, url, artist_id)
@@ -410,7 +409,7 @@ class YandexMusicArtistTracksIE(YandexMusicArtistBaseIE):
_ARTIST_WHAT = 'tracks'
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
tld = mobj.group('tld')
artist_id = mobj.group('id')
data = self._call_artist(tld, url, artist_id)
@@ -440,7 +439,7 @@ class YandexMusicArtistAlbumsIE(YandexMusicArtistBaseIE):
_ARTIST_WHAT = 'albums'
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
tld = mobj.group('tld')
artist_id = mobj.group('id')
data = self._call_artist(tld, url, artist_id)
diff --git a/hypervideo_dl/extractor/yandexvideo.py b/hypervideo_dl/extractor/yandexvideo.py
index 6a166ec..9974d65 100644
--- a/hypervideo_dl/extractor/yandexvideo.py
+++ b/hypervideo_dl/extractor/yandexvideo.py
@@ -1,6 +1,9 @@
# coding: utf-8
from __future__ import unicode_literals
+import itertools
+import re
+
from .common import InfoExtractor
from ..utils import (
determine_ext,
@@ -142,3 +145,88 @@ class YandexVideoIE(InfoExtractor):
'release_year': int_or_none(content.get('release_year')),
'formats': formats,
}
+
+
+class ZenYandexIE(InfoExtractor):
+ _VALID_URL = r'https?://zen\.yandex\.ru/media/(?:id/[^/]+/|[^/]+/)(?:[a-z0-9-]+)-(?P<id>[a-z0-9-]+)'
+ _TESTS = [{
+ 'url': 'https://zen.yandex.ru/media/popmech/izverjenie-vulkana-iz-spichek-zreliscnyi-opyt-6002240ff8b1af50bb2da5e3',
+ 'info_dict': {
+ 'id': '6002240ff8b1af50bb2da5e3',
+ 'ext': 'mp4',
+ 'title': 'Извержение вулкана из спичек: зрелищный опыт',
+ 'description': 'md5:053ad3c61b5596d510c9a199dc8ee633',
+ 'thumbnail': 'https://avatars.mds.yandex.net/get-zen-pub-og/3558619/pub_6002240ff8b1af50bb2da5e3_600bad814d953e4132a30b5e/orig',
+ 'uploader': 'Популярная механика',
+ },
+ }, {
+ 'url': 'https://zen.yandex.ru/media/id/606fd806cc13cb3c58c05cf5/vot-eto-focus-dedy-morozy-na-gidrociklah-60c7c443da18892ebfe85ed7',
+ 'info_dict': {
+ 'id': '60c7c443da18892ebfe85ed7',
+ 'ext': 'mp4',
+ 'title': 'ВОТ ЭТО Focus. Деды Морозы на гидроциклах',
+ 'description': 'md5:8684912f6086f298f8078d4af0e8a600',
+ 'thumbnail': 'https://avatars.mds.yandex.net/get-zen-pub-og/4410519/pub_60c7c443da18892ebfe85ed7_60c7c48e060a163121f42cc3/orig',
+ 'uploader': 'AcademeG DailyStream'
+ },
+ }, {
+ 'url': 'https://zen.yandex.ru/media/id/606fd806cc13cb3c58c05cf5/novyi-samsung-fold-3-moskvich-barahlit-612f93b7f8d48e7e945792a2?from=channel&rid=2286618386.482.1630817595976.42360',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+ webpage = self._download_webpage(url, id)
+ data_json = self._parse_json(self._search_regex(r'w\._data\s?=\s?({.+?});', webpage, 'metadata'), id)
+ stream_json = try_get(data_json, lambda x: x['publication']['content']['gifContent'], dict)
+ stream_url = stream_json.get('stream') or try_get(stream_json, lambda x: x['streams']['url'])
+ formats = self._extract_m3u8_formats(stream_url, id)
+ self._sort_formats(formats)
+ return {
+ 'id': id,
+ 'title': try_get(data_json, (lambda x: x['og']['title'], lambda x: x['publication']['content']['preview']['title'])),
+ 'uploader': data_json.get('authorName') or try_get(data_json, lambda x: x['publisher']['name']),
+ 'description': try_get(data_json, lambda x: x['og']['description']),
+ 'thumbnail': try_get(data_json, lambda x: x['og']['imageUrl']),
+ 'formats': formats,
+ }
+
+
+class ZenYandexChannelIE(InfoExtractor):
+ _VALID_URL = r'https?://zen\.yandex\.ru/(?!media)(?:id/)?(?P<id>[a-z0-9-_]+)'
+ _TESTS = [{
+ 'url': 'https://zen.yandex.ru/tok_media',
+ 'info_dict': {
+ 'id': 'tok_media',
+ },
+ 'playlist_mincount': 169,
+ }, {
+ 'url': 'https://zen.yandex.ru/id/606fd806cc13cb3c58c05cf5',
+ 'info_dict': {
+ 'id': '606fd806cc13cb3c58c05cf5',
+ },
+ 'playlist_mincount': 657,
+ }]
+
+ def _entries(self, id, url):
+ webpage = self._download_webpage(url, id)
+ data_json = self._parse_json(re.findall(r'var\s?data\s?=\s?({.+?})\s?;', webpage)[-1], id)
+ for key in data_json.keys():
+ if key.startswith('__serverState__'):
+ data_json = data_json[key]
+ items = list(try_get(data_json, lambda x: x['feed']['items'], dict).values())
+ more = try_get(data_json, lambda x: x['links']['more']) or None
+ for page in itertools.count(1):
+ for item in items:
+ video_id = item.get('publication_id') or item.get('publicationId')
+ video_url = item.get('link')
+ yield self.url_result(video_url, ie=ZenYandexIE.ie_key(), video_id=video_id.split(':')[-1])
+ if not more:
+ break
+ data_json = self._download_json(more, id, note='Downloading Page %d' % page)
+ items = data_json.get('items', [])
+ more = try_get(data_json, lambda x: x['more']['link']) or None
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+ return self.playlist_result(self._entries(id, url), playlist_id=id)
diff --git a/hypervideo_dl/extractor/youjizz.py b/hypervideo_dl/extractor/youjizz.py
index 88aabd2..5f5fbf2 100644
--- a/hypervideo_dl/extractor/youjizz.py
+++ b/hypervideo_dl/extractor/youjizz.py
@@ -1,6 +1,5 @@
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..utils import (
@@ -32,7 +31,7 @@ class YouJizzIE(InfoExtractor):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id') or mobj.group('embed_id')
webpage = self._download_webpage(url, video_id)
diff --git a/hypervideo_dl/extractor/youku.py b/hypervideo_dl/extractor/youku.py
index 880c896..b505799 100644
--- a/hypervideo_dl/extractor/youku.py
+++ b/hypervideo_dl/extractor/youku.py
@@ -160,7 +160,7 @@ class YoukuIE(InfoExtractor):
'client_ts': time.time() / 1000,
}
- video_password = self._downloader.params.get('videopassword')
+ video_password = self.get_param('videopassword')
if video_password:
basic_data_params['password'] = video_password
diff --git a/hypervideo_dl/extractor/youporn.py b/hypervideo_dl/extractor/youporn.py
index 7084d3d..5feb568 100644
--- a/hypervideo_dl/extractor/youporn.py
+++ b/hypervideo_dl/extractor/youporn.py
@@ -74,7 +74,7 @@ class YouPornIE(InfoExtractor):
webpage)
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
display_id = mobj.group('display_id') or video_id
diff --git a/hypervideo_dl/extractor/youtube.py b/hypervideo_dl/extractor/youtube.py
index 2272a02..dc5ee63 100644
--- a/hypervideo_dl/extractor/youtube.py
+++ b/hypervideo_dl/extractor/youtube.py
@@ -2,11 +2,17 @@
from __future__ import unicode_literals
+import base64
+import calendar
+import copy
+import datetime
+import hashlib
import itertools
import json
import os.path
import random
import re
+import time
import traceback
from .common import InfoExtractor, SearchInfoExtractor
@@ -22,231 +28,250 @@ from ..compat import (
)
from ..jsinterp import JSInterpreter
from ..utils import (
- ExtractorError,
+ bytes_to_intlist,
clean_html,
+ datetime_from_str,
dict_get,
+ error_to_compat_str,
+ ExtractorError,
float_or_none,
+ format_field,
int_or_none,
+ intlist_to_bytes,
+ is_html,
mimetype2ext,
+ network_exceptions,
+ orderedSet,
parse_codecs,
+ parse_count,
parse_duration,
+ parse_iso8601,
+ parse_qs,
qualities,
+ remove_end,
remove_start,
smuggle_url,
str_or_none,
str_to_int,
+ traverse_obj,
try_get,
unescapeHTML,
unified_strdate,
unsmuggle_url,
update_url_query,
url_or_none,
- urlencode_postdata,
urljoin,
+ variadic,
)
-def parse_qs(url):
- return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+# any clients starting with _ cannot be explicity requested by the user
+INNERTUBE_CLIENTS = {
+ 'web': {
+ 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
+ 'INNERTUBE_CONTEXT': {
+ 'client': {
+ 'clientName': 'WEB',
+ 'clientVersion': '2.20210622.10.00',
+ }
+ },
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
+ },
+ 'web_embedded': {
+ 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
+ 'INNERTUBE_CONTEXT': {
+ 'client': {
+ 'clientName': 'WEB_EMBEDDED_PLAYER',
+ 'clientVersion': '1.20210620.0.1',
+ },
+ },
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
+ },
+ 'web_music': {
+ 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
+ 'INNERTUBE_HOST': 'music.youtube.com',
+ 'INNERTUBE_CONTEXT': {
+ 'client': {
+ 'clientName': 'WEB_REMIX',
+ 'clientVersion': '1.20210621.00.00',
+ }
+ },
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
+ },
+ 'web_creator': {
+ 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
+ 'INNERTUBE_CONTEXT': {
+ 'client': {
+ 'clientName': 'WEB_CREATOR',
+ 'clientVersion': '1.20210621.00.00',
+ }
+ },
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
+ },
+ 'android': {
+ 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
+ 'INNERTUBE_CONTEXT': {
+ 'client': {
+ 'clientName': 'ANDROID',
+ 'clientVersion': '16.20',
+ }
+ },
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
+ 'REQUIRE_JS_PLAYER': False
+ },
+ 'android_embedded': {
+ 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
+ 'INNERTUBE_CONTEXT': {
+ 'client': {
+ 'clientName': 'ANDROID_EMBEDDED_PLAYER',
+ 'clientVersion': '16.20',
+ },
+ },
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
+ 'REQUIRE_JS_PLAYER': False
+ },
+ 'android_music': {
+ 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
+ 'INNERTUBE_HOST': 'music.youtube.com',
+ 'INNERTUBE_CONTEXT': {
+ 'client': {
+ 'clientName': 'ANDROID_MUSIC',
+ 'clientVersion': '4.32',
+ }
+ },
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
+ 'REQUIRE_JS_PLAYER': False
+ },
+ 'android_creator': {
+ 'INNERTUBE_CONTEXT': {
+ 'client': {
+ 'clientName': 'ANDROID_CREATOR',
+ 'clientVersion': '21.24.100',
+ },
+ },
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
+ 'REQUIRE_JS_PLAYER': False
+ },
+ # ios has HLS live streams
+ # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680
+ 'ios': {
+ 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
+ 'INNERTUBE_CONTEXT': {
+ 'client': {
+ 'clientName': 'IOS',
+ 'clientVersion': '16.20',
+ }
+ },
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
+ 'REQUIRE_JS_PLAYER': False
+ },
+ 'ios_embedded': {
+ 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
+ 'INNERTUBE_CONTEXT': {
+ 'client': {
+ 'clientName': 'IOS_MESSAGES_EXTENSION',
+ 'clientVersion': '16.20',
+ },
+ },
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
+ 'REQUIRE_JS_PLAYER': False
+ },
+ 'ios_music': {
+ 'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
+ 'INNERTUBE_HOST': 'music.youtube.com',
+ 'INNERTUBE_CONTEXT': {
+ 'client': {
+ 'clientName': 'IOS_MUSIC',
+ 'clientVersion': '4.32',
+ },
+ },
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
+ 'REQUIRE_JS_PLAYER': False
+ },
+ 'ios_creator': {
+ 'INNERTUBE_CONTEXT': {
+ 'client': {
+ 'clientName': 'IOS_CREATOR',
+ 'clientVersion': '21.24.100',
+ },
+ },
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
+ 'REQUIRE_JS_PLAYER': False
+ },
+ # mweb has 'ultralow' formats
+ # See: https://github.com/hypervideo/hypervideo/pull/557
+ 'mweb': {
+ 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
+ 'INNERTUBE_CONTEXT': {
+ 'client': {
+ 'clientName': 'MWEB',
+ 'clientVersion': '2.20210721.07.00',
+ }
+ },
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
+ },
+}
+
+
+def build_innertube_clients():
+ third_party = {
+ 'embedUrl': 'https://google.com', # Can be any valid URL
+ }
+ base_clients = ('android', 'web', 'ios', 'mweb')
+ priority = qualities(base_clients[::-1])
+
+ for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
+ ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
+ ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
+ ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
+ ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
+ ytcfg['priority'] = 10 * priority(client.split('_', 1)[0])
+
+ if client in base_clients:
+ INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
+ agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
+ agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
+ agegate_ytcfg['priority'] -= 1
+ elif client.endswith('_embedded'):
+ ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
+ ytcfg['priority'] -= 2
+ else:
+ ytcfg['priority'] -= 3
+
+
+build_innertube_clients()
class YoutubeBaseInfoExtractor(InfoExtractor):
"""Provide base functions for Youtube extractors"""
- _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
- _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
- _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
- _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
- _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
+ _RESERVED_NAMES = (
+ r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
+ r'shorts|movies|results|shared|hashtag|trending|feed|feeds|'
+ r'browse|oembed|get_video_info|iframe_api|s/player|'
+ r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
+
+ _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
_NETRC_MACHINE = 'youtube'
+
# If True it will raise an error if no login info is provided
_LOGIN_REQUIRED = False
- _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM)'
-
def _login(self):
"""
Attempt to log in to YouTube.
- True is returned if successful or skipped.
- False is returned if login failed.
-
If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
"""
- username, password = self._get_login_info()
- # No authentication to be performed
- if username is None:
- if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
- raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
- return True
-
- login_page = self._download_webpage(
- self._LOGIN_URL, None,
- note='Downloading login page',
- errnote='unable to fetch login page', fatal=False)
- if login_page is False:
- return
-
- login_form = self._hidden_inputs(login_page)
-
- def req(url, f_req, note, errnote):
- data = login_form.copy()
- data.update({
- 'pstMsg': 1,
- 'checkConnection': 'youtube',
- 'checkedDomains': 'youtube',
- 'hl': 'en',
- 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
- 'f.req': json.dumps(f_req),
- 'flowName': 'GlifWebSignIn',
- 'flowEntry': 'ServiceLogin',
- # TODO: reverse actual botguard identifier generation algo
- 'bgRequest': '["identifier",""]',
- })
- return self._download_json(
- url, None, note=note, errnote=errnote,
- transform_source=lambda s: re.sub(r'^[^[]*', '', s),
- fatal=False,
- data=urlencode_postdata(data), headers={
- 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
- 'Google-Accounts-XSRF': 1,
- })
-
- def warn(message):
- self._downloader.report_warning(message)
-
- lookup_req = [
- username,
- None, [], None, 'US', None, None, 2, False, True,
- [
- None, None,
- [2, 1, None, 1,
- 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
- None, [], 4],
- 1, [None, None, []], None, None, None, True
- ],
- username,
- ]
-
- lookup_results = req(
- self._LOOKUP_URL, lookup_req,
- 'Looking up account info', 'Unable to look up account info')
-
- if lookup_results is False:
- return False
-
- user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
- if not user_hash:
- warn('Unable to extract user hash')
- return False
-
- challenge_req = [
- user_hash,
- None, 1, None, [1, None, None, None, [password, None, True]],
- [
- None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
- 1, [None, None, []], None, None, None, True
- ]]
-
- challenge_results = req(
- self._CHALLENGE_URL, challenge_req,
- 'Logging in', 'Unable to log in')
- if challenge_results is False:
- return
-
- login_res = try_get(challenge_results, lambda x: x[0][5], list)
- if login_res:
- login_msg = try_get(login_res, lambda x: x[5], compat_str)
- warn(
- 'Unable to login: %s' % 'Invalid password'
- if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
- return False
-
- res = try_get(challenge_results, lambda x: x[0][-1], list)
- if not res:
- warn('Unable to extract result entry')
- return False
-
- login_challenge = try_get(res, lambda x: x[0][0], list)
- if login_challenge:
- challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
- if challenge_str == 'TWO_STEP_VERIFICATION':
- # SEND_SUCCESS - TFA code has been successfully sent to phone
- # QUOTA_EXCEEDED - reached the limit of TFA codes
- status = try_get(login_challenge, lambda x: x[5], compat_str)
- if status == 'QUOTA_EXCEEDED':
- warn('Exceeded the limit of TFA codes, try later')
- return False
-
- tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
- if not tl:
- warn('Unable to extract TL')
- return False
-
- tfa_code = self._get_tfa_info('2-step verification code')
-
- if not tfa_code:
- warn(
- 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
- '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
- return False
-
- tfa_code = remove_start(tfa_code, 'G-')
-
- tfa_req = [
- user_hash, None, 2, None,
- [
- 9, None, None, None, None, None, None, None,
- [None, tfa_code, True, 2]
- ]]
-
- tfa_results = req(
- self._TFA_URL.format(tl), tfa_req,
- 'Submitting TFA code', 'Unable to submit TFA code')
-
- if tfa_results is False:
- return False
-
- tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
- if tfa_res:
- tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
- warn(
- 'Unable to finish TFA: %s' % 'Invalid TFA code'
- if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
- return False
-
- check_cookie_url = try_get(
- tfa_results, lambda x: x[0][-1][2], compat_str)
- else:
- CHALLENGES = {
- 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
- 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
- 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
- }
- challenge = CHALLENGES.get(
- challenge_str,
- '%s returned error %s.' % (self.IE_NAME, challenge_str))
- warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
- return False
- else:
- check_cookie_url = try_get(res, lambda x: x[2], compat_str)
-
- if not check_cookie_url:
- warn('Unable to extract CheckCookie URL')
- return False
-
- check_cookie_results = self._download_webpage(
- check_cookie_url, None, 'Checking cookie', fatal=False)
-
- if check_cookie_results is False:
- return False
-
- if 'https://myaccount.google.com/' not in check_cookie_results:
- warn('Unable to log in')
- return False
-
- return True
+ if (self._LOGIN_REQUIRED
+ and self.get_param('cookiefile') is None
+ and self.get_param('cookiesfrombrowser') is None):
+ self.raise_login_required(
+ 'Login details are needed to download this content', method='cookies')
+ username, password = self._get_login_info()
+ if username:
+ self.report_warning(f'Cannot login to YouTube using username and password. {self._LOGIN_HINTS["cookies"]}')
def _initialize_consent(self):
cookies = self._get_cookies('https://www.youtube.com/')
@@ -265,73 +290,402 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
def _real_initialize(self):
self._initialize_consent()
- if self._downloader is None:
- return
- if not self._login():
- return
-
- _DEFAULT_API_DATA = {
- 'context': {
- 'client': {
- 'clientName': 'WEB',
- 'clientVersion': '2.20201021.03.00',
- }
- },
- }
+ self._login()
_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
- def _call_api(self, ep, query, video_id, fatal=True):
- data = self._DEFAULT_API_DATA.copy()
- data.update(query)
+ def _get_default_ytcfg(self, client='web'):
+ return copy.deepcopy(INNERTUBE_CLIENTS[client])
+
+ def _get_innertube_host(self, client='web'):
+ return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
+
+ def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
+ # try_get but with fallback to default ytcfg client values when present
+ _func = lambda y: try_get(y, getter, expected_type)
+ return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
+
+ def _extract_client_name(self, ytcfg, default_client='web'):
+ return self._ytcfg_get_safe(
+ ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
+ lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
+ def _extract_client_version(self, ytcfg, default_client='web'):
+ return self._ytcfg_get_safe(
+ ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
+ lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
+
+ def _extract_api_key(self, ytcfg=None, default_client='web'):
+ return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
+
+ def _extract_context(self, ytcfg=None, default_client='web'):
+ _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
+ context = _get_context(ytcfg)
+ if context:
+ return context
+
+ context = _get_context(self._get_default_ytcfg(default_client))
+ if not ytcfg:
+ return context
+
+ # Recreate the client context (required)
+ context['client'].update({
+ 'clientVersion': self._extract_client_version(ytcfg, default_client),
+ 'clientName': self._extract_client_name(ytcfg, default_client),
+ })
+ visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
+ if visitor_data:
+ context['client']['visitorData'] = visitor_data
+ return context
+
+ _SAPISID = None
+
+ def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
+ time_now = round(time.time())
+ if self._SAPISID is None:
+ yt_cookies = self._get_cookies('https://www.youtube.com')
+ # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
+ # See: https://github.com/hypervideo/hypervideo/issues/393
+ sapisid_cookie = dict_get(
+ yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
+ if sapisid_cookie and sapisid_cookie.value:
+ self._SAPISID = sapisid_cookie.value
+ self.write_debug('Extracted SAPISID cookie')
+ # SAPISID cookie is required if not already present
+ if not yt_cookies.get('SAPISID'):
+ self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
+ self._set_cookie(
+ '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
+ else:
+ self._SAPISID = False
+ if not self._SAPISID:
+ return None
+ # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
+ sapisidhash = hashlib.sha1(
+ f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()
+ return f'SAPISIDHASH {time_now}_{sapisidhash}'
+
+ def _call_api(self, ep, query, video_id, fatal=True, headers=None,
+ note='Downloading API JSON', errnote='Unable to download API page',
+ context=None, api_key=None, api_hostname=None, default_client='web'):
+
+ data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
+ data.update(query)
+ real_headers = self.generate_api_headers(default_client=default_client)
+ real_headers.update({'content-type': 'application/json'})
+ if headers:
+ real_headers.update(headers)
return self._download_json(
- 'https://www.youtube.com/youtubei/v1/%s' % ep, video_id=video_id,
- note='Downloading API JSON', errnote='Unable to download API page',
- data=json.dumps(data).encode('utf8'), fatal=fatal,
- headers={'content-type': 'application/json'},
- query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'})
+ 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
+ video_id=video_id, fatal=fatal, note=note, errnote=errnote,
+ data=json.dumps(data).encode('utf8'), headers=real_headers,
+ query={'key': api_key or self._extract_api_key()})
+
+ def extract_yt_initial_data(self, item_id, webpage, fatal=True):
+ data = self._search_regex(
+ (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
+ self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)
+ if data:
+ return self._parse_json(data, item_id, fatal=fatal)
- def _extract_yt_initial_data(self, video_id, webpage):
- return self._parse_json(
- self._search_regex(
- (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
- self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
- video_id)
+ @staticmethod
+ def _extract_session_index(*data):
+ """
+ Index of current account in account list.
+ See: https://github.com/hypervideo/hypervideo/pull/519
+ """
+ for ytcfg in data:
+ session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
+ if session_index is not None:
+ return session_index
+
+ # Deprecated?
+ def _extract_identity_token(self, ytcfg=None, webpage=None):
+ if ytcfg:
+ token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
+ if token:
+ return token
+ if webpage:
+ return self._search_regex(
+ r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
+ 'identity token', default=None, fatal=False)
+
+ @staticmethod
+ def _extract_account_syncid(*args):
+ """
+ Extract syncId required to download private playlists of secondary channels
+ @params response and/or ytcfg
+ """
+ for data in args:
+ # ytcfg includes channel_syncid if on secondary channel
+ delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
+ if delegated_sid:
+ return delegated_sid
+ sync_ids = (try_get(
+ data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
+ lambda x: x['DATASYNC_ID']), compat_str) or '').split('||')
+ if len(sync_ids) >= 2 and sync_ids[1]:
+ # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
+ # and just "user_syncid||" for primary channel. We only want the channel_syncid
+ return sync_ids[0]
- def _extract_ytcfg(self, video_id, webpage):
+ @staticmethod
+ def _extract_visitor_data(*args):
+ """
+ Extracts visitorData from an API response or ytcfg
+ Appears to be used to track session state
+ """
+ return traverse_obj(
+ args, (..., ('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))),
+ expected_type=compat_str, get_all=False)
+
+ @property
+ def is_authenticated(self):
+ return bool(self._generate_sapisidhash_header())
+
+ def extract_ytcfg(self, video_id, webpage):
+ if not webpage:
+ return {}
return self._parse_json(
self._search_regex(
r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
default='{}'), video_id, fatal=False) or {}
+ def generate_api_headers(
+ self, *, ytcfg=None, account_syncid=None, session_index=None,
+ visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
+
+ origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
+ headers = {
+ 'X-YouTube-Client-Name': compat_str(
+ self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
+ 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
+ 'Origin': origin,
+ 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
+ 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
+ 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)
+ }
+ if session_index is None:
+ session_index = self._extract_session_index(ytcfg)
+ if account_syncid or session_index is not None:
+ headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
+
+ auth = self._generate_sapisidhash_header(origin)
+ if auth is not None:
+ headers['Authorization'] = auth
+ headers['X-Origin'] = origin
+ return {h: v for h, v in headers.items() if v is not None}
+
+ @staticmethod
+ def _build_api_continuation_query(continuation, ctp=None):
+ query = {
+ 'continuation': continuation
+ }
+ # TODO: Inconsistency with clickTrackingParams.
+ # Currently we have a fixed ctp contained within context (from ytcfg)
+ # and a ctp in root query for continuation.
+ if ctp:
+ query['clickTracking'] = {'clickTrackingParams': ctp}
+ return query
+
+ @classmethod
+ def _extract_next_continuation_data(cls, renderer):
+ next_continuation = try_get(
+ renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
+ lambda x: x['continuation']['reloadContinuationData']), dict)
+ if not next_continuation:
+ return
+ continuation = next_continuation.get('continuation')
+ if not continuation:
+ return
+ ctp = next_continuation.get('clickTrackingParams')
+ return cls._build_api_continuation_query(continuation, ctp)
+
+ @classmethod
+ def _extract_continuation_ep_data(cls, continuation_ep: dict):
+ if isinstance(continuation_ep, dict):
+ continuation = try_get(
+ continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
+ if not continuation:
+ return
+ ctp = continuation_ep.get('clickTrackingParams')
+ return cls._build_api_continuation_query(continuation, ctp)
+
+ @classmethod
+ def _extract_continuation(cls, renderer):
+ next_continuation = cls._extract_next_continuation_data(renderer)
+ if next_continuation:
+ return next_continuation
+
+ contents = []
+ for key in ('contents', 'items'):
+ contents.extend(try_get(renderer, lambda x: x[key], list) or [])
+
+ for content in contents:
+ if not isinstance(content, dict):
+ continue
+ continuation_ep = try_get(
+ content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
+ lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
+ dict)
+ continuation = cls._extract_continuation_ep_data(continuation_ep)
+ if continuation:
+ return continuation
+
+ @classmethod
+ def _extract_alerts(cls, data):
+ for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
+ if not isinstance(alert_dict, dict):
+ continue
+ for alert in alert_dict.values():
+ alert_type = alert.get('type')
+ if not alert_type:
+ continue
+ message = cls._get_text(alert, 'text')
+ if message:
+ yield alert_type, message
+
+ def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
+ errors = []
+ warnings = []
+ for alert_type, alert_message in alerts:
+ if alert_type.lower() == 'error' and fatal:
+ errors.append([alert_type, alert_message])
+ else:
+ warnings.append([alert_type, alert_message])
+
+ for alert_type, alert_message in (warnings + errors[:-1]):
+ self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message), only_once=only_once)
+ if errors:
+ raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
+
+ def _extract_and_report_alerts(self, data, *args, **kwargs):
+ return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
+
+ def _extract_badges(self, renderer: dict):
+ badges = set()
+ for badge in try_get(renderer, lambda x: x['badges'], list) or []:
+ label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
+ if label:
+ badges.add(label.lower())
+ return badges
+
+ @staticmethod
+ def _get_text(data, *path_list, max_runs=None):
+ for path in path_list or [None]:
+ if path is None:
+ obj = [data]
+ else:
+ obj = traverse_obj(data, path, default=[])
+ if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
+ obj = [obj]
+ for item in obj:
+ text = try_get(item, lambda x: x['simpleText'], compat_str)
+ if text:
+ return text
+ runs = try_get(item, lambda x: x['runs'], list) or []
+ if not runs and isinstance(item, list):
+ runs = item
+
+ runs = runs[:min(len(runs), max_runs or len(runs))]
+ text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
+ if text:
+ return text
+
+ def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
+ ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
+ default_client='web'):
+ response = None
+ last_error = None
+ count = -1
+ retries = self.get_param('extractor_retries', 3)
+ if check_get_keys is None:
+ check_get_keys = []
+ while count < retries:
+ count += 1
+ if last_error:
+ self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))
+ try:
+ response = self._call_api(
+ ep=ep, fatal=True, headers=headers,
+ video_id=item_id, query=query,
+ context=self._extract_context(ytcfg, default_client),
+ api_key=self._extract_api_key(ytcfg, default_client),
+ api_hostname=api_hostname, default_client=default_client,
+ note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
+ except ExtractorError as e:
+ if isinstance(e.cause, network_exceptions):
+ if isinstance(e.cause, compat_HTTPError) and not is_html(e.cause.read(512)):
+ e.cause.seek(0)
+ yt_error = try_get(
+ self._parse_json(e.cause.read().decode(), item_id, fatal=False),
+ lambda x: x['error']['message'], compat_str)
+ if yt_error:
+ self._report_alerts([('ERROR', yt_error)], fatal=False)
+ # Downloading page may result in intermittent 5xx HTTP error
+ # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
+ # We also want to catch all other network exceptions since errors in later pages can be troublesome
+ # See https://github.com/hypervideo/hypervideo/issues/507#issuecomment-880188210
+ if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
+ last_error = error_to_compat_str(e.cause or e.msg)
+ if count < retries:
+ continue
+ if fatal:
+ raise
+ else:
+ self.report_warning(error_to_compat_str(e))
+ return
+
+ else:
+ try:
+ self._extract_and_report_alerts(response, only_once=True)
+ except ExtractorError as e:
+ # YouTube servers may return errors we want to retry on in a 200 OK response
+ # See: https://github.com/hypervideo/hypervideo/issues/839
+ if 'unknown error' in e.msg.lower():
+ last_error = e.msg
+ continue
+ if fatal:
+ raise
+ self.report_warning(error_to_compat_str(e))
+ return
+ if not check_get_keys or dict_get(response, check_get_keys):
+ break
+ # Youtube sometimes sends incomplete data
+ # See: https://github.com/ytdl-org/youtube-dl/issues/28194
+ last_error = 'Incomplete data received'
+ if count >= retries:
+ if fatal:
+ raise ExtractorError(last_error)
+ else:
+ self.report_warning(last_error)
+ return
+ return response
+
+ @staticmethod
+ def is_music_url(url):
+ return re.match(r'https?://music\.youtube\.com/', url) is not None
+
def _extract_video(self, renderer):
- video_id = renderer['videoId']
- title = try_get(
- renderer,
- (lambda x: x['title']['runs'][0]['text'],
- lambda x: x['title']['simpleText']), compat_str)
- description = try_get(
- renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
- compat_str)
- duration = parse_duration(try_get(
- renderer, lambda x: x['lengthText']['simpleText'], compat_str))
- view_count_text = try_get(
- renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
+ video_id = renderer.get('videoId')
+ title = self._get_text(renderer, 'title')
+ description = self._get_text(renderer, 'descriptionSnippet')
+ duration = parse_duration(self._get_text(
+ renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
+ view_count_text = self._get_text(renderer, 'viewCountText') or ''
view_count = str_to_int(self._search_regex(
r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
'view count', default=None))
- uploader = try_get(
- renderer,
- (lambda x: x['ownerText']['runs'][0]['text'],
- lambda x: x['shortBylineText']['runs'][0]['text']), compat_str)
+
+ uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
+
return {
'_type': 'url',
'ie_key': YoutubeIE.ie_key(),
'id': video_id,
- 'url': video_id,
+ 'url': f'https://www.youtube.com/watch?v={video_id}',
'title': title,
'description': description,
'duration': duration,
@@ -347,13 +701,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
r'(?:www\.)?redirect\.invidious\.io',
r'(?:(?:www|dev)\.)?invidio\.us',
# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
+ r'(?:www\.)?invidious\.pussthecat\.org',
+ r'(?:www\.)?invidious\.zee\.li',
+ r'(?:www\.)?invidious\.ethibox\.fr',
+ r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
+ # youtube-dl invidious instances list
r'(?:(?:www|no)\.)?invidiou\.sh',
r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
r'(?:www\.)?invidious\.kabi\.tk',
- r'(?:www\.)?invidious\.13ad\.de',
r'(?:www\.)?invidious\.mastodon\.host',
r'(?:www\.)?invidious\.zapashcanon\.fr',
- r'(?:www\.)?invidious\.kavin\.rocks',
+ r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
r'(?:www\.)?invidious\.tinfoil-hat\.net',
r'(?:www\.)?invidious\.himiko\.cloud',
r'(?:www\.)?invidious\.reallyancient\.tech',
@@ -380,6 +738,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
r'(?:www\.)?invidious\.toot\.koeln',
r'(?:www\.)?invidious\.fdn\.fr',
r'(?:www\.)?watch\.nettohikari\.com',
+ r'(?:www\.)?invidious\.namazso\.eu',
+ r'(?:www\.)?invidious\.silkky\.cloud',
+ r'(?:www\.)?invidious\.exonip\.de',
+ r'(?:www\.)?invidious\.riverside\.rocks',
+ r'(?:www\.)?invidious\.blamefran\.net',
+ r'(?:www\.)?invidious\.moomoo\.de',
+ r'(?:www\.)?ytb\.trom\.tf',
+ r'(?:www\.)?yt\.cyberhost\.uk',
r'(?:www\.)?kgg2m7yk5aybusll\.onion',
r'(?:www\.)?qklhadlycap4cnod\.onion',
r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
@@ -388,6 +754,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
+ r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
+ r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
+ r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
+ r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
)
_VALID_URL = r"""(?x)^
(
@@ -402,7 +772,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
(?:.*?\#/)? # handle anchor (#/) redirect urls
(?: # the various things that can precede the ID:
- (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
+ (?:(?:v|embed|e|shorts)/(?!videoseries)) # v/ or embed/ or e/ or shorts/
|(?: # or the v= param in all its forms
(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
(?:\?|\#!?) # the params delimiter ? or # or #!
@@ -421,7 +791,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
)? # all until now is optional -> you can pass the naked ID
(?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
(?(1).+)? # if we found the ID, everything can follow
- $""" % {
+ (?:\#|$)""" % {
'invidious': '|'.join(_INVIDIOUS_SITES),
}
_PLAYER_INFO_RE = (
@@ -429,7 +799,116 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
)
- _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')
+ _formats = {
+ '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
+ '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
+ '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
+ '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
+ '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
+ '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
+ '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
+ '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
+ # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
+ '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
+ '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
+ '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
+ '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
+ '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
+ '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
+ '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
+ '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
+ '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
+
+
+ # 3D videos
+ '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
+ '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
+ '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
+ '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
+ '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
+ '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
+ '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
+
+ # Apple HTTP Live Streaming
+ '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
+ '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
+ '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
+ '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
+ '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
+ '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
+ '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
+ '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
+
+ # DASH mp4 video
+ '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
+ '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
+ '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
+ '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
+ '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
+ '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
+ '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
+ '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
+ '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
+ '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
+ '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
+ '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
+
+ # Dash mp4 audio
+ '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
+ '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
+ '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
+ '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
+ '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
+ '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
+ '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
+
+ # Dash webm
+ '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
+ '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
+ '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
+ '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
+ '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
+ '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
+ '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
+ '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
+ '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
+ '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
+ '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
+ '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
+
+ # Dash webm audio
+ '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
+ '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
+
+ # Dash webm audio with opus inside
+ '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
+ '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
+ '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
+
+ # RTMP (unnamed)
+ '_rtmp': {'protocol': 'rtmp'},
+
+ # av01 video only formats sometimes served with "unknown" codecs
+ '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
+ '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
+ '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
+ '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
+ '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
+ '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
+ '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
+ '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
+ }
+ _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
_GEO_BYPASS = False
@@ -440,16 +919,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'info_dict': {
'id': 'BaW_jenozKc',
'ext': 'mp4',
- 'title': 'hypervideo test video "\'/\\ä↭𝕐',
+ 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
'uploader': 'Philipp Hagemeister',
'uploader_id': 'phihag',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
'upload_date': '20121002',
- 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for hypervideo.\n\nFor more information, contact phihag@phihag.de .',
+ 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
'categories': ['Science & Technology'],
- 'tags': ['hypervideo'],
+ 'tags': ['youtube-dl'],
'duration': 10,
'view_count': int,
'like_count': int,
@@ -480,14 +959,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'info_dict': {
'id': 'BaW_jenozKc',
'ext': 'mp4',
- 'title': 'hypervideo test video "\'/\\ä↭𝕐',
+ 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
'uploader': 'Philipp Hagemeister',
'uploader_id': 'phihag',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
'upload_date': '20121002',
- 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for hypervideo.\n\nFor more information, contact phihag@phihag.de .',
+ 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
'categories': ['Science & Technology'],
- 'tags': ['hypervideo'],
+ 'tags': ['youtube-dl'],
'duration': 10,
'view_count': int,
'like_count': int,
@@ -535,23 +1014,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'format': '141/bestaudio[ext=m4a]',
},
},
- # Controversy video
- {
- 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
- 'info_dict': {
- 'id': 'T4XJQO3qol8',
- 'ext': 'mp4',
- 'duration': 219,
- 'upload_date': '20100909',
- 'uploader': 'Amazing Atheist',
- 'uploader_id': 'TheAmazingAtheist',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
- 'title': 'Burning Everyone\'s Koran',
- 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
- }
- },
- # Normal age-gate video (No vevo, embed allowed), available via embed page
+ # Age-gate videos. See https://github.com/hypervideo/hypervideo/pull/575#issuecomment-888837000
{
+ 'note': 'Embed allowed age-gate video',
'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
'info_dict': {
'id': 'HtVdAasjOgU',
@@ -567,9 +1032,49 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
},
},
{
- # Age-gated video only available with authentication (unavailable
- # via embed page workaround)
- 'url': 'XgnwCQzjau8',
+ 'note': 'Age-gate video with embed allowed in public site',
+ 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
+ 'info_dict': {
+ 'id': 'HsUATh_Nc2U',
+ 'ext': 'mp4',
+ 'title': 'Godzilla 2 (Official Video)',
+ 'description': 'md5:bf77e03fcae5529475e500129b05668a',
+ 'upload_date': '20200408',
+ 'uploader_id': 'FlyingKitty900',
+ 'uploader': 'FlyingKitty',
+ 'age_limit': 18,
+ },
+ },
+ {
+ 'note': 'Age-gate video embedable only with clientScreen=EMBED',
+ 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
+ 'info_dict': {
+ 'id': 'Tq92D6wQ1mg',
+ 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
+ 'ext': 'mp4',
+ 'upload_date': '20191227',
+ 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
+ 'uploader': 'Projekt Melody',
+ 'description': 'md5:17eccca93a786d51bc67646756894066',
+ 'age_limit': 18,
+ },
+ },
+ {
+ 'note': 'Non-Agegated non-embeddable video',
+ 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
+ 'info_dict': {
+ 'id': 'MeJVWBSsPAY',
+ 'ext': 'mp4',
+ 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
+ 'uploader': 'Herr Lurik',
+ 'uploader_id': 'st3in234',
+ 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
+ 'upload_date': '20130730',
+ },
+ },
+ {
+ 'note': 'Non-bypassable age-gated video',
+ 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
'only_matching': True,
},
# video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
@@ -604,7 +1109,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_id': 'olympic',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
- 'uploader': 'Olympic',
+ 'uploader': 'Olympics',
'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
},
'params': {
@@ -740,6 +1245,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'params': {
'skip_download': True,
},
+ 'skip': 'Not multifeed anymore',
},
{
# Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
@@ -769,16 +1275,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'id': 'lsguqyKfVQg',
'ext': 'mp4',
'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
- 'alt_title': 'Dark Walk - Position Music',
+ 'alt_title': 'Dark Walk',
'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
'duration': 133,
'upload_date': '20151119',
'uploader_id': 'IronSoulElf',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
'uploader': 'IronSoulElf',
- 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
- 'track': 'Dark Walk - Position Music',
- 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
+ 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
+ 'track': 'Dark Walk',
+ 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
},
'params': {
@@ -1089,6 +1595,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'only_matching': True,
},
{
+ # controversial video, requires bpctr/contentCheckOk
+ 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
+ 'info_dict': {
+ 'id': 'SZJvDhaSDnc',
+ 'ext': 'mp4',
+ 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
+ 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
+ 'uploader': 'CBS This Morning',
+ 'uploader_id': 'CBSThisMorning',
+ 'upload_date': '20140716',
+ 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
+ }
+ },
+ {
# restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
'url': 'cBvYw8_A0vQ',
'info_dict': {
@@ -1104,119 +1624,55 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'params': {
'skip_download': True,
},
+ }, {
+ # Has multiple audio streams
+ 'url': 'WaOKSUlf4TM',
+ 'only_matching': True
+ }, {
+ # Requires Premium: has format 141 when requested using YTM url
+ 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
+ 'only_matching': True
+ }, {
+ # multiple subtitles with same lang_code
+ 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
+ 'only_matching': True,
+ }, {
+ # Force use android client fallback
+ 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
+ 'info_dict': {
+ 'id': 'YOelRv7fMxY',
+ 'title': 'DIGGING A SECRET TUNNEL Part 1',
+ 'ext': '3gp',
+ 'upload_date': '20210624',
+ 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
+ 'uploader': 'colinfurze',
+ 'uploader_id': 'colinfurze',
+ 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
+ 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
+ },
+ 'params': {
+ 'format': '17', # 3gp format available on android
+ 'extractor_args': {'youtube': {'player_client': ['android']}},
+ },
+ },
+ {
+ # Skip download of additional client configs (remix client config in this case)
+ 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
+ 'only_matching': True,
+ 'params': {
+ 'extractor_args': {'youtube': {'player_skip': ['configs']}},
+ },
+ }, {
+ # shorts
+ 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
+ 'only_matching': True,
},
]
- _formats = {
- '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
- '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
- '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
- '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
- '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
- '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
- '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
- '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
- # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
- '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
- '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
- '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
- '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
- '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
- '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
- '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
- '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
- '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
-
-
- # 3D videos
- '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
- '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
- '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
- '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
- '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
- '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
- '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
-
- # Apple HTTP Live Streaming
- '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
- '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
- '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
- '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
- '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
- '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
- '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
- '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
-
- # DASH mp4 video
- '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
- '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
- '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
- '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
- '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
- '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
- '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
- '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
- '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
- '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
- '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
- '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
-
- # Dash mp4 audio
- '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
- '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
- '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
- '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
- '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
- '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
- '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
-
- # Dash webm
- '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
- '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
- '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
- '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
- '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
- '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
- '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
- '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
- '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
- '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
- '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
- '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
-
- # Dash webm audio
- '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
- '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
-
- # Dash webm audio with opus inside
- '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
- '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
- '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
-
- # RTMP (unnamed)
- '_rtmp': {'protocol': 'rtmp'},
-
- # av01 video only formats sometimes served with "unknown" codecs
- '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
- '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
- '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
- '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
- }
@classmethod
def suitable(cls, url):
- # Hack for lazy extractors until more generic solution is implemented
- # (see #28780)
- from .youtube import parse_qs
+ from ..utils import parse_qs
+
qs = parse_qs(url)
if qs.get('list', [None])[0]:
return False
@@ -1227,6 +1683,29 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
self._code_cache = {}
self._player_cache = {}
+ def _extract_player_url(self, *ytcfgs, webpage=None):
+ player_url = traverse_obj(
+ ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
+ get_all=False, expected_type=compat_str)
+ if not player_url:
+ return
+ if player_url.startswith('//'):
+ player_url = 'https:' + player_url
+ elif not re.match(r'https?://', player_url):
+ player_url = compat_urlparse.urljoin(
+ 'https://www.youtube.com', player_url)
+ return player_url
+
+ def _download_player_url(self, video_id, fatal=False):
+ res = self._download_webpage(
+ 'https://www.youtube.com/iframe_api',
+ note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
+ if res:
+ player_version = self._search_regex(
+ r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
+ if player_version:
+ return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
+
def _signature_cache_id(self, example_sig):
""" Return a string representation of a signature """
return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
@@ -1241,6 +1720,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
raise ExtractorError('Cannot identify player %r' % player_url)
return id_m.group('id')
+ def _load_player(self, video_id, player_url, fatal=True) -> bool:
+ player_id = self._extract_player_info(player_url)
+ if player_id not in self._code_cache:
+ code = self._download_webpage(
+ player_url, video_id, fatal=fatal,
+ note='Downloading player ' + player_id,
+ errnote='Download of %s failed' % player_url)
+ if code:
+ self._code_cache[player_id] = code
+ return player_id in self._code_cache
+
def _extract_signature_function(self, video_id, player_url, example_sig):
player_id = self._extract_player_info(player_url)
@@ -1253,20 +1743,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if cache_spec is not None:
return lambda s: ''.join(s[i] for i in cache_spec)
- if player_id not in self._code_cache:
- self._code_cache[player_id] = self._download_webpage(
- player_url, video_id,
- note='Downloading player ' + player_id,
- errnote='Download of %s failed' % player_url)
- code = self._code_cache[player_id]
- res = self._parse_sig_js(code)
+ if self._load_player(video_id, player_url):
+ code = self._code_cache[player_id]
+ res = self._parse_sig_js(code)
- test_string = ''.join(map(compat_chr, range(len(example_sig))))
- cache_res = res(test_string)
- cache_spec = [ord(c) for c in cache_res]
+ test_string = ''.join(map(compat_chr, range(len(example_sig))))
+ cache_res = res(test_string)
+ cache_spec = [ord(c) for c in cache_res]
- self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
- return res
+ self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
+ return res
def _print_sig_code(self, func, example_sig):
def gen_sig_code(idxs):
@@ -1311,10 +1797,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
funcname = self._search_regex(
(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
- r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
- r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
- r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
- r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
+ r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
+ r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
+ r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
+ r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
# Obsolete patterns
r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
@@ -1337,11 +1823,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if player_url is None:
raise ExtractorError('Cannot decrypt signature without player_url')
- if player_url.startswith('//'):
- player_url = 'https:' + player_url
- elif not re.match(r'https?://', player_url):
- player_url = compat_urlparse.urljoin(
- 'https://www.youtube.com', player_url)
try:
player_id = (player_url, self._signature_cache_id(s))
if player_id not in self._player_cache:
@@ -1350,7 +1831,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
)
self._player_cache[player_id] = func
func = self._player_cache[player_id]
- if self._downloader.params.get('youtube_print_sig_code'):
+ if self.get_param('youtube_print_sig_code'):
self._print_sig_code(func, s)
return func(s)
except Exception as e:
@@ -1358,11 +1839,37 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
raise ExtractorError(
'Signature extraction failed: ' + tb, cause=e)
- def _mark_watched(self, video_id, player_response):
- playback_url = url_or_none(try_get(
- player_response,
- lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
+ def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
+ """
+ Extract signatureTimestamp (sts)
+ Required to tell API what sig/player version is in use.
+ """
+ sts = None
+ if isinstance(ytcfg, dict):
+ sts = int_or_none(ytcfg.get('STS'))
+
+ if not sts:
+ # Attempt to extract from player
+ if player_url is None:
+ error_msg = 'Cannot extract signature timestamp without player_url.'
+ if fatal:
+ raise ExtractorError(error_msg)
+ self.report_warning(error_msg)
+ return
+ if self._load_player(video_id, player_url, fatal=fatal):
+ player_id = self._extract_player_info(player_url)
+ code = self._code_cache[player_id]
+ sts = int_or_none(self._search_regex(
+ r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
+ 'JS player signature timestamp', group='sts', fatal=fatal))
+ return sts
+
+ def _mark_watched(self, video_id, player_responses):
+ playback_url = traverse_obj(
+ player_responses, (..., 'playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
+ expected_type=url_or_none, get_all=False)
if not playback_url:
+ self.report_warning('Unable to mark watched')
return
parsed_playback_url = compat_urlparse.urlparse(playback_url)
qs = compat_urlparse.parse_qs(parsed_playback_url.query)
@@ -1425,47 +1932,58 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
if mobj is None:
raise ExtractorError('Invalid URL: %s' % url)
- video_id = mobj.group(2)
- return video_id
-
- def _extract_chapters_from_json(self, data, video_id, duration):
- chapters_list = try_get(
+ return mobj.group('id')
+
+ def _extract_chapters_from_json(self, data, duration):
+ chapter_list = traverse_obj(
+ data, (
+ 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
+ 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
+ ), expected_type=list)
+
+ return self._extract_chapters(
+ chapter_list,
+ chapter_time=lambda chapter: float_or_none(
+ traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
+ chapter_title=lambda chapter: traverse_obj(
+ chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
+ duration=duration)
+
+ def _extract_chapters_from_engagement_panel(self, data, duration):
+ content_list = traverse_obj(
data,
- lambda x: x['playerOverlays']
- ['playerOverlayRenderer']
- ['decoratedPlayerBarRenderer']
- ['decoratedPlayerBarRenderer']
- ['playerBar']
- ['chapteredPlayerBarRenderer']
- ['chapters'],
- list)
- if not chapters_list:
- return
-
- def chapter_time(chapter):
- return float_or_none(
- try_get(
- chapter,
- lambda x: x['chapterRenderer']['timeRangeStartMillis'],
- int),
- scale=1000)
+ ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
+ expected_type=list, default=[])
+ chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
+ chapter_title = lambda chapter: self._get_text(chapter, 'title')
+
+ return next((
+ filter(None, (
+ self._extract_chapters(
+ traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
+ chapter_time, chapter_title, duration)
+ for contents in content_list
+ ))), [])
+
+ def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
chapters = []
- for next_num, chapter in enumerate(chapters_list, start=1):
+ last_chapter = {'start_time': 0}
+ for idx, chapter in enumerate(chapter_list or []):
+ title = chapter_title(chapter)
start_time = chapter_time(chapter)
if start_time is None:
continue
- end_time = (chapter_time(chapters_list[next_num])
- if next_num < len(chapters_list) else duration)
- if end_time is None:
- continue
- title = try_get(
- chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
- compat_str)
- chapters.append({
- 'start_time': start_time,
- 'end_time': end_time,
- 'title': title,
- })
+ last_chapter['end_time'] = start_time
+ if start_time < last_chapter['start_time']:
+ if idx == 1:
+ chapters.pop()
+ self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
+ else:
+ self.report_warning(f'Invalid start time for chapter "{title}"')
+ continue
+ last_chapter = {'start_time': start_time, 'title': title}
+ chapters.append(last_chapter)
+ last_chapter['end_time'] = duration
return chapters
def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
@@ -1473,132 +1991,436 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
(r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
regex), webpage, name, default='{}'), video_id, fatal=False)
- def _real_extract(self, url):
- url, smuggled_data = unsmuggle_url(url, {})
- video_id = self._match_id(url)
- base_url = self.http_scheme() + '//www.youtube.com/'
- webpage_url = base_url + 'watch?v=' + video_id
- webpage = self._download_webpage(
- webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
+ @staticmethod
+ def parse_time_text(time_text):
+ """
+ Parse the comment time text
+ time_text is in the format 'X units ago (edited)'
+ """
+ time_text_split = time_text.split(' ')
+ if len(time_text_split) >= 3:
+ try:
+ return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
+ except ValueError:
+ return None
+
+ def _extract_comment(self, comment_renderer, parent=None):
+ comment_id = comment_renderer.get('commentId')
+ if not comment_id:
+ return
- player_response = None
- if webpage:
- player_response = self._extract_yt_initial_variable(
- webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
- video_id, 'initial player response')
- if not player_response:
- player_response = self._call_api(
- 'player', {'videoId': video_id}, video_id)
-
- playability_status = player_response.get('playabilityStatus') or {}
- if playability_status.get('reason') == 'Sign in to confirm your age':
- pr = self._parse_json(try_get(compat_parse_qs(
- self._download_webpage(
- base_url + 'get_video_info', video_id,
- 'Refetching age-gated info webpage',
- 'unable to download video info webpage', query={
- 'video_id': video_id,
- 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
- 'html5': 1,
- }, fatal=False)),
- lambda x: x['player_response'][0],
- compat_str) or '{}', video_id)
- if pr:
- player_response = pr
+ text = self._get_text(comment_renderer, 'contentText')
+
+ # note: timestamp is an estimate calculated from the current time and time_text
+ time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
+ time_text_dt = self.parse_time_text(time_text)
+ if isinstance(time_text_dt, datetime.datetime):
+ timestamp = calendar.timegm(time_text_dt.timetuple())
+ author = self._get_text(comment_renderer, 'authorText')
+ author_id = try_get(comment_renderer,
+ lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
+
+ votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
+ lambda x: x['likeCount']), compat_str)) or 0
+ author_thumbnail = try_get(comment_renderer,
+ lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
+
+ author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
+ is_favorited = 'creatorHeart' in (try_get(
+ comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
+ return {
+ 'id': comment_id,
+ 'text': text,
+ 'timestamp': timestamp,
+ 'time_text': time_text,
+ 'like_count': votes,
+ 'is_favorited': is_favorited,
+ 'author': author,
+ 'author_id': author_id,
+ 'author_thumbnail': author_thumbnail,
+ 'author_is_uploader': author_is_uploader,
+ 'parent': parent or 'root'
+ }
- trailer_video_id = try_get(
- playability_status,
- lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
- compat_str)
- if trailer_video_id:
- return self.url_result(
- trailer_video_id, self.ie_key(), trailer_video_id)
+ def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, comment_counts=None):
- def get_text(x):
- if not x:
- return
- text = x.get('simpleText')
- if text and isinstance(text, compat_str):
- return text
- runs = x.get('runs')
- if not isinstance(runs, list):
- return
- return ''.join([r['text'] for r in runs if isinstance(r.get('text'), compat_str)])
-
- search_meta = (
- lambda x: self._html_search_meta(x, webpage, default=None)) \
- if webpage else lambda x: None
-
- video_details = player_response.get('videoDetails') or {}
- microformat = try_get(
- player_response,
- lambda x: x['microformat']['playerMicroformatRenderer'],
- dict) or {}
- video_title = video_details.get('title') \
- or get_text(microformat.get('title')) \
- or search_meta(['og:title', 'twitter:title', 'title'])
- video_description = video_details.get('shortDescription')
+ def extract_header(contents):
+ _continuation = None
+ for content in contents:
+ comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
+ expected_comment_count = parse_count(self._get_text(
+ comments_header_renderer, 'countText', 'commentsCount', max_runs=1))
+
+ if expected_comment_count:
+ comment_counts[1] = expected_comment_count
+ self.to_screen('Downloading ~%d comments' % expected_comment_count)
+ sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
+ comment_sort_index = int(sort_mode_str != 'top') # 1 = new, 0 = top
+
+ sort_menu_item = try_get(
+ comments_header_renderer,
+ lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
+ sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
+
+ _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
+ if not _continuation:
+ continue
- if not smuggled_data.get('force_singlefeed', False):
- if not self._downloader.params.get('noplaylist'):
- multifeed_metadata_list = try_get(
- player_response,
- lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
- compat_str)
- if multifeed_metadata_list:
- entries = []
- feed_ids = []
- for feed in multifeed_metadata_list.split(','):
- # Unquote should take place before split on comma (,) since textual
- # fields may contain comma as well (see
- # https://github.com/ytdl-org/youtube-dl/issues/8536)
- feed_data = compat_parse_qs(
- compat_urllib_parse_unquote_plus(feed))
+ sort_text = sort_menu_item.get('title')
+ if isinstance(sort_text, compat_str):
+ sort_text = sort_text.lower()
+ else:
+ sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
+ self.to_screen('Sorting comments by %s' % sort_text)
+ break
+ return _continuation
- def feed_entry(name):
- return try_get(
- feed_data, lambda x: x[name][0], compat_str)
+ def extract_thread(contents):
+ if not parent:
+ comment_counts[2] = 0
+ for content in contents:
+ comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
+ comment_renderer = try_get(
+ comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
+ content, (lambda x: x['commentRenderer'], dict))
- feed_id = feed_entry('id')
- if not feed_id:
- continue
- feed_title = feed_entry('title')
- title = video_title
- if feed_title:
- title += ' (%s)' % feed_title
- entries.append({
- '_type': 'url_transparent',
- 'ie_key': 'Youtube',
- 'url': smuggle_url(
- base_url + 'watch?v=' + feed_data['id'][0],
- {'force_singlefeed': True}),
- 'title': title,
- })
- feed_ids.append(feed_id)
- self.to_screen(
- 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
- % (', '.join(feed_ids), video_id))
- return self.playlist_result(
- entries, video_id, video_title, video_description)
+ if not comment_renderer:
+ continue
+ comment = self._extract_comment(comment_renderer, parent)
+ if not comment:
+ continue
+ comment_counts[0] += 1
+ yield comment
+ # Attempt to get the replies
+ comment_replies_renderer = try_get(
+ comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
+
+ if comment_replies_renderer:
+ comment_counts[2] += 1
+ comment_entries_iter = self._comment_entries(
+ comment_replies_renderer, ytcfg, video_id,
+ parent=comment.get('id'), comment_counts=comment_counts)
+
+ for reply_comment in comment_entries_iter:
+ yield reply_comment
+
+ # YouTube comments have a max depth of 2
+ max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
+ if max_depth == 1 and parent:
+ return
+ if not comment_counts:
+ # comment so far, est. total comments, current comment thread #
+ comment_counts = [0, 0, 0]
+
+ continuation = self._extract_continuation(root_continuation_data)
+ if continuation and len(continuation['continuation']) < 27:
+ self.write_debug('Detected old API continuation token. Generating new API compatible token.')
+ continuation_token = self._generate_comment_continuation(video_id)
+ continuation = self._build_api_continuation_query(continuation_token, None)
+
+ message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
+ if message and not parent:
+ self.report_warning(message, video_id=video_id)
+
+ visitor_data = None
+ is_first_continuation = parent is None
+
+ for page_num in itertools.count(0):
+ if not continuation:
+ break
+ headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=visitor_data)
+ comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
+ if page_num == 0:
+ if is_first_continuation:
+ note_prefix = 'Downloading comment section API JSON'
+ else:
+ note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
+ comment_counts[2], comment_prog_str)
else:
- self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
+ note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
+ ' ' if parent else '', ' replies' if parent else '',
+ page_num, comment_prog_str)
+
+ response = self._extract_response(
+ item_id=None, query=continuation,
+ ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
+ check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
+ if not response:
+ break
+ visitor_data = try_get(
+ response,
+ lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
+ compat_str) or visitor_data
+
+ continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
+
+ continuation = None
+ if isinstance(continuation_contents, list):
+ for continuation_section in continuation_contents:
+ if not isinstance(continuation_section, dict):
+ continue
+ continuation_items = try_get(
+ continuation_section,
+ (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
+ lambda x: x['appendContinuationItemsAction']['continuationItems']),
+ list) or []
+ if is_first_continuation:
+ continuation = extract_header(continuation_items)
+ is_first_continuation = False
+ if continuation:
+ break
+ continue
+ count = 0
+ for count, entry in enumerate(extract_thread(continuation_items)):
+ yield entry
+ continuation = self._extract_continuation({'contents': continuation_items})
+ if continuation:
+ # Sometimes YouTube provides a continuation without any comments
+ # In most cases we end up just downloading these with very little comments to come.
+ if count == 0:
+ if not parent:
+ self.report_warning('No comments received - assuming end of comments')
+ continuation = None
+ break
+
+ # Deprecated response structure
+ elif isinstance(continuation_contents, dict):
+ known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
+ for key, continuation_renderer in continuation_contents.items():
+ if key not in known_continuation_renderers:
+ continue
+ if not isinstance(continuation_renderer, dict):
+ continue
+ if is_first_continuation:
+ header_continuation_items = [continuation_renderer.get('header') or {}]
+ continuation = extract_header(header_continuation_items)
+ is_first_continuation = False
+ if continuation:
+ break
+
+ # Sometimes YouTube provides a continuation without any comments
+ # In most cases we end up just downloading these with very little comments to come.
+ count = 0
+ for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
+ yield entry
+ continuation = self._extract_continuation(continuation_renderer)
+ if count == 0:
+ if not parent:
+ self.report_warning('No comments received - assuming end of comments')
+ continuation = None
+ break
+
+ @staticmethod
+ def _generate_comment_continuation(video_id):
+ """
+ Generates initial comment section continuation token from given video id
+ """
+ b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
+ parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
+ new_continuation_intlist = list(itertools.chain.from_iterable(
+ [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
+ return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
+
+ def _get_comments(self, ytcfg, video_id, contents, webpage):
+ """Entry for comment extraction"""
+ def _real_comment_extract(contents):
+ renderer = next((
+ item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
+ if item.get('sectionIdentifier') == 'comment-item-section'), None)
+ yield from self._comment_entries(renderer, ytcfg, video_id)
+
+ max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
+ # Force English regardless of account setting to prevent parsing issues
+ # See: https://github.com/hypervideo/hypervideo/issues/532
+ ytcfg = copy.deepcopy(ytcfg)
+ traverse_obj(
+ ytcfg, ('INNERTUBE_CONTEXT', 'client'), expected_type=dict, default={})['hl'] = 'en'
+ return itertools.islice(_real_comment_extract(contents), 0, max_comments)
+
+ @staticmethod
+ def _get_checkok_params():
+ return {'contentCheckOk': True, 'racyCheckOk': True}
+
+ @classmethod
+ def _generate_player_context(cls, sts=None):
+ context = {
+ 'html5Preference': 'HTML5_PREF_WANTS',
+ }
+ if sts is not None:
+ context['signatureTimestamp'] = sts
+ return {
+ 'playbackContext': {
+ 'contentPlaybackContext': context
+ },
+ **cls._get_checkok_params()
+ }
- formats = []
- itags = []
- itag_qualities = {}
+ @staticmethod
+ def _is_agegated(player_response):
+ if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
+ return True
+
+ reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
+ AGE_GATE_REASONS = (
+ 'confirm your age', 'age-restricted', 'inappropriate', # reason
+ 'age_verification_required', 'age_check_required', # status
+ )
+ return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
+
+ @staticmethod
+ def _is_unplayable(player_response):
+ return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
+
+ def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):
+
+ session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
+ syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
+ sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
+ headers = self.generate_api_headers(
+ ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
+
+ yt_query = {'videoId': video_id}
+ yt_query.update(self._generate_player_context(sts))
+ return self._extract_response(
+ item_id=video_id, ep='player', query=yt_query,
+ ytcfg=player_ytcfg, headers=headers, fatal=True,
+ default_client=client,
+ note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
+ ) or None
+
+ def _get_requested_clients(self, url, smuggled_data):
+ requested_clients = []
+ allowed_clients = sorted(
+ [client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'],
+ key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
+ for client in self._configuration_arg('player_client'):
+ if client in allowed_clients:
+ requested_clients.append(client)
+ elif client == 'all':
+ requested_clients.extend(allowed_clients)
+ else:
+ self.report_warning(f'Skipping unsupported client {client}')
+ if not requested_clients:
+ requested_clients = ['android', 'web']
+
+ if smuggled_data.get('is_music_url') or self.is_music_url(url):
+ requested_clients.extend(
+ f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
+
+ return orderedSet(requested_clients)
+
+ def _extract_player_ytcfg(self, client, video_id):
+ url = {
+ 'web_music': 'https://music.youtube.com',
+ 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
+ }.get(client)
+ if not url:
+ return {}
+ webpage = self._download_webpage(url, video_id, fatal=False, note=f'Downloading {client} config')
+ return self.extract_ytcfg(video_id, webpage) or {}
+
+ def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):
+ initial_pr = None
+ if webpage:
+ initial_pr = self._extract_yt_initial_variable(
+ webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
+ video_id, 'initial player response')
+
+ original_clients = clients
+ clients = clients[::-1]
+ prs = []
+
+ def append_client(client_name):
+ if client_name in INNERTUBE_CLIENTS and client_name not in original_clients:
+ clients.append(client_name)
+
+ # Android player_response does not have microFormats which are needed for
+ # extraction of some data. So we return the initial_pr with formats
+ # stripped out even if not requested by the user
+ # See: https://github.com/hypervideo/hypervideo/issues/501
+ if initial_pr:
+ pr = dict(initial_pr)
+ pr['streamingData'] = None
+ prs.append(pr)
+
+ last_error = None
+ tried_iframe_fallback = False
player_url = None
- q = qualities(['tiny', 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'])
- streaming_data = player_response.get('streamingData') or {}
- streaming_formats = streaming_data.get('formats') or []
- streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
+ while clients:
+ client = clients.pop()
+ player_ytcfg = master_ytcfg if client == 'web' else {}
+ if 'configs' not in self._configuration_arg('player_skip'):
+ player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
+
+ player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
+ require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
+ if 'js' in self._configuration_arg('player_skip'):
+ require_js_player = False
+ player_url = None
+
+ if not player_url and not tried_iframe_fallback and require_js_player:
+ player_url = self._download_player_url(video_id)
+ tried_iframe_fallback = True
+
+ try:
+ pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
+ client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)
+ except ExtractorError as e:
+ if last_error:
+ self.report_warning(last_error)
+ last_error = e
+ continue
+
+ if pr:
+ prs.append(pr)
+
+ # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
+ if client.endswith('_agegate') and self._is_unplayable(pr) and self.is_authenticated:
+ append_client(client.replace('_agegate', '_creator'))
+ elif self._is_agegated(pr):
+ append_client(f'{client}_agegate')
+
+ if last_error:
+ if not len(prs):
+ raise last_error
+ self.report_warning(last_error)
+ return prs, player_url
+
+ def _extract_formats(self, streaming_data, video_id, player_url, is_live):
+ itags, stream_ids = [], []
+ itag_qualities, res_qualities = {}, {}
+ q = qualities([
+ # Normally tiny is the smallest video-only formats. But
+ # audio-only formats with unknown quality may get tagged as tiny
+ 'tiny',
+ 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
+ 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
+ ])
+ streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
+
for fmt in streaming_formats:
if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
continue
itag = str_or_none(fmt.get('itag'))
+ audio_track = fmt.get('audioTrack') or {}
+ stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
+ if stream_id in stream_ids:
+ continue
+
quality = fmt.get('quality')
- if itag and quality:
- itag_qualities[itag] = quality
+ height = int_or_none(fmt.get('height'))
+ if quality == 'tiny' or not quality:
+ quality = fmt.get('audioQuality', '').lower() or quality
+ # The 3gp format (17) in android client has a quality of "small",
+ # but is actually worse than other formats
+ if itag == '17':
+ quality = 'tiny'
+ if quality:
+ if itag:
+ itag_qualities[itag] = quality
+ if height:
+ res_qualities[height] = quality
# FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
# (adding `&sq=0` to the URL) and parsing emsg box to determine the
# number of fragment that would subsequently requested with (`&sq=N`)
@@ -1613,12 +2435,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if not (sc and fmt_url and encrypted_sig):
continue
if not player_url:
- if not webpage:
- continue
- player_url = self._search_regex(
- r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
- webpage, 'player URL', fatal=False)
- if not player_url:
continue
signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
@@ -1626,27 +2442,32 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if itag:
itags.append(itag)
+ stream_ids.append(stream_id)
+
tbr = float_or_none(
fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
dct = {
'asr': int_or_none(fmt.get('audioSampleRate')),
'filesize': int_or_none(fmt.get('contentLength')),
'format_id': itag,
- 'format_note': fmt.get('qualityLabel') or quality,
+ 'format_note': ', '.join(filter(None, (
+ '%s%s' % (audio_track.get('displayName') or '',
+ ' (default)' if audio_track.get('audioIsDefault') else ''),
+ fmt.get('qualityLabel') or quality.replace('audio_quality_', '')))),
'fps': int_or_none(fmt.get('fps')),
- 'height': int_or_none(fmt.get('height')),
+ 'height': height,
'quality': q(quality),
'tbr': tbr,
'url': fmt_url,
- 'width': fmt.get('width'),
+ 'width': int_or_none(fmt.get('width')),
+ 'language': audio_track.get('id', '').split('.')[0],
+ 'language_preference': 1 if audio_track.get('audioIsDefault') else -1,
}
- mimetype = fmt.get('mimeType')
- if mimetype:
- mobj = re.match(
- r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', mimetype)
- if mobj:
- dct['ext'] = mimetype2ext(mobj.group(1))
- dct.update(parse_codecs(mobj.group(2)))
+ mime_mobj = re.match(
+ r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
+ if mime_mobj:
+ dct['ext'] = mimetype2ext(mime_mobj.group(1))
+ dct.update(parse_codecs(mime_mobj.group(2)))
no_audio = dct.get('acodec') == 'none'
no_video = dct.get('vcodec') == 'none'
if no_audio:
@@ -1660,61 +2481,181 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
}
if dct.get('ext'):
dct['container'] = dct['ext'] + '_dash'
- formats.append(dct)
-
- hls_manifest_url = streaming_data.get('hlsManifestUrl')
- if hls_manifest_url:
- for f in self._extract_m3u8_formats(
- hls_manifest_url, video_id, 'mp4', fatal=False):
- itag = self._search_regex(
- r'/itag/(\d+)', f['url'], 'itag', default=None)
- if itag:
- f['format_id'] = itag
- formats.append(f)
+ yield dct
+
+ skip_manifests = self._configuration_arg('skip')
+ get_dash = (
+ (not is_live or self._configuration_arg('include_live_dash'))
+ and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True))
+ get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
+
+ def guess_quality(f):
+ for val, qdict in ((f.get('format_id'), itag_qualities), (f.get('height'), res_qualities)):
+ if val in qdict:
+ return q(qdict[val])
+ return -1
+
+ for sd in streaming_data:
+ hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
+ if hls_manifest_url:
+ for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
+ itag = self._search_regex(
+ r'/itag/(\d+)', f['url'], 'itag', default=None)
+ if itag in itags:
+ itag += '-hls'
+ if itag in itags:
+ continue
+ if itag:
+ f['format_id'] = itag
+ itags.append(itag)
+ f['quality'] = guess_quality(f)
+ yield f
- if self._downloader.params.get('youtube_include_dash_manifest', True):
- dash_manifest_url = streaming_data.get('dashManifestUrl')
+ dash_manifest_url = get_dash and sd.get('dashManifestUrl')
if dash_manifest_url:
- for f in self._extract_mpd_formats(
- dash_manifest_url, video_id, fatal=False):
+ for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
itag = f['format_id']
if itag in itags:
- continue
- if itag in itag_qualities:
- f['quality'] = q(itag_qualities[itag])
+ itag += '-dash'
+ if itag in itags:
+ continue
+ if itag:
+ f['format_id'] = itag
+ itags.append(itag)
+ f['quality'] = guess_quality(f)
filesize = int_or_none(self._search_regex(
r'/clen/(\d+)', f.get('fragment_base_url')
or f['url'], 'file size', default=None))
if filesize:
f['filesize'] = filesize
- formats.append(f)
+ yield f
+
+ def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
+ video_id = self._match_id(url)
+
+ base_url = self.http_scheme() + '//www.youtube.com/'
+ webpage_url = base_url + 'watch?v=' + video_id
+ webpage = None
+ if 'webpage' not in self._configuration_arg('player_skip'):
+ webpage = self._download_webpage(
+ webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
+
+ master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
+
+ player_responses, player_url = self._extract_player_responses(
+ self._get_requested_clients(url, smuggled_data),
+ video_id, webpage, master_ytcfg)
+
+ get_first = lambda obj, keys, **kwargs: traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
+
+ playability_statuses = traverse_obj(
+ player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
+
+ trailer_video_id = get_first(
+ playability_statuses,
+ ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
+ expected_type=str)
+ if trailer_video_id:
+ return self.url_result(
+ trailer_video_id, self.ie_key(), trailer_video_id)
+
+ search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
+ if webpage else (lambda x: None))
+
+ video_details = traverse_obj(
+ player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
+ microformats = traverse_obj(
+ player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
+ expected_type=dict, default=[])
+ video_title = (
+ get_first(video_details, 'title')
+ or self._get_text(microformats, (..., 'title'))
+ or search_meta(['og:title', 'twitter:title', 'title']))
+ video_description = get_first(video_details, 'shortDescription')
+
+ if not smuggled_data.get('force_singlefeed', False):
+ if not self.get_param('noplaylist'):
+ multifeed_metadata_list = get_first(
+ player_responses,
+ ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
+ expected_type=str)
+ if multifeed_metadata_list:
+ entries = []
+ feed_ids = []
+ for feed in multifeed_metadata_list.split(','):
+ # Unquote should take place before split on comma (,) since textual
+ # fields may contain comma as well (see
+ # https://github.com/ytdl-org/youtube-dl/issues/8536)
+ feed_data = compat_parse_qs(
+ compat_urllib_parse_unquote_plus(feed))
+
+ def feed_entry(name):
+ return try_get(
+ feed_data, lambda x: x[name][0], compat_str)
+
+ feed_id = feed_entry('id')
+ if not feed_id:
+ continue
+ feed_title = feed_entry('title')
+ title = video_title
+ if feed_title:
+ title += ' (%s)' % feed_title
+ entries.append({
+ '_type': 'url_transparent',
+ 'ie_key': 'Youtube',
+ 'url': smuggle_url(
+ '%swatch?v=%s' % (base_url, feed_data['id'][0]),
+ {'force_singlefeed': True}),
+ 'title': title,
+ })
+ feed_ids.append(feed_id)
+ self.to_screen(
+ 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
+ % (', '.join(feed_ids), video_id))
+ return self.playlist_result(
+ entries, video_id, video_title, video_description)
+ else:
+ self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
+
+ live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
+ is_live = get_first(video_details, 'isLive')
+ if is_live is None:
+ is_live = get_first(live_broadcast_details, 'isLiveNow')
+
+ streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
+ formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
if not formats:
- if streaming_data.get('licenseInfos'):
- raise ExtractorError(
- 'This video is DRM protected.', expected=True)
- pemr = try_get(
- playability_status,
- lambda x: x['errorScreen']['playerErrorMessageRenderer'],
- dict) or {}
- reason = get_text(pemr.get('reason')) or playability_status.get('reason')
- subreason = pemr.get('subreason')
+ if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
+ self.report_drm(video_id)
+ pemr = get_first(
+ playability_statuses,
+ ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
+ reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
+ subreason = clean_html(self._get_text(pemr, 'subreason') or '')
if subreason:
- subreason = clean_html(get_text(subreason))
if subreason == 'The uploader has not made this video available in your country.':
- countries = microformat.get('availableCountries')
+ countries = get_first(microformats, 'availableCountries')
if not countries:
regions_allowed = search_meta('regionsAllowed')
countries = regions_allowed.split(',') if regions_allowed else None
- self.raise_geo_restricted(
- subreason, countries)
- reason += '\n' + subreason
+ self.raise_geo_restricted(subreason, countries, metadata_available=True)
+ reason += f'. {subreason}'
if reason:
- raise ExtractorError(reason, expected=True)
+ self.raise_no_formats(reason, expected=True)
- self._sort_formats(formats)
+ for f in formats:
+ if '&c=WEB&' in f['url'] and '&ratebypass=yes&' not in f['url']: # throttled
+ f['source_preference'] = -10
+ # TODO: this method is not reliable
+ f['format_note'] = format_field(f, 'format_note', '%s ') + '(maybe throttled)'
- keywords = video_details.get('keywords') or []
+ # Source is given priority since formats that throttle are given lower source_preference
+ # When throttling issue is fully fixed, remove this
+ self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang'))
+
+ keywords = get_first(video_details, 'keywords', expected_type=list) or []
if not keywords and webpage:
keywords = [
unescapeHTML(m.group('content'))
@@ -1733,35 +2674,71 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
break
thumbnails = []
- for container in (video_details, microformat):
- for thumbnail in (try_get(
- container,
- lambda x: x['thumbnail']['thumbnails'], list) or []):
- thumbnail_url = thumbnail.get('url')
- if not thumbnail_url:
- continue
- thumbnails.append({
- 'height': int_or_none(thumbnail.get('height')),
- 'url': thumbnail_url,
- 'width': int_or_none(thumbnail.get('width')),
- })
- if thumbnails:
- break
- else:
- thumbnail = search_meta(['og:image', 'twitter:image'])
- if thumbnail:
- thumbnails = [{'url': thumbnail}]
-
- category = microformat.get('category') or search_meta('genre')
- channel_id = video_details.get('channelId') \
- or microformat.get('externalChannelId') \
- or search_meta('channelId')
+ thumbnail_dicts = traverse_obj(
+ (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...),
+ expected_type=dict, default=[])
+ for thumbnail in thumbnail_dicts:
+ thumbnail_url = thumbnail.get('url')
+ if not thumbnail_url:
+ continue
+ # Sometimes youtube gives a wrong thumbnail URL. See:
+ # https://github.com/hypervideo/hypervideo/issues/233
+ # https://github.com/ytdl-org/youtube-dl/issues/28023
+ if 'maxresdefault' in thumbnail_url:
+ thumbnail_url = thumbnail_url.split('?')[0]
+ thumbnails.append({
+ 'url': thumbnail_url,
+ 'height': int_or_none(thumbnail.get('height')),
+ 'width': int_or_none(thumbnail.get('width')),
+ })
+ thumbnail_url = search_meta(['og:image', 'twitter:image'])
+ if thumbnail_url:
+ thumbnails.append({
+ 'url': thumbnail_url,
+ })
+ # The best resolution thumbnails sometimes does not appear in the webpage
+ # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/hypervideo/hypervideo/issues/340
+ # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
+ thumbnail_names = [
+ 'maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3',
+ 'hqdefault', 'hq1', 'hq2', 'hq3', '0',
+ 'mqdefault', 'mq1', 'mq2', 'mq3',
+ 'default', '1', '2', '3'
+ ]
+ n_thumbnail_names = len(thumbnail_names)
+
+ thumbnails.extend({
+ 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
+ video_id=video_id, name=name, ext=ext,
+ webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
+ } for name in thumbnail_names for ext in ('webp', 'jpg'))
+ for thumb in thumbnails:
+ i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
+ thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
+ self._remove_duplicate_formats(thumbnails)
+
+ category = get_first(microformats, 'category') or search_meta('genre')
+ channel_id = str_or_none(
+ get_first(video_details, 'channelId')
+ or get_first(microformats, 'externalChannelId')
+ or search_meta('channelId'))
duration = int_or_none(
- video_details.get('lengthSeconds')
- or microformat.get('lengthSeconds')) \
- or parse_duration(search_meta('duration'))
- is_live = video_details.get('isLive')
- owner_profile_url = microformat.get('ownerProfileUrl')
+ get_first(video_details, 'lengthSeconds')
+ or get_first(microformats, 'lengthSeconds')
+ or parse_duration(search_meta('duration'))) or None
+ owner_profile_url = get_first(microformats, 'ownerProfileUrl')
+
+ live_content = get_first(video_details, 'isLiveContent')
+ is_upcoming = get_first(video_details, 'isUpcoming')
+ if is_live is None:
+ if is_upcoming or live_content is False:
+ is_live = False
+ if is_upcoming is None and (live_content or is_live):
+ is_upcoming = False
+ live_starttime = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
+ live_endtime = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
+ if not duration and live_endtime and live_starttime:
+ duration = live_endtime - live_starttime
info = {
'id': video_id,
@@ -1770,35 +2747,50 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'thumbnails': thumbnails,
'description': video_description,
'upload_date': unified_strdate(
- microformat.get('uploadDate')
+ get_first(microformats, 'uploadDate')
or search_meta('uploadDate')),
- 'uploader': video_details['author'],
+ 'uploader': get_first(video_details, 'author'),
'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
'uploader_url': owner_profile_url,
'channel_id': channel_id,
- 'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
+ 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
'duration': duration,
'view_count': int_or_none(
- video_details.get('viewCount')
- or microformat.get('viewCount')
+ get_first((video_details, microformats), (..., 'viewCount'))
or search_meta('interactionCount')),
- 'average_rating': float_or_none(video_details.get('averageRating')),
+ 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
'age_limit': 18 if (
- microformat.get('isFamilySafe') is False
+ get_first(microformats, 'isFamilySafe') is False
or search_meta('isFamilyFriendly') == 'false'
or search_meta('og:restrictions:age') == '18+') else 0,
'webpage_url': webpage_url,
'categories': [category] if category else None,
'tags': keywords,
+ 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
'is_live': is_live,
+ 'was_live': (False if is_live or is_upcoming or live_content is False
+ else None if is_live is None or is_upcoming is None
+ else live_content),
+ 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL
+ 'release_timestamp': live_starttime,
}
- pctr = try_get(
- player_response,
- lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
+ pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
if pctr:
- def process_language(container, base_url, lang_code, query):
- lang_subs = []
+ def get_lang_code(track):
+ return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
+ or track.get('languageCode'))
+
+ # Converted into dicts to remove duplicates
+ captions = {
+ get_lang_code(sub): sub
+ for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
+ translation_languages = {
+ lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
+ for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
+
+ def process_language(container, base_url, lang_code, sub_name, query):
+ lang_subs = container.setdefault(lang_code, [])
for fmt in self._SUBTITLE_FORMATS:
query.update({
'fmt': fmt,
@@ -1806,30 +2798,31 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
lang_subs.append({
'ext': fmt,
'url': update_url_query(base_url, query),
+ 'name': sub_name,
})
- container[lang_code] = lang_subs
- subtitles = {}
- for caption_track in (pctr.get('captionTracks') or []):
+ subtitles, automatic_captions = {}, {}
+ for lang_code, caption_track in captions.items():
base_url = caption_track.get('baseUrl')
if not base_url:
continue
+ lang_name = self._get_text(caption_track, 'name', max_runs=1)
if caption_track.get('kind') != 'asr':
- lang_code = caption_track.get('languageCode')
if not lang_code:
continue
process_language(
- subtitles, base_url, lang_code, {})
- continue
- automatic_captions = {}
- for translation_language in (pctr.get('translationLanguages') or []):
- translation_language_code = translation_language.get('languageCode')
- if not translation_language_code:
+ subtitles, base_url, lang_code, lang_name, {})
+ if not caption_track.get('isTranslatable'):
+ continue
+ for trans_code, trans_name in translation_languages.items():
+ if not trans_code:
continue
+ if caption_track.get('kind') != 'asr':
+ trans_code += f'-{lang_code}'
+ trans_name += format_field(lang_name, template=' from %s')
process_language(
- automatic_captions, base_url, translation_language_code,
- {'tlang': translation_language_code})
- info['automatic_captions'] = automatic_captions
+ automatic_captions, base_url, trans_code, trans_name, {'tlang': trans_code})
+ info['automatic_captions'] = automatic_captions
info['subtitles'] = subtitles
parsed_url = compat_urllib_parse_urlparse(url)
@@ -1841,6 +2834,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if d_k not in info and k in s_ks:
info[d_k] = parse_duration(query[k][0])
+ # Youtube Music Auto-generated description
if video_description:
mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
if mobj:
@@ -1864,42 +2858,31 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
webpage, self._YT_INITIAL_DATA_RE, video_id,
'yt initial data')
if not initial_data:
- initial_data = self._call_api(
- 'next', {'videoId': video_id}, video_id, fatal=False)
+ query = {'videoId': video_id}
+ query.update(self._get_checkok_params())
+ initial_data = self._extract_response(
+ item_id=video_id, ep='next', fatal=False,
+ ytcfg=master_ytcfg, query=query,
+ headers=self.generate_api_headers(ytcfg=master_ytcfg),
+ note='Downloading initial data API JSON')
- if initial_data:
- chapters = self._extract_chapters_from_json(
- initial_data, video_id, duration)
- if not chapters:
- for engagment_pannel in (initial_data.get('engagementPanels') or []):
- contents = try_get(
- engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'],
- list)
- if not contents:
- continue
+ try:
+ # This will error if there is no livechat
+ initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
+ info.setdefault('subtitles', {})['live_chat'] = [{
+ 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
+ 'video_id': video_id,
+ 'ext': 'json',
+ 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
+ }]
+ except (KeyError, IndexError, TypeError):
+ pass
- def chapter_time(mmlir):
- return parse_duration(
- get_text(mmlir.get('timeDescription')))
-
- chapters = []
- for next_num, content in enumerate(contents, start=1):
- mmlir = content.get('macroMarkersListItemRenderer') or {}
- start_time = chapter_time(mmlir)
- end_time = chapter_time(try_get(
- contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \
- if next_num < len(contents) else duration
- if start_time is None or end_time is None:
- continue
- chapters.append({
- 'start_time': start_time,
- 'end_time': end_time,
- 'title': get_text(mmlir.get('title')),
- })
- if chapters:
- break
- if chapters:
- info['chapters'] = chapters
+ if initial_data:
+ info['chapters'] = (
+ self._extract_chapters_from_json(initial_data, duration)
+ or self._extract_chapters_from_engagement_panel(initial_data, duration)
+ or None)
contents = try_get(
initial_data,
@@ -1910,7 +2893,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if vpir:
stl = vpir.get('superTitleLink')
if stl:
- stl = get_text(stl)
+ stl = self._get_text(stl)
if try_get(
vpir,
lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
@@ -1950,10 +2933,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
})
vsir = content.get('videoSecondaryInfoRenderer')
if vsir:
- info['channel'] = get_text(try_get(
- vsir,
- lambda x: x['owner']['videoOwnerRenderer']['title'],
- dict))
+ info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))
rows = try_get(
vsir,
lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
@@ -1968,8 +2948,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
mrr_title = mrr.get('title')
if not mrr_title:
continue
- mrr_title = get_text(mrr['title'])
- mrr_contents_text = get_text(mrr['contents'][0])
+ mrr_title = self._get_text(mrr, 'title')
+ mrr_contents_text = self._get_text(mrr, ('contents', 0))
if mrr_title == 'License':
info['license'] = mrr_contents_text
elif not multiple_songs:
@@ -1980,12 +2960,51 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
elif mrr_title == 'Song':
info['track'] = mrr_contents_text
+ fallbacks = {
+ 'channel': 'uploader',
+ 'channel_id': 'uploader_id',
+ 'channel_url': 'uploader_url',
+ }
+ for to, frm in fallbacks.items():
+ if not info.get(to):
+ info[to] = info.get(frm)
+
for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
v = info.get(s_k)
if v:
info[d_k] = v
- self.mark_watched(video_id, player_response)
+ is_private = get_first(video_details, 'isPrivate', expected_type=bool)
+ is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
+ is_membersonly = None
+ is_premium = None
+ if initial_data and is_private is not None:
+ is_membersonly = False
+ is_premium = False
+ contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
+ badge_labels = set()
+ for content in contents:
+ if not isinstance(content, dict):
+ continue
+ badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
+ for badge_label in badge_labels:
+ if badge_label.lower() == 'members only':
+ is_membersonly = True
+ elif badge_label.lower() == 'premium':
+ is_premium = True
+ elif badge_label.lower() == 'unlisted':
+ is_unlisted = True
+
+ info['availability'] = self._availability(
+ is_private=is_private,
+ needs_premium=is_premium,
+ needs_subscription=is_membersonly,
+ needs_auth=info['age_limit'] >= 18,
+ is_unlisted=None if is_private is None else is_unlisted)
+
+ info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
+
+ self.mark_watched(video_id, player_responses)
return info
@@ -2000,127 +3019,161 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
invidio\.us
)/
(?:
- (?:channel|c|user|feed|hashtag)/|
- (?:playlist|watch)\?.*?\blist=|
- (?!(?:watch|embed|v|e)\b)
+ (?P<channel_type>channel|c|user|browse)/|
+ (?P<not_channel>
+ feed/|hashtag/|
+ (?:playlist|watch)\?.*?\blist=
+ )|
+ (?!(?:%s)\b) # Direct URLs
)
(?P<id>[^/?\#&]+)
- '''
+ ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
IE_NAME = 'youtube:tab'
_TESTS = [{
- # playlists, multipage
+ 'note': 'playlists, multipage',
'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
'playlist_mincount': 94,
'info_dict': {
'id': 'UCqj7Cz7revf5maW9g5pgNcg',
'title': 'Игорь Клейнер - Playlists',
'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
+ 'uploader': 'Игорь Клейнер',
+ 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
},
}, {
- # playlists, multipage, different order
+ 'note': 'playlists, multipage, different order',
'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
'playlist_mincount': 94,
'info_dict': {
'id': 'UCqj7Cz7revf5maW9g5pgNcg',
'title': 'Игорь Клейнер - Playlists',
'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
+ 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
+ 'uploader': 'Игорь Клейнер',
},
}, {
- # playlists, series
+ 'note': 'playlists, series',
'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
'playlist_mincount': 5,
'info_dict': {
'id': 'UCYO_jab_esuFRV4b17AJtAw',
'title': '3Blue1Brown - Playlists',
'description': 'md5:e1384e8a133307dd10edee76e875d62f',
+ 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
+ 'uploader': '3Blue1Brown',
},
}, {
- # playlists, singlepage
+ 'note': 'playlists, singlepage',
'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
'playlist_mincount': 4,
'info_dict': {
'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
'title': 'ThirstForScience - Playlists',
'description': 'md5:609399d937ea957b0f53cbffb747a14c',
+ 'uploader': 'ThirstForScience',
+ 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
}
}, {
'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
'only_matching': True,
}, {
- # basic, single video playlist
+ 'note': 'basic, single video playlist',
'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
'info_dict': {
'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
'uploader': 'Sergey M.',
'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
- 'title': 'hypervideo public playlist',
+ 'title': 'youtube-dl public playlist',
},
'playlist_count': 1,
}, {
- # empty playlist
+ 'note': 'empty playlist',
'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
'info_dict': {
'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
'uploader': 'Sergey M.',
'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
- 'title': 'hypervideo empty playlist',
+ 'title': 'youtube-dl empty playlist',
},
'playlist_count': 0,
}, {
- # Home tab
+ 'note': 'Home tab',
'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
'info_dict': {
'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
'title': 'lex will - Home',
'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
+ 'uploader': 'lex will',
+ 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
},
'playlist_mincount': 2,
}, {
- # Videos tab
+ 'note': 'Videos tab',
'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
'info_dict': {
'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
'title': 'lex will - Videos',
'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
+ 'uploader': 'lex will',
+ 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
},
'playlist_mincount': 975,
}, {
- # Videos tab, sorted by popular
+ 'note': 'Videos tab, sorted by popular',
'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
'info_dict': {
'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
'title': 'lex will - Videos',
'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
+ 'uploader': 'lex will',
+ 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
},
'playlist_mincount': 199,
}, {
- # Playlists tab
+ 'note': 'Playlists tab',
'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
'info_dict': {
'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
'title': 'lex will - Playlists',
'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
+ 'uploader': 'lex will',
+ 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
},
'playlist_mincount': 17,
}, {
- # Community tab
+ 'note': 'Community tab',
'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
'info_dict': {
'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
'title': 'lex will - Community',
'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
+ 'uploader': 'lex will',
+ 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
},
'playlist_mincount': 18,
}, {
- # Channels tab
+ 'note': 'Channels tab',
'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
'info_dict': {
'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
'title': 'lex will - Channels',
'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
+ 'uploader': 'lex will',
+ 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
+ },
+ 'playlist_mincount': 12,
+ }, {
+ 'note': 'Search tab',
+ 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
+ 'playlist_mincount': 40,
+ 'info_dict': {
+ 'id': 'UCYO_jab_esuFRV4b17AJtAw',
+ 'title': '3Blue1Brown - Search - linear algebra',
+ 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
+ 'uploader': '3Blue1Brown',
+ 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
},
- 'playlist_mincount': 138,
}, {
'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
'only_matching': True,
@@ -2138,6 +3191,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
'uploader': 'Christiaan008',
'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
+ 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
},
'playlist_count': 96,
}, {
@@ -2151,7 +3205,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
},
'playlist_mincount': 1123,
}, {
- # even larger playlist, 8832 videos
+ 'note': 'even larger playlist, 8832 videos',
'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
'only_matching': True,
}, {
@@ -2165,20 +3219,41 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
},
'playlist_mincount': 21,
}, {
- # https://github.com/ytdl-org/youtube-dl/issues/21844
+ 'note': 'Playlist with "show unavailable videos" button',
+ 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
+ 'info_dict': {
+ 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
+ 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
+ 'uploader': 'Phim Siêu Nhân Nhật Bản',
+ 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
+ },
+ 'playlist_mincount': 200,
+ }, {
+ 'note': 'Playlist with unavailable videos in page 7',
+ 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
+ 'info_dict': {
+ 'title': 'Uploads from BlankTV',
+ 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
+ 'uploader': 'BlankTV',
+ 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
+ },
+ 'playlist_mincount': 1000,
+ }, {
+ 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
'info_dict': {
'title': 'Data Analysis with Dr Mike Pound',
'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
'uploader': 'Computerphile',
+ 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
},
'playlist_mincount': 11,
}, {
'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
'only_matching': True,
}, {
- # Playlist URL that does not actually serve a playlist
+ 'note': 'Playlist URL that does not actually serve a playlist',
'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
'info_dict': {
'id': 'FqZTN594JQw',
@@ -2210,14 +3285,14 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
}, {
'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
'info_dict': {
- 'id': '9Auq9mYxFEE',
+ 'id': '3yImotZU3tw', # This will keep changing
'ext': 'mp4',
- 'title': 'Watch Sky News live',
+ 'title': compat_str,
'uploader': 'Sky News',
'uploader_id': 'skynews',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
- 'upload_date': '20191102',
- 'description': 'md5:78de4e1c2359d0ea3ed829678e38b662',
+ 'upload_date': r're:\d{8}',
+ 'description': compat_str,
'categories': ['News & Politics'],
'tags': list,
'like_count': int,
@@ -2226,6 +3301,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
'params': {
'skip_download': True,
},
+ 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
}, {
'url': 'https://www.youtube.com/user/TheYoungTurks/live',
'info_dict': {
@@ -2254,30 +3330,30 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
'only_matching': True,
}, {
+ 'note': 'A channel that is not live. Should raise error',
+ 'url': 'https://www.youtube.com/user/numberphile/live',
+ 'only_matching': True,
+ }, {
'url': 'https://www.youtube.com/feed/trending',
'only_matching': True,
}, {
- # needs auth
'url': 'https://www.youtube.com/feed/library',
'only_matching': True,
}, {
- # needs auth
'url': 'https://www.youtube.com/feed/history',
'only_matching': True,
}, {
- # needs auth
'url': 'https://www.youtube.com/feed/subscriptions',
'only_matching': True,
}, {
- # needs auth
'url': 'https://www.youtube.com/feed/watch_later',
'only_matching': True,
}, {
- # no longer available?
+ 'note': 'Recommended - redirects to home page.',
'url': 'https://www.youtube.com/feed/recommended',
'only_matching': True,
}, {
- # inline playlist with not always working continuations
+ 'note': 'inline playlist with not always working continuations',
'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
'only_matching': True,
}, {
@@ -2305,6 +3381,116 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
}, {
'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
'only_matching': True,
+ }, {
+ 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
+ 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
+ 'only_matching': True
+ }, {
+ 'note': '/browse/ should redirect to /channel/',
+ 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
+ 'only_matching': True
+ }, {
+ 'note': 'VLPL, should redirect to playlist?list=PL...',
+ 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
+ 'info_dict': {
+ 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
+ 'uploader': 'NoCopyrightSounds',
+ 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
+ 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
+ 'title': 'NCS Releases',
+ },
+ 'playlist_mincount': 166,
+ }, {
+ 'note': 'Topic, should redirect to playlist?list=UU...',
+ 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
+ 'info_dict': {
+ 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
+ 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
+ 'title': 'Uploads from Royalty Free Music - Topic',
+ 'uploader': 'Royalty Free Music - Topic',
+ },
+ 'expected_warnings': [
+ 'A channel/user page was given',
+ 'The URL does not have a videos tab',
+ ],
+ 'playlist_mincount': 101,
+ }, {
+ 'note': 'Topic without a UU playlist',
+ 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
+ 'info_dict': {
+ 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
+ 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
+ },
+ 'expected_warnings': [
+ 'A channel/user page was given',
+ 'The URL does not have a videos tab',
+ 'Falling back to channel URL',
+ ],
+ 'playlist_mincount': 9,
+ }, {
+ 'note': 'Youtube music Album',
+ 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
+ 'info_dict': {
+ 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
+ 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
+ },
+ 'playlist_count': 50,
+ }, {
+ 'note': 'unlisted single video playlist',
+ 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
+ 'info_dict': {
+ 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
+ 'uploader': 'colethedj',
+ 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
+ 'title': 'hypervideo unlisted playlist test',
+ 'availability': 'unlisted'
+ },
+ 'playlist_count': 1,
+ }, {
+ 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
+ 'url': 'https://www.youtube.com/feed/recommended',
+ 'info_dict': {
+ 'id': 'recommended',
+ 'title': 'recommended',
+ },
+ 'playlist_mincount': 50,
+ 'params': {
+ 'skip_download': True,
+ 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
+ },
+ }, {
+ 'note': 'API Fallback: /videos tab, sorted by oldest first',
+ 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
+ 'info_dict': {
+ 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
+ 'title': 'Cody\'sLab - Videos',
+ 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
+ 'uploader': 'Cody\'sLab',
+ 'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
+ },
+ 'playlist_mincount': 650,
+ 'params': {
+ 'skip_download': True,
+ 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
+ },
+ }, {
+ 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
+ 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
+ 'info_dict': {
+ 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
+ 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
+ 'title': 'Uploads from Royalty Free Music - Topic',
+ 'uploader': 'Royalty Free Music - Topic',
+ },
+ 'expected_warnings': [
+ 'A channel/user page was given',
+ 'The URL does not have a videos tab',
+ ],
+ 'playlist_mincount': 101,
+ 'params': {
+ 'skip_download': True,
+ 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
+ },
}]
@classmethod
@@ -2326,25 +3512,28 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
channel_url, 'channel id')
@staticmethod
- def _extract_grid_item_renderer(item):
- assert isinstance(item, dict)
+ def _extract_basic_item_renderer(item):
+ # Modified from _extract_grid_item_renderer
+ known_basic_renderers = (
+ 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
+ )
for key, renderer in item.items():
- if not key.startswith('grid') or not key.endswith('Renderer'):
- continue
if not isinstance(renderer, dict):
continue
- return renderer
+ elif key in known_basic_renderers:
+ return renderer
+ elif key.startswith('grid') and key.endswith('Renderer'):
+ return renderer
def _grid_entries(self, grid_renderer):
for item in grid_renderer['items']:
if not isinstance(item, dict):
continue
- renderer = self._extract_grid_item_renderer(item)
+ renderer = self._extract_basic_item_renderer(item)
if not isinstance(renderer, dict):
continue
- title = try_get(
- renderer, (lambda x: x['title']['runs'][0]['text'],
- lambda x: x['title']['simpleText']), compat_str)
+ title = self._get_text(renderer, 'title')
+
# playlist
playlist_id = renderer.get('playlistId')
if playlist_id:
@@ -2361,8 +3550,6 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
# channel
channel_id = renderer.get('channelId')
if channel_id:
- title = try_get(
- renderer, lambda x: x['title']['simpleText'], compat_str)
yield self.url_result(
'https://www.youtube.com/channel/%s' % channel_id,
ie=YoutubeTabIE.ie_key(), video_title=title)
@@ -2382,7 +3569,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
content = shelf_renderer.get('content')
if not isinstance(content, dict):
return
- renderer = content.get('gridRenderer')
+ renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
if renderer:
# TODO: add support for nested playlists so each shelf is processed
# as separate playlist
@@ -2405,8 +3592,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
# will not work
if skip_channels and '/channels?' in shelf_url:
return
- title = try_get(
- shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
+ title = self._get_text(shelf_renderer, 'title')
yield self.url_result(shelf_url, video_title=title)
# Shelf may not contain shelf URL, fallback to extraction from content
for entry in self._shelf_entries_from_content(shelf_renderer):
@@ -2424,6 +3610,14 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
continue
yield self._extract_video(renderer)
+ def _rich_entries(self, rich_grid_renderer):
+ renderer = try_get(
+ rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
+ video_id = renderer.get('videoId')
+ if not video_id:
+ return
+ yield self._extract_video(renderer)
+
def _video_entry(self, video_renderer):
video_id = video_renderer.get('videoId')
if video_id:
@@ -2436,12 +3630,19 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
return
# video attachment
video_renderer = try_get(
- post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict)
- video_id = None
- if video_renderer:
- entry = self._video_entry(video_renderer)
+ post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
+ video_id = video_renderer.get('videoId')
+ if video_id:
+ entry = self._extract_video(video_renderer)
if entry:
yield entry
+ # playlist attachment
+ playlist_id = try_get(
+ post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
+ if playlist_id:
+ yield self.url_result(
+ 'https://www.youtube.com/playlist?list=%s' % playlist_id,
+ ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
# inline video links
runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
for run in runs:
@@ -2456,7 +3657,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
ep_video_id = YoutubeIE._match_id(ep_url)
if video_id == ep_video_id:
continue
- yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=video_id)
+ yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
def _post_thread_continuation_entries(self, post_thread_continuation):
contents = post_thread_continuation.get('contents')
@@ -2469,6 +3670,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
for entry in self._post_thread_entries(renderer):
yield entry
+ r''' # unused
def _rich_grid_entries(self, contents):
for content in contents:
video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
@@ -2476,316 +3678,264 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
entry = self._video_entry(video_renderer)
if entry:
yield entry
+ '''
+ def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
- @staticmethod
- def _build_continuation_query(continuation, ctp=None):
- query = {
- 'ctoken': continuation,
- 'continuation': continuation,
- }
- if ctp:
- query['itct'] = ctp
- return query
-
- @staticmethod
- def _extract_next_continuation_data(renderer):
- next_continuation = try_get(
- renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
- if not next_continuation:
- return
- continuation = next_continuation.get('continuation')
- if not continuation:
- return
- ctp = next_continuation.get('clickTrackingParams')
- return YoutubeTabIE._build_continuation_query(continuation, ctp)
-
- @classmethod
- def _extract_continuation(cls, renderer):
- next_continuation = cls._extract_next_continuation_data(renderer)
- if next_continuation:
- return next_continuation
- contents = []
- for key in ('contents', 'items'):
- contents.extend(try_get(renderer, lambda x: x[key], list) or [])
- for content in contents:
- if not isinstance(content, dict):
- continue
- continuation_ep = try_get(
- content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
- dict)
- if not continuation_ep:
- continue
- continuation = try_get(
- continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
- if not continuation:
- continue
- ctp = continuation_ep.get('clickTrackingParams')
- return YoutubeTabIE._build_continuation_query(continuation, ctp)
-
- def _entries(self, tab, item_id, webpage):
- tab_content = try_get(tab, lambda x: x['content'], dict)
- if not tab_content:
- return
- slr_renderer = try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
- if slr_renderer:
- is_channels_tab = tab.get('title') == 'Channels'
- continuation = None
- slr_contents = try_get(slr_renderer, lambda x: x['contents'], list) or []
- for slr_content in slr_contents:
- if not isinstance(slr_content, dict):
+ def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
+ contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
+ for content in contents:
+ if not isinstance(content, dict):
continue
- is_renderer = try_get(slr_content, lambda x: x['itemSectionRenderer'], dict)
+ is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
if not is_renderer:
+ renderer = content.get('richItemRenderer')
+ if renderer:
+ for entry in self._rich_entries(renderer):
+ yield entry
+ continuation_list[0] = self._extract_continuation(parent_renderer)
continue
isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
for isr_content in isr_contents:
if not isinstance(isr_content, dict):
continue
- renderer = isr_content.get('playlistVideoListRenderer')
- if renderer:
- for entry in self._playlist_entries(renderer):
- yield entry
- continuation = self._extract_continuation(renderer)
- continue
- renderer = isr_content.get('gridRenderer')
- if renderer:
- for entry in self._grid_entries(renderer):
- yield entry
- continuation = self._extract_continuation(renderer)
- continue
- renderer = isr_content.get('shelfRenderer')
- if renderer:
- for entry in self._shelf_entries(renderer, not is_channels_tab):
- yield entry
- continue
- renderer = isr_content.get('backstagePostThreadRenderer')
- if renderer:
- for entry in self._post_thread_entries(renderer):
- yield entry
- continuation = self._extract_continuation(renderer)
- continue
- renderer = isr_content.get('videoRenderer')
- if renderer:
- entry = self._video_entry(renderer)
- if entry:
- yield entry
- if not continuation:
- continuation = self._extract_continuation(is_renderer)
- if not continuation:
- continuation = self._extract_continuation(slr_renderer)
- else:
- rich_grid_renderer = tab_content.get('richGridRenderer')
- if not rich_grid_renderer:
- return
- for entry in self._rich_grid_entries(rich_grid_renderer.get('contents') or []):
- yield entry
- continuation = self._extract_continuation(rich_grid_renderer)
+ known_renderers = {
+ 'playlistVideoListRenderer': self._playlist_entries,
+ 'gridRenderer': self._grid_entries,
+ 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
+ 'backstagePostThreadRenderer': self._post_thread_entries,
+ 'videoRenderer': lambda x: [self._video_entry(x)],
+ }
+ for key, renderer in isr_content.items():
+ if key not in known_renderers:
+ continue
+ for entry in known_renderers[key](renderer):
+ if entry:
+ yield entry
+ continuation_list[0] = self._extract_continuation(renderer)
+ break
- ytcfg = self._extract_ytcfg(item_id, webpage)
- client_version = try_get(
- ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str) or '2.20210407.08.00'
+ if not continuation_list[0]:
+ continuation_list[0] = self._extract_continuation(is_renderer)
- headers = {
- 'x-youtube-client-name': '1',
- 'x-youtube-client-version': client_version,
- 'content-type': 'application/json',
- }
+ if not continuation_list[0]:
+ continuation_list[0] = self._extract_continuation(parent_renderer)
- context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'], dict) or {
- 'client': {
- 'clientName': 'WEB',
- 'clientVersion': client_version,
- }
- }
- visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
-
- identity_token = self._extract_identity_token(ytcfg, webpage)
- if identity_token:
- headers['x-youtube-identity-token'] = identity_token
-
- data = {
- 'context': context,
- }
+ continuation_list = [None] # Python 2 does not support nonlocal
+ tab_content = try_get(tab, lambda x: x['content'], dict)
+ if not tab_content:
+ return
+ parent_renderer = (
+ try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
+ or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
+ for entry in extract_entries(parent_renderer):
+ yield entry
+ continuation = continuation_list[0]
for page_num in itertools.count(1):
if not continuation:
break
- if visitor_data:
- headers['x-goog-visitor-id'] = visitor_data
- data['continuation'] = continuation['continuation']
- data['clickTracking'] = {
- 'clickTrackingParams': continuation['itct']
- }
- count = 0
- retries = 3
- while count <= retries:
- try:
- # Downloading page may result in intermittent 5xx HTTP error
- # that is usually worked around with a retry
- response = self._download_json(
- 'https://www.youtube.com/youtubei/v1/browse?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
- None, 'Downloading page %d%s' % (page_num, ' (retry #%d)' % count if count else ''),
- headers=headers, data=json.dumps(data).encode('utf8'))
- break
- except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
- count += 1
- if count <= retries:
- continue
- raise
+ headers = self.generate_api_headers(
+ ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
+ response = self._extract_response(
+ item_id='%s page %s' % (item_id, page_num),
+ query=continuation, headers=headers, ytcfg=ytcfg,
+ check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
+
if not response:
break
-
- visitor_data = try_get(
- response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
-
+ # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
+ # See: https://github.com/ytdl-org/youtube-dl/issues/28702
+ visitor_data = self._extract_visitor_data(response) or visitor_data
+
+ known_continuation_renderers = {
+ 'playlistVideoListContinuation': self._playlist_entries,
+ 'gridContinuation': self._grid_entries,
+ 'itemSectionContinuation': self._post_thread_continuation_entries,
+ 'sectionListContinuation': extract_entries, # for feeds
+ }
continuation_contents = try_get(
- response, lambda x: x['continuationContents'], dict)
- if continuation_contents:
- continuation_renderer = continuation_contents.get('playlistVideoListContinuation')
- if continuation_renderer:
- for entry in self._playlist_entries(continuation_renderer):
- yield entry
- continuation = self._extract_continuation(continuation_renderer)
- continue
- continuation_renderer = continuation_contents.get('gridContinuation')
- if continuation_renderer:
- for entry in self._grid_entries(continuation_renderer):
- yield entry
- continuation = self._extract_continuation(continuation_renderer)
- continue
- continuation_renderer = continuation_contents.get('itemSectionContinuation')
- if continuation_renderer:
- for entry in self._post_thread_continuation_entries(continuation_renderer):
- yield entry
- continuation = self._extract_continuation(continuation_renderer)
+ response, lambda x: x['continuationContents'], dict) or {}
+ continuation_renderer = None
+ for key, value in continuation_contents.items():
+ if key not in known_continuation_renderers:
continue
+ continuation_renderer = value
+ continuation_list = [None]
+ for entry in known_continuation_renderers[key](continuation_renderer):
+ yield entry
+ continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
+ break
+ if continuation_renderer:
+ continue
+ known_renderers = {
+ 'gridPlaylistRenderer': (self._grid_entries, 'items'),
+ 'gridVideoRenderer': (self._grid_entries, 'items'),
+ 'gridChannelRenderer': (self._grid_entries, 'items'),
+ 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
+ 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
+ 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
+ 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
+ }
on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
continuation_items = try_get(
on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
- if continuation_items:
- continuation_item = continuation_items[0]
- if not isinstance(continuation_item, dict):
- continue
- renderer = self._extract_grid_item_renderer(continuation_item)
- if renderer:
- grid_renderer = {'items': continuation_items}
- for entry in self._grid_entries(grid_renderer):
- yield entry
- continuation = self._extract_continuation(grid_renderer)
- continue
- renderer = continuation_item.get('playlistVideoRenderer') or continuation_item.get('itemSectionRenderer')
- if renderer:
- video_list_renderer = {'contents': continuation_items}
- for entry in self._playlist_entries(video_list_renderer):
- yield entry
- continuation = self._extract_continuation(video_list_renderer)
- continue
- renderer = continuation_item.get('backstagePostThreadRenderer')
- if renderer:
- continuation_renderer = {'contents': continuation_items}
- for entry in self._post_thread_continuation_entries(continuation_renderer):
- yield entry
- continuation = self._extract_continuation(continuation_renderer)
- continue
- renderer = continuation_item.get('richItemRenderer')
- if renderer:
- for entry in self._rich_grid_entries(continuation_items):
- yield entry
- continuation = self._extract_continuation({'contents': continuation_items})
+ continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
+ video_items_renderer = None
+ for key, value in continuation_item.items():
+ if key not in known_renderers:
continue
-
+ video_items_renderer = {known_renderers[key][1]: continuation_items}
+ continuation_list = [None]
+ for entry in known_renderers[key][0](video_items_renderer):
+ yield entry
+ continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
+ break
+ if video_items_renderer:
+ continue
break
@staticmethod
def _extract_selected_tab(tabs):
for tab in tabs:
- if try_get(tab, lambda x: x['tabRenderer']['selected'], bool):
- return tab['tabRenderer']
+ renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
+ if renderer.get('selected') is True:
+ return renderer
else:
raise ExtractorError('Unable to find selected tab')
- @staticmethod
- def _extract_uploader(data):
+ @classmethod
+ def _extract_uploader(cls, data):
uploader = {}
- sidebar_renderer = try_get(
- data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
- if sidebar_renderer:
- for item in sidebar_renderer:
- if not isinstance(item, dict):
- continue
- renderer = item.get('playlistSidebarSecondaryInfoRenderer')
- if not isinstance(renderer, dict):
- continue
- owner = try_get(
- renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
- if owner:
- uploader['uploader'] = owner.get('text')
- uploader['uploader_id'] = try_get(
- owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
- uploader['uploader_url'] = urljoin(
- 'https://www.youtube.com/',
- try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
- return uploader
+ renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
+ owner = try_get(
+ renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
+ if owner:
+ uploader['uploader'] = owner.get('text')
+ uploader['uploader_id'] = try_get(
+ owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
+ uploader['uploader_url'] = urljoin(
+ 'https://www.youtube.com/',
+ try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
+ return {k: v for k, v in uploader.items() if v is not None}
+
+ def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
+ playlist_id = title = description = channel_url = channel_name = channel_id = None
+ thumbnails_list = []
+ tags = []
- @staticmethod
- def _extract_alert(data):
- alerts = []
- for alert in try_get(data, lambda x: x['alerts'], list) or []:
- if not isinstance(alert, dict):
- continue
- alert_text = try_get(
- alert, lambda x: x['alertRenderer']['text'], dict)
- if not alert_text:
- continue
- text = try_get(
- alert_text,
- (lambda x: x['simpleText'], lambda x: x['runs'][0]['text']),
- compat_str)
- if text:
- alerts.append(text)
- return '\n'.join(alerts)
-
- def _extract_from_tabs(self, item_id, webpage, data, tabs):
selected_tab = self._extract_selected_tab(tabs)
renderer = try_get(
data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
- playlist_id = item_id
- title = description = None
if renderer:
- channel_title = renderer.get('title') or item_id
- tab_title = selected_tab.get('title')
- title = channel_title or item_id
- if tab_title:
- title += ' - %s' % tab_title
- description = renderer.get('description')
- playlist_id = renderer.get('externalId')
+ channel_name = renderer.get('title')
+ channel_url = renderer.get('channelUrl')
+ channel_id = renderer.get('externalId')
else:
renderer = try_get(
data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
- if renderer:
- title = renderer.get('title')
- else:
- renderer = try_get(
- data, lambda x: x['header']['hashtagHeaderRenderer'], dict)
- if renderer:
- title = try_get(renderer, lambda x: x['hashtag']['simpleText'])
- playlist = self.playlist_result(
- self._entries(selected_tab, item_id, webpage),
- playlist_id=playlist_id, playlist_title=title,
- playlist_description=description)
- playlist.update(self._extract_uploader(data))
- return playlist
-
- def _extract_from_playlist(self, item_id, url, data, playlist):
+
+ if renderer:
+ title = renderer.get('title')
+ description = renderer.get('description', '')
+ playlist_id = channel_id
+ tags = renderer.get('keywords', '').split()
+ thumbnails_list = (
+ try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
+ or try_get(
+ self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
+ lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
+ list)
+ or [])
+
+ thumbnails = []
+ for t in thumbnails_list:
+ if not isinstance(t, dict):
+ continue
+ thumbnail_url = url_or_none(t.get('url'))
+ if not thumbnail_url:
+ continue
+ thumbnails.append({
+ 'url': thumbnail_url,
+ 'width': int_or_none(t.get('width')),
+ 'height': int_or_none(t.get('height')),
+ })
+ if playlist_id is None:
+ playlist_id = item_id
+ if title is None:
+ title = (
+ try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
+ or playlist_id)
+ title += format_field(selected_tab, 'title', ' - %s')
+ title += format_field(selected_tab, 'expandedText', ' - %s')
+ metadata = {
+ 'playlist_id': playlist_id,
+ 'playlist_title': title,
+ 'playlist_description': description,
+ 'uploader': channel_name,
+ 'uploader_id': channel_id,
+ 'uploader_url': channel_url,
+ 'thumbnails': thumbnails,
+ 'tags': tags,
+ }
+ availability = self._extract_availability(data)
+ if availability:
+ metadata['availability'] = availability
+ if not channel_id:
+ metadata.update(self._extract_uploader(data))
+ metadata.update({
+ 'channel': metadata['uploader'],
+ 'channel_id': metadata['uploader_id'],
+ 'channel_url': metadata['uploader_url']})
+ return self.playlist_result(
+ self._entries(
+ selected_tab, playlist_id, ytcfg,
+ self._extract_account_syncid(ytcfg, data),
+ self._extract_visitor_data(data, ytcfg)),
+ **metadata)
+
+ def _extract_mix_playlist(self, playlist, playlist_id, data, ytcfg):
+ first_id = last_id = response = None
+ for page_num in itertools.count(1):
+ videos = list(self._playlist_entries(playlist))
+ if not videos:
+ return
+ start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
+ if start >= len(videos):
+ return
+ for video in videos[start:]:
+ if video['id'] == first_id:
+ self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
+ return
+ yield video
+ first_id = first_id or videos[0]['id']
+ last_id = videos[-1]['id']
+ watch_endpoint = try_get(
+ playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
+ headers = self.generate_api_headers(
+ ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
+ visitor_data=self._extract_visitor_data(response, data, ytcfg))
+ query = {
+ 'playlistId': playlist_id,
+ 'videoId': watch_endpoint.get('videoId') or last_id,
+ 'index': watch_endpoint.get('index') or len(videos),
+ 'params': watch_endpoint.get('params') or 'OAE%3D'
+ }
+ response = self._extract_response(
+ item_id='%s page %d' % (playlist_id, page_num),
+ query=query, ep='next', headers=headers, ytcfg=ytcfg,
+ check_get_keys='contents'
+ )
+ playlist = try_get(
+ response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
+
+ def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
title = playlist.get('title') or try_get(
data, lambda x: x['titleText']['simpleText'], compat_str)
playlist_id = playlist.get('playlistId') or item_id
- # Inline playlist rendition continuation does not always work
- # at Youtube side, so delegating regular tab-based playlist URL
- # processing whenever possible.
+
+ # Delegating everything except mix playlists to regular tab-based playlist URL
playlist_url = urljoin(url, try_get(
playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
compat_str))
@@ -2793,54 +3943,297 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
return self.url_result(
playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
video_title=title)
+
return self.playlist_result(
- self._playlist_entries(playlist), playlist_id=playlist_id,
- playlist_title=title)
+ self._extract_mix_playlist(playlist, playlist_id, data, ytcfg),
+ playlist_id=playlist_id, playlist_title=title)
- def _extract_identity_token(self, ytcfg, webpage):
- if ytcfg:
- token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
- if token:
- return token
- return self._search_regex(
- r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
- 'identity token', default=None)
+ def _extract_availability(self, data):
+ """
+ Gets the availability of a given playlist/tab.
+ Note: Unless YouTube tells us explicitly, we do not assume it is public
+ @param data: response
+ """
+ is_private = is_unlisted = None
+ renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
+ badge_labels = self._extract_badges(renderer)
+
+ # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
+ privacy_dropdown_entries = try_get(
+ renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
+ for renderer_dict in privacy_dropdown_entries:
+ is_selected = try_get(
+ renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
+ if not is_selected:
+ continue
+ label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
+ if label:
+ badge_labels.add(label.lower())
+ break
+
+ for badge_label in badge_labels:
+ if badge_label == 'unlisted':
+ is_unlisted = True
+ elif badge_label == 'private':
+ is_private = True
+ elif badge_label == 'public':
+ is_unlisted = is_private = False
+ return self._availability(is_private, False, False, False, is_unlisted)
+
+ @staticmethod
+ def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
+ sidebar_renderer = try_get(
+ data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
+ for item in sidebar_renderer:
+ renderer = try_get(item, lambda x: x[info_renderer], expected_type)
+ if renderer:
+ return renderer
+
+ def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
+ """
+ Get playlist with unavailable videos if the 'show unavailable videos' button exists.
+ """
+ browse_id = params = None
+ renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
+ if not renderer:
+ return
+ menu_renderer = try_get(
+ renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
+ for menu_item in menu_renderer:
+ if not isinstance(menu_item, dict):
+ continue
+ nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
+ text = try_get(
+ nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
+ if not text or text.lower() != 'show unavailable videos':
+ continue
+ browse_endpoint = try_get(
+ nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
+ browse_id = browse_endpoint.get('browseId')
+ params = browse_endpoint.get('params')
+ break
+
+ headers = self.generate_api_headers(
+ ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
+ visitor_data=self._extract_visitor_data(data, ytcfg))
+ query = {
+ 'params': params or 'wgYCCAA=',
+ 'browseId': browse_id or 'VL%s' % item_id
+ }
+ return self._extract_response(
+ item_id=item_id, headers=headers, query=query,
+ check_get_keys='contents', fatal=False, ytcfg=ytcfg,
+ note='Downloading API JSON with unavailable videos')
+
+ def _extract_webpage(self, url, item_id, fatal=True):
+ retries = self.get_param('extractor_retries', 3)
+ count = -1
+ webpage = data = last_error = None
+ while count < retries:
+ count += 1
+ # Sometimes youtube returns a webpage with incomplete ytInitialData
+ # See: https://github.com/hypervideo/hypervideo/issues/116
+ if last_error:
+ self.report_warning('%s. Retrying ...' % last_error)
+ try:
+ webpage = self._download_webpage(
+ url, item_id,
+ note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',))
+ data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
+ except ExtractorError as e:
+ if isinstance(e.cause, network_exceptions):
+ if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
+ last_error = error_to_compat_str(e.cause or e.msg)
+ if count < retries:
+ continue
+ if fatal:
+ raise
+ self.report_warning(error_to_compat_str(e))
+ break
+ else:
+ try:
+ self._extract_and_report_alerts(data)
+ except ExtractorError as e:
+ if fatal:
+ raise
+ self.report_warning(error_to_compat_str(e))
+ break
+
+ if dict_get(data, ('contents', 'currentVideoEndpoint')):
+ break
+
+ last_error = 'Incomplete yt initial data received'
+ if count >= retries:
+ if fatal:
+ raise ExtractorError(last_error)
+ self.report_warning(last_error)
+ break
+
+ return webpage, data
+
+ def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
+ data = None
+ if 'webpage' not in self._configuration_arg('skip'):
+ webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
+ ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
+ if not data:
+ if not ytcfg and self.is_authenticated:
+ msg = 'Playlists that require authentication may not extract correctly without a successful webpage download.'
+ if 'authcheck' not in self._configuration_arg('skip') and fatal:
+ raise ExtractorError(
+ msg + ' If you are not downloading private content, or your cookies are only for the first account and channel,'
+ ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
+ expected=True)
+ self.report_warning(msg, only_once=True)
+ data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
+ return data, ytcfg
+
+ def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
+ headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
+ resolve_response = self._extract_response(
+ item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
+ ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
+ endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
+ for ep_key, ep in endpoints.items():
+ params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
+ if params:
+ return self._extract_response(
+ item_id=item_id, query=params, ep=ep, headers=headers,
+ ytcfg=ytcfg, fatal=fatal, default_client=default_client,
+ check_get_keys=('contents', 'currentVideoEndpoint'))
+ err_note = 'Failed to resolve url (does the playlist exist?)'
+ if fatal:
+ raise ExtractorError(err_note, expected=True)
+ self.report_warning(err_note, item_id)
+
+ @staticmethod
+ def _smuggle_data(entries, data):
+ for entry in entries:
+ if data:
+ entry['url'] = smuggle_url(entry['url'], data)
+ yield entry
def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
+ if self.is_music_url(url):
+ smuggled_data['is_music_url'] = True
+ info_dict = self.__real_extract(url, smuggled_data)
+ if info_dict.get('entries'):
+ info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
+ return info_dict
+
+ _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
+
+ def __real_extract(self, url, smuggled_data):
item_id = self._match_id(url)
url = compat_urlparse.urlunparse(
compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
+ compat_opts = self.get_param('compat_opts', [])
+
+ def get_mobj(url):
+ mobj = self._url_re.match(url).groupdict()
+ mobj.update((k, '') for k, v in mobj.items() if v is None)
+ return mobj
+
+ mobj = get_mobj(url)
+ # Youtube returns incomplete data if tabname is not lower case
+ pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
+ if is_channel:
+ if smuggled_data.get('is_music_url'):
+ if item_id[:2] == 'VL':
+ # Youtube music VL channels have an equivalent playlist
+ item_id = item_id[2:]
+ pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
+ elif item_id[:2] == 'MP':
+ # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
+ mdata = self._extract_tab_endpoint(
+ 'https://music.youtube.com/channel/%s' % item_id, item_id, default_client='web_music')
+ murl = traverse_obj(
+ mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'), get_all=False, expected_type=compat_str)
+ if not murl:
+ raise ExtractorError('Failed to resolve album to playlist.')
+ return self.url_result(murl, ie=YoutubeTabIE.ie_key())
+ elif mobj['channel_type'] == 'browse':
+ # Youtube music /browse/ should be changed to /channel/
+ pre = 'https://www.youtube.com/channel/%s' % item_id
+ if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
+ # Home URLs should redirect to /videos/
+ self.report_warning(
+ 'A channel/user page was given. All the channel\'s videos will be downloaded. '
+ 'To download only the videos in the home page, add a "/featured" to the URL')
+ tab = '/videos'
+
+ url = ''.join((pre, tab, post))
+ mobj = get_mobj(url)
+
# Handle both video/playlist URLs
qs = parse_qs(url)
video_id = qs.get('v', [None])[0]
playlist_id = qs.get('list', [None])[0]
+
+ if not video_id and mobj['not_channel'].startswith('watch'):
+ if not playlist_id:
+ # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
+ raise ExtractorError('Unable to recognize tab page')
+ # Common mistake: https://www.youtube.com/watch?list=playlist_id
+ self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
+ url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
+ mobj = get_mobj(url)
+
if video_id and playlist_id:
- if self._downloader.params.get('noplaylist'):
+ if self.get_param('noplaylist'):
self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
- return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
- self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
- webpage = self._download_webpage(url, item_id)
- data = self._extract_yt_initial_data(item_id, webpage)
+ return self.url_result(f'https://www.youtube.com/watch?v={video_id}', ie=YoutubeIE.ie_key(), video_id=video_id)
+ self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
+
+ data, ytcfg = self._extract_data(url, item_id)
+
+ tabs = try_get(
+ data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
+ if tabs:
+ selected_tab = self._extract_selected_tab(tabs)
+ tab_name = selected_tab.get('title', '')
+ if 'no-youtube-channel-redirect' not in compat_opts:
+ if mobj['tab'] == '/live':
+ # Live tab should have redirected to the video
+ raise ExtractorError('The channel is not currently live', expected=True)
+ if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
+ if not mobj['not_channel'] and item_id[:2] == 'UC':
+ # Topic channels don't have /videos. Use the equivalent playlist instead
+ self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
+ pl_id = 'UU%s' % item_id[2:]
+ pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
+ try:
+ data, ytcfg, item_id, url = *self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True), pl_id, pl_url
+ except ExtractorError:
+ self.report_warning('The playlist gave error. Falling back to channel URL')
+ else:
+ self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
+
+ self.write_debug('Final URL: %s' % url)
+
+ # YouTube sometimes provides a button to reload playlist with unavailable videos.
+ if 'no-youtube-unavailable-videos' not in compat_opts:
+ data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data
+ self._extract_and_report_alerts(data, only_once=True)
tabs = try_get(
data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
if tabs:
- return self._extract_from_tabs(item_id, webpage, data, tabs)
+ return self._extract_from_tabs(item_id, ytcfg, data, tabs)
+
playlist = try_get(
data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
if playlist:
- return self._extract_from_playlist(item_id, url, data, playlist)
- # Fallback to video extraction if no playlist alike page is recognized.
- # First check for the current video then try the v attribute of URL query.
+ return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
+
video_id = try_get(
data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
compat_str) or video_id
if video_id:
- return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
- # Capture and output alerts
- alert = self._extract_alert(data)
- if alert:
- raise ExtractorError(alert, expected=True)
- # Failed to recognize
+ if mobj['tab'] != '/live': # live tab is expected to redirect to video
+ self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
+ return self.url_result(f'https://www.youtube.com/watch?v={video_id}', ie=YoutubeIE.ie_key(), video_id=video_id)
+
raise ExtractorError('Unable to recognize tab page')
@@ -2867,6 +4260,7 @@ class YoutubePlaylistIE(InfoExtractor):
'id': 'PLBB231211A4F62143',
'uploader': 'Wickydoo',
'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
+ 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
},
'playlist_mincount': 29,
}, {
@@ -2889,12 +4283,13 @@ class YoutubePlaylistIE(InfoExtractor):
}
}, {
'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
- 'playlist_mincount': 982,
+ 'playlist_mincount': 654,
'info_dict': {
'title': '2018 Chinese New Singles (11/6 updated)',
'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
'uploader': 'LBK',
'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
+ 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
}
}, {
'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
@@ -2919,15 +4314,17 @@ class YoutubePlaylistIE(InfoExtractor):
def _real_extract(self, url):
playlist_id = self._match_id(url)
- qs = parse_qs(url)
- if not qs:
- qs = {'list': playlist_id}
- return self.url_result(
- update_url_query('https://www.youtube.com/playlist', qs),
- ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
+ is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
+ url = update_url_query(
+ 'https://www.youtube.com/playlist',
+ parse_qs(url) or {'list': playlist_id})
+ if is_music_url:
+ url = smuggle_url(url, {'is_music_url': True})
+ return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
class YoutubeYtBeIE(InfoExtractor):
+ IE_DESC = 'youtu.be'
_VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
_TESTS = [{
'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
@@ -2955,7 +4352,7 @@ class YoutubeYtBeIE(InfoExtractor):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
playlist_id = mobj.group('playlist_id')
return self.url_result(
@@ -2967,6 +4364,7 @@ class YoutubeYtBeIE(InfoExtractor):
class YoutubeYtUserIE(InfoExtractor):
+ IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
_VALID_URL = r'ytuser:(?P<id>.+)'
_TESTS = [{
'url': 'ytuser:phihag',
@@ -2982,8 +4380,8 @@ class YoutubeYtUserIE(InfoExtractor):
class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
IE_NAME = 'youtube:favorites'
- IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
- _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
+ IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
+ _VALID_URL = r':ytfav(?:ou?rite)?s?'
_LOGIN_REQUIRED = True
_TESTS = [{
'url': ':ytfav',
@@ -2999,8 +4397,8 @@ class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
ie=YoutubeTabIE.ie_key())
-class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
- IE_DESC = 'YouTube.com searches'
+class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
+ IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
# there doesn't appear to be a real limit, for example if you search for
# 'python' you get more than 8.000.000 results
_MAX_RESULTS = float('inf')
@@ -3009,27 +4407,17 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
_SEARCH_PARAMS = None
_TESTS = []
- def _entries(self, query, n):
- data = {
- 'context': {
- 'client': {
- 'clientName': 'WEB',
- 'clientVersion': '2.20201021.03.00',
- }
- },
- 'query': query,
- }
+ def _search_results(self, query):
+ data = {'query': query}
if self._SEARCH_PARAMS:
data['params'] = self._SEARCH_PARAMS
- total = 0
+ continuation = {}
for page_num in itertools.count(1):
- search = self._download_json(
- 'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
- video_id='query "%s"' % query,
- note='Downloading page %s' % page_num,
- errnote='Unable to download API page', fatal=False,
- data=json.dumps(data).encode('utf8'),
- headers={'content-type': 'application/json'})
+ data.update(continuation)
+ search = self._extract_response(
+ item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
+ check_get_keys=('contents', 'onResponseReceivedCommands')
+ )
if not search:
break
slr_contents = try_get(
@@ -3039,7 +4427,15 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
list)
if not slr_contents:
break
+
+ # Youtube sometimes adds promoted content to searches,
+ # changing the index location of videos and token.
+ # So we search through all entries till we find them.
+ continuation = None
for slr_content in slr_contents:
+ if not continuation:
+ continuation = self._extract_continuation({'contents': [slr_content]})
+
isr_contents = try_get(
slr_content,
lambda x: x['itemSectionRenderer']['contents'],
@@ -3055,52 +4451,46 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
video_id = video.get('videoId')
if not video_id:
continue
+
yield self._extract_video(video)
- total += 1
- if total == n:
- return
- token = try_get(
- slr_contents,
- lambda x: x[-1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
- compat_str)
- if not token:
- break
- data['continuation'] = token
- def _get_n_results(self, query, n):
- """Get a specified number of results for a query"""
- return self.playlist_result(self._entries(query, n), query)
+ if not continuation:
+ break
class YoutubeSearchDateIE(YoutubeSearchIE):
IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
_SEARCH_KEY = 'ytsearchdate'
- IE_DESC = 'YouTube.com searches, newest videos first'
+ IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
_SEARCH_PARAMS = 'CAI%3D'
-r"""
class YoutubeSearchURLIE(YoutubeSearchIE):
IE_DESC = 'YouTube.com search URLs'
- IE_NAME = 'youtube:search_url'
- _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
+ IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
+ _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
+ # _MAX_RESULTS = 100
_TESTS = [{
'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
'playlist_mincount': 5,
'info_dict': {
- 'title': 'hypervideo test video',
+ 'id': 'youtube-dl test video',
+ 'title': 'youtube-dl test video',
}
}, {
'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
'only_matching': True,
}]
+ @classmethod
+ def _make_valid_url(cls):
+ return cls._VALID_URL
+
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- query = compat_urllib_parse_unquote_plus(mobj.group('query'))
- webpage = self._download_webpage(url, query)
- return self.playlist_result(self._process_page(webpage), playlist_title=query)
-"""
+ qs = parse_qs(url)
+ query = (qs.get('search_query') or qs.get('q'))[0]
+ self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
+ return self._get_n_results(query, self._MAX_RESULTS)
class YoutubeFeedsInfoExtractor(YoutubeTabIE):
@@ -3109,14 +4499,12 @@ class YoutubeFeedsInfoExtractor(YoutubeTabIE):
Subclasses must define the _FEED_NAME property.
"""
_LOGIN_REQUIRED = True
+ _TESTS = []
@property
def IE_NAME(self):
return 'youtube:%s' % self._FEED_NAME
- def _real_initialize(self):
- self._login()
-
def _real_extract(self, url):
return self.url_result(
'https://www.youtube.com/feed/%s' % self._FEED_NAME,
@@ -3139,20 +4527,24 @@ class YoutubeWatchLaterIE(InfoExtractor):
class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
- _VALID_URL = r':ytrec(?:ommended)?'
+ _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
_FEED_NAME = 'recommended'
+ _LOGIN_REQUIRED = False
_TESTS = [{
'url': ':ytrec',
'only_matching': True,
}, {
'url': ':ytrecommended',
'only_matching': True,
+ }, {
+ 'url': 'https://youtube.com',
+ 'only_matching': True,
}]
class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
- IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
- _VALID_URL = r':ytsubs(?:criptions)?'
+ IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
+ _VALID_URL = r':ytsub(?:scription)?s?'
_FEED_NAME = 'subscriptions'
_TESTS = [{
'url': ':ytsubs',
@@ -3164,8 +4556,8 @@ class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
- IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
- _VALID_URL = r':ythistory'
+ IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
+ _VALID_URL = r':ythis(?:tory)?'
_FEED_NAME = 'history'
_TESTS = [{
'url': ':ythistory',
@@ -3216,12 +4608,22 @@ class YoutubeTruncatedURLIE(InfoExtractor):
raise ExtractorError(
'Did you forget to quote the URL? Remember that & is a meta '
'character in most shells, so you want to put the URL in quotes, '
- 'like hypervideo '
+ 'like youtube-dl '
'"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
- ' or simply hypervideo BaW_jenozKc .',
+ ' or simply youtube-dl BaW_jenozKc .',
expected=True)
+class YoutubeClipIE(InfoExtractor):
+ IE_NAME = 'youtube:clip'
+ IE_DESC = False # Do not list
+ _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/'
+
+ def _real_extract(self, url):
+ self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead')
+ return self.url_result(url, 'Generic')
+
+
class YoutubeTruncatedIDIE(InfoExtractor):
IE_NAME = 'youtube:truncated_id'
IE_DESC = False # Do not list
diff --git a/hypervideo_dl/extractor/zapiks.py b/hypervideo_dl/extractor/zapiks.py
index f6496f5..161b011 100644
--- a/hypervideo_dl/extractor/zapiks.py
+++ b/hypervideo_dl/extractor/zapiks.py
@@ -46,7 +46,7 @@ class ZapiksIE(InfoExtractor):
]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('id')
display_id = mobj.group('display_id') or video_id
diff --git a/hypervideo_dl/extractor/zaq1.py b/hypervideo_dl/extractor/zaq1.py
new file mode 100644
index 0000000..889aff5
--- /dev/null
+++ b/hypervideo_dl/extractor/zaq1.py
@@ -0,0 +1,101 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ unified_timestamp,
+)
+
+
+class Zaq1IE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?zaq1\.pl/video/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'http://zaq1.pl/video/xev0e',
+ 'md5': '24a5eb3f052e604ae597c4d0d19b351e',
+ 'info_dict': {
+ 'id': 'xev0e',
+ 'title': 'DJ NA WESELE. TANIEC Z FIGURAMI.węgrów/sokołów podlaski/siedlce/mińsk mazowiecki/warszawa',
+ 'description': 'www.facebook.com/weseledjKontakt: 728 448 199 / 505 419 147',
+ 'ext': 'mp4',
+ 'duration': 511,
+ 'timestamp': 1490896361,
+ 'uploader': 'Anonim',
+ 'upload_date': '20170330',
+ 'view_count': int,
+ }
+ }, {
+ # malformed JSON-LD
+ 'url': 'http://zaq1.pl/video/x81vn',
+ 'info_dict': {
+ 'id': 'x81vn',
+ 'title': 'SEKRETNE ŻYCIE WALTERA MITTY',
+ 'ext': 'mp4',
+ 'duration': 6234,
+ 'timestamp': 1493494860,
+ 'uploader': 'Anonim',
+ 'upload_date': '20170429',
+ 'view_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Failed to parse JSON'],
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ video_url = self._search_regex(
+ r'data-video-url=(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
+ 'video url', group='url')
+
+ info = self._search_json_ld(webpage, video_id, fatal=False)
+
+ def extract_data(field, name, fatal=False):
+ return self._search_regex(
+ r'data-%s=(["\'])(?P<field>(?:(?!\1).)+)\1' % field,
+ webpage, field, fatal=fatal, group='field')
+
+ if not info.get('title'):
+ info['title'] = extract_data('file-name', 'title', fatal=True)
+
+ if not info.get('duration'):
+ info['duration'] = int_or_none(extract_data('duration', 'duration'))
+
+ if not info.get('thumbnail'):
+ info['thumbnail'] = extract_data('photo-url', 'thumbnail')
+
+ if not info.get('timestamp'):
+ info['timestamp'] = unified_timestamp(self._html_search_meta(
+ 'uploadDate', webpage, 'timestamp'))
+
+ if not info.get('interactionCount'):
+ info['view_count'] = int_or_none(self._html_search_meta(
+ 'interactionCount', webpage, 'view count'))
+
+ uploader = self._html_search_regex(
+ r'Wideo dodał:\s*<a[^>]*>([^<]+)</a>', webpage, 'uploader',
+ fatal=False)
+
+ width = int_or_none(self._html_search_meta(
+ 'width', webpage, fatal=False))
+ height = int_or_none(self._html_search_meta(
+ 'height', webpage, fatal=False))
+
+ info.update({
+ 'id': video_id,
+ 'formats': [{
+ 'url': video_url,
+ 'width': width,
+ 'height': height,
+ 'http_headers': {
+ 'Referer': url,
+ },
+ }],
+ 'uploader': uploader,
+ })
+
+ return info
diff --git a/hypervideo_dl/extractor/zattoo.py b/hypervideo_dl/extractor/zattoo.py
index 6bac302..a13d124 100644
--- a/hypervideo_dl/extractor/zattoo.py
+++ b/hypervideo_dl/extractor/zattoo.py
@@ -182,7 +182,7 @@ class ZattooPlatformBaseIE(InfoExtractor):
else:
assert False
for this_format in this_formats:
- this_format['preference'] = preference
+ this_format['quality'] = preference
formats.extend(this_formats)
self._sort_formats(formats)
return formats
@@ -217,7 +217,7 @@ class QuicklineIE(QuicklineBaseIE):
}
def _real_extract(self, url):
- channel_name, video_id = re.match(self._VALID_URL, url).groups()
+ channel_name, video_id = self._match_valid_url(url).groups()
return self._extract_video(channel_name, video_id)
@@ -262,7 +262,7 @@ class ZattooIE(ZattooBaseIE):
}]
def _real_extract(self, url):
- channel_name, video_id, record_id = re.match(self._VALID_URL, url).groups()
+ channel_name, video_id, record_id = self._match_valid_url(url).groups()
return self._extract_video(channel_name, video_id, record_id)
diff --git a/hypervideo_dl/extractor/zdf.py b/hypervideo_dl/extractor/zdf.py
index 4dd56f6..8c279c5 100644
--- a/hypervideo_dl/extractor/zdf.py
+++ b/hypervideo_dl/extractor/zdf.py
@@ -14,6 +14,7 @@ from ..utils import (
orderedSet,
parse_codecs,
qualities,
+ str_or_none,
try_get,
unified_timestamp,
update_url_query,
@@ -49,35 +50,35 @@ class ZDFBaseIE(InfoExtractor):
def _extract_format(self, video_id, formats, format_urls, meta):
format_url = url_or_none(meta.get('url'))
- if not format_url:
- return
- if format_url in format_urls:
+ if not format_url or format_url in format_urls:
return
format_urls.add(format_url)
- mime_type = meta.get('mimeType')
- ext = determine_ext(format_url)
+
+ mime_type, ext = meta.get('mimeType'), determine_ext(format_url)
if mime_type == 'application/x-mpegURL' or ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
+ new_formats = self._extract_m3u8_formats(
format_url, video_id, 'mp4', m3u8_id='hls',
- entry_protocol='m3u8_native', fatal=False))
+ entry_protocol='m3u8_native', fatal=False)
elif mime_type == 'application/f4m+xml' or ext == 'f4m':
- formats.extend(self._extract_f4m_formats(
- update_url_query(format_url, {'hdcore': '3.7.0'}), video_id, f4m_id='hds', fatal=False))
+ new_formats = self._extract_f4m_formats(
+ update_url_query(format_url, {'hdcore': '3.7.0'}), video_id, f4m_id='hds', fatal=False)
else:
f = parse_codecs(meta.get('mimeCodec'))
- format_id = ['http']
- for p in (meta.get('type'), meta.get('quality')):
- if p and isinstance(p, compat_str):
- format_id.append(p)
+ if not f and meta.get('type'):
+ data = meta['type'].split('_')
+ if try_get(data, lambda x: x[2]) == ext:
+ f = {'vcodec': data[0], 'acodec': data[1]}
f.update({
'url': format_url,
- 'format_id': '-'.join(format_id),
- 'format_note': meta.get('quality'),
- 'language': meta.get('language'),
- 'quality': qualities(self._QUALITIES)(meta.get('quality')),
- 'preference': -10,
+ 'format_id': '-'.join(filter(str_or_none, ('http', meta.get('type'), meta.get('quality')))),
})
- formats.append(f)
+ new_formats = [f]
+ formats.extend(merge_dicts(f, {
+ 'format_note': ', '.join(filter(None, (meta.get('quality'), meta.get('class')))),
+ 'language': meta.get('language'),
+ 'language_preference': 10 if meta.get('class') == 'main' else -10 if meta.get('class') == 'ad' else -1,
+ 'quality': qualities(self._QUALITIES)(meta.get('quality')),
+ }) for f in new_formats)
def _extract_ptmd(self, ptmd_url, video_id, api_token, referrer):
ptmd = self._call_api(
@@ -106,9 +107,10 @@ class ZDFBaseIE(InfoExtractor):
'type': f.get('type'),
'mimeType': f.get('mimeType'),
'quality': quality.get('quality'),
+ 'class': track.get('class'),
'language': track.get('language'),
})
- self._sort_formats(formats)
+ self._sort_formats(formats, ('hasaud', 'res', 'quality', 'language_preference'))
duration = float_or_none(try_get(
ptmd, lambda x: x['attributes']['duration']['value']), scale=1000)
diff --git a/hypervideo_dl/extractor/zee5.py b/hypervideo_dl/extractor/zee5.py
new file mode 100644
index 0000000..5366041
--- /dev/null
+++ b/hypervideo_dl/extractor/zee5.py
@@ -0,0 +1,244 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ parse_age_limit,
+ str_or_none,
+ try_get,
+ unified_strdate,
+ unified_timestamp,
+ url_or_none,
+)
+
+
+class Zee5IE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ (?:
+ zee5:|
+ (?:https?://)(?:www\.)?zee5\.com/(?:[^#?]+/)?
+ (?:
+ (?:tvshows|kids|zee5originals)(?:/[^#/?]+){3}
+ |movies/[^#/?]+
+ )/(?P<display_id>[^#/?]+)/
+ )
+ (?P<id>[^#/?]+)/?(?:$|[?#])
+ '''
+ _TESTS = [{
+ 'url': 'https://www.zee5.com/movies/details/krishna-the-birth/0-0-63098',
+ 'info_dict': {
+ 'id': '0-0-63098',
+ 'ext': 'mp4',
+ 'display_id': 'krishna-the-birth',
+ 'title': 'Krishna - The Birth',
+ 'duration': 4368,
+ 'average_rating': 4,
+ 'description': compat_str,
+ 'alt_title': 'Krishna - The Birth',
+ 'uploader': 'Zee Entertainment Enterprises Ltd',
+ 'release_date': '20060101',
+ 'upload_date': '20060101',
+ 'timestamp': 1136073600,
+ 'thumbnail': 'https://akamaividz.zee5.com/resources/0-0-63098/list/270x152/0063098_list_80888170.jpg',
+ 'tags': list
+ },
+ 'params': {
+ 'format': 'bv',
+ },
+ }, {
+ 'url': 'https://zee5.com/tvshows/details/krishna-balram/0-6-1871/episode-1-the-test-of-bramha/0-1-233402',
+ 'info_dict': {
+ 'id': '0-1-233402',
+ 'ext': 'mp4',
+ 'display_id': 'episode-1-the-test-of-bramha',
+ 'title': 'Episode 1 - The Test Of Bramha',
+ 'duration': 1336,
+ 'average_rating': 4,
+ 'description': compat_str,
+ 'alt_title': 'Episode 1 - The Test Of Bramha',
+ 'uploader': 'Zee Entertainment Enterprises Ltd',
+ 'release_date': '20090101',
+ 'upload_date': '20090101',
+ 'timestamp': 1230768000,
+ 'thumbnail': 'https://akamaividz.zee5.com/resources/0-1-233402/list/270x152/01233402_list.jpg',
+ 'series': 'Krishna Balram',
+ 'season_number': 1,
+ 'episode_number': 1,
+ 'tags': list,
+ },
+ 'params': {
+ 'format': 'bv',
+ },
+ }, {
+ 'url': 'https://www.zee5.com/hi/tvshows/details/kundali-bhagya/0-6-366/kundali-bhagya-march-08-2021/0-1-manual_7g9jv1os7730?country=IN',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.zee5.com/global/hi/tvshows/details/kundali-bhagya/0-6-366/kundali-bhagya-march-08-2021/0-1-manual_7g9jv1os7730',
+ 'only_matching': True
+ }]
+ _DETAIL_API_URL = 'https://spapi.zee5.com/singlePlayback/getDetails?content_id={}&device_id={}&platform_name=desktop_web&country=IN&check_parental_control=false'
+ _DEVICE_ID = 'iIxsxYf40cqO3koIkwzKHZhnJzHN13zb'
+ _USER_TOKEN = None
+ _LOGIN_HINT = 'Use "--username <mobile_number>" to login using otp or "--username token" and "--password <user_token>" to login using user token.'
+ _NETRC_MACHINE = 'zee5'
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if username:
+ if len(username) == 10 and username.isdigit() and self._USER_TOKEN is None:
+ self.report_login()
+ otp_request_json = self._download_json('https://b2bapi.zee5.com/device/sendotp_v1.php?phoneno=91{}'.format(username),
+ None, note='Sending OTP')
+ if otp_request_json['code'] == 0:
+ self.to_screen(otp_request_json['message'])
+ else:
+ raise ExtractorError(otp_request_json['message'], expected=True)
+ otp_code = self._get_tfa_info('OTP')
+ otp_verify_json = self._download_json('https://b2bapi.zee5.com/device/verifyotp_v1.php?phoneno=91{}&otp={}&guest_token={}&platform=web'.format(username, otp_code, self._DEVICE_ID),
+ None, note='Verifying OTP', fatal=False)
+ if not otp_verify_json:
+ raise ExtractorError('Unable to verify OTP.', expected=True)
+ self._USER_TOKEN = otp_verify_json.get('token')
+ if not self._USER_TOKEN:
+ raise ExtractorError(otp_request_json['message'], expected=True)
+ elif username.lower() == 'token' and len(password) > 1198:
+ self._USER_TOKEN = password
+
+ def _real_initialize(self):
+ self._login()
+
+ def _real_extract(self, url):
+ video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
+ access_token_request = self._download_json(
+ 'https://useraction.zee5.com/token/platform_tokens.php?platform_name=web_app',
+ video_id, note='Downloading access token')
+ data = {
+ 'x-access-token': access_token_request['token']
+ }
+ if self._USER_TOKEN:
+ data['Authorization'] = 'bearer %s' % self._USER_TOKEN
+ else:
+ data['X-Z5-Guest-Token'] = self._DEVICE_ID
+
+ json_data = self._download_json(
+ self._DETAIL_API_URL.format(video_id, self._DEVICE_ID),
+ video_id, headers={'content-type': 'application/json'}, data=json.dumps(data).encode('utf-8'))
+ asset_data = json_data['assetDetails']
+ show_data = json_data.get('showDetails', {})
+ if 'premium' in asset_data['business_type']:
+ raise ExtractorError('Premium content is DRM protected.', expected=True)
+ if not asset_data.get('hls_url'):
+ self.raise_login_required(self._LOGIN_HINT, metadata_available=True, method=None)
+ formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles(asset_data['hls_url'], video_id, 'mp4', fatal=False)
+ self._sort_formats(formats)
+
+ subtitles = {}
+ for sub in asset_data.get('subtitle_url', []):
+ sub_url = sub.get('url')
+ if not sub_url:
+ continue
+ subtitles.setdefault(sub.get('language', 'en'), []).append({
+ 'url': self._proto_relative_url(sub_url),
+ })
+ subtitles = self._merge_subtitles(subtitles, m3u8_subs)
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': asset_data['title'],
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'duration': int_or_none(asset_data.get('duration')),
+ 'average_rating': int_or_none(asset_data.get('rating')),
+ 'description': str_or_none(asset_data.get('description')),
+ 'alt_title': str_or_none(asset_data.get('original_title')),
+ 'uploader': str_or_none(asset_data.get('content_owner')),
+ 'age_limit': parse_age_limit(asset_data.get('age_rating')),
+ 'release_date': unified_strdate(asset_data.get('release_date')),
+ 'timestamp': unified_timestamp(asset_data.get('release_date')),
+ 'thumbnail': url_or_none(asset_data.get('image_url')),
+ 'series': str_or_none(asset_data.get('tvshow_name')),
+ 'season': try_get(show_data, lambda x: x['seasons']['title'], str),
+ 'season_number': int_or_none(try_get(show_data, lambda x: x['seasons'][0]['orderid'])),
+ 'episode_number': int_or_none(try_get(asset_data, lambda x: x['orderid'])),
+ 'tags': try_get(asset_data, lambda x: x['tags'], list)
+ }
+
+
+class Zee5SeriesIE(InfoExtractor):
+ IE_NAME = 'zee5:series'
+ _VALID_URL = r'''(?x)
+ (?:
+ zee5:series:|
+ (?:https?://)(?:www\.)?zee5\.com/(?:[^#?]+/)?
+ (?:tvshows|kids|zee5originals)(?:/[^#/?]+){2}/
+ )
+ (?P<id>[^#/?]+)/?(?:$|[?#])
+ '''
+ _TESTS = [{
+ 'url': 'https://www.zee5.com/kids/kids-shows/krishna-balram/0-6-1871',
+ 'playlist_mincount': 43,
+ 'info_dict': {
+ 'id': '0-6-1871',
+ },
+ }, {
+ 'url': 'https://www.zee5.com/tvshows/details/bhabi-ji-ghar-par-hai/0-6-199',
+ 'playlist_mincount': 1500,
+ 'info_dict': {
+ 'id': '0-6-199',
+ },
+ }, {
+ 'url': 'https://www.zee5.com/tvshows/details/agent-raghav-crime-branch/0-6-965',
+ 'playlist_mincount': 24,
+ 'info_dict': {
+ 'id': '0-6-965',
+ },
+ }, {
+ 'url': 'https://www.zee5.com/ta/tvshows/details/nagabhairavi/0-6-3201',
+ 'playlist_mincount': 3,
+ 'info_dict': {
+ 'id': '0-6-3201',
+ },
+ }, {
+ 'url': 'https://www.zee5.com/global/hi/tvshows/details/khwaabon-ki-zamin-par/0-6-270',
+ 'playlist_mincount': 150,
+ 'info_dict': {
+ 'id': '0-6-270',
+ },
+ }
+ ]
+
+ def _entries(self, show_id):
+ access_token_request = self._download_json(
+ 'https://useraction.zee5.com/token/platform_tokens.php?platform_name=web_app',
+ show_id, note='Downloading access token')
+ headers = {
+ 'X-Access-Token': access_token_request['token'],
+ 'Referer': 'https://www.zee5.com/',
+ }
+ show_url = 'https://gwapi.zee5.com/content/tvshow/{}?translation=en&country=IN'.format(show_id)
+
+ page_num = 0
+ show_json = self._download_json(show_url, video_id=show_id, headers=headers)
+ for season in show_json.get('seasons') or []:
+ season_id = try_get(season, lambda x: x['id'], compat_str)
+ next_url = 'https://gwapi.zee5.com/content/tvshow/?season_id={}&type=episode&translation=en&country=IN&on_air=false&asset_subtype=tvshow&page=1&limit=100'.format(season_id)
+ while next_url:
+ page_num += 1
+ episodes_json = self._download_json(
+ next_url, video_id=show_id, headers=headers,
+ note='Downloading JSON metadata page %d' % page_num)
+ for episode in try_get(episodes_json, lambda x: x['episode'], list) or []:
+ video_id = episode.get('id')
+ yield self.url_result(
+ 'zee5:%s' % video_id,
+ ie=Zee5IE.ie_key(), video_id=video_id)
+ next_url = url_or_none(episodes_json.get('next_episode_api'))
+
+ def _real_extract(self, url):
+ show_id = self._match_id(url)
+ return self.playlist_result(self._entries(show_id), playlist_id=show_id)
diff --git a/hypervideo_dl/extractor/zingmp3.py b/hypervideo_dl/extractor/zingmp3.py
index 207c04f..a3edc15 100644
--- a/hypervideo_dl/extractor/zingmp3.py
+++ b/hypervideo_dl/extractor/zingmp3.py
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
- ExtractorError,
int_or_none,
)
@@ -48,8 +47,8 @@ class ZingMp3BaseIE(InfoExtractor):
return
msg = item['msg']
if msg == 'Sorry, this content is not available in your country.':
- self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
- raise ExtractorError(msg, expected=True)
+ self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True)
+ self.raise_no_formats(msg, expected=True)
self._sort_formats(formats)
subtitles = None
diff --git a/hypervideo_dl/extractor/zoom.py b/hypervideo_dl/extractor/zoom.py
index db073d9..25a0902 100644
--- a/hypervideo_dl/extractor/zoom.py
+++ b/hypervideo_dl/extractor/zoom.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
from ..utils import (
@@ -10,6 +9,7 @@ from ..utils import (
js_to_json,
parse_filesize,
urlencode_postdata,
+ urljoin,
)
@@ -27,7 +27,7 @@ class ZoomIE(InfoExtractor):
}
def _real_extract(self, url):
- base_url, play_id = re.match(self._VALID_URL, url).groups()
+ base_url, play_id = self._match_valid_url(url).groups()
webpage = self._download_webpage(url, play_id)
try:
@@ -35,7 +35,7 @@ class ZoomIE(InfoExtractor):
except ExtractorError:
form = None
if form:
- password = self._downloader.params.get('videopassword')
+ password = self.get_param('videopassword')
if not password:
raise ExtractorError(
'This video is protected by a passcode, use the --video-password option', expected=True)
@@ -55,10 +55,19 @@ class ZoomIE(InfoExtractor):
r'(?s)window\.__data__\s*=\s*({.+?});',
webpage, 'data'), play_id, js_to_json)
+ subtitles = {}
+ for _type in ('transcript', 'cc'):
+ if data.get('%sUrl' % _type):
+ subtitles[_type] = [{
+ 'url': urljoin(base_url, data['%sUrl' % _type]),
+ 'ext': 'vtt',
+ }]
+
return {
'id': play_id,
'title': data['topic'],
'url': data['viewMp4Url'],
+ 'subtitles': subtitles,
'width': int_or_none(data.get('viewResolvtionsWidth')),
'height': int_or_none(data.get('viewResolvtionsHeight')),
'http_headers': {
diff --git a/hypervideo_dl/extractor/zype.py b/hypervideo_dl/extractor/zype.py
index f20f953..7663cb3 100644
--- a/hypervideo_dl/extractor/zype.py
+++ b/hypervideo_dl/extractor/zype.py
@@ -56,6 +56,8 @@ class ZypeIE(InfoExtractor):
video = response['video']
title = video['title']
+ subtitles = {}
+
if isinstance(body, dict):
formats = []
for output in body.get('outputs', []):
@@ -64,7 +66,7 @@ class ZypeIE(InfoExtractor):
continue
name = output.get('name')
if name == 'm3u8':
- formats = self._extract_m3u8_formats(
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
output_url, video_id, 'mp4',
'm3u8_native', m3u8_id='hls', fatal=False)
else:
@@ -97,7 +99,7 @@ class ZypeIE(InfoExtractor):
if get_attr('integration') == 'verizon-media':
m3u8_url = 'https://content.uplynk.com/%s.m3u8' % get_attr('id')
- formats = self._extract_m3u8_formats(
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
text_tracks = self._search_regex(
r'textTracks\s*:\s*(\[[^]]+\])',
@@ -107,7 +109,6 @@ class ZypeIE(InfoExtractor):
text_tracks, video_id, js_to_json, False)
self._sort_formats(formats)
- subtitles = {}
if text_tracks:
for text_track in text_tracks:
tt_url = dict_get(text_track, ('file', 'src'))
diff --git a/hypervideo_dl/minicurses.py b/hypervideo_dl/minicurses.py
new file mode 100644
index 0000000..a6e159a
--- /dev/null
+++ b/hypervideo_dl/minicurses.py
@@ -0,0 +1,109 @@
+import functools
+from threading import Lock
+from .utils import supports_terminal_sequences, TERMINAL_SEQUENCES, write_string
+
+
+class MultilinePrinterBase:
+ def __init__(self, stream=None, lines=1):
+ self.stream = stream
+ self.maximum = lines - 1
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, *args):
+ self.end()
+
+ def print_at_line(self, text, pos):
+ pass
+
+ def end(self):
+ pass
+
+ def _add_line_number(self, text, line):
+ if self.maximum:
+ return f'{line + 1}: {text}'
+ return text
+
+ def write(self, *text):
+ write_string(''.join(text), self.stream)
+
+
+class QuietMultilinePrinter(MultilinePrinterBase):
+ pass
+
+
+class MultilineLogger(MultilinePrinterBase):
+ def write(self, *text):
+ self.stream.debug(''.join(text))
+
+ def print_at_line(self, text, pos):
+ # stream is the logger object, not an actual stream
+ self.write(self._add_line_number(text, pos))
+
+
+class BreaklineStatusPrinter(MultilinePrinterBase):
+ def print_at_line(self, text, pos):
+ self.write(self._add_line_number(text, pos), '\n')
+
+
+class MultilinePrinter(MultilinePrinterBase):
+ def __init__(self, stream=None, lines=1, preserve_output=True):
+ super().__init__(stream, lines)
+ self.preserve_output = preserve_output
+ self._lastline = self._lastlength = 0
+ self._movelock = Lock()
+ self._HAVE_FULLCAP = supports_terminal_sequences(self.stream)
+
+ def lock(func):
+ @functools.wraps(func)
+ def wrapper(self, *args, **kwargs):
+ with self._movelock:
+ return func(self, *args, **kwargs)
+ return wrapper
+
+ def _move_cursor(self, dest):
+ current = min(self._lastline, self.maximum)
+ yield '\r'
+ distance = dest - current
+ if distance < 0:
+ yield TERMINAL_SEQUENCES['UP'] * -distance
+ elif distance > 0:
+ yield TERMINAL_SEQUENCES['DOWN'] * distance
+ self._lastline = dest
+
+ @lock
+ def print_at_line(self, text, pos):
+ if self._HAVE_FULLCAP:
+ self.write(*self._move_cursor(pos), TERMINAL_SEQUENCES['ERASE_LINE'], text)
+
+ text = self._add_line_number(text, pos)
+ textlen = len(text)
+ if self._lastline == pos:
+ # move cursor at the start of progress when writing to same line
+ prefix = '\r'
+ if self._lastlength > textlen:
+ text += ' ' * (self._lastlength - textlen)
+ self._lastlength = textlen
+ else:
+ # otherwise, break the line
+ prefix = '\n'
+ self._lastlength = textlen
+ self.write(prefix, text)
+ self._lastline = pos
+
+ @lock
+ def end(self):
+ # move cursor to the end of the last line, and write line break
+ # so that other to_screen calls can precede
+ text = self._move_cursor(self.maximum) if self._HAVE_FULLCAP else []
+ if self.preserve_output:
+ self.write(*text, '\n')
+ return
+
+ if self._HAVE_FULLCAP:
+ self.write(
+ *text, TERMINAL_SEQUENCES['ERASE_LINE'],
+ f'{TERMINAL_SEQUENCES["UP"]}{TERMINAL_SEQUENCES["ERASE_LINE"]}' * self.maximum)
+ else:
+ self.write(*text, ' ' * self._lastlength)
diff --git a/hypervideo_dl/options.py b/hypervideo_dl/options.py
index 6ec5912..578fb86 100644
--- a/hypervideo_dl/options.py
+++ b/hypervideo_dl/options.py
@@ -5,7 +5,6 @@ import optparse
import re
import sys
-from .downloader.external import list_external_downloaders
from .compat import (
compat_expanduser,
compat_get_terminal_size,
@@ -14,11 +13,26 @@ from .compat import (
compat_shlex_split,
)
from .utils import (
+ expand_path,
+ get_executable_path,
+ OUTTMPL_TYPES,
preferredencoding,
+ remove_end,
write_string,
)
+from .cookies import SUPPORTED_BROWSERS
from .version import __version__
+from .downloader.external import list_external_downloaders
+from .postprocessor import (
+ FFmpegExtractAudioPP,
+ FFmpegSubtitlesConvertorPP,
+ FFmpegThumbnailsConvertorPP,
+ FFmpegVideoRemuxerPP,
+ SponsorBlockPP,
+)
+from .postprocessor.modify_chapters import DEFAULT_SPONSORBLOCK_CHAPTER_TITLE
+
def _hide_login_info(opts):
PRIVATE_OPTS = set(['-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username'])
@@ -54,42 +68,37 @@ def parseOpts(overrideArguments=None):
optionf.close()
return res
- def _readUserConf():
- xdg_config_home = compat_getenv('XDG_CONFIG_HOME')
- if xdg_config_home:
- userConfFile = os.path.join(xdg_config_home, 'hypervideo', 'config')
- if not os.path.isfile(userConfFile):
- userConfFile = os.path.join(xdg_config_home, 'hypervideo.conf')
- else:
- userConfFile = os.path.join(compat_expanduser('~'), '.config', 'hypervideo', 'config')
- if not os.path.isfile(userConfFile):
- userConfFile = os.path.join(compat_expanduser('~'), '.config', 'hypervideo.conf')
- userConf = _readOptions(userConfFile, None)
+ def _readUserConf(package_name, default=[]):
+ # .config
+ xdg_config_home = compat_getenv('XDG_CONFIG_HOME') or compat_expanduser('~/.config')
+ userConfFile = os.path.join(xdg_config_home, package_name, 'config')
+ if not os.path.isfile(userConfFile):
+ userConfFile = os.path.join(xdg_config_home, '%s.conf' % package_name)
+ userConf = _readOptions(userConfFile, default=None)
+ if userConf is not None:
+ return userConf, userConfFile
- if userConf is None:
- appdata_dir = compat_getenv('appdata')
- if appdata_dir:
- userConf = _readOptions(
- os.path.join(appdata_dir, 'hypervideo', 'config'),
- default=None)
- if userConf is None:
- userConf = _readOptions(
- os.path.join(appdata_dir, 'hypervideo', 'config.txt'),
- default=None)
+ # appdata
+ appdata_dir = compat_getenv('appdata')
+ if appdata_dir:
+ userConfFile = os.path.join(appdata_dir, package_name, 'config')
+ userConf = _readOptions(userConfFile, default=None)
+ if userConf is None:
+ userConfFile += '.txt'
+ userConf = _readOptions(userConfFile, default=None)
+ if userConf is not None:
+ return userConf, userConfFile
+ # home
+ userConfFile = os.path.join(compat_expanduser('~'), '%s.conf' % package_name)
+ userConf = _readOptions(userConfFile, default=None)
if userConf is None:
- userConf = _readOptions(
- os.path.join(compat_expanduser('~'), 'hypervideo.conf'),
- default=None)
- if userConf is None:
- userConf = _readOptions(
- os.path.join(compat_expanduser('~'), 'hypervideo.conf.txt'),
- default=None)
+ userConfFile += '.txt'
+ userConf = _readOptions(userConfFile, default=None)
+ if userConf is not None:
+ return userConf, userConfFile
- if userConf is None:
- userConf = []
-
- return userConf
+ return default, None
def _format_option_string(option):
''' ('-o', '--option') -> -o, --format METAVAR'''
@@ -108,13 +117,69 @@ def parseOpts(overrideArguments=None):
return ''.join(opts)
- def _comma_separated_values_options_callback(option, opt_str, value, parser):
- setattr(parser.values, option.dest, value.split(','))
+ def _list_from_options_callback(option, opt_str, value, parser, append=True, delim=',', process=str.strip):
+ # append can be True, False or -1 (prepend)
+ current = getattr(parser.values, option.dest) if append else []
+ value = list(filter(None, [process(value)] if delim is None else map(process, value.split(delim))))
+ setattr(
+ parser.values, option.dest,
+ current + value if append is True else value + current)
+
+ def _set_from_options_callback(
+ option, opt_str, value, parser, delim=',', allowed_values=None, aliases={},
+ process=lambda x: x.lower().strip()):
+ current = getattr(parser.values, option.dest)
+ values = [process(value)] if delim is None else list(map(process, value.split(delim)[::-1]))
+ while values:
+ actual_val = val = values.pop()
+ if val == 'all':
+ current.update(allowed_values)
+ elif val == '-all':
+ current = set()
+ elif val in aliases:
+ values.extend(aliases[val])
+ else:
+ if val[0] == '-':
+ val = val[1:]
+ current.discard(val)
+ else:
+ current.update([val])
+ if allowed_values is not None and val not in allowed_values:
+ raise optparse.OptionValueError(f'wrong {option.metavar} for {opt_str}: {actual_val}')
+
+ setattr(parser.values, option.dest, current)
+
+ def _dict_from_options_callback(
+ option, opt_str, value, parser,
+ allowed_keys=r'[\w-]+', delimiter=':', default_key=None, process=None, multiple_keys=True):
+
+ out_dict = getattr(parser.values, option.dest)
+ if multiple_keys:
+ allowed_keys = r'(%s)(,(%s))*' % (allowed_keys, allowed_keys)
+ mobj = re.match(r'(?i)(?P<keys>%s)%s(?P<val>.*)$' % (allowed_keys, delimiter), value)
+ if mobj is not None:
+ keys = [k.strip() for k in mobj.group('keys').lower().split(',')]
+ val = mobj.group('val')
+ elif default_key is not None:
+ keys, val = [default_key], value
+ else:
+ raise optparse.OptionValueError(
+ 'wrong %s formatting; it should be %s, not "%s"' % (opt_str, option.metavar, value))
+ try:
+ val = process(val) if process else val
+ except Exception as err:
+ raise optparse.OptionValueError(
+ 'wrong %s formatting; %s' % (opt_str, err))
+ for key in keys:
+ out_dict[key] = val
# No need to wrap help messages if we're on a wide console
columns = compat_get_terminal_size().columns
max_width = columns if columns else 80
- max_help_position = 80
+ # 47% is chosen because that is how README.md is currently formatted
+ # and moving help text even further to the right is undesirable.
+ # This can be reduced in the future to get a prettier output
+ max_help_position = int(0.47 * max_width)
fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position)
fmt.format_option_strings = _format_option_string
@@ -139,24 +204,28 @@ def parseOpts(overrideArguments=None):
help='Print program version and exit')
general.add_option(
'-i', '--ignore-errors',
- action='store_true', dest='ignoreerrors', default=False,
- help='Continue on download errors, for example to skip unavailable videos in a playlist')
+ action='store_true', dest='ignoreerrors',
+ help='Ignore download and postprocessing errors. The download will be considered successfull even if the postprocessing fails')
+ general.add_option(
+ '--no-abort-on-error',
+ action='store_const', dest='ignoreerrors', const='only_download',
+ help='Continue with next video on download errors; e.g. to skip unavailable videos in a playlist (default)')
general.add_option(
- '--abort-on-error',
+ '--abort-on-error', '--no-ignore-errors',
action='store_false', dest='ignoreerrors',
- help='Abort downloading of further videos (in the playlist or the command line) if an error occurs')
+ help='Abort downloading of further videos if an error occurs (Alias: --no-ignore-errors)')
general.add_option(
'--dump-user-agent',
action='store_true', dest='dump_user_agent', default=False,
- help='Display the current browser identification')
+ help='Display the current user-agent and exit')
general.add_option(
'--list-extractors',
action='store_true', dest='list_extractors', default=False,
- help='List all supported extractors')
+ help='List all supported extractors and exit')
general.add_option(
'--extractor-descriptions',
action='store_true', dest='list_extractor_descriptions', default=False,
- help='Output descriptions of all supported extractors')
+ help='Output descriptions of all supported extractors and exit')
general.add_option(
'--force-generic-extractor',
action='store_true', dest='force_generic_extractor', default=False,
@@ -164,45 +233,67 @@ def parseOpts(overrideArguments=None):
general.add_option(
'--default-search',
dest='default_search', metavar='PREFIX',
- help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for hypervideo "large apple". Use the value "auto" to let hypervideo guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.')
+ help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for the search term "large apple". Use the value "auto" to let hypervideo guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching')
general.add_option(
- '--ignore-config',
- action='store_true',
- help='Do not read configuration files. '
- 'When given in the global configuration file /etc/hypervideo.conf: '
- 'Do not read the user configuration in ~/.config/hypervideo/config '
- '(%APPDATA%/hypervideo/config.txt on Windows)')
+ '--ignore-config', '--no-config',
+ action='store_true', dest='ignoreconfig',
+ help=(
+ 'Disable loading any configuration files except the one provided by --config-location. '
+ 'When given inside a configuration file, no further configuration files are loaded. '
+ 'Additionally, (for backward compatibility) if this option is found inside the '
+ 'system configuration file, the user configuration is not loaded'))
general.add_option(
'--config-location',
dest='config_location', metavar='PATH',
- help='Location of the configuration file; either the path to the config or its containing directory.')
+ help='Location of the main configuration file; either the path to the config or its containing directory')
general.add_option(
'--flat-playlist',
- action='store_const', dest='extract_flat', const='in_playlist',
- default=False,
- help='Do not extract the videos of a playlist, only list them.')
+ action='store_const', dest='extract_flat', const='in_playlist', default=False,
+ help='Do not extract the videos of a playlist, only list them')
+ general.add_option(
+ '--no-flat-playlist',
+ action='store_false', dest='extract_flat',
+ help='Extract the videos of a playlist')
general.add_option(
'--mark-watched',
action='store_true', dest='mark_watched', default=False,
- help='Mark videos watched (YouTube only)')
+ help='Mark videos watched (even with --simulate). Currently only supported for YouTube')
general.add_option(
'--no-mark-watched',
- action='store_false', dest='mark_watched', default=False,
- help='Do not mark videos watched (YouTube only)')
+ action='store_false', dest='mark_watched',
+ help='Do not mark videos watched (default)')
general.add_option(
- '--no-color', '--no-colors',
- action='store_true', dest='no_color',
- default=False,
+ '--no-colors',
+ action='store_true', dest='no_color', default=False,
help='Do not emit color codes in output')
+ general.add_option(
+ '--compat-options',
+ metavar='OPTS', dest='compat_opts', default=set(), type='str',
+ action='callback', callback=_set_from_options_callback,
+ callback_kwargs={
+ 'allowed_values': {
+ 'filename', 'format-sort', 'abort-on-error', 'format-spec', 'no-playlist-metafiles',
+ 'multistreams', 'no-live-chat', 'playlist-index', 'list-formats', 'no-direct-merge',
+ 'no-youtube-channel-redirect', 'no-youtube-unavailable-videos', 'no-attach-info-json',
+ 'embed-thumbnail-atomicparsley', 'seperate-video-versions', 'no-clean-infojson', 'no-keep-subs',
+ }, 'aliases': {
+ 'youtube-dl': ['-multistreams', 'all'],
+ 'youtube-dlc': ['-no-youtube-channel-redirect', '-no-live-chat', 'all'],
+ }
+ }, help=(
+ 'Options that can help keep compatibility with youtube-dl or youtube-dlc '
+ 'configurations by reverting some of the changes made in hypervideo. '
+ 'See "Differences in default behavior" for details'))
network = optparse.OptionGroup(parser, 'Network Options')
network.add_option(
'--proxy', dest='proxy',
default=None, metavar='URL',
- help='Use the specified HTTP/HTTPS/SOCKS proxy. To enable '
- 'SOCKS proxy, specify a proper scheme. For example '
- 'socks5://127.0.0.1:1080/. Pass in an empty string (--proxy "") '
- 'for direct connection')
+ help=(
+ 'Use the specified HTTP/HTTPS/SOCKS proxy. To enable '
+ 'SOCKS proxy, specify a proper scheme. For example '
+ 'socks5://127.0.0.1:1080/. Pass in an empty string (--proxy "") '
+ 'for direct connection'))
network.add_option(
'--socket-timeout',
dest='socket_timeout', type=float, default=None, metavar='SECONDS',
@@ -223,12 +314,13 @@ def parseOpts(overrideArguments=None):
help='Make all connections via IPv6',
)
- geo = optparse.OptionGroup(parser, 'Geo Restriction')
+ geo = optparse.OptionGroup(parser, 'Geo-restriction')
geo.add_option(
'--geo-verification-proxy',
dest='geo_verification_proxy', default=None, metavar='URL',
- help='Use this proxy to verify the IP address for some geo-restricted sites. '
- 'The default proxy specified by --proxy (or none, if the option is not present) is used for the actual downloading.')
+ help=(
+ 'Use this proxy to verify the IP address for some geo-restricted sites. '
+ 'The default proxy specified by --proxy (or none, if the option is not present) is used for the actual downloading'))
geo.add_option(
'--cn-verification-proxy',
dest='cn_verification_proxy', default=None, metavar='URL',
@@ -262,15 +354,15 @@ def parseOpts(overrideArguments=None):
selection.add_option(
'--playlist-items',
dest='playlist_items', metavar='ITEM_SPEC', default=None,
- help='Playlist video items to download. Specify indices of the videos in the playlist separated by commas like: "--playlist-items 1,2,5,8" if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.')
+ help='Playlist video items to download. Specify indices of the videos in the playlist separated by commas like: "--playlist-items 1,2,5,8" if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13')
selection.add_option(
'--match-title',
dest='matchtitle', metavar='REGEX',
- help='Download only matching titles (regex or caseless sub-string)')
+ help=optparse.SUPPRESS_HELP)
selection.add_option(
'--reject-title',
dest='rejecttitle', metavar='REGEX',
- help='Skip download for matching titles (regex or caseless sub-string)')
+ help=optparse.SUPPRESS_HELP)
selection.add_option(
'--max-downloads',
dest='max_downloads', metavar='NUMBER', type=int, default=None,
@@ -286,52 +378,57 @@ def parseOpts(overrideArguments=None):
selection.add_option(
'--date',
metavar='DATE', dest='date', default=None,
- help='Download only videos uploaded in this date')
+ help=(
+ 'Download only videos uploaded in this date. '
+ 'The date can be "YYYYMMDD" or in the format '
+ '"(now|today)[+-][0-9](day|week|month|year)(s)?"'))
selection.add_option(
'--datebefore',
metavar='DATE', dest='datebefore', default=None,
- help='Download only videos uploaded on or before this date (i.e. inclusive)')
+ help=(
+ 'Download only videos uploaded on or before this date. '
+ 'The date formats accepted is the same as --date'))
selection.add_option(
'--dateafter',
metavar='DATE', dest='dateafter', default=None,
- help='Download only videos uploaded on or after this date (i.e. inclusive)')
+ help=(
+ 'Download only videos uploaded on or after this date. '
+ 'The date formats accepted is the same as --date'))
selection.add_option(
'--min-views',
metavar='COUNT', dest='min_views', default=None, type=int,
- help='Do not download any videos with less than COUNT views')
+ help=optparse.SUPPRESS_HELP)
selection.add_option(
'--max-views',
metavar='COUNT', dest='max_views', default=None, type=int,
- help='Do not download any videos with more than COUNT views')
+ help=optparse.SUPPRESS_HELP)
selection.add_option(
'--match-filter',
metavar='FILTER', dest='match_filter', default=None,
help=(
- 'Generic video filter. '
- 'Specify any key (see the "OUTPUT TEMPLATE" for a list of available keys) to '
- 'match if the key is present, '
- '!key to check if the key is not present, '
- 'key > NUMBER (like "comment_count > 12", also works with '
- '>=, <, <=, !=, =) to compare against a number, '
- 'key = \'LITERAL\' (like "uploader = \'Mike Smith\'", also works with !=) '
- 'to match against a string literal '
- 'and & to require multiple matches. '
- 'Values which are not known are excluded unless you '
- 'put a question mark (?) after the operator. '
- 'For example, to only match videos that have been liked more than '
- '100 times and disliked less than 50 times (or the dislike '
- 'functionality is not available at the given service), but who '
- 'also have a description, use --match-filter '
- '"like_count > 100 & dislike_count <? 50 & description" .'
- ))
+ 'Generic video filter. Any field (see "OUTPUT TEMPLATE") can be compared with a '
+ 'number or a string using the operators defined in "Filtering formats". '
+ 'You can also simply specify a field to match if the field is present '
+ 'and "!field" to check if the field is not present. In addition, '
+ 'Python style regular expression matching can be done using "~=", '
+ 'and multiple filters can be checked with "&". '
+ 'Use a "\\" to escape "&" or quotes if needed. Eg: --match-filter '
+ '"!is_live & like_count>?100 & description~=\'(?i)\\bcats \\& dogs\\b\'" '
+ 'matches only videos that are not live, has a like count more than 100 '
+ '(or the like field is not available), and also has a description '
+ 'that contains the phrase "cats & dogs" (ignoring case)'))
+ selection.add_option(
+ '--no-match-filter',
+ metavar='FILTER', dest='match_filter', action='store_const', const=None,
+ help='Do not use generic video filter (default)')
selection.add_option(
'--no-playlist',
action='store_true', dest='noplaylist', default=False,
- help='Download only the video, if the URL refers to a video and a playlist.')
+ help='Download only the video, if the URL refers to a video and a playlist')
selection.add_option(
'--yes-playlist',
- action='store_false', dest='noplaylist', default=False,
- help='Download the playlist, if the URL refers to a video and a playlist.')
+ action='store_false', dest='noplaylist',
+ help='Download the playlist, if the URL refers to a video and a playlist')
selection.add_option(
'--age-limit',
metavar='YEARS', dest='age_limit', default=None, type=int,
@@ -339,11 +436,31 @@ def parseOpts(overrideArguments=None):
selection.add_option(
'--download-archive', metavar='FILE',
dest='download_archive',
- help='Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.')
+ help='Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it')
+ selection.add_option(
+ '--break-on-existing',
+ action='store_true', dest='break_on_existing', default=False,
+ help='Stop the download process when encountering a file that is in the archive')
+ selection.add_option(
+ '--break-on-reject',
+ action='store_true', dest='break_on_reject', default=False,
+ help='Stop the download process when encountering a file that has been filtered out')
+ selection.add_option(
+ '--skip-playlist-after-errors', metavar='N',
+ dest='skip_playlist_after_errors', default=None, type=int,
+ help='Number of allowed failures until the rest of the playlist is skipped')
+ selection.add_option(
+ '--no-download-archive',
+ dest='download_archive', action="store_const", const=None,
+ help='Do not use archive file (default)')
selection.add_option(
'--include-ads',
dest='include_ads', action='store_true',
- help='Download advertisements as well (experimental)')
+ help=optparse.SUPPRESS_HELP)
+ selection.add_option(
+ '--no-include-ads',
+ dest='include_ads', action='store_false',
+ help=optparse.SUPPRESS_HELP)
authentication = optparse.OptionGroup(parser, 'Authentication Options')
authentication.add_option(
@@ -353,7 +470,7 @@ def parseOpts(overrideArguments=None):
authentication.add_option(
'-p', '--password',
dest='password', metavar='PASSWORD',
- help='Account password. If this option is left out, hypervideo will ask interactively.')
+ help='Account password. If this option is left out, hypervideo will ask interactively')
authentication.add_option(
'-2', '--twofactor',
dest='twofactor', metavar='TWOFACTOR',
@@ -363,24 +480,26 @@ def parseOpts(overrideArguments=None):
action='store_true', dest='usenetrc', default=False,
help='Use .netrc authentication data')
authentication.add_option(
+ '--netrc-location',
+ dest='netrc_location', metavar='PATH',
+ help='Location of .netrc authentication data; either the path or its containing directory. Defaults to ~/.netrc')
+ authentication.add_option(
'--video-password',
dest='videopassword', metavar='PASSWORD',
help='Video password (vimeo, youku)')
-
- adobe_pass = optparse.OptionGroup(parser, 'Adobe Pass Options')
- adobe_pass.add_option(
+ authentication.add_option(
'--ap-mso',
dest='ap_mso', metavar='MSO',
help='Adobe Pass multiple-system operator (TV provider) identifier, use --ap-list-mso for a list of available MSOs')
- adobe_pass.add_option(
+ authentication.add_option(
'--ap-username',
dest='ap_username', metavar='USERNAME',
help='Multiple-system operator account login')
- adobe_pass.add_option(
+ authentication.add_option(
'--ap-password',
dest='ap_password', metavar='PASSWORD',
- help='Multiple-system operator account password. If this option is left out, hypervideo will ask interactively.')
- adobe_pass.add_option(
+ help='Multiple-system operator account password. If this option is left out, hypervideo will ask interactively')
+ authentication.add_option(
'--ap-list-mso',
action='store_true', dest='ap_list_mso', default=False,
help='List all supported multiple-system operators')
@@ -389,27 +508,74 @@ def parseOpts(overrideArguments=None):
video_format.add_option(
'-f', '--format',
action='store', dest='format', metavar='FORMAT', default=None,
- help='Video format code, see the "FORMAT SELECTION" for all the info')
+ help='Video format code, see "FORMAT SELECTION" for more details')
+ video_format.add_option(
+ '-S', '--format-sort', metavar='SORTORDER',
+ dest='format_sort', default=[], type='str', action='callback',
+ callback=_list_from_options_callback, callback_kwargs={'append': -1},
+ help='Sort the formats by the fields given, see "Sorting Formats" for more details')
+ video_format.add_option(
+ '--format-sort-force', '--S-force',
+ action='store_true', dest='format_sort_force', metavar='FORMAT', default=False,
+ help=(
+ 'Force user specified sort order to have precedence over all fields, '
+ 'see "Sorting Formats" for more details'))
+ video_format.add_option(
+ '--no-format-sort-force',
+ action='store_false', dest='format_sort_force', metavar='FORMAT', default=False,
+ help=(
+ 'Some fields have precedence over the user specified sort order (default), '
+ 'see "Sorting Formats" for more details'))
+ video_format.add_option(
+ '--video-multistreams',
+ action='store_true', dest='allow_multiple_video_streams', default=None,
+ help='Allow multiple video streams to be merged into a single file')
+ video_format.add_option(
+ '--no-video-multistreams',
+ action='store_false', dest='allow_multiple_video_streams',
+ help='Only one video stream is downloaded for each output file (default)')
+ video_format.add_option(
+ '--audio-multistreams',
+ action='store_true', dest='allow_multiple_audio_streams', default=None,
+ help='Allow multiple audio streams to be merged into a single file')
+ video_format.add_option(
+ '--no-audio-multistreams',
+ action='store_false', dest='allow_multiple_audio_streams',
+ help='Only one audio stream is downloaded for each output file (default)')
video_format.add_option(
'--all-formats',
action='store_const', dest='format', const='all',
- help='Download all available video formats')
+ help=optparse.SUPPRESS_HELP)
video_format.add_option(
'--prefer-free-formats',
action='store_true', dest='prefer_free_formats', default=False,
- help='Prefer free video formats unless a specific one is requested')
+ help=(
+ 'Prefer video formats with free containers over non-free ones of same quality. '
+ 'Use with "-S ext" to strictly prefer free containers irrespective of quality'))
+ video_format.add_option(
+ '--no-prefer-free-formats',
+ action='store_false', dest='prefer_free_formats', default=False,
+ help="Don't give any special preference to free containers (default)")
+ video_format.add_option(
+ '--check-formats',
+ action='store_true', dest='check_formats', default=None,
+ help='Check that the formats selected are actually downloadable')
+ video_format.add_option(
+ '--no-check-formats',
+ action='store_false', dest='check_formats',
+ help='Do not check that the formats selected are actually downloadable')
video_format.add_option(
'-F', '--list-formats',
action='store_true', dest='listformats',
- help='List all available formats of requested videos')
+ help='List available formats of each video. Simulate unless --no-simulate is used')
video_format.add_option(
- '--youtube-include-dash-manifest',
- action='store_true', dest='youtube_include_dash_manifest', default=True,
+ '--list-formats-as-table',
+ action='store_true', dest='listformats_table', default=True,
help=optparse.SUPPRESS_HELP)
video_format.add_option(
- '--youtube-skip-dash-manifest',
- action='store_false', dest='youtube_include_dash_manifest',
- help='Do not download the DASH manifests and related data on YouTube videos')
+ '--list-formats-old', '--no-list-formats-as-table',
+ action='store_false', dest='listformats_table',
+ help=optparse.SUPPRESS_HELP)
video_format.add_option(
'--merge-output-format',
action='store', dest='merge_output_format', metavar='FORMAT', default=None,
@@ -417,72 +583,108 @@ def parseOpts(overrideArguments=None):
'If a merge is required (e.g. bestvideo+bestaudio), '
'output to given container format. One of mkv, mp4, ogg, webm, flv. '
'Ignored if no merge is required'))
+ video_format.add_option(
+ '--allow-unplayable-formats',
+ action='store_true', dest='allow_unplayable_formats', default=False,
+ help=optparse.SUPPRESS_HELP)
+ video_format.add_option(
+ '--no-allow-unplayable-formats',
+ action='store_false', dest='allow_unplayable_formats',
+ help=optparse.SUPPRESS_HELP)
subtitles = optparse.OptionGroup(parser, 'Subtitle Options')
subtitles.add_option(
- '--write-sub', '--write-srt',
+ '--write-subs', '--write-srt',
action='store_true', dest='writesubtitles', default=False,
help='Write subtitle file')
subtitles.add_option(
- '--write-auto-sub', '--write-automatic-sub',
+ '--no-write-subs', '--no-write-srt',
+ action='store_false', dest='writesubtitles',
+ help='Do not write subtitle file (default)')
+ subtitles.add_option(
+ '--write-auto-subs', '--write-automatic-subs',
action='store_true', dest='writeautomaticsub', default=False,
- help='Write automatically generated subtitle file (YouTube only)')
+ help='Write automatically generated subtitle file (Alias: --write-automatic-subs)')
+ subtitles.add_option(
+ '--no-write-auto-subs', '--no-write-automatic-subs',
+ action='store_false', dest='writeautomaticsub', default=False,
+ help='Do not write auto-generated subtitles (default) (Alias: --no-write-automatic-subs)')
subtitles.add_option(
'--all-subs',
action='store_true', dest='allsubtitles', default=False,
- help='Download all the available subtitles of the video')
+ help=optparse.SUPPRESS_HELP)
subtitles.add_option(
'--list-subs',
action='store_true', dest='listsubtitles', default=False,
- help='List all available subtitles for the video')
+ help='List available subtitles of each video. Simulate unless --no-simulate is used')
subtitles.add_option(
'--sub-format',
action='store', dest='subtitlesformat', metavar='FORMAT', default='best',
help='Subtitle format, accepts formats preference, for example: "srt" or "ass/srt/best"')
subtitles.add_option(
- '--sub-lang', '--sub-langs', '--srt-lang',
+ '--sub-langs', '--srt-langs',
action='callback', dest='subtitleslangs', metavar='LANGS', type='str',
- default=[], callback=_comma_separated_values_options_callback,
- help='Languages of the subtitles to download (optional) separated by commas, use --list-subs for available language tags')
+ default=[], callback=_list_from_options_callback,
+ help=(
+ 'Languages of the subtitles to download (can be regex) or "all" separated by commas. (Eg: --sub-langs en.*,ja) '
+ 'You can prefix the language code with a "-" to exempt it from the requested languages. (Eg: --sub-langs all,-live_chat) '
+ 'Use --list-subs for a list of available language tags'))
downloader = optparse.OptionGroup(parser, 'Download Options')
downloader.add_option(
+ '-N', '--concurrent-fragments',
+ dest='concurrent_fragment_downloads', metavar='N', default=1, type=int,
+ help='Number of fragments of a dash/hlsnative video that should be download concurrently (default is %default)')
+ downloader.add_option(
'-r', '--limit-rate', '--rate-limit',
dest='ratelimit', metavar='RATE',
help='Maximum download rate in bytes per second (e.g. 50K or 4.2M)')
downloader.add_option(
+ '--throttled-rate',
+ dest='throttledratelimit', metavar='RATE',
+ help='Minimum download rate in bytes per second below which throttling is assumed and the video data is re-extracted (e.g. 100K)')
+ downloader.add_option(
'-R', '--retries',
dest='retries', metavar='RETRIES', default=10,
- help='Number of retries (default is %default), or "infinite".')
+ help='Number of retries (default is %default), or "infinite"')
downloader.add_option(
'--fragment-retries',
dest='fragment_retries', metavar='RETRIES', default=10,
help='Number of retries for a fragment (default is %default), or "infinite" (DASH, hlsnative and ISM)')
downloader.add_option(
- '--skip-unavailable-fragments',
+ '--skip-unavailable-fragments', '--no-abort-on-unavailable-fragment',
action='store_true', dest='skip_unavailable_fragments', default=True,
- help='Skip unavailable fragments (DASH, hlsnative and ISM)')
+ help='Skip unavailable fragments for DASH, hlsnative and ISM (default) (Alias: --no-abort-on-unavailable-fragment)')
downloader.add_option(
- '--abort-on-unavailable-fragment',
+ '--abort-on-unavailable-fragment', '--no-skip-unavailable-fragments',
action='store_false', dest='skip_unavailable_fragments',
- help='Abort downloading when some fragment is not available')
+ help='Abort downloading if a fragment is unavailable (Alias: --no-skip-unavailable-fragments)')
downloader.add_option(
'--keep-fragments',
action='store_true', dest='keep_fragments', default=False,
- help='Keep downloaded fragments on disk after downloading is finished; fragments are erased by default')
+ help='Keep downloaded fragments on disk after downloading is finished')
+ downloader.add_option(
+ '--no-keep-fragments',
+ action='store_false', dest='keep_fragments',
+ help='Delete downloaded fragments after downloading is finished (default)')
downloader.add_option(
'--buffer-size',
dest='buffersize', metavar='SIZE', default='1024',
help='Size of download buffer (e.g. 1024 or 16K) (default is %default)')
downloader.add_option(
+ '--resize-buffer',
+ action='store_false', dest='noresizebuffer',
+ help='The buffer size is automatically resized from an initial value of --buffer-size (default)')
+ downloader.add_option(
'--no-resize-buffer',
action='store_true', dest='noresizebuffer', default=False,
- help='Do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.')
+ help='Do not automatically adjust the buffer size')
downloader.add_option(
'--http-chunk-size',
dest='http_chunk_size', metavar='SIZE', default=None,
- help='Size of a chunk for chunk-based HTTP downloading (e.g. 10485760 or 10M) (default is disabled). '
- 'May be useful for bypassing bandwidth throttling imposed by a webserver (experimental)')
+ help=(
+ 'Size of a chunk for chunk-based HTTP downloading (e.g. 10485760 or 10M) (default is disabled). '
+ 'May be useful for bypassing bandwidth throttling imposed by a webserver (experimental)'))
downloader.add_option(
'--test',
action='store_true', dest='test', default=False,
@@ -492,6 +694,10 @@ def parseOpts(overrideArguments=None):
action='store_true',
help='Download playlist videos in reverse order')
downloader.add_option(
+ '--no-playlist-reverse',
+ action='store_false', dest='playlist_reverse',
+ help='Download playlist videos in default order (default)')
+ downloader.add_option(
'--playlist-random',
action='store_true',
help='Download playlist videos in random order')
@@ -502,25 +708,55 @@ def parseOpts(overrideArguments=None):
downloader.add_option(
'--hls-prefer-native',
dest='hls_prefer_native', action='store_true', default=None,
- help='Use the native HLS downloader instead of ffmpeg')
+ help=optparse.SUPPRESS_HELP)
downloader.add_option(
'--hls-prefer-ffmpeg',
dest='hls_prefer_native', action='store_false', default=None,
- help='Use ffmpeg instead of the native HLS downloader')
+ help=optparse.SUPPRESS_HELP)
downloader.add_option(
'--hls-use-mpegts',
- dest='hls_use_mpegts', action='store_true',
- help='Use the mpegts container for HLS videos, allowing to play the '
- 'video while downloading (some players may not be able to play it)')
+ dest='hls_use_mpegts', action='store_true', default=None,
+ help=(
+ 'Use the mpegts container for HLS videos; '
+ 'allowing some players to play the video while downloading, '
+ 'and reducing the chance of file corruption if download is interrupted. '
+ 'This is enabled by default for live streams'))
+ downloader.add_option(
+ '--no-hls-use-mpegts',
+ dest='hls_use_mpegts', action='store_false',
+ help=(
+ 'Do not use the mpegts container for HLS videos. '
+ 'This is default when not downloading live streams'))
downloader.add_option(
- '--external-downloader',
- dest='external_downloader', metavar='COMMAND',
- help='Use the specified external downloader. '
- 'Currently supports %s' % ','.join(list_external_downloaders()))
+ '--downloader', '--external-downloader',
+ dest='external_downloader', metavar='[PROTO:]NAME', default={}, type='str',
+ action='callback', callback=_dict_from_options_callback,
+ callback_kwargs={
+ 'allowed_keys': 'http|ftp|m3u8|dash|rtsp|rtmp|mms',
+ 'default_key': 'default',
+ 'process': str.strip
+ }, help=(
+ 'Name or path of the external downloader to use (optionally) prefixed by '
+ 'the protocols (http, ftp, m3u8, dash, rstp, rtmp, mms) to use it for. '
+ 'Currently supports native, %s (Recommended: aria2c). '
+ 'You can use this option multiple times to set different downloaders for different protocols. '
+ 'For example, --downloader aria2c --downloader "dash,m3u8:native" will use '
+ 'aria2c for http/ftp downloads, and the native downloader for dash/m3u8 downloads '
+ '(Alias: --external-downloader)' % ', '.join(list_external_downloaders())))
downloader.add_option(
- '--external-downloader-args',
- dest='external_downloader_args', metavar='ARGS',
- help='Give these arguments to the external downloader')
+ '--downloader-args', '--external-downloader-args',
+ metavar='NAME:ARGS', dest='external_downloader_args', default={}, type='str',
+ action='callback', callback=_dict_from_options_callback,
+ callback_kwargs={
+ 'allowed_keys': r'ffmpeg_[io]\d*|%s' % '|'.join(list_external_downloaders()),
+ 'default_key': 'default',
+ 'process': compat_shlex_split
+ }, help=(
+ 'Give these arguments to the external downloader. '
+ 'Specify the downloader name and the arguments separated by a colon ":". '
+ 'For ffmpeg, arguments can be passed to different positions using the same syntax as --postprocessor-args. '
+ 'You can use this option multiple times to give different arguments to different downloaders '
+ '(Alias: --external-downloader-args)'))
workarounds = optparse.OptionGroup(parser, 'Workarounds')
workarounds.add_option(
@@ -528,13 +764,13 @@ def parseOpts(overrideArguments=None):
dest='encoding', metavar='ENCODING',
help='Force the specified encoding (experimental)')
workarounds.add_option(
- '--no-check-certificate',
+ '--no-check-certificates',
action='store_true', dest='no_check_certificate', default=False,
help='Suppress HTTPS certificate validation')
workarounds.add_option(
- '--prefer-insecure',
- '--prefer-unsecure', action='store_true', dest='prefer_insecure',
- help='Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)')
+ '--prefer-insecure', '--prefer-unsecure',
+ action='store_true', dest='prefer_insecure',
+ help='Use an unencrypted connection to retrieve information about the video (Currently supported only for YouTube)')
workarounds.add_option(
'--user-agent',
metavar='UA', dest='user_agent',
@@ -546,104 +782,155 @@ def parseOpts(overrideArguments=None):
)
workarounds.add_option(
'--add-header',
- metavar='FIELD:VALUE', dest='headers', action='append',
- help='Specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times',
+ metavar='FIELD:VALUE', dest='headers', default={}, type='str',
+ action='callback', callback=_dict_from_options_callback,
+ callback_kwargs={'multiple_keys': False},
+ help='Specify a custom HTTP header and its value, separated by a colon ":". You can use this option multiple times',
)
workarounds.add_option(
'--bidi-workaround',
dest='bidi_workaround', action='store_true',
help='Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH')
workarounds.add_option(
+ '--sleep-requests', metavar='SECONDS',
+ dest='sleep_interval_requests', type=float,
+ help='Number of seconds to sleep between requests during data extraction')
+ workarounds.add_option(
'--sleep-interval', '--min-sleep-interval', metavar='SECONDS',
dest='sleep_interval', type=float,
help=(
- 'Number of seconds to sleep before each download when used alone '
- 'or a lower bound of a range for randomized sleep before each download '
- '(minimum possible number of seconds to sleep) when used along with '
- '--max-sleep-interval.'))
+ 'Number of seconds to sleep before each download. '
+ 'This is the minimum time to sleep when used along with --max-sleep-interval '
+ '(Alias: --min-sleep-interval)'))
workarounds.add_option(
'--max-sleep-interval', metavar='SECONDS',
dest='max_sleep_interval', type=float,
- help=(
- 'Upper bound of a range for randomized sleep before each download '
- '(maximum possible number of seconds to sleep). Must only be used '
- 'along with --min-sleep-interval.'))
+ help='Maximum number of seconds to sleep. Can only be used along with --min-sleep-interval')
+ workarounds.add_option(
+ '--sleep-subtitles', metavar='SECONDS',
+ dest='sleep_interval_subtitles', default=0, type=int,
+ help='Number of seconds to sleep before each subtitle download')
- verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
+ verbosity = optparse.OptionGroup(parser, 'Verbosity and Simulation Options')
verbosity.add_option(
'-q', '--quiet',
action='store_true', dest='quiet', default=False,
- help='Activate quiet mode')
+ help='Activate quiet mode. If used with --verbose, print the log to stderr')
verbosity.add_option(
'--no-warnings',
dest='no_warnings', action='store_true', default=False,
help='Ignore warnings')
verbosity.add_option(
'-s', '--simulate',
- action='store_true', dest='simulate', default=False,
+ action='store_true', dest='simulate', default=None,
help='Do not download the video and do not write anything to disk')
verbosity.add_option(
- '--skip-download',
+ '--no-simulate',
+ action='store_false', dest='simulate',
+ help='Download the video even if printing/listing options are used')
+ verbosity.add_option(
+ '--ignore-no-formats-error',
+ action='store_true', dest='ignore_no_formats_error', default=False,
+ help=(
+ 'Ignore "No video formats" error. Usefull for extracting metadata '
+ 'even if the videos are not actually available for download (experimental)'))
+ verbosity.add_option(
+ '--no-ignore-no-formats-error',
+ action='store_false', dest='ignore_no_formats_error',
+ help='Throw error when no downloadable video formats are found (default)')
+ verbosity.add_option(
+ '--skip-download', '--no-download',
action='store_true', dest='skip_download', default=False,
- help='Do not download the video')
+ help='Do not download the video but write all related files (Alias: --no-download)')
+ verbosity.add_option(
+ '-O', '--print',
+ metavar='TEMPLATE', action='append', dest='forceprint',
+ help=(
+ 'Quiet, but print the given fields for each video. Simulate unless --no-simulate is used. '
+ 'Either a field name or same syntax as the output template can be used'))
verbosity.add_option(
'-g', '--get-url',
action='store_true', dest='geturl', default=False,
- help='Simulate, quiet but print URL')
+ help=optparse.SUPPRESS_HELP)
verbosity.add_option(
'-e', '--get-title',
action='store_true', dest='gettitle', default=False,
- help='Simulate, quiet but print title')
+ help=optparse.SUPPRESS_HELP)
verbosity.add_option(
'--get-id',
action='store_true', dest='getid', default=False,
- help='Simulate, quiet but print id')
+ help=optparse.SUPPRESS_HELP)
verbosity.add_option(
'--get-thumbnail',
action='store_true', dest='getthumbnail', default=False,
- help='Simulate, quiet but print thumbnail URL')
+ help=optparse.SUPPRESS_HELP)
verbosity.add_option(
'--get-description',
action='store_true', dest='getdescription', default=False,
- help='Simulate, quiet but print video description')
+ help=optparse.SUPPRESS_HELP)
verbosity.add_option(
'--get-duration',
action='store_true', dest='getduration', default=False,
- help='Simulate, quiet but print video length')
+ help=optparse.SUPPRESS_HELP)
verbosity.add_option(
'--get-filename',
action='store_true', dest='getfilename', default=False,
- help='Simulate, quiet but print output filename')
+ help=optparse.SUPPRESS_HELP)
verbosity.add_option(
'--get-format',
action='store_true', dest='getformat', default=False,
- help='Simulate, quiet but print output format')
+ help=optparse.SUPPRESS_HELP)
verbosity.add_option(
'-j', '--dump-json',
action='store_true', dest='dumpjson', default=False,
- help='Simulate, quiet but print JSON information. See the "OUTPUT TEMPLATE" for a description of available keys.')
+ help='Quiet, but print JSON information for each video. Simulate unless --no-simulate is used. See "OUTPUT TEMPLATE" for a description of available keys')
verbosity.add_option(
'-J', '--dump-single-json',
action='store_true', dest='dump_single_json', default=False,
- help='Simulate, quiet but print JSON information for each command-line argument. If the URL refers to a playlist, dump the whole playlist information in a single line.')
+ help=(
+ 'Quiet, but print JSON information for each url or infojson passed. Simulate unless --no-simulate is used. '
+ 'If the URL refers to a playlist, the whole playlist information is dumped in a single line'))
verbosity.add_option(
'--print-json',
action='store_true', dest='print_json', default=False,
- help='Be quiet and print the video information as JSON (video is still being downloaded).',
- )
+ help=optparse.SUPPRESS_HELP)
+ verbosity.add_option(
+ '--force-write-archive', '--force-write-download-archive', '--force-download-archive',
+ action='store_true', dest='force_write_download_archive', default=False,
+ help=(
+ 'Force download archive entries to be written as far as no errors occur, '
+ 'even if -s or another simulation option is used (Alias: --force-download-archive)'))
verbosity.add_option(
'--newline',
action='store_true', dest='progress_with_newline', default=False,
help='Output progress bar as new lines')
verbosity.add_option(
'--no-progress',
- action='store_true', dest='noprogress', default=False,
+ action='store_true', dest='noprogress', default=None,
help='Do not print progress bar')
verbosity.add_option(
+ '--progress',
+ action='store_false', dest='noprogress',
+ help='Show progress bar, even if in quiet mode')
+ verbosity.add_option(
'--console-title',
action='store_true', dest='consoletitle', default=False,
help='Display progress in console titlebar')
verbosity.add_option(
+ '--progress-template',
+ metavar='[TYPES:]TEMPLATE', dest='progress_template', default={}, type='str',
+ action='callback', callback=_dict_from_options_callback,
+ callback_kwargs={
+ 'allowed_keys': '(download|postprocess)(-title)?',
+ 'default_key': 'download'
+ }, help=(
+ 'Template for progress outputs, optionally prefixed with one of "download:" (default), '
+ '"download-title:" (the console title), "postprocess:", or "postprocess-title:". '
+ 'The video\'s fields are accessible under the "info" key and '
+ 'the progress attributes are accessible under "progress" key. Eg: '
+ # TODO: Document the fields inside "progress"
+ '--console-title --progress-template "download-title:%(info.id)s-%(progress.eta)s"'))
+ verbosity.add_option(
'-v', '--verbose',
action='store_true', dest='verbose', default=False,
help='Print various debugging information')
@@ -666,29 +953,47 @@ def parseOpts(overrideArguments=None):
verbosity.add_option(
'-C', '--call-home',
dest='call_home', action='store_true', default=False,
- help='Contact the hypervideo server for debugging')
+ # help='[Broken] Contact the hypervideo server for debugging')
+ help=optparse.SUPPRESS_HELP)
verbosity.add_option(
'--no-call-home',
- dest='call_home', action='store_false', default=False,
- help='Do NOT contact the hypervideo server for debugging')
+ dest='call_home', action='store_false',
+ # help='Do not contact the hypervideo server for debugging (default)')
+ help=optparse.SUPPRESS_HELP)
filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
filesystem.add_option(
'-a', '--batch-file',
dest='batchfile', metavar='FILE',
help="File containing URLs to download ('-' for stdin), one URL per line. "
- "Lines starting with '#', ';' or ']' are considered as comments and ignored.")
+ "Lines starting with '#', ';' or ']' are considered as comments and ignored")
filesystem.add_option(
- '--id', default=False,
- action='store_true', dest='useid', help='Use only video ID in file name')
+ '-P', '--paths',
+ metavar='[TYPES:]PATH', dest='paths', default={}, type='str',
+ action='callback', callback=_dict_from_options_callback,
+ callback_kwargs={
+ 'allowed_keys': 'home|temp|%s' % '|'.join(OUTTMPL_TYPES.keys()),
+ 'default_key': 'home'
+ }, help=(
+ 'The paths where the files should be downloaded. '
+ 'Specify the type of file and the path separated by a colon ":". '
+ 'All the same types as --output are supported. '
+ 'Additionally, you can also provide "home" (default) and "temp" paths. '
+ 'All intermediary files are first downloaded to the temp path and '
+ 'then the final files are moved over to the home path after download is finished. '
+ 'This option is ignored if --output is an absolute path'))
filesystem.add_option(
'-o', '--output',
- dest='outtmpl', metavar='TEMPLATE',
- help=('Output filename template, see the "OUTPUT TEMPLATE" for all the info'))
+ metavar='[TYPES:]TEMPLATE', dest='outtmpl', default={}, type='str',
+ action='callback', callback=_dict_from_options_callback,
+ callback_kwargs={
+ 'allowed_keys': '|'.join(OUTTMPL_TYPES.keys()),
+ 'default_key': 'default'
+ }, help='Output filename template; see "OUTPUT TEMPLATE" for details')
filesystem.add_option(
'--output-na-placeholder',
- dest='outtmpl_na_placeholder', metavar='PLACEHOLDER', default='NA',
- help=('Placeholder value for unavailable meta fields in output filename template (default is "%default")'))
+ dest='outtmpl_na_placeholder', metavar='TEXT', default='NA',
+ help=('Placeholder value for unavailable meta fields in output filename template (default: "%default")'))
filesystem.add_option(
'--autonumber-size',
dest='autonumber_size', metavar='NUMBER', type=int,
@@ -696,55 +1001,119 @@ def parseOpts(overrideArguments=None):
filesystem.add_option(
'--autonumber-start',
dest='autonumber_start', metavar='NUMBER', default=1, type=int,
- help='Specify the start value for %(autonumber)s (default is %default)')
+ help=optparse.SUPPRESS_HELP)
filesystem.add_option(
'--restrict-filenames',
action='store_true', dest='restrictfilenames', default=False,
help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames')
filesystem.add_option(
- '-A', '--auto-number',
- action='store_true', dest='autonumber', default=False,
- help=optparse.SUPPRESS_HELP)
+ '--no-restrict-filenames',
+ action='store_false', dest='restrictfilenames',
+ help='Allow Unicode characters, "&" and spaces in filenames (default)')
filesystem.add_option(
- '-t', '--title',
- action='store_true', dest='usetitle', default=False,
- help=optparse.SUPPRESS_HELP)
+ '--windows-filenames',
+ action='store_true', dest='windowsfilenames', default=False,
+ help='Force filenames to be windows compatible')
filesystem.add_option(
- '-l', '--literal', default=False,
- action='store_true', dest='usetitle',
- help=optparse.SUPPRESS_HELP)
+ '--no-windows-filenames',
+ action='store_false', dest='windowsfilenames',
+ help='Make filenames windows compatible only if using windows (default)')
+ filesystem.add_option(
+ '--trim-filenames', '--trim-file-names', metavar='LENGTH',
+ dest='trim_file_name', default=0, type=int,
+ help='Limit the filename length (excluding extension) to the specified number of characters')
filesystem.add_option(
'-w', '--no-overwrites',
- action='store_true', dest='nooverwrites', default=False,
- help='Do not overwrite files')
+ action='store_false', dest='overwrites', default=None,
+ help='Do not overwrite any files')
+ filesystem.add_option(
+ '--force-overwrites', '--yes-overwrites',
+ action='store_true', dest='overwrites',
+ help='Overwrite all video and metadata files. This option includes --no-continue')
+ filesystem.add_option(
+ '--no-force-overwrites',
+ action='store_const', dest='overwrites', const=None,
+ help='Do not overwrite the video, but overwrite related files (default)')
filesystem.add_option(
'-c', '--continue',
action='store_true', dest='continue_dl', default=True,
- help='Force resume of partially downloaded files. By default, hypervideo will resume downloads if possible.')
+ help='Resume partially downloaded files/fragments (default)')
filesystem.add_option(
'--no-continue',
action='store_false', dest='continue_dl',
- help='Do not resume partially downloaded files (restart from beginning)')
+ help=(
+ 'Do not resume partially downloaded fragments. '
+ 'If the file is not fragmented, restart download of the entire file'))
+ filesystem.add_option(
+ '--part',
+ action='store_false', dest='nopart', default=False,
+ help='Use .part files instead of writing directly into output file (default)')
filesystem.add_option(
'--no-part',
- action='store_true', dest='nopart', default=False,
+ action='store_true', dest='nopart',
help='Do not use .part files - write directly into output file')
filesystem.add_option(
+ '--mtime',
+ action='store_true', dest='updatetime', default=True,
+ help='Use the Last-modified header to set the file modification time (default)')
+ filesystem.add_option(
'--no-mtime',
- action='store_false', dest='updatetime', default=True,
+ action='store_false', dest='updatetime',
help='Do not use the Last-modified header to set the file modification time')
filesystem.add_option(
'--write-description',
action='store_true', dest='writedescription', default=False,
help='Write video description to a .description file')
filesystem.add_option(
+ '--no-write-description',
+ action='store_false', dest='writedescription',
+ help='Do not write video description (default)')
+ filesystem.add_option(
'--write-info-json',
action='store_true', dest='writeinfojson', default=False,
- help='Write video metadata to a .info.json file')
+ help='Write video metadata to a .info.json file (this may contain personal information)')
+ filesystem.add_option(
+ '--no-write-info-json',
+ action='store_false', dest='writeinfojson',
+ help='Do not write video metadata (default)')
filesystem.add_option(
'--write-annotations',
action='store_true', dest='writeannotations', default=False,
- help='Write video annotations to a .annotations.xml file')
+ help=optparse.SUPPRESS_HELP)
+ filesystem.add_option(
+ '--no-write-annotations',
+ action='store_false', dest='writeannotations',
+ help=optparse.SUPPRESS_HELP)
+ filesystem.add_option(
+ '--write-playlist-metafiles',
+ action='store_true', dest='allow_playlist_files', default=None,
+ help=(
+ 'Write playlist metadata in addition to the video metadata '
+ 'when using --write-info-json, --write-description etc. (default)'))
+ filesystem.add_option(
+ '--no-write-playlist-metafiles',
+ action='store_false', dest='allow_playlist_files',
+ help='Do not write playlist metadata when using --write-info-json, --write-description etc.')
+ filesystem.add_option(
+ '--clean-infojson',
+ action='store_true', dest='clean_infojson', default=None,
+ help=(
+ 'Remove some private fields such as filenames from the infojson. '
+ 'Note that it could still contain some personal information (default)'))
+ filesystem.add_option(
+ '--no-clean-infojson',
+ action='store_false', dest='clean_infojson',
+ help='Write all fields to the infojson')
+ filesystem.add_option(
+ '--write-comments', '--get-comments',
+ action='store_true', dest='getcomments', default=False,
+ help=(
+ 'Retrieve video comments to be placed in the infojson. '
+ 'The comments are fetched even without this option if the extraction is known to be quick (Alias: --get-comments)'))
+ filesystem.add_option(
+ '--no-write-comments', '--no-get-comments',
+ action='store_false', dest='getcomments',
+ help='Do not retrieve video comments unless the extraction is known to be quick (Alias: --no-get-comments)')
filesystem.add_option(
'--load-info-json', '--load-info',
dest='load_info_filename', metavar='FILE',
@@ -754,10 +1123,28 @@ def parseOpts(overrideArguments=None):
dest='cookiefile', metavar='FILE',
help='File to read cookies from and dump cookie jar in')
filesystem.add_option(
+ '--no-cookies',
+ action='store_const', const=None, dest='cookiefile', metavar='FILE',
+ help='Do not read/dump cookies from/to file (default)')
+ filesystem.add_option(
+ '--cookies-from-browser',
+ dest='cookiesfrombrowser', metavar='BROWSER[:PROFILE]',
+ help=(
+ 'Load cookies from a user profile of the given web browser. '
+ 'Currently supported browsers are: {}. '
+ 'You can specify the user profile name or directory using '
+ '"BROWSER:PROFILE_NAME" or "BROWSER:PROFILE_PATH". '
+ 'If no profile is given, the most recently accessed one is used'.format(
+ ', '.join(sorted(SUPPORTED_BROWSERS)))))
+ filesystem.add_option(
+ '--no-cookies-from-browser',
+ action='store_const', const=None, dest='cookiesfrombrowser',
+ help='Do not load cookies from browser (default)')
+ filesystem.add_option(
'--cache-dir', dest='cachedir', default=None, metavar='DIR',
- help='Location in the filesystem where hypervideo can store some downloaded information permanently. By default $XDG_CACHE_HOME/hypervideo or ~/.cache/hypervideo . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.')
+ help='Location in the filesystem where youtube-dl can store some downloaded information (such as client ids and signatures) permanently. By default $XDG_CACHE_HOME/hypervideo or ~/.cache/hypervideo')
filesystem.add_option(
- '--no-cache-dir', action='store_const', const=False, dest='cachedir',
+ '--no-cache-dir', action='store_false', dest='cachedir',
help='Disable filesystem caching')
filesystem.add_option(
'--rm-cache-dir',
@@ -770,94 +1157,380 @@ def parseOpts(overrideArguments=None):
action='store_true', dest='writethumbnail', default=False,
help='Write thumbnail image to disk')
thumbnail.add_option(
+ '--no-write-thumbnail',
+ action='store_false', dest='writethumbnail',
+ help='Do not write thumbnail image to disk (default)')
+ thumbnail.add_option(
'--write-all-thumbnails',
action='store_true', dest='write_all_thumbnails', default=False,
help='Write all thumbnail image formats to disk')
thumbnail.add_option(
'--list-thumbnails',
action='store_true', dest='list_thumbnails', default=False,
- help='Simulate and list all available thumbnail formats')
+ help='List available thumbnails of each video. Simulate unless --no-simulate is used')
- postproc = optparse.OptionGroup(parser, 'Post-processing Options')
+ link = optparse.OptionGroup(parser, 'Internet Shortcut Options')
+ link.add_option(
+ '--write-link',
+ action='store_true', dest='writelink', default=False,
+ help='Write an internet shortcut file, depending on the current platform (.url, .webloc or .desktop). The URL may be cached by the OS')
+ link.add_option(
+ '--write-url-link',
+ action='store_true', dest='writeurllink', default=False,
+ help='Write a .url Windows internet shortcut. The OS caches the URL based on the file path')
+ link.add_option(
+ '--write-webloc-link',
+ action='store_true', dest='writewebloclink', default=False,
+ help='Write a .webloc macOS internet shortcut')
+ link.add_option(
+ '--write-desktop-link',
+ action='store_true', dest='writedesktoplink', default=False,
+ help='Write a .desktop Linux internet shortcut')
+
+ postproc = optparse.OptionGroup(parser, 'Post-Processing Options')
postproc.add_option(
'-x', '--extract-audio',
action='store_true', dest='extractaudio', default=False,
- help='Convert video files to audio-only files (requires ffmpeg/avconv and ffprobe/avprobe)')
+ help='Convert video files to audio-only files (requires ffmpeg and ffprobe)')
postproc.add_option(
'--audio-format', metavar='FORMAT', dest='audioformat', default='best',
- help='Specify audio format: "best", "aac", "flac", "mp3", "m4a", "opus", "vorbis", or "wav"; "%default" by default; No effect without -x')
+ help=(
+ 'Specify audio format to convert the audio to when -x is used. Currently supported formats are: '
+ 'best (default) or one of %s' % '|'.join(FFmpegExtractAudioPP.SUPPORTED_EXTS)))
postproc.add_option(
'--audio-quality', metavar='QUALITY',
dest='audioquality', default='5',
- help='Specify ffmpeg/avconv audio quality, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default %default)')
+ help='Specify ffmpeg audio quality, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default %default)')
+ postproc.add_option(
+ '--remux-video',
+ metavar='FORMAT', dest='remuxvideo', default=None,
+ help=(
+ 'Remux the video into another container if necessary (currently supported: %s). '
+ 'If target container does not support the video/audio codec, remuxing will fail. '
+ 'You can specify multiple rules; Eg. "aac>m4a/mov>mp4/mkv" will remux aac to m4a, mov to mp4 '
+ 'and anything else to mkv.' % '|'.join(FFmpegVideoRemuxerPP.SUPPORTED_EXTS)))
postproc.add_option(
'--recode-video',
metavar='FORMAT', dest='recodevideo', default=None,
- help='Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv|avi)')
+ help=(
+ 'Re-encode the video into another format if re-encoding is necessary. '
+ 'The syntax and supported formats are the same as --remux-video'))
postproc.add_option(
- '--postprocessor-args',
- dest='postprocessor_args', metavar='ARGS',
- help='Give these arguments to the postprocessor')
+ '--postprocessor-args', '--ppa',
+ metavar='NAME:ARGS', dest='postprocessor_args', default={}, type='str',
+ action='callback', callback=_dict_from_options_callback,
+ callback_kwargs={
+ 'allowed_keys': r'\w+(?:\+\w+)?', 'default_key': 'default-compat',
+ 'process': compat_shlex_split,
+ 'multiple_keys': False
+ }, help=(
+ 'Give these arguments to the postprocessors. '
+ 'Specify the postprocessor/executable name and the arguments separated by a colon ":" '
+ 'to give the argument to the specified postprocessor/executable. Supported PP are: '
+ 'Merger, ModifyChapters, SplitChapters, ExtractAudio, VideoRemuxer, VideoConvertor, '
+ 'Metadata, EmbedSubtitle, EmbedThumbnail, SubtitlesConvertor, ThumbnailsConvertor, '
+ 'FixupStretched, FixupM4a, FixupM3u8, FixupTimestamp and FixupDuration. '
+ 'The supported executables are: AtomicParsley, FFmpeg and FFprobe. '
+ 'You can also specify "PP+EXE:ARGS" to give the arguments to the specified executable '
+ 'only when being used by the specified postprocessor. Additionally, for ffmpeg/ffprobe, '
+ '"_i"/"_o" can be appended to the prefix optionally followed by a number to pass the argument '
+ 'before the specified input/output file. Eg: --ppa "Merger+ffmpeg_i1:-v quiet". '
+ 'You can use this option multiple times to give different arguments to different '
+ 'postprocessors. (Alias: --ppa)'))
postproc.add_option(
'-k', '--keep-video',
action='store_true', dest='keepvideo', default=False,
- help='Keep the video file on disk after the post-processing; the video is erased by default')
+ help='Keep the intermediate video file on disk after post-processing')
+ postproc.add_option(
+ '--no-keep-video',
+ action='store_false', dest='keepvideo',
+ help='Delete the intermediate video file after post-processing (default)')
+ postproc.add_option(
+ '--post-overwrites',
+ action='store_false', dest='nopostoverwrites',
+ help='Overwrite post-processed files (default)')
postproc.add_option(
'--no-post-overwrites',
action='store_true', dest='nopostoverwrites', default=False,
- help='Do not overwrite post-processed files; the post-processed files are overwritten by default')
+ help='Do not overwrite post-processed files')
postproc.add_option(
'--embed-subs',
action='store_true', dest='embedsubtitles', default=False,
help='Embed subtitles in the video (only for mp4, webm and mkv videos)')
postproc.add_option(
+ '--no-embed-subs',
+ action='store_false', dest='embedsubtitles',
+ help='Do not embed subtitles (default)')
+ postproc.add_option(
'--embed-thumbnail',
action='store_true', dest='embedthumbnail', default=False,
- help='Embed thumbnail in the audio as cover art')
+ help='Embed thumbnail in the video as cover art')
+ postproc.add_option(
+ '--no-embed-thumbnail',
+ action='store_false', dest='embedthumbnail',
+ help='Do not embed thumbnail (default)')
postproc.add_option(
- '--add-metadata',
+ '--embed-metadata', '--add-metadata',
action='store_true', dest='addmetadata', default=False,
- help='Write metadata to the video file')
+ help='Embed metadata to the video file. Also adds chapters to file unless --no-add-chapters is used (Alias: --add-metadata)')
+ postproc.add_option(
+ '--no-embed-metadata', '--no-add-metadata',
+ action='store_false', dest='addmetadata',
+ help='Do not add metadata to file (default) (Alias: --no-add-metadata)')
+ postproc.add_option(
+ '--embed-chapters', '--add-chapters',
+ action='store_true', dest='addchapters', default=None,
+ help='Add chapter markers to the video file (Alias: --add-chapters)')
+ postproc.add_option(
+ '--no-embed-chapters', '--no-add-chapters',
+ action='store_false', dest='addchapters',
+ help='Do not add chapter markers (default) (Alias: --no-add-chapters)')
postproc.add_option(
'--metadata-from-title',
metavar='FORMAT', dest='metafromtitle',
- help='Parse additional metadata like song title / artist from the video title. '
- 'The format syntax is the same as --output. Regular expression with '
- 'named capture groups may also be used. '
- 'The parsed parameters replace existing values. '
- 'Example: --metadata-from-title "%(artist)s - %(title)s" matches a title like '
- '"Coldplay - Paradise". '
- 'Example (regex): --metadata-from-title "(?P<artist>.+?) - (?P<title>.+)"')
+ help=optparse.SUPPRESS_HELP)
+ postproc.add_option(
+ '--parse-metadata',
+ metavar='FROM:TO', dest='parse_metadata', action='append',
+ help=(
+ 'Parse additional metadata like title/artist from other fields; '
+ 'see "MODIFYING METADATA" for details'))
+ postproc.add_option(
+ '--replace-in-metadata',
+ dest='parse_metadata', metavar='FIELDS REGEX REPLACE', action='append', nargs=3,
+ help='Replace text in a metadata field using the given regex. This option can be used multiple times')
postproc.add_option(
'--xattrs',
action='store_true', dest='xattrs', default=False,
help='Write metadata to the video file\'s xattrs (using dublin core and xdg standards)')
postproc.add_option(
'--fixup',
- metavar='POLICY', dest='fixup', default='detect_or_warn',
- help='Automatically correct known faults of the file. '
- 'One of never (do nothing), warn (only emit a warning), '
- 'detect_or_warn (the default; fix file if we can, warn otherwise)')
+ metavar='POLICY', dest='fixup', default=None,
+ choices=('never', 'ignore', 'warn', 'detect_or_warn', 'force'),
+ help=(
+ 'Automatically correct known faults of the file. '
+ 'One of never (do nothing), warn (only emit a warning), '
+ 'detect_or_warn (the default; fix file if we can, warn otherwise), '
+ 'force (try fixing even if file already exists'))
postproc.add_option(
- '--prefer-avconv',
+ '--prefer-avconv', '--no-prefer-ffmpeg',
action='store_false', dest='prefer_ffmpeg',
- help='Prefer avconv over ffmpeg for running the postprocessors')
+ help=optparse.SUPPRESS_HELP)
postproc.add_option(
- '--prefer-ffmpeg',
- action='store_true', dest='prefer_ffmpeg',
- help='Prefer ffmpeg over avconv for running the postprocessors (default)')
+ '--prefer-ffmpeg', '--no-prefer-avconv',
+ action='store_true', dest='prefer_ffmpeg', default=True,
+ help=optparse.SUPPRESS_HELP)
postproc.add_option(
'--ffmpeg-location', '--avconv-location', metavar='PATH',
dest='ffmpeg_location',
- help='Location of the ffmpeg/avconv binary; either the path to the binary or its containing directory.')
+ help='Location of the ffmpeg binary; either the path to the binary or its containing directory')
+ postproc.add_option(
+ '--exec', metavar='CMD',
+ action='append', dest='exec_cmd',
+ help=(
+ 'Execute a command on the file after downloading and post-processing. '
+ 'Same syntax as the output template can be used to pass any field as arguments to the command. '
+ 'An additional field "filepath" that contains the final path of the downloaded file is also available. '
+ 'If no fields are passed, %(filepath)q is appended to the end of the command. '
+ 'This option can be used multiple times'))
+ postproc.add_option(
+ '--no-exec',
+ action='store_const', dest='exec_cmd', const=[],
+ help='Remove any previously defined --exec')
+ postproc.add_option(
+ '--exec-before-download', metavar='CMD',
+ action='append', dest='exec_before_dl_cmd',
+ help=(
+ 'Execute a command before the actual download. '
+ 'The syntax is the same as --exec but "filepath" is not available. '
+ 'This option can be used multiple times'))
postproc.add_option(
- '--exec',
- metavar='CMD', dest='exec_cmd',
- help='Execute a command on the file after downloading and post-processing, similar to find\'s -exec syntax. Example: --exec \'adb push {} /sdcard/Music/ && rm {}\'')
+ '--no-exec-before-download',
+ action='store_const', dest='exec_before_dl_cmd', const=[],
+ help='Remove any previously defined --exec-before-download')
postproc.add_option(
- '--convert-subs', '--convert-subtitles',
+ '--convert-subs', '--convert-sub', '--convert-subtitles',
metavar='FORMAT', dest='convertsubtitles', default=None,
- help='Convert the subtitles to other format (currently supported: srt|ass|vtt|lrc)')
+ help=(
+ 'Convert the subtitles to another format (currently supported: %s) '
+ '(Alias: --convert-subtitles)' % '|'.join(FFmpegSubtitlesConvertorPP.SUPPORTED_EXTS)))
+ postproc.add_option(
+ '--convert-thumbnails',
+ metavar='FORMAT', dest='convertthumbnails', default=None,
+ help=(
+ 'Convert the thumbnails to another format '
+ '(currently supported: %s) ' % '|'.join(FFmpegThumbnailsConvertorPP.SUPPORTED_EXTS)))
+ postproc.add_option(
+ '--split-chapters', '--split-tracks',
+ dest='split_chapters', action='store_true', default=False,
+ help=(
+ 'Split video into multiple files based on internal chapters. '
+ 'The "chapter:" prefix can be used with "--paths" and "--output" to '
+ 'set the output filename for the split files. See "OUTPUT TEMPLATE" for details'))
+ postproc.add_option(
+ '--no-split-chapters', '--no-split-tracks',
+ dest='split_chapters', action='store_false',
+ help='Do not split video based on chapters (default)')
+ postproc.add_option(
+ '--remove-chapters',
+ metavar='REGEX', dest='remove_chapters', action='append',
+ help=(
+ 'Remove chapters whose title matches the given regular expression. '
+ 'Time ranges prefixed by a "*" can also be used in place of chapters to remove the specified range. '
+ 'Eg: --remove-chapters "*10:15-15:00" --remove-chapters "intro". '
+ 'This option can be used multiple times'))
+ postproc.add_option(
+ '--no-remove-chapters', dest='remove_chapters', action='store_const', const=None,
+ help='Do not remove any chapters from the file (default)')
+ postproc.add_option(
+ '--force-keyframes-at-cuts',
+ action='store_true', dest='force_keyframes_at_cuts', default=False,
+ help=(
+ 'Force keyframes around the chapters before removing/splitting them. '
+ 'Requires a reencode and thus is very slow, but the resulting video '
+ 'may have fewer artifacts around the cuts'))
+ postproc.add_option(
+ '--no-force-keyframes-at-cuts',
+ action='store_false', dest='force_keyframes_at_cuts',
+ help='Do not force keyframes around the chapters when cutting/splitting (default)')
+ _postprocessor_opts_parser = lambda key, val='': (
+ *(item.split('=', 1) for item in (val.split(';') if val else [])),
+ ('key', remove_end(key, 'PP')))
+ postproc.add_option(
+ '--use-postprocessor',
+ metavar='NAME[:ARGS]', dest='add_postprocessors', default=[], type='str',
+ action='callback', callback=_list_from_options_callback,
+ callback_kwargs={
+ 'delim': None,
+ 'process': lambda val: dict(_postprocessor_opts_parser(*val.split(':', 1)))
+ }, help=(
+ 'The (case sensitive) name of plugin postprocessors to be enabled, '
+ 'and (optionally) arguments to be passed to it, seperated by a colon ":". '
+ 'ARGS are a semicolon ";" delimited list of NAME=VALUE. '
+ 'The "when" argument determines when the postprocessor is invoked. '
+ 'It can be one of "pre_process" (after extraction), '
+ '"before_dl" (before video download), "post_process" (after video download; default) '
+ 'or "after_move" (after moving file to their final locations). '
+ 'This option can be used multiple times to add different postprocessors'))
+
+ sponsorblock = optparse.OptionGroup(parser, 'SponsorBlock Options', description=(
+ 'Make chapter entries for, or remove various segments (sponsor, introductions, etc.) '
+ 'from downloaded YouTube videos using the SponsorBlock API (https://sponsor.ajay.app)'))
+ sponsorblock.add_option(
+ '--sponsorblock-mark', metavar='CATS',
+ dest='sponsorblock_mark', default=set(), action='callback', type='str',
+ callback=_set_from_options_callback, callback_kwargs={'allowed_values': SponsorBlockPP.CATEGORIES.keys()},
+ help=(
+ 'SponsorBlock categories to create chapters for, separated by commas. '
+ 'Available categories are all, %s. You can prefix the category with a "-" to exempt it. '
+ 'See https://wiki.sponsor.ajay.app/index.php/Segment_Categories for description of the categories. '
+ 'Eg: --sponsorblock-mark all,-preview' % ', '.join(SponsorBlockPP.CATEGORIES.keys())))
+ sponsorblock.add_option(
+ '--sponsorblock-remove', metavar='CATS',
+ dest='sponsorblock_remove', default=set(), action='callback', type='str',
+ callback=_set_from_options_callback, callback_kwargs={'allowed_values': SponsorBlockPP.CATEGORIES.keys()},
+ help=(
+ 'SponsorBlock categories to be removed from the video file, separated by commas. '
+ 'If a category is present in both mark and remove, remove takes precedence. '
+ 'The syntax and available categories are the same as for --sponsorblock-mark'))
+ sponsorblock.add_option(
+ '--sponsorblock-chapter-title', metavar='TEMPLATE',
+ default=DEFAULT_SPONSORBLOCK_CHAPTER_TITLE, dest='sponsorblock_chapter_title',
+ help=(
+ 'The title template for SponsorBlock chapters created by --sponsorblock-mark. '
+ 'The same syntax as the output template is used, but the only available fields are '
+ 'start_time, end_time, category, categories, name, category_names. Defaults to "%default"'))
+ sponsorblock.add_option(
+ '--no-sponsorblock', default=False,
+ action='store_true', dest='no_sponsorblock',
+ help='Disable both --sponsorblock-mark and --sponsorblock-remove')
+ sponsorblock.add_option(
+ '--sponsorblock-api', metavar='URL',
+ default='https://sponsor.ajay.app', dest='sponsorblock_api',
+ help='SponsorBlock API location, defaults to %default')
+
+ sponsorblock.add_option(
+ '--sponskrub',
+ action='store_true', dest='sponskrub', default=False,
+ help=optparse.SUPPRESS_HELP)
+ sponsorblock.add_option(
+ '--no-sponskrub',
+ action='store_false', dest='sponskrub',
+ help=optparse.SUPPRESS_HELP)
+ sponsorblock.add_option(
+ '--sponskrub-cut', default=False,
+ action='store_true', dest='sponskrub_cut',
+ help=optparse.SUPPRESS_HELP)
+ sponsorblock.add_option(
+ '--no-sponskrub-cut',
+ action='store_false', dest='sponskrub_cut',
+ help=optparse.SUPPRESS_HELP)
+ sponsorblock.add_option(
+ '--sponskrub-force', default=False,
+ action='store_true', dest='sponskrub_force',
+ help=optparse.SUPPRESS_HELP)
+ sponsorblock.add_option(
+ '--no-sponskrub-force',
+ action='store_true', dest='sponskrub_force',
+ help=optparse.SUPPRESS_HELP)
+ sponsorblock.add_option(
+ '--sponskrub-location', metavar='PATH',
+ dest='sponskrub_path', default='',
+ help=optparse.SUPPRESS_HELP)
+ sponsorblock.add_option(
+ '--sponskrub-args', dest='sponskrub_args', metavar='ARGS',
+ help=optparse.SUPPRESS_HELP)
+
+ extractor = optparse.OptionGroup(parser, 'Extractor Options')
+ extractor.add_option(
+ '--extractor-retries',
+ dest='extractor_retries', metavar='RETRIES', default=3,
+ help='Number of retries for known extractor errors (default is %default), or "infinite"')
+ extractor.add_option(
+ '--allow-dynamic-mpd', '--no-ignore-dynamic-mpd',
+ action='store_true', dest='dynamic_mpd', default=True,
+ help='Process dynamic DASH manifests (default) (Alias: --no-ignore-dynamic-mpd)')
+ extractor.add_option(
+ '--ignore-dynamic-mpd', '--no-allow-dynamic-mpd',
+ action='store_false', dest='dynamic_mpd',
+ help='Do not process dynamic DASH manifests (Alias: --no-allow-dynamic-mpd)')
+ extractor.add_option(
+ '--hls-split-discontinuity',
+ dest='hls_split_discontinuity', action='store_true', default=False,
+ help='Split HLS playlists to different formats at discontinuities such as ad breaks'
+ )
+ extractor.add_option(
+ '--no-hls-split-discontinuity',
+ dest='hls_split_discontinuity', action='store_false',
+ help='Do not split HLS playlists to different formats at discontinuities such as ad breaks (default)')
+ _extractor_arg_parser = lambda key, vals='': (key.strip().lower().replace('-', '_'), [val.strip() for val in vals.split(',')])
+ extractor.add_option(
+ '--extractor-args',
+ metavar='KEY:ARGS', dest='extractor_args', default={}, type='str',
+ action='callback', callback=_dict_from_options_callback,
+ callback_kwargs={
+ 'multiple_keys': False,
+ 'process': lambda val: dict(
+ _extractor_arg_parser(*arg.split('=', 1)) for arg in val.split(';'))
+ }, help=(
+ 'Pass these arguments to the extractor. See "EXTRACTOR ARGUMENTS" for details. '
+ 'You can use this option multiple times to give arguments for different extractors'))
+ extractor.add_option(
+ '--youtube-include-dash-manifest', '--no-youtube-skip-dash-manifest',
+ action='store_true', dest='youtube_include_dash_manifest', default=True,
+ help=optparse.SUPPRESS_HELP)
+ extractor.add_option(
+ '--youtube-skip-dash-manifest', '--no-youtube-include-dash-manifest',
+ action='store_false', dest='youtube_include_dash_manifest',
+ help=optparse.SUPPRESS_HELP)
+ extractor.add_option(
+ '--youtube-include-hls-manifest', '--no-youtube-skip-hls-manifest',
+ action='store_true', dest='youtube_include_hls_manifest', default=True,
+ help=optparse.SUPPRESS_HELP)
+ extractor.add_option(
+ '--youtube-skip-hls-manifest', '--no-youtube-include-hls-manifest',
+ action='store_false', dest='youtube_include_hls_manifest',
+ help=optparse.SUPPRESS_HELP)
parser.add_option_group(general)
parser.add_option_group(network)
@@ -866,13 +1539,15 @@ def parseOpts(overrideArguments=None):
parser.add_option_group(downloader)
parser.add_option_group(filesystem)
parser.add_option_group(thumbnail)
+ parser.add_option_group(link)
parser.add_option_group(verbosity)
parser.add_option_group(workarounds)
parser.add_option_group(video_format)
parser.add_option_group(subtitles)
parser.add_option_group(authentication)
- parser.add_option_group(adobe_pass)
parser.add_option_group(postproc)
+ parser.add_option_group(sponsorblock)
+ parser.add_option_group(extractor)
if overrideArguments is not None:
opts, args = parser.parse_args(overrideArguments)
@@ -884,33 +1559,62 @@ def parseOpts(overrideArguments=None):
return [a.decode(preferredencoding(), 'replace') for a in conf]
return conf
- command_line_conf = compat_conf(sys.argv[1:])
- opts, args = parser.parse_args(command_line_conf)
+ configs = {
+ 'command-line': compat_conf(sys.argv[1:]),
+ 'custom': [], 'home': [], 'portable': [], 'user': [], 'system': []}
+ paths = {'command-line': False}
- system_conf = user_conf = custom_conf = []
+ def read_options(name, path, user=False):
+ ''' loads config files and returns ignoreconfig '''
+ # Multiple package names can be given here
+ # Eg: ('hypervideo', 'youtube-dlc', 'youtube-dl') will look for
+ # the configuration file of any of these three packages
+ for package in ('hypervideo',):
+ if user:
+ config, current_path = _readUserConf(package, default=None)
+ else:
+ current_path = os.path.join(path, '%s.conf' % package)
+ config = _readOptions(current_path, default=None)
+ if config is not None:
+ configs[name], paths[name] = config, current_path
+ return parser.parse_args(config)[0].ignoreconfig
+ return False
- if '--config-location' in command_line_conf:
- location = compat_expanduser(opts.config_location)
- if os.path.isdir(location):
- location = os.path.join(location, 'hypervideo.conf')
- if not os.path.exists(location):
- parser.error('config-location %s does not exist.' % location)
- custom_conf = _readOptions(location)
- elif '--ignore-config' in command_line_conf:
- pass
- else:
- system_conf = _readOptions('/etc/hypervideo.conf')
- if '--ignore-config' not in system_conf:
- user_conf = _readUserConf()
+ def get_configs():
+ opts, _ = parser.parse_args(configs['command-line'])
+ if opts.config_location is not None:
+ location = compat_expanduser(opts.config_location)
+ if os.path.isdir(location):
+ location = os.path.join(location, 'hypervideo.conf')
+ if not os.path.exists(location):
+ parser.error('config-location %s does not exist.' % location)
+ config = _readOptions(location, default=None)
+ if config:
+ configs['custom'], paths['custom'] = config, location
+
+ if opts.ignoreconfig:
+ return
+ if parser.parse_args(configs['custom'])[0].ignoreconfig:
+ return
+ if read_options('portable', get_executable_path()):
+ return
+ opts, _ = parser.parse_args(configs['portable'] + configs['custom'] + configs['command-line'])
+ if read_options('home', expand_path(opts.paths.get('home', '')).strip()):
+ return
+ if read_options('system', '/etc'):
+ return
+ if read_options('user', None, user=True):
+ configs['system'], paths['system'] = [], None
- argv = system_conf + user_conf + custom_conf + command_line_conf
+ get_configs()
+ argv = configs['system'] + configs['user'] + configs['home'] + configs['portable'] + configs['custom'] + configs['command-line']
opts, args = parser.parse_args(argv)
if opts.verbose:
- for conf_label, conf in (
- ('System config', system_conf),
- ('User config', user_conf),
- ('Custom config', custom_conf),
- ('Command-line args', command_line_conf)):
- write_string('[debug] %s: %s\n' % (conf_label, repr(_hide_login_info(conf))))
+ for label in ('Command-line', 'Custom', 'Portable', 'Home', 'User', 'System'):
+ key = label.lower()
+ if paths.get(key):
+ write_string(f'[debug] {label} config file: {paths[key]}\n')
+ if paths.get(key) is not None:
+ write_string(f'[debug] {label} config: {_hide_login_info(configs[key])!r}\n')
return parser, opts, args
diff --git a/hypervideo_dl/postprocessor/__init__.py b/hypervideo_dl/postprocessor/__init__.py
index 3ea5183..07c87b7 100644
--- a/hypervideo_dl/postprocessor/__init__.py
+++ b/hypervideo_dl/postprocessor/__init__.py
@@ -1,40 +1,43 @@
-from __future__ import unicode_literals
+# flake8: noqa: F401
+
+from ..utils import load_plugins
from .embedthumbnail import EmbedThumbnailPP
+from .exec import ExecPP, ExecAfterDownloadPP
from .ffmpeg import (
FFmpegPostProcessor,
FFmpegEmbedSubtitlePP,
FFmpegExtractAudioPP,
+ FFmpegFixupDurationPP,
FFmpegFixupStretchedPP,
+ FFmpegFixupTimestampPP,
FFmpegFixupM3u8PP,
FFmpegFixupM4aPP,
FFmpegMergerPP,
FFmpegMetadataPP,
- FFmpegVideoConvertorPP,
FFmpegSubtitlesConvertorPP,
+ FFmpegThumbnailsConvertorPP,
+ FFmpegSplitChaptersPP,
+ FFmpegVideoConvertorPP,
+ FFmpegVideoRemuxerPP,
)
+from .metadataparser import (
+ MetadataFromFieldPP,
+ MetadataFromTitlePP,
+ MetadataParserPP,
+)
+from .modify_chapters import ModifyChaptersPP
+from .movefilesafterdownload import MoveFilesAfterDownloadPP
+from .sponskrub import SponSkrubPP
+from .sponsorblock import SponsorBlockPP
from .xattrpp import XAttrMetadataPP
-from .execafterdownload import ExecAfterDownloadPP
-from .metadatafromtitle import MetadataFromTitlePP
+
+_PLUGIN_CLASSES = load_plugins('postprocessor', 'PP', globals())
def get_postprocessor(key):
return globals()[key + 'PP']
-__all__ = [
- 'EmbedThumbnailPP',
- 'ExecAfterDownloadPP',
- 'FFmpegEmbedSubtitlePP',
- 'FFmpegExtractAudioPP',
- 'FFmpegFixupM3u8PP',
- 'FFmpegFixupM4aPP',
- 'FFmpegFixupStretchedPP',
- 'FFmpegMergerPP',
- 'FFmpegMetadataPP',
- 'FFmpegPostProcessor',
- 'FFmpegSubtitlesConvertorPP',
- 'FFmpegVideoConvertorPP',
- 'MetadataFromTitlePP',
- 'XAttrMetadataPP',
-]
+__all__ = [name for name in globals().keys() if name.endswith('IE')]
+__all__.append('FFmpegPostProcessor')
diff --git a/hypervideo_dl/postprocessor/common.py b/hypervideo_dl/postprocessor/common.py
index 599dd1d..b491afb 100644
--- a/hypervideo_dl/postprocessor/common.py
+++ b/hypervideo_dl/postprocessor/common.py
@@ -1,15 +1,38 @@
from __future__ import unicode_literals
+import copy
+import functools
import os
+from ..compat import compat_str
from ..utils import (
- PostProcessingError,
- cli_configuration_args,
+ _configuration_args,
encodeFilename,
+ PostProcessingError,
)
-class PostProcessor(object):
+class PostProcessorMetaClass(type):
+ @staticmethod
+ def run_wrapper(func):
+ @functools.wraps(func)
+ def run(self, info, *args, **kwargs):
+ info_copy = copy.deepcopy(self._copy_infodict(info))
+ self._hook_progress({'status': 'started'}, info_copy)
+ ret = func(self, info, *args, **kwargs)
+ if ret is not None:
+ _, info = ret
+ self._hook_progress({'status': 'finished'}, info_copy)
+ return ret
+ return run
+
+ def __new__(cls, name, bases, attrs):
+ if 'run' in attrs:
+ attrs['run'] = cls.run_wrapper(attrs['run'])
+ return type.__new__(cls, name, bases, attrs)
+
+
+class PostProcessor(metaclass=PostProcessorMetaClass):
"""Post Processor class.
PostProcessor objects can be added to downloaders with their
@@ -32,11 +55,66 @@ class PostProcessor(object):
_downloader = None
def __init__(self, downloader=None):
- self._downloader = downloader
+ self._progress_hooks = []
+ self.add_progress_hook(self.report_progress)
+ self.set_downloader(downloader)
+ self.PP_NAME = self.pp_key()
+
+ @classmethod
+ def pp_key(cls):
+ name = cls.__name__[:-2]
+ return compat_str(name[6:]) if name[:6].lower() == 'ffmpeg' else name
+
+ def to_screen(self, text, prefix=True, *args, **kwargs):
+ tag = '[%s] ' % self.PP_NAME if prefix else ''
+ if self._downloader:
+ return self._downloader.to_screen('%s%s' % (tag, text), *args, **kwargs)
+
+ def report_warning(self, text, *args, **kwargs):
+ if self._downloader:
+ return self._downloader.report_warning(text, *args, **kwargs)
+
+ def report_error(self, text, *args, **kwargs):
+ # Exists only for compatibility. Do not use
+ if self._downloader:
+ return self._downloader.report_error(text, *args, **kwargs)
+
+ def write_debug(self, text, *args, **kwargs):
+ if self._downloader:
+ return self._downloader.write_debug(text, *args, **kwargs)
+
+ def get_param(self, name, default=None, *args, **kwargs):
+ if self._downloader:
+ return self._downloader.params.get(name, default, *args, **kwargs)
+ return default
def set_downloader(self, downloader):
"""Sets the downloader for this PP."""
self._downloader = downloader
+ for ph in getattr(downloader, '_postprocessor_hooks', []):
+ self.add_progress_hook(ph)
+
+ def _copy_infodict(self, info_dict):
+ return getattr(self._downloader, '_copy_infodict', dict)(info_dict)
+
+ @staticmethod
+ def _restrict_to(*, video=True, audio=True, images=True):
+ allowed = {'video': video, 'audio': audio, 'images': images}
+
+ def decorator(func):
+ @functools.wraps(func)
+ def wrapper(self, info):
+ format_type = (
+ 'video' if info.get('vcodec') != 'none'
+ else 'audio' if info.get('acodec') != 'none'
+ else 'images')
+ if allowed[format_type]:
+ return func(self, info)
+ else:
+ self.to_screen('Skipping %s' % format_type)
+ return [], info
+ return wrapper
+ return decorator
def run(self, information):
"""Run the PostProcessor.
@@ -59,10 +137,41 @@ class PostProcessor(object):
try:
os.utime(encodeFilename(path), (atime, mtime))
except Exception:
- self._downloader.report_warning(errnote)
-
- def _configuration_args(self, default=[]):
- return cli_configuration_args(self._downloader.params, 'postprocessor_args', default)
+ self.report_warning(errnote)
+
+ def _configuration_args(self, exe, *args, **kwargs):
+ return _configuration_args(
+ self.pp_key(), self.get_param('postprocessor_args'), exe, *args, **kwargs)
+
+ def _hook_progress(self, status, info_dict):
+ if not self._progress_hooks:
+ return
+ status.update({
+ 'info_dict': info_dict,
+ 'postprocessor': self.pp_key(),
+ })
+ for ph in self._progress_hooks:
+ ph(status)
+
+ def add_progress_hook(self, ph):
+ # See YoutubeDl.py (search for postprocessor_hooks) for a description of this interface
+ self._progress_hooks.append(ph)
+
+ def report_progress(self, s):
+ s['_default_template'] = '%(postprocessor)s %(status)s' % s
+
+ progress_dict = s.copy()
+ progress_dict.pop('info_dict')
+ progress_dict = {'info': s['info_dict'], 'progress': progress_dict}
+
+ progress_template = self.get_param('progress_template', {})
+ tmpl = progress_template.get('postprocess')
+ if tmpl:
+ self._downloader.to_stdout(self._downloader.evaluate_outtmpl(tmpl, progress_dict))
+
+ self._downloader.to_console_title(self._downloader.evaluate_outtmpl(
+ progress_template.get('postprocess-title') or 'hypervideo %(progress._default_template)s',
+ progress_dict))
class AudioConversionError(PostProcessingError):
diff --git a/hypervideo_dl/postprocessor/embedthumbnail.py b/hypervideo_dl/postprocessor/embedthumbnail.py
index 3990908..3139a63 100644
--- a/hypervideo_dl/postprocessor/embedthumbnail.py
+++ b/hypervideo_dl/postprocessor/embedthumbnail.py
@@ -1,20 +1,35 @@
# coding: utf-8
from __future__ import unicode_literals
-
+import base64
+import imghdr
import os
import subprocess
-
-from .ffmpeg import FFmpegPostProcessor
-
+import re
+
+try:
+ from mutagen.flac import Picture, FLAC
+ from mutagen.mp4 import MP4, MP4Cover
+ from mutagen.oggopus import OggOpus
+ from mutagen.oggvorbis import OggVorbis
+ has_mutagen = True
+except ImportError:
+ has_mutagen = False
+
+from .common import PostProcessor
+from .ffmpeg import (
+ FFmpegPostProcessor,
+ FFmpegThumbnailsConvertorPP,
+)
from ..utils import (
check_executable,
encodeArgument,
encodeFilename,
+ error_to_compat_str,
PostProcessingError,
prepend_extension,
- replace_extension,
- shell_quote
+ process_communicate_or_kill,
+ shell_quote,
)
@@ -23,108 +38,198 @@ class EmbedThumbnailPPError(PostProcessingError):
class EmbedThumbnailPP(FFmpegPostProcessor):
+
def __init__(self, downloader=None, already_have_thumbnail=False):
- super(EmbedThumbnailPP, self).__init__(downloader)
+ FFmpegPostProcessor.__init__(self, downloader)
self._already_have_thumbnail = already_have_thumbnail
+ def _get_thumbnail_resolution(self, filename, thumbnail_dict):
+ def guess():
+ width, height = thumbnail_dict.get('width'), thumbnail_dict.get('height')
+ if width and height:
+ return width, height
+
+ try:
+ size_regex = r',\s*(?P<w>\d+)x(?P<h>\d+)\s*[,\[]'
+ size_result = self.run_ffmpeg(filename, None, ['-hide_banner'], expected_retcodes=(1,))
+ mobj = re.search(size_regex, size_result)
+ if mobj is None:
+ return guess()
+ except PostProcessingError as err:
+ self.report_warning('unable to find the thumbnail resolution; %s' % error_to_compat_str(err))
+ return guess()
+ return int(mobj.group('w')), int(mobj.group('h'))
+
+ def _report_run(self, exe, filename):
+ self.to_screen('%s: Adding thumbnail to "%s"' % (exe, filename))
+
+ @PostProcessor._restrict_to(images=False)
def run(self, info):
filename = info['filepath']
temp_filename = prepend_extension(filename, 'temp')
if not info.get('thumbnails'):
- self._downloader.to_screen('[embedthumbnail] There aren\'t any thumbnails to embed')
+ self.to_screen('There aren\'t any thumbnails to embed')
return [], info
- thumbnail_filename = info['thumbnails'][-1]['filename']
-
+ idx = next((-i for i, t in enumerate(info['thumbnails'][::-1], 1) if t.get('filepath')), None)
+ if idx is None:
+ self.to_screen('There are no thumbnails on disk')
+ return [], info
+ thumbnail_filename = info['thumbnails'][idx]['filepath']
if not os.path.exists(encodeFilename(thumbnail_filename)):
- self._downloader.report_warning(
- 'Skipping embedding the thumbnail because the file is missing.')
+ self.report_warning('Skipping embedding the thumbnail because the file is missing.')
return [], info
- def is_webp(path):
- with open(encodeFilename(path), 'rb') as f:
- b = f.read(12)
- return b[0:4] == b'RIFF' and b[8:] == b'WEBP'
-
# Correct extension for WebP file with wrong extension (see #25687, #25717)
- _, thumbnail_ext = os.path.splitext(thumbnail_filename)
- if thumbnail_ext:
- thumbnail_ext = thumbnail_ext[1:].lower()
- if thumbnail_ext != 'webp' and is_webp(thumbnail_filename):
- self._downloader.to_screen(
- '[ffmpeg] Correcting extension to webp and escaping path for thumbnail "%s"' % thumbnail_filename)
- thumbnail_webp_filename = replace_extension(thumbnail_filename, 'webp')
- os.rename(encodeFilename(thumbnail_filename), encodeFilename(thumbnail_webp_filename))
- thumbnail_filename = thumbnail_webp_filename
- thumbnail_ext = 'webp'
-
- # Convert unsupported thumbnail formats to JPEG (see #25687, #25717)
- if thumbnail_ext not in ['jpg', 'png']:
- # NB: % is supposed to be escaped with %% but this does not work
- # for input files so working around with standard substitution
- escaped_thumbnail_filename = thumbnail_filename.replace('%', '#')
- os.rename(encodeFilename(thumbnail_filename), encodeFilename(escaped_thumbnail_filename))
- escaped_thumbnail_jpg_filename = replace_extension(escaped_thumbnail_filename, 'jpg')
- self._downloader.to_screen('[ffmpeg] Converting thumbnail "%s" to JPEG' % escaped_thumbnail_filename)
- self.run_ffmpeg(escaped_thumbnail_filename, escaped_thumbnail_jpg_filename, ['-bsf:v', 'mjpeg2jpeg'])
- os.remove(encodeFilename(escaped_thumbnail_filename))
- thumbnail_jpg_filename = replace_extension(thumbnail_filename, 'jpg')
- # Rename back to unescaped for further processing
- os.rename(encodeFilename(escaped_thumbnail_jpg_filename), encodeFilename(thumbnail_jpg_filename))
- thumbnail_filename = thumbnail_jpg_filename
-
- if info['ext'] == 'mp3':
- options = [
- '-c', 'copy', '-map', '0', '-map', '1',
- '-metadata:s:v', 'title="Album cover"', '-metadata:s:v', 'comment="Cover (Front)"']
+ convertor = FFmpegThumbnailsConvertorPP(self._downloader)
+ convertor.fixup_webp(info, idx)
- self._downloader.to_screen('[ffmpeg] Adding thumbnail to "%s"' % filename)
+ original_thumbnail = thumbnail_filename = info['thumbnails'][idx]['filepath']
- self.run_ffmpeg_multiple_files([filename, thumbnail_filename], temp_filename, options)
-
- if not self._already_have_thumbnail:
- os.remove(encodeFilename(thumbnail_filename))
- os.remove(encodeFilename(filename))
- os.rename(encodeFilename(temp_filename), encodeFilename(filename))
-
- elif info['ext'] in ['m4a', 'mp4']:
- atomicparsley = next((x
- for x in ['AtomicParsley', 'atomicparsley']
- if check_executable(x, ['-v'])), None)
+ # Convert unsupported thumbnail formats to PNG (see #25687, #25717)
+ # Original behavior was to convert to JPG, but since JPG is a lossy
+ # format, there will be some additional data loss.
+ # PNG, on the other hand, is lossless.
+ thumbnail_ext = os.path.splitext(thumbnail_filename)[1][1:]
+ if thumbnail_ext not in ('jpg', 'jpeg', 'png'):
+ thumbnail_filename = convertor.convert_thumbnail(thumbnail_filename, 'png')
+ thumbnail_ext = 'png'
- if atomicparsley is None:
- raise EmbedThumbnailPPError('AtomicParsley was not found. Please install.')
+ mtime = os.stat(encodeFilename(filename)).st_mtime
- cmd = [encodeFilename(atomicparsley, True),
- encodeFilename(filename, True),
- encodeArgument('--artwork'),
- encodeFilename(thumbnail_filename, True),
- encodeArgument('-o'),
- encodeFilename(temp_filename, True)]
+ success = True
+ if info['ext'] == 'mp3':
+ options = [
+ '-c', 'copy', '-map', '0:0', '-map', '1:0', '-id3v2_version', '3',
+ '-metadata:s:v', 'title="Album cover"', '-metadata:s:v', 'comment="Cover (front)"']
- self._downloader.to_screen('[atomicparsley] Adding thumbnail to "%s"' % filename)
+ self._report_run('ffmpeg', filename)
+ self.run_ffmpeg_multiple_files([filename, thumbnail_filename], temp_filename, options)
- if self._downloader.params.get('verbose', False):
- self._downloader.to_screen('[debug] AtomicParsley command line: %s' % shell_quote(cmd))
+ elif info['ext'] in ['mkv', 'mka']:
+ options = ['-c', 'copy', '-map', '0', '-dn']
+
+ mimetype = 'image/%s' % ('png' if thumbnail_ext == 'png' else 'jpeg')
+ old_stream, new_stream = self.get_stream_number(
+ filename, ('tags', 'mimetype'), mimetype)
+ if old_stream is not None:
+ options.extend(['-map', '-0:%d' % old_stream])
+ new_stream -= 1
+ options.extend([
+ '-attach', thumbnail_filename,
+ '-metadata:s:%d' % new_stream, 'mimetype=%s' % mimetype,
+ '-metadata:s:%d' % new_stream, 'filename=cover.%s' % thumbnail_ext])
+
+ self._report_run('ffmpeg', filename)
+ self.run_ffmpeg(filename, temp_filename, options)
+
+ elif info['ext'] in ['m4a', 'mp4', 'mov']:
+ prefer_atomicparsley = 'embed-thumbnail-atomicparsley' in self.get_param('compat_opts', [])
+ # Method 1: Use mutagen
+ if not has_mutagen or prefer_atomicparsley:
+ success = False
+ else:
+ try:
+ self._report_run('mutagen', filename)
+ meta = MP4(filename)
+ # NOTE: the 'covr' atom is a non-standard MPEG-4 atom,
+ # Apple iTunes 'M4A' files include the 'moov.udta.meta.ilst' atom.
+ f = {'jpeg': MP4Cover.FORMAT_JPEG, 'png': MP4Cover.FORMAT_PNG}[imghdr.what(thumbnail_filename)]
+ with open(thumbnail_filename, 'rb') as thumbfile:
+ thumb_data = thumbfile.read()
+ meta.tags['covr'] = [MP4Cover(data=thumb_data, imageformat=f)]
+ meta.save()
+ temp_filename = filename
+ except Exception as err:
+ self.report_warning('unable to embed using mutagen; %s' % error_to_compat_str(err))
+ success = False
+
+ # Method 2: Use ffmpeg+ffprobe
+ if not success and not prefer_atomicparsley:
+ success = True
+ try:
+ options = ['-c', 'copy', '-map', '0', '-dn', '-map', '1']
+
+ old_stream, new_stream = self.get_stream_number(
+ filename, ('disposition', 'attached_pic'), 1)
+ if old_stream is not None:
+ options.extend(['-map', '-0:%d' % old_stream])
+ new_stream -= 1
+ options.extend(['-disposition:%s' % new_stream, 'attached_pic'])
+
+ self._report_run('ffmpeg', filename)
+ self.run_ffmpeg_multiple_files([filename, thumbnail_filename], temp_filename, options)
+ except PostProcessingError as err:
+ self.report_warning('unable to embed using ffprobe & ffmpeg; %s' % error_to_compat_str(err))
+ success = False
+
+ # Method 3: Use AtomicParsley
+ if not success:
+ success = True
+ atomicparsley = next((
+ x for x in ['AtomicParsley', 'atomicparsley']
+ if check_executable(x, ['-v'])), None)
+ if atomicparsley is None:
+ raise EmbedThumbnailPPError('AtomicParsley was not found. Please install')
+
+ cmd = [encodeFilename(atomicparsley, True),
+ encodeFilename(filename, True),
+ encodeArgument('--artwork'),
+ encodeFilename(thumbnail_filename, True),
+ encodeArgument('-o'),
+ encodeFilename(temp_filename, True)]
+ cmd += [encodeArgument(o) for o in self._configuration_args('AtomicParsley')]
+
+ self._report_run('atomicparsley', filename)
+ self.write_debug('AtomicParsley command line: %s' % shell_quote(cmd))
+ p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ stdout, stderr = process_communicate_or_kill(p)
+ if p.returncode != 0:
+ msg = stderr.decode('utf-8', 'replace').strip()
+ raise EmbedThumbnailPPError(msg)
+ # for formats that don't support thumbnails (like 3gp) AtomicParsley
+ # won't create to the temporary file
+ if b'No changes' in stdout:
+ self.report_warning('The file format doesn\'t support embedding a thumbnail')
+ success = False
+
+ elif info['ext'] in ['ogg', 'opus', 'flac']:
+ if not has_mutagen:
+ raise EmbedThumbnailPPError('module mutagen was not found. Please install using `python -m pip install mutagen`')
+
+ self._report_run('mutagen', filename)
+ f = {'opus': OggOpus, 'flac': FLAC, 'ogg': OggVorbis}[info['ext']](filename)
+
+ pic = Picture()
+ pic.mime = 'image/%s' % imghdr.what(thumbnail_filename)
+ with open(thumbnail_filename, 'rb') as thumbfile:
+ pic.data = thumbfile.read()
+ pic.type = 3 # front cover
+ res = self._get_thumbnail_resolution(thumbnail_filename, info['thumbnails'][idx])
+ if res is not None:
+ pic.width, pic.height = res
+
+ if info['ext'] == 'flac':
+ f.add_picture(pic)
+ else:
+ # https://wiki.xiph.org/VorbisComment#METADATA_BLOCK_PICTURE
+ f['METADATA_BLOCK_PICTURE'] = base64.b64encode(pic.write()).decode('ascii')
+ f.save()
+ temp_filename = filename
- p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
- stdout, stderr = p.communicate()
+ else:
+ raise EmbedThumbnailPPError('Supported filetypes for thumbnail embedding are: mp3, mkv/mka, ogg/opus/flac, m4a/mp4/mov')
- if p.returncode != 0:
- msg = stderr.decode('utf-8', 'replace').strip()
- raise EmbedThumbnailPPError(msg)
+ if success and temp_filename != filename:
+ os.replace(temp_filename, filename)
- if not self._already_have_thumbnail:
- os.remove(encodeFilename(thumbnail_filename))
- # for formats that don't support thumbnails (like 3gp) AtomicParsley
- # won't create to the temporary file
- if b'No changes' in stdout:
- self._downloader.report_warning('The file format doesn\'t support embedding a thumbnail')
- else:
- os.remove(encodeFilename(filename))
- os.rename(encodeFilename(temp_filename), encodeFilename(filename))
- else:
- raise EmbedThumbnailPPError('Only mp3 and m4a/mp4 are supported for thumbnail embedding for now.')
+ self.try_utime(filename, mtime, mtime)
- return [], info
+ files_to_delete = [thumbnail_filename]
+ if self._already_have_thumbnail:
+ if original_thumbnail == thumbnail_filename:
+ files_to_delete = []
+ elif original_thumbnail != thumbnail_filename:
+ files_to_delete.append(original_thumbnail)
+ return files_to_delete, info
diff --git a/hypervideo_dl/postprocessor/exec.py b/hypervideo_dl/postprocessor/exec.py
new file mode 100644
index 0000000..7a3cb49
--- /dev/null
+++ b/hypervideo_dl/postprocessor/exec.py
@@ -0,0 +1,42 @@
+from __future__ import unicode_literals
+
+import subprocess
+
+from .common import PostProcessor
+from ..compat import compat_shlex_quote
+from ..utils import (
+ encodeArgument,
+ PostProcessingError,
+ variadic,
+)
+
+
+class ExecPP(PostProcessor):
+
+ def __init__(self, downloader, exec_cmd):
+ PostProcessor.__init__(self, downloader)
+ self.exec_cmd = variadic(exec_cmd)
+
+ def parse_cmd(self, cmd, info):
+ tmpl, tmpl_dict = self._downloader.prepare_outtmpl(cmd, info)
+ if tmpl_dict: # if there are no replacements, tmpl_dict = {}
+ return self._downloader.escape_outtmpl(tmpl) % tmpl_dict
+
+ # If no replacements are found, replace {} for backard compatibility
+ if '{}' not in cmd:
+ cmd += ' {}'
+ return cmd.replace('{}', compat_shlex_quote(
+ info.get('filepath') or info['_filename']))
+
+ def run(self, info):
+ for tmpl in self.exec_cmd:
+ cmd = self.parse_cmd(tmpl, info)
+ self.to_screen('Executing command: %s' % cmd)
+ retCode = subprocess.call(encodeArgument(cmd), shell=True)
+ if retCode != 0:
+ raise PostProcessingError('Command returned error code %d' % retCode)
+ return [], info
+
+
+class ExecAfterDownloadPP(ExecPP): # for backward compatibility
+ pass
diff --git a/hypervideo_dl/postprocessor/ffmpeg.py b/hypervideo_dl/postprocessor/ffmpeg.py
index 3078329..a6d6d78 100644
--- a/hypervideo_dl/postprocessor/ffmpeg.py
+++ b/hypervideo_dl/postprocessor/ffmpeg.py
@@ -1,26 +1,32 @@
from __future__ import unicode_literals
import io
+import itertools
import os
import subprocess
import time
import re
-
+import json
from .common import AudioConversionError, PostProcessor
+from ..compat import compat_str
from ..utils import (
+ dfxp2srt,
encodeArgument,
encodeFilename,
+ float_or_none,
get_exe_version,
is_outdated_version,
+ ISO639Utils,
+ orderedSet,
PostProcessingError,
prepend_extension,
- shell_quote,
- subtitles_filename,
- dfxp2srt,
- ISO639Utils,
+ process_communicate_or_kill,
replace_extension,
+ shell_quote,
+ traverse_obj,
+ variadic,
)
@@ -58,15 +64,14 @@ class FFmpegPostProcessor(PostProcessor):
def check_version(self):
if not self.available:
- raise FFmpegPostProcessorError('ffmpeg or avconv not found. Please install one.')
+ raise FFmpegPostProcessorError('ffmpeg not found. Please install or provide the path using --ffmpeg-location')
required_version = '10-0' if self.basename == 'avconv' else '1.0'
if is_outdated_version(
self._versions[self.basename], required_version):
warning = 'Your copy of %s is outdated, update %s to version %s or newer if you encounter any errors.' % (
self.basename, self.basename, required_version)
- if self._downloader:
- self._downloader.report_warning(warning)
+ self.report_warning(warning)
@staticmethod
def get_versions(downloader=None):
@@ -96,30 +101,28 @@ class FFmpegPostProcessor(PostProcessor):
self._paths = None
self._versions = None
if self._downloader:
- prefer_ffmpeg = self._downloader.params.get('prefer_ffmpeg', True)
- location = self._downloader.params.get('ffmpeg_location')
+ prefer_ffmpeg = self.get_param('prefer_ffmpeg', True)
+ location = self.get_param('ffmpeg_location')
if location is not None:
if not os.path.exists(location):
- self._downloader.report_warning(
+ self.report_warning(
'ffmpeg-location %s does not exist! '
- 'Continuing without avconv/ffmpeg.' % (location))
+ 'Continuing without ffmpeg.' % (location))
self._versions = {}
return
- elif not os.path.isdir(location):
+ elif os.path.isdir(location):
+ dirname, basename = location, None
+ else:
basename = os.path.splitext(os.path.basename(location))[0]
- if basename not in programs:
- self._downloader.report_warning(
- 'Cannot identify executable %s, its basename should be one of %s. '
- 'Continuing without avconv/ffmpeg.' %
- (location, ', '.join(programs)))
- self._versions = {}
- return None
- location = os.path.dirname(os.path.abspath(location))
+ basename = next((p for p in programs if basename.startswith(p)), 'ffmpeg')
+ dirname = os.path.dirname(os.path.abspath(location))
if basename in ('ffmpeg', 'ffprobe'):
prefer_ffmpeg = True
self._paths = dict(
- (p, os.path.join(location, p)) for p in programs)
+ (p, os.path.join(dirname, p)) for p in programs)
+ if basename:
+ self._paths[basename] = location
self._versions = dict(
(p, get_ffmpeg_version(self._paths[p])) for p in programs)
if self._versions is None:
@@ -163,7 +166,7 @@ class FFmpegPostProcessor(PostProcessor):
def get_audio_codec(self, path):
if not self.probe_available and not self.available:
- raise PostProcessingError('ffprobe/avprobe and ffmpeg/avconv not found. Please install one.')
+ raise PostProcessingError('ffprobe and ffmpeg not found. Please install or provide the path using --ffmpeg-location')
try:
if self.probe_available:
cmd = [
@@ -174,13 +177,11 @@ class FFmpegPostProcessor(PostProcessor):
encodeFilename(self.executable, True),
encodeArgument('-i')]
cmd.append(encodeFilename(self._ffmpeg_filename_argument(path), True))
- if self._downloader.params.get('verbose', False):
- self._downloader.to_screen(
- '[debug] %s command line: %s' % (self.basename, shell_quote(cmd)))
+ self.write_debug('%s command line: %s' % (self.basename, shell_quote(cmd)))
handle = subprocess.Popen(
cmd, stderr=subprocess.PIPE,
stdout=subprocess.PIPE, stdin=subprocess.PIPE)
- stdout_data, stderr_data = handle.communicate()
+ stdout_data, stderr_data = process_communicate_or_kill(handle)
expected_ret = 0 if self.probe_available else 1
if handle.wait() != expected_ret:
return None
@@ -203,55 +204,174 @@ class FFmpegPostProcessor(PostProcessor):
return mobj.group(1)
return None
- def run_ffmpeg_multiple_files(self, input_paths, out_path, opts):
+ def get_metadata_object(self, path, opts=[]):
+ if self.probe_basename != 'ffprobe':
+ if self.probe_available:
+ self.report_warning('Only ffprobe is supported for metadata extraction')
+ raise PostProcessingError('ffprobe not found. Please install or provide the path using --ffmpeg-location')
self.check_version()
- oldest_mtime = min(
- os.stat(encodeFilename(path)).st_mtime for path in input_paths)
+ cmd = [
+ encodeFilename(self.probe_executable, True),
+ encodeArgument('-hide_banner'),
+ encodeArgument('-show_format'),
+ encodeArgument('-show_streams'),
+ encodeArgument('-print_format'),
+ encodeArgument('json'),
+ ]
+
+ cmd += opts
+ cmd.append(encodeFilename(self._ffmpeg_filename_argument(path), True))
+ self.write_debug('ffprobe command line: %s' % shell_quote(cmd))
+ p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
+ stdout, stderr = p.communicate()
+ return json.loads(stdout.decode('utf-8', 'replace'))
+
+ def get_stream_number(self, path, keys, value):
+ streams = self.get_metadata_object(path)['streams']
+ num = next(
+ (i for i, stream in enumerate(streams) if traverse_obj(stream, keys, casesense=False) == value),
+ None)
+ return num, len(streams)
- opts += self._configuration_args()
+ def _get_real_video_duration(self, info, fatal=True):
+ try:
+ if '_real_duration' not in info:
+ info['_real_duration'] = float_or_none(
+ traverse_obj(self.get_metadata_object(info['filepath']), ('format', 'duration')))
+ if not info['_real_duration']:
+ raise PostProcessingError('ffprobe returned empty duration')
+ except PostProcessingError as e:
+ if fatal:
+ raise PostProcessingError(f'Unable to determine video duration; {e}')
+ return info.setdefault('_real_duration', None)
+
+ def _duration_mismatch(self, d1, d2):
+ if not d1 or not d2:
+ return None
+ return abs(d1 - d2) > 1
+
+ def run_ffmpeg_multiple_files(self, input_paths, out_path, opts, **kwargs):
+ return self.real_run_ffmpeg(
+ [(path, []) for path in input_paths],
+ [(out_path, opts)], **kwargs)
+
+ def real_run_ffmpeg(self, input_path_opts, output_path_opts, *, expected_retcodes=(0,)):
+ self.check_version()
+
+ oldest_mtime = min(
+ os.stat(encodeFilename(path)).st_mtime for path, _ in input_path_opts if path)
- files_cmd = []
- for path in input_paths:
- files_cmd.extend([
- encodeArgument('-i'),
- encodeFilename(self._ffmpeg_filename_argument(path), True)
- ])
cmd = [encodeFilename(self.executable, True), encodeArgument('-y')]
# avconv does not have repeat option
if self.basename == 'ffmpeg':
cmd += [encodeArgument('-loglevel'), encodeArgument('repeat+info')]
- cmd += (files_cmd
- + [encodeArgument(o) for o in opts]
- + [encodeFilename(self._ffmpeg_filename_argument(out_path), True)])
- if self._downloader.params.get('verbose', False):
- self._downloader.to_screen('[debug] ffmpeg command line: %s' % shell_quote(cmd))
+ def make_args(file, args, name, number):
+ keys = ['_%s%d' % (name, number), '_%s' % name]
+ if name == 'o' and number == 1:
+ keys.append('')
+ args += self._configuration_args(self.basename, keys)
+ if name == 'i':
+ args.append('-i')
+ return (
+ [encodeArgument(arg) for arg in args]
+ + [encodeFilename(self._ffmpeg_filename_argument(file), True)])
+
+ for arg_type, path_opts in (('i', input_path_opts), ('o', output_path_opts)):
+ cmd += itertools.chain.from_iterable(
+ make_args(path, list(opts), arg_type, i + 1)
+ for i, (path, opts) in enumerate(path_opts) if path)
+
+ self.write_debug('ffmpeg command line: %s' % shell_quote(cmd))
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
- stdout, stderr = p.communicate()
- if p.returncode != 0:
- stderr = stderr.decode('utf-8', 'replace')
- msg = stderr.strip().split('\n')[-1]
- raise FFmpegPostProcessorError(msg)
- self.try_utime(out_path, oldest_mtime, oldest_mtime)
+ stdout, stderr = process_communicate_or_kill(p)
+ if p.returncode not in variadic(expected_retcodes):
+ stderr = stderr.decode('utf-8', 'replace').strip()
+ self.write_debug(stderr)
+ raise FFmpegPostProcessorError(stderr.split('\n')[-1])
+ for out_path, _ in output_path_opts:
+ if out_path:
+ self.try_utime(out_path, oldest_mtime, oldest_mtime)
+ return stderr.decode('utf-8', 'replace')
+
+ def run_ffmpeg(self, path, out_path, opts, **kwargs):
+ return self.run_ffmpeg_multiple_files([path], out_path, opts, **kwargs)
- def run_ffmpeg(self, path, out_path, opts):
- self.run_ffmpeg_multiple_files([path], out_path, opts)
-
- def _ffmpeg_filename_argument(self, fn):
+ @staticmethod
+ def _ffmpeg_filename_argument(fn):
# Always use 'file:' because the filename may contain ':' (ffmpeg
# interprets that as a protocol) or can start with '-' (-- is broken in
# ffmpeg, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details)
# Also leave '-' intact in order not to break streaming to stdout.
+ if fn.startswith(('http://', 'https://')):
+ return fn
return 'file:' + fn if fn != '-' else fn
+ @staticmethod
+ def _quote_for_ffmpeg(string):
+ # See https://ffmpeg.org/ffmpeg-utils.html#toc-Quoting-and-escaping
+ # A sequence of '' produces '\'''\'';
+ # final replace removes the empty '' between \' \'.
+ string = string.replace("'", r"'\''").replace("'''", "'")
+ # Handle potential ' at string boundaries.
+ string = string[1:] if string[0] == "'" else "'" + string
+ return string[:-1] if string[-1] == "'" else string + "'"
+
+ def force_keyframes(self, filename, timestamps):
+ timestamps = orderedSet(timestamps)
+ if timestamps[0] == 0:
+ timestamps = timestamps[1:]
+ keyframe_file = prepend_extension(filename, 'keyframes.temp')
+ self.to_screen(f'Re-encoding "{filename}" with appropriate keyframes')
+ self.run_ffmpeg(filename, keyframe_file, ['-force_key_frames', ','.join(
+ f'{t:.6f}' for t in timestamps)])
+ return keyframe_file
+
+ def concat_files(self, in_files, out_file, concat_opts=None):
+ """
+ Use concat demuxer to concatenate multiple files having identical streams.
+
+ Only inpoint, outpoint, and duration concat options are supported.
+ See https://ffmpeg.org/ffmpeg-formats.html#concat-1 for details
+ """
+ concat_file = f'{out_file}.concat'
+ self.write_debug(f'Writing concat spec to {concat_file}')
+ with open(concat_file, 'wt', encoding='utf-8') as f:
+ f.writelines(self._concat_spec(in_files, concat_opts))
+
+ out_flags = ['-c', 'copy']
+ if out_file.rpartition('.')[-1] in ('mp4', 'mov'):
+ # For some reason, '-c copy' is not enough to copy subtitles
+ out_flags.extend(['-c:s', 'mov_text', '-movflags', '+faststart'])
+
+ try:
+ self.real_run_ffmpeg(
+ [(concat_file, ['-hide_banner', '-nostdin', '-f', 'concat', '-safe', '0'])],
+ [(out_file, out_flags)])
+ finally:
+ os.remove(concat_file)
+
+ @classmethod
+ def _concat_spec(cls, in_files, concat_opts=None):
+ if concat_opts is None:
+ concat_opts = [{}] * len(in_files)
+ yield 'ffconcat version 1.0\n'
+ for file, opts in zip(in_files, concat_opts):
+ yield f'file {cls._quote_for_ffmpeg(cls._ffmpeg_filename_argument(file))}\n'
+ # Iterate explicitly to yield the following directives in order, ignoring the rest.
+ for directive in 'inpoint', 'outpoint', 'duration':
+ if directive in opts:
+ yield f'{directive} {opts[directive]}\n'
+
class FFmpegExtractAudioPP(FFmpegPostProcessor):
+ COMMON_AUDIO_EXTS = ('wav', 'flac', 'm4a', 'aiff', 'mp3', 'ogg', 'mka', 'opus', 'wma')
+ SUPPORTED_EXTS = ('best', 'aac', 'flac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav')
+
def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, nopostoverwrites=False):
FFmpegPostProcessor.__init__(self, downloader)
- if preferredcodec is None:
- preferredcodec = 'best'
- self._preferredcodec = preferredcodec
+ self._preferredcodec = preferredcodec or 'best'
self._preferredquality = preferredquality
self._nopostoverwrites = nopostoverwrites
@@ -266,8 +386,14 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
except FFmpegPostProcessorError as err:
raise AudioConversionError(err.msg)
+ @PostProcessor._restrict_to(images=False)
def run(self, information):
path = information['filepath']
+ orig_ext = information['ext']
+
+ if self._preferredcodec == 'best' and orig_ext in self.COMMON_AUDIO_EXTS:
+ self.to_screen('Skipping audio extraction since the file is already in a common audio format')
+ return [], information
filecodec = self.get_audio_codec(path)
if filecodec is None:
@@ -328,11 +454,11 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
# If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly.
if (new_path == path
or (self._nopostoverwrites and os.path.exists(encodeFilename(new_path)))):
- self._downloader.to_screen('[ffmpeg] Post-process file %s exists, skipping' % new_path)
+ self.to_screen('Post-process file %s exists, skipping' % new_path)
return [], information
try:
- self._downloader.to_screen('[ffmpeg] Destination: ' + new_path)
+ self.to_screen('Destination: ' + new_path)
self.run_ffmpeg(path, new_path, acodec, more_opts)
except AudioConversionError as e:
raise PostProcessingError(
@@ -350,54 +476,102 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
class FFmpegVideoConvertorPP(FFmpegPostProcessor):
+ SUPPORTED_EXTS = ('mp4', 'mkv', 'flv', 'webm', 'mov', 'avi', 'mp3', 'mka', 'm4a', 'ogg', 'opus')
+ FORMAT_RE = re.compile(r'{0}(?:/{0})*$'.format(r'(?:\w+>)?(?:%s)' % '|'.join(SUPPORTED_EXTS)))
+ _ACTION = 'converting'
+
def __init__(self, downloader=None, preferedformat=None):
super(FFmpegVideoConvertorPP, self).__init__(downloader)
- self._preferedformat = preferedformat
+ self._preferedformats = preferedformat.lower().split('/')
- def run(self, information):
- path = information['filepath']
- if information['ext'] == self._preferedformat:
- self._downloader.to_screen('[ffmpeg] Not converting video file %s - already is in target format %s' % (path, self._preferedformat))
- return [], information
- options = []
- if self._preferedformat == 'avi':
- options.extend(['-c:v', 'libxvid', '-vtag', 'XVID'])
- prefix, sep, ext = path.rpartition('.')
- outpath = prefix + sep + self._preferedformat
- self._downloader.to_screen('[' + 'ffmpeg' + '] Converting video from %s to %s, Destination: ' % (information['ext'], self._preferedformat) + outpath)
- self.run_ffmpeg(path, outpath, options)
- information['filepath'] = outpath
- information['format'] = self._preferedformat
- information['ext'] = self._preferedformat
- return [path], information
+ def _target_ext(self, source_ext):
+ for pair in self._preferedformats:
+ kv = pair.split('>')
+ if len(kv) == 1 or kv[0].strip() == source_ext:
+ return kv[-1].strip()
+
+ @staticmethod
+ def _options(target_ext):
+ if target_ext == 'avi':
+ return ['-c:v', 'libxvid', '-vtag', 'XVID']
+ return []
+
+ @PostProcessor._restrict_to(images=False)
+ def run(self, info):
+ filename, source_ext = info['filepath'], info['ext'].lower()
+ target_ext = self._target_ext(source_ext)
+ _skip_msg = (
+ f'could not find a mapping for {source_ext}' if not target_ext
+ else f'already is in target format {source_ext}' if source_ext == target_ext
+ else None)
+ if _skip_msg:
+ self.to_screen(f'Not {self._ACTION} media file {filename!r}; {_skip_msg}')
+ return [], info
+
+ outpath = replace_extension(filename, target_ext, source_ext)
+ self.to_screen(f'{self._ACTION.title()} video from {source_ext} to {target_ext}; Destination: {outpath}')
+ self.run_ffmpeg(filename, outpath, self._options(target_ext))
+
+ info['filepath'] = outpath
+ info['format'] = info['ext'] = target_ext
+ return [filename], info
+
+
+class FFmpegVideoRemuxerPP(FFmpegVideoConvertorPP):
+ _ACTION = 'remuxing'
+
+ @staticmethod
+ def _options(target_ext):
+ options = ['-c', 'copy', '-map', '0', '-dn']
+ if target_ext in ['mp4', 'm4a', 'mov']:
+ options.extend(['-movflags', '+faststart'])
+ return options
class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
+ def __init__(self, downloader=None, already_have_subtitle=False):
+ super(FFmpegEmbedSubtitlePP, self).__init__(downloader)
+ self._already_have_subtitle = already_have_subtitle
+
+ @PostProcessor._restrict_to(images=False)
def run(self, information):
if information['ext'] not in ('mp4', 'webm', 'mkv'):
- self._downloader.to_screen('[ffmpeg] Subtitles can only be embedded in mp4, webm or mkv files')
+ self.to_screen('Subtitles can only be embedded in mp4, webm or mkv files')
return [], information
subtitles = information.get('requested_subtitles')
if not subtitles:
- self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to embed')
+ self.to_screen('There aren\'t any subtitles to embed')
return [], information
filename = information['filepath']
+ if information.get('duration') and self._duration_mismatch(
+ self._get_real_video_duration(information, False), information['duration']):
+ self.to_screen(f'Skipping {self.pp_key()} since the real and expected durations mismatch')
+ return [], information
ext = information['ext']
- sub_langs = []
- sub_filenames = []
+ sub_langs, sub_names, sub_filenames = [], [], []
webm_vtt_warn = False
+ mp4_ass_warn = False
for lang, sub_info in subtitles.items():
+ if not os.path.exists(sub_info.get('filepath', '')):
+ self.report_warning(f'Skipping embedding {lang} subtitle because the file is missing')
+ continue
sub_ext = sub_info['ext']
- if ext != 'webm' or ext == 'webm' and sub_ext == 'vtt':
+ if sub_ext == 'json':
+ self.report_warning('JSON subtitles cannot be embedded')
+ elif ext != 'webm' or ext == 'webm' and sub_ext == 'vtt':
sub_langs.append(lang)
- sub_filenames.append(subtitles_filename(filename, lang, sub_ext, ext))
+ sub_names.append(sub_info.get('name'))
+ sub_filenames.append(sub_info['filepath'])
else:
if not webm_vtt_warn and ext == 'webm' and sub_ext != 'vtt':
webm_vtt_warn = True
- self._downloader.to_screen('[ffmpeg] Only WebVTT subtitles can be embedded in webm files')
+ self.report_warning('Only WebVTT subtitles can be embedded in webm files')
+ if not mp4_ass_warn and ext == 'mp4' and sub_ext == 'ass':
+ mp4_ass_warn = True
+ self.report_warning('ASS subtitles cannot be properly embedded in mp4 files; expect issues')
if not sub_langs:
return [], information
@@ -405,8 +579,7 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
input_files = [filename] + sub_filenames
opts = [
- '-map', '0',
- '-c', 'copy',
+ '-c', 'copy', '-map', '0', '-dn',
# Don't copy the existing subtitles, we may be running the
# postprocessor a second time
'-map', '-0:s',
@@ -416,48 +589,100 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
]
if information['ext'] == 'mp4':
opts += ['-c:s', 'mov_text']
- for (i, lang) in enumerate(sub_langs):
+ for i, (lang, name) in enumerate(zip(sub_langs, sub_names)):
opts.extend(['-map', '%d:0' % (i + 1)])
lang_code = ISO639Utils.short2long(lang) or lang
opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code])
+ if name:
+ opts.extend(['-metadata:s:s:%d' % i, 'handler_name=%s' % name,
+ '-metadata:s:s:%d' % i, 'title=%s' % name])
temp_filename = prepend_extension(filename, 'temp')
- self._downloader.to_screen('[ffmpeg] Embedding subtitles in \'%s\'' % filename)
+ self.to_screen('Embedding subtitles in "%s"' % filename)
self.run_ffmpeg_multiple_files(input_files, temp_filename, opts)
- os.remove(encodeFilename(filename))
- os.rename(encodeFilename(temp_filename), encodeFilename(filename))
+ os.replace(temp_filename, filename)
- return sub_filenames, information
+ files_to_delete = [] if self._already_have_subtitle else sub_filenames
+ return files_to_delete, information
class FFmpegMetadataPP(FFmpegPostProcessor):
+
+ def __init__(self, downloader, add_metadata=True, add_chapters=True):
+ FFmpegPostProcessor.__init__(self, downloader)
+ self._add_metadata = add_metadata
+ self._add_chapters = add_chapters
+
+ @staticmethod
+ def _options(target_ext):
+ yield from ('-map', '0', '-dn')
+ if target_ext == 'm4a':
+ yield from ('-vn', '-acodec', 'copy')
+ else:
+ yield from ('-c', 'copy')
+
+ @PostProcessor._restrict_to(images=False)
def run(self, info):
+ filename, metadata_filename = info['filepath'], None
+ options = []
+ if self._add_chapters and info.get('chapters'):
+ metadata_filename = replace_extension(filename, 'meta')
+ options.extend(self._get_chapter_opts(info['chapters'], metadata_filename))
+ if self._add_metadata:
+ options.extend(self._get_metadata_opts(info))
+
+ if not options:
+ self.to_screen('There isn\'t any metadata to add')
+ return [], info
+
+ temp_filename = prepend_extension(filename, 'temp')
+ self.to_screen('Adding metadata to "%s"' % filename)
+ self.run_ffmpeg_multiple_files(
+ (filename, metadata_filename), temp_filename,
+ itertools.chain(self._options(info['ext']), *options))
+ if metadata_filename:
+ os.remove(metadata_filename)
+ os.replace(temp_filename, filename)
+ return [], info
+
+ @staticmethod
+ def _get_chapter_opts(chapters, metadata_filename):
+ with io.open(metadata_filename, 'wt', encoding='utf-8') as f:
+ def ffmpeg_escape(text):
+ return re.sub(r'([\\=;#\n])', r'\\\1', text)
+
+ metadata_file_content = ';FFMETADATA1\n'
+ for chapter in chapters:
+ metadata_file_content += '[CHAPTER]\nTIMEBASE=1/1000\n'
+ metadata_file_content += 'START=%d\n' % (chapter['start_time'] * 1000)
+ metadata_file_content += 'END=%d\n' % (chapter['end_time'] * 1000)
+ chapter_title = chapter.get('title')
+ if chapter_title:
+ metadata_file_content += 'title=%s\n' % ffmpeg_escape(chapter_title)
+ f.write(metadata_file_content)
+ yield ('-map_metadata', '1')
+
+ def _get_metadata_opts(self, info):
metadata = {}
+ meta_prefix = 'meta_'
def add(meta_list, info_list=None):
- if not info_list:
- info_list = meta_list
- if not isinstance(meta_list, (list, tuple)):
- meta_list = (meta_list,)
- if not isinstance(info_list, (list, tuple)):
- info_list = (info_list,)
- for info_f in info_list:
- if info.get(info_f) is not None:
- for meta_f in meta_list:
- metadata[meta_f] = info[info_f]
- break
+ value = next((
+ str(info[key]) for key in [meta_prefix] + list(variadic(info_list or meta_list))
+ if info.get(key) is not None), None)
+ if value not in ('', None):
+ metadata.update({meta_f: value for meta_f in variadic(meta_list)})
# See [1-4] for some info on media metadata/metadata supported
# by ffmpeg.
# 1. https://kdenlive.org/en/project/adding-meta-data-to-mp4-video/
# 2. https://wiki.multimedia.cx/index.php/FFmpeg_Metadata
# 3. https://kodi.wiki/view/Video_file_tagging
- # 4. http://atomicparsley.sourceforge.net/mpeg-4files.html
add('title', ('track', 'title'))
add('date', 'upload_date')
- add(('description', 'comment'), 'description')
- add('purl', 'webpage_url')
+ add(('description', 'synopsis'), 'description')
+ add(('purl', 'comment'), 'webpage_url')
add('track', 'track_number')
add('artist', ('artist', 'creator', 'uploader', 'uploader_id'))
add('genre')
@@ -469,57 +694,50 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
add('episode_id', ('episode', 'episode_id'))
add('episode_sort', 'episode_number')
- if not metadata:
- self._downloader.to_screen('[ffmpeg] There isn\'t any metadata to add')
- return [], info
+ for key, value in info.items():
+ if value is not None and key != meta_prefix and key.startswith(meta_prefix):
+ metadata[key[len(meta_prefix):]] = value
- filename = info['filepath']
- temp_filename = prepend_extension(filename, 'temp')
- in_filenames = [filename]
- options = []
+ for name, value in metadata.items():
+ yield ('-metadata', f'{name}={value}')
- if info['ext'] == 'm4a':
- options.extend(['-vn', '-acodec', 'copy'])
- else:
- options.extend(['-c', 'copy'])
+ stream_idx = 0
+ for fmt in info.get('requested_formats') or []:
+ stream_count = 2 if 'none' not in (fmt.get('vcodec'), fmt.get('acodec')) else 1
+ if fmt.get('language'):
+ lang = ISO639Utils.short2long(fmt['language']) or fmt['language']
+ for i in range(stream_count):
+ yield ('-metadata:s:%d' % (stream_idx + i), 'language=%s' % lang)
+ stream_idx += stream_count
- for (name, value) in metadata.items():
- options.extend(['-metadata', '%s=%s' % (name, value)])
+ if ('no-attach-info-json' not in self.get_param('compat_opts', [])
+ and '__infojson_filename' in info and info['ext'] in ('mkv', 'mka')):
+ old_stream, new_stream = self.get_stream_number(info['filepath'], ('tags', 'mimetype'), 'application/json')
+ if old_stream is not None:
+ yield ('-map', '-0:%d' % old_stream)
+ new_stream -= 1
- chapters = info.get('chapters', [])
- if chapters:
- metadata_filename = replace_extension(filename, 'meta')
- with io.open(metadata_filename, 'wt', encoding='utf-8') as f:
- def ffmpeg_escape(text):
- return re.sub(r'(=|;|#|\\|\n)', r'\\\1', text)
-
- metadata_file_content = ';FFMETADATA1\n'
- for chapter in chapters:
- metadata_file_content += '[CHAPTER]\nTIMEBASE=1/1000\n'
- metadata_file_content += 'START=%d\n' % (chapter['start_time'] * 1000)
- metadata_file_content += 'END=%d\n' % (chapter['end_time'] * 1000)
- chapter_title = chapter.get('title')
- if chapter_title:
- metadata_file_content += 'title=%s\n' % ffmpeg_escape(chapter_title)
- f.write(metadata_file_content)
- in_filenames.append(metadata_filename)
- options.extend(['-map_metadata', '1'])
-
- self._downloader.to_screen('[ffmpeg] Adding metadata to \'%s\'' % filename)
- self.run_ffmpeg_multiple_files(in_filenames, temp_filename, options)
- if chapters:
- os.remove(metadata_filename)
- os.remove(encodeFilename(filename))
- os.rename(encodeFilename(temp_filename), encodeFilename(filename))
- return [], info
+ yield ('-attach', info['__infojson_filename'],
+ '-metadata:s:%d' % new_stream, 'mimetype=application/json')
class FFmpegMergerPP(FFmpegPostProcessor):
+ @PostProcessor._restrict_to(images=False)
def run(self, info):
filename = info['filepath']
temp_filename = prepend_extension(filename, 'temp')
- args = ['-c', 'copy', '-map', '0:v:0', '-map', '1:a:0']
- self._downloader.to_screen('[ffmpeg] Merging formats into "%s"' % filename)
+ args = ['-c', 'copy']
+ audio_streams = 0
+ for (i, fmt) in enumerate(info['requested_formats']):
+ if fmt.get('acodec') != 'none':
+ args.extend(['-map', f'{i}:a:0'])
+ aac_fixup = fmt['protocol'].startswith('m3u8') and self.get_audio_codec(fmt['filepath']) == 'aac'
+ if aac_fixup:
+ args.extend([f'-bsf:a:{audio_streams}', 'aac_adtstoasc'])
+ audio_streams += 1
+ if fmt.get('vcodec') != 'none':
+ args.extend(['-map', '%u:v:0' % (i)])
+ self.to_screen('Merging formats into "%s"' % filename)
self.run_ffmpeg_multiple_files(info['__files_to_merge'], temp_filename, args)
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
return info['__files_to_merge'], info
@@ -536,98 +754,120 @@ class FFmpegMergerPP(FFmpegPostProcessor):
'hypervideo will download single file media. '
'Update %s to version %s or newer to fix this.') % (
self.basename, self.basename, required_version)
- if self._downloader:
- self._downloader.report_warning(warning)
+ self.report_warning(warning)
return False
return True
-class FFmpegFixupStretchedPP(FFmpegPostProcessor):
- def run(self, info):
- stretched_ratio = info.get('stretched_ratio')
- if stretched_ratio is None or stretched_ratio == 1:
- return [], info
-
- filename = info['filepath']
+class FFmpegFixupPostProcessor(FFmpegPostProcessor):
+ def _fixup(self, msg, filename, options):
temp_filename = prepend_extension(filename, 'temp')
- options = ['-c', 'copy', '-aspect', '%f' % stretched_ratio]
- self._downloader.to_screen('[ffmpeg] Fixing aspect ratio in "%s"' % filename)
+ self.to_screen(f'{msg} of "{filename}"')
self.run_ffmpeg(filename, temp_filename, options)
- os.remove(encodeFilename(filename))
- os.rename(encodeFilename(temp_filename), encodeFilename(filename))
+ os.replace(temp_filename, filename)
+
+class FFmpegFixupStretchedPP(FFmpegFixupPostProcessor):
+ @PostProcessor._restrict_to(images=False, audio=False)
+ def run(self, info):
+ stretched_ratio = info.get('stretched_ratio')
+ if stretched_ratio not in (None, 1):
+ self._fixup('Fixing aspect ratio', info['filepath'], [
+ '-c', 'copy', '-map', '0', '-dn', '-aspect', '%f' % stretched_ratio])
return [], info
-class FFmpegFixupM4aPP(FFmpegPostProcessor):
+class FFmpegFixupM4aPP(FFmpegFixupPostProcessor):
+ @PostProcessor._restrict_to(images=False, video=False)
def run(self, info):
- if info.get('container') != 'm4a_dash':
- return [], info
+ if info.get('container') == 'm4a_dash':
+ self._fixup('Correcting container', info['filepath'], [
+ '-c', 'copy', '-map', '0', '-dn', '-f', 'mp4'])
+ return [], info
- filename = info['filepath']
- temp_filename = prepend_extension(filename, 'temp')
- options = ['-c', 'copy', '-f', 'mp4']
- self._downloader.to_screen('[ffmpeg] Correcting container in "%s"' % filename)
- self.run_ffmpeg(filename, temp_filename, options)
+class FFmpegFixupM3u8PP(FFmpegFixupPostProcessor):
+ @PostProcessor._restrict_to(images=False)
+ def run(self, info):
+ if self.get_audio_codec(info['filepath']) == 'aac':
+ self._fixup('Fixing malformed AAC bitstream', info['filepath'], [
+ '-c', 'copy', '-map', '0', '-dn', '-f', 'mp4', '-bsf:a', 'aac_adtstoasc'])
+ return [], info
- os.remove(encodeFilename(filename))
- os.rename(encodeFilename(temp_filename), encodeFilename(filename))
- return [], info
+class FFmpegFixupTimestampPP(FFmpegFixupPostProcessor):
+ def __init__(self, downloader=None, trim=0.001):
+ # "trim" should be used when the video contains unintended packets
+ super(FFmpegFixupTimestampPP, self).__init__(downloader)
+ assert isinstance(trim, (int, float))
+ self.trim = str(trim)
-class FFmpegFixupM3u8PP(FFmpegPostProcessor):
+ @PostProcessor._restrict_to(images=False)
def run(self, info):
- filename = info['filepath']
- if self.get_audio_codec(filename) == 'aac':
- temp_filename = prepend_extension(filename, 'temp')
+ required_version = '4.4'
+ if is_outdated_version(self._versions[self.basename], required_version):
+ self.report_warning(
+ 'A re-encode is needed to fix timestamps in older versions of ffmpeg. '
+ f'Please install ffmpeg {required_version} or later to fixup without re-encoding')
+ opts = ['-vf', 'setpts=PTS-STARTPTS']
+ else:
+ opts = ['-c', 'copy', '-bsf', 'setts=ts=TS-STARTPTS']
+ self._fixup('Fixing frame timestamp', info['filepath'], opts + ['-map', '0', '-dn', '-ss', self.trim])
+ return [], info
- options = ['-c', 'copy', '-f', 'mp4', '-bsf:a', 'aac_adtstoasc']
- self._downloader.to_screen('[ffmpeg] Fixing malformed AAC bitstream in "%s"' % filename)
- self.run_ffmpeg(filename, temp_filename, options)
- os.remove(encodeFilename(filename))
- os.rename(encodeFilename(temp_filename), encodeFilename(filename))
+class FFmpegFixupDurationPP(FFmpegFixupPostProcessor):
+ @PostProcessor._restrict_to(images=False)
+ def run(self, info):
+ self._fixup('Fixing video duration', info['filepath'], ['-c', 'copy', '-map', '0', '-dn'])
return [], info
class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
+ SUPPORTED_EXTS = ('srt', 'vtt', 'ass', 'lrc')
+
def __init__(self, downloader=None, format=None):
super(FFmpegSubtitlesConvertorPP, self).__init__(downloader)
self.format = format
def run(self, info):
subs = info.get('requested_subtitles')
- filename = info['filepath']
new_ext = self.format
new_format = new_ext
if new_format == 'vtt':
new_format = 'webvtt'
if subs is None:
- self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to convert')
+ self.to_screen('There aren\'t any subtitles to convert')
return [], info
- self._downloader.to_screen('[ffmpeg] Converting subtitles')
+ self.to_screen('Converting subtitles')
sub_filenames = []
for lang, sub in subs.items():
+ if not os.path.exists(sub.get('filepath', '')):
+ self.report_warning(f'Skipping embedding {lang} subtitle because the file is missing')
+ continue
ext = sub['ext']
if ext == new_ext:
- self._downloader.to_screen(
- '[ffmpeg] Subtitle file for %s is already in the requested format' % new_ext)
+ self.to_screen('Subtitle file for %s is already in the requested format' % new_ext)
+ continue
+ elif ext == 'json':
+ self.to_screen(
+ 'You have requested to convert json subtitles into another format, '
+ 'which is currently not possible')
continue
- old_file = subtitles_filename(filename, lang, ext, info.get('ext'))
+ old_file = sub['filepath']
sub_filenames.append(old_file)
- new_file = subtitles_filename(filename, lang, new_ext, info.get('ext'))
+ new_file = replace_extension(old_file, new_ext)
if ext in ('dfxp', 'ttml', 'tt'):
- self._downloader.report_warning(
+ self.report_warning(
'You have requested to convert dfxp (TTML) subtitles into another format, '
'which results in style information loss')
dfxp_file = old_file
- srt_file = subtitles_filename(filename, lang, 'srt', info.get('ext'))
+ srt_file = replace_extension(old_file, 'srt')
with open(dfxp_file, 'rb') as f:
srt_data = dfxp2srt(f.read())
@@ -638,7 +878,8 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
subs[lang] = {
'ext': 'srt',
- 'data': srt_data
+ 'data': srt_data,
+ 'filepath': srt_file,
}
if new_ext == 'srt':
@@ -652,6 +893,125 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
subs[lang] = {
'ext': new_ext,
'data': f.read(),
+ 'filepath': new_file,
}
+ info['__files_to_move'][new_file] = replace_extension(
+ info['__files_to_move'][sub['filepath']], new_ext)
+
return sub_filenames, info
+
+
+class FFmpegSplitChaptersPP(FFmpegPostProcessor):
+ def __init__(self, downloader, force_keyframes=False):
+ FFmpegPostProcessor.__init__(self, downloader)
+ self._force_keyframes = force_keyframes
+
+ def _prepare_filename(self, number, chapter, info):
+ info = info.copy()
+ info.update({
+ 'section_number': number,
+ 'section_title': chapter.get('title'),
+ 'section_start': chapter.get('start_time'),
+ 'section_end': chapter.get('end_time'),
+ })
+ return self._downloader.prepare_filename(info, 'chapter')
+
+ def _ffmpeg_args_for_chapter(self, number, chapter, info):
+ destination = self._prepare_filename(number, chapter, info)
+ if not self._downloader._ensure_dir_exists(encodeFilename(destination)):
+ return
+
+ chapter['filepath'] = destination
+ self.to_screen('Chapter %03d; Destination: %s' % (number, destination))
+ return (
+ destination,
+ ['-ss', compat_str(chapter['start_time']),
+ '-t', compat_str(chapter['end_time'] - chapter['start_time'])])
+
+ @PostProcessor._restrict_to(images=False)
+ def run(self, info):
+ chapters = info.get('chapters') or []
+ if not chapters:
+ self.to_screen('Chapter information is unavailable')
+ return [], info
+
+ in_file = info['filepath']
+ if self._force_keyframes and len(chapters) > 1:
+ in_file = self.force_keyframes(in_file, (c['start_time'] for c in chapters))
+ self.to_screen('Splitting video by chapters; %d chapters found' % len(chapters))
+ for idx, chapter in enumerate(chapters):
+ destination, opts = self._ffmpeg_args_for_chapter(idx + 1, chapter, info)
+ self.real_run_ffmpeg([(in_file, opts)], [(destination, ['-c', 'copy'])])
+ if in_file != info['filepath']:
+ os.remove(in_file)
+ return [], info
+
+
+class FFmpegThumbnailsConvertorPP(FFmpegPostProcessor):
+ SUPPORTED_EXTS = ('jpg', 'png')
+
+ def __init__(self, downloader=None, format=None):
+ super(FFmpegThumbnailsConvertorPP, self).__init__(downloader)
+ self.format = format
+
+ @staticmethod
+ def is_webp(path):
+ with open(encodeFilename(path), 'rb') as f:
+ b = f.read(12)
+ return b[0:4] == b'RIFF' and b[8:] == b'WEBP'
+
+ def fixup_webp(self, info, idx=-1):
+ thumbnail_filename = info['thumbnails'][idx]['filepath']
+ _, thumbnail_ext = os.path.splitext(thumbnail_filename)
+ if thumbnail_ext:
+ thumbnail_ext = thumbnail_ext[1:].lower()
+ if thumbnail_ext != 'webp' and self.is_webp(thumbnail_filename):
+ self.to_screen('Correcting thumbnail "%s" extension to webp' % thumbnail_filename)
+ webp_filename = replace_extension(thumbnail_filename, 'webp')
+ os.replace(thumbnail_filename, webp_filename)
+ info['thumbnails'][idx]['filepath'] = webp_filename
+ info['__files_to_move'][webp_filename] = replace_extension(
+ info['__files_to_move'].pop(thumbnail_filename), 'webp')
+
+ @staticmethod
+ def _options(target_ext):
+ if target_ext == 'jpg':
+ return ['-bsf:v', 'mjpeg2jpeg']
+ return []
+
+ def convert_thumbnail(self, thumbnail_filename, target_ext):
+ thumbnail_conv_filename = replace_extension(thumbnail_filename, target_ext)
+
+ self.to_screen('Converting thumbnail "%s" to %s' % (thumbnail_filename, target_ext))
+ self.real_run_ffmpeg(
+ [(thumbnail_filename, ['-f', 'image2', '-pattern_type', 'none'])],
+ [(thumbnail_conv_filename.replace('%', '%%'), self._options(target_ext))])
+ return thumbnail_conv_filename
+
+ def run(self, info):
+ files_to_delete = []
+ has_thumbnail = False
+
+ for idx, thumbnail_dict in enumerate(info['thumbnails']):
+ if 'filepath' not in thumbnail_dict:
+ continue
+ has_thumbnail = True
+ self.fixup_webp(info, idx)
+ original_thumbnail = thumbnail_dict['filepath']
+ _, thumbnail_ext = os.path.splitext(original_thumbnail)
+ if thumbnail_ext:
+ thumbnail_ext = thumbnail_ext[1:].lower()
+ if thumbnail_ext == 'jpeg':
+ thumbnail_ext = 'jpg'
+ if thumbnail_ext == self.format:
+ self.to_screen('Thumbnail "%s" is already in the requested format' % original_thumbnail)
+ continue
+ thumbnail_dict['filepath'] = self.convert_thumbnail(original_thumbnail, self.format)
+ files_to_delete.append(original_thumbnail)
+ info['__files_to_move'][thumbnail_dict['filepath']] = replace_extension(
+ info['__files_to_move'][original_thumbnail], self.format)
+
+ if not has_thumbnail:
+ self.to_screen('There aren\'t any thumbnails to convert')
+ return files_to_delete, info
diff --git a/hypervideo_dl/postprocessor/metadataparser.py b/hypervideo_dl/postprocessor/metadataparser.py
new file mode 100644
index 0000000..96aac9b
--- /dev/null
+++ b/hypervideo_dl/postprocessor/metadataparser.py
@@ -0,0 +1,116 @@
+import re
+
+from enum import Enum
+
+from .common import PostProcessor
+
+
+class MetadataParserPP(PostProcessor):
+ class Actions(Enum):
+ INTERPRET = 'interpretter'
+ REPLACE = 'replacer'
+
+ def __init__(self, downloader, actions):
+ PostProcessor.__init__(self, downloader)
+ self._actions = []
+ for f in actions:
+ action = f[0]
+ assert isinstance(action, self.Actions)
+ self._actions.append(getattr(self, action._value_)(*f[1:]))
+
+ @classmethod
+ def validate_action(cls, action, *data):
+ ''' Each action can be:
+ (Actions.INTERPRET, from, to) OR
+ (Actions.REPLACE, field, search, replace)
+ '''
+ if not isinstance(action, cls.Actions):
+ raise ValueError(f'{action!r} is not a valid action')
+ getattr(cls, action._value_)(cls, *data)
+
+ @staticmethod
+ def field_to_template(tmpl):
+ if re.match(r'[a-zA-Z_]+$', tmpl):
+ return f'%({tmpl})s'
+ return tmpl
+
+ @staticmethod
+ def format_to_regex(fmt):
+ r"""
+ Converts a string like
+ '%(title)s - %(artist)s'
+ to a regex like
+ '(?P<title>.+)\ \-\ (?P<artist>.+)'
+ """
+ if not re.search(r'%\(\w+\)s', fmt):
+ return fmt
+ lastpos = 0
+ regex = ''
+ # replace %(..)s with regex group and escape other string parts
+ for match in re.finditer(r'%\((\w+)\)s', fmt):
+ regex += re.escape(fmt[lastpos:match.start()])
+ regex += rf'(?P<{match.group(1)}>.+)'
+ lastpos = match.end()
+ if lastpos < len(fmt):
+ regex += re.escape(fmt[lastpos:])
+ return regex
+
+ def run(self, info):
+ for f in self._actions:
+ f(info)
+ return [], info
+
+ def interpretter(self, inp, out):
+ def f(info):
+ data_to_parse = self._downloader.evaluate_outtmpl(template, info)
+ self.write_debug(f'Searching for {out_re.pattern!r} in {template!r}')
+ match = out_re.search(data_to_parse)
+ if match is None:
+ self.report_warning(f'Could not interpret {inp!r} as {out!r}')
+ return
+ for attribute, value in match.groupdict().items():
+ info[attribute] = value
+ self.to_screen('Parsed %s from %r: %r' % (attribute, template, value if value is not None else 'NA'))
+
+ template = self.field_to_template(inp)
+ out_re = re.compile(self.format_to_regex(out))
+ return f
+
+ def replacer(self, field, search, replace):
+ def f(info):
+ val = info.get(field)
+ if val is None:
+ self.report_warning(f'Video does not have a {field}')
+ return
+ elif not isinstance(val, str):
+ self.report_warning(f'Cannot replace in field {field} since it is a {type(val).__name__}')
+ return
+ self.write_debug(f'Replacing all {search!r} in {field} with {replace!r}')
+ info[field], n = search_re.subn(replace, val)
+ if n:
+ self.to_screen(f'Changed {field} to: {info[field]}')
+ else:
+ self.to_screen(f'Did not find {search!r} in {field}')
+
+ search_re = re.compile(search)
+ return f
+
+
+class MetadataFromFieldPP(MetadataParserPP):
+ @classmethod
+ def to_action(cls, f):
+ match = re.match(r'(?P<in>.*?)(?<!\\):(?P<out>.+)$', f)
+ if match is None:
+ raise ValueError(f'it should be FROM:TO, not {f!r}')
+ return (
+ cls.Actions.INTERPRET,
+ match.group('in').replace('\\:', ':'),
+ match.group('out'))
+
+ def __init__(self, downloader, formats):
+ MetadataParserPP.__init__(self, downloader, [self.to_action(f) for f in formats])
+
+
+class MetadataFromTitlePP(MetadataParserPP): # for backward compatibility
+ def __init__(self, downloader, titleformat):
+ MetadataParserPP.__init__(self, downloader, [(self.Actions.INTERPRET, 'title', titleformat)])
diff --git a/hypervideo_dl/postprocessor/modify_chapters.py b/hypervideo_dl/postprocessor/modify_chapters.py
new file mode 100644
index 0000000..a0818c4
--- /dev/null
+++ b/hypervideo_dl/postprocessor/modify_chapters.py
@@ -0,0 +1,336 @@
+import copy
+import heapq
+import os
+
+from .common import PostProcessor
+from .ffmpeg import (
+ FFmpegPostProcessor,
+ FFmpegSubtitlesConvertorPP
+)
+from .sponsorblock import SponsorBlockPP
+from ..utils import (
+ orderedSet,
+ PostProcessingError,
+ prepend_extension,
+)
+
+
+_TINY_CHAPTER_DURATION = 1
+DEFAULT_SPONSORBLOCK_CHAPTER_TITLE = '[SponsorBlock]: %(category_names)l'
+
+
+class ModifyChaptersPP(FFmpegPostProcessor):
+ def __init__(self, downloader, remove_chapters_patterns=None, remove_sponsor_segments=None, remove_ranges=None,
+ *, sponsorblock_chapter_title=DEFAULT_SPONSORBLOCK_CHAPTER_TITLE, force_keyframes=False):
+ FFmpegPostProcessor.__init__(self, downloader)
+ self._remove_chapters_patterns = set(remove_chapters_patterns or [])
+ self._remove_sponsor_segments = set(remove_sponsor_segments or [])
+ self._ranges_to_remove = set(remove_ranges or [])
+ self._sponsorblock_chapter_title = sponsorblock_chapter_title
+ self._force_keyframes = force_keyframes
+
+ @PostProcessor._restrict_to(images=False)
+ def run(self, info):
+ chapters, sponsor_chapters = self._mark_chapters_to_remove(
+ info.get('chapters') or [], info.get('sponsorblock_chapters') or [])
+ if not chapters and not sponsor_chapters:
+ return [], info
+
+ real_duration = self._get_real_video_duration(info)
+ if not chapters:
+ chapters = [{'start_time': 0, 'end_time': real_duration, 'title': info['title']}]
+
+ info['chapters'], cuts = self._remove_marked_arrange_sponsors(chapters + sponsor_chapters)
+ if not cuts:
+ return [], info
+
+ if self._duration_mismatch(real_duration, info.get('duration')):
+ if not self._duration_mismatch(real_duration, info['chapters'][-1]['end_time']):
+ self.to_screen(f'Skipping {self.pp_key()} since the video appears to be already cut')
+ return [], info
+ if not info.get('__real_download'):
+ raise PostProcessingError('Cannot cut video since the real and expected durations mismatch. '
+ 'Different chapters may have already been removed')
+ else:
+ self.write_debug('Expected and actual durations mismatch')
+
+ concat_opts = self._make_concat_opts(cuts, real_duration)
+
+ def remove_chapters(file, is_sub):
+ return file, self.remove_chapters(file, cuts, concat_opts, self._force_keyframes and not is_sub)
+
+ in_out_files = [remove_chapters(info['filepath'], False)]
+ in_out_files.extend(remove_chapters(in_file, True) for in_file in self._get_supported_subs(info))
+
+ # Renaming should only happen after all files are processed
+ files_to_remove = []
+ for in_file, out_file in in_out_files:
+ uncut_file = prepend_extension(in_file, 'uncut')
+ os.replace(in_file, uncut_file)
+ os.replace(out_file, in_file)
+ files_to_remove.append(uncut_file)
+
+ info['_real_duration'] = info['chapters'][-1]['end_time']
+ return files_to_remove, info
+
+ def _mark_chapters_to_remove(self, chapters, sponsor_chapters):
+ if self._remove_chapters_patterns:
+ warn_no_chapter_to_remove = True
+ if not chapters:
+ self.to_screen('Chapter information is unavailable')
+ warn_no_chapter_to_remove = False
+ for c in chapters:
+ if any(regex.search(c['title']) for regex in self._remove_chapters_patterns):
+ c['remove'] = True
+ warn_no_chapter_to_remove = False
+ if warn_no_chapter_to_remove:
+ self.to_screen('There are no chapters matching the regex')
+
+ if self._remove_sponsor_segments:
+ warn_no_chapter_to_remove = True
+ if not sponsor_chapters:
+ self.to_screen('SponsorBlock information is unavailable')
+ warn_no_chapter_to_remove = False
+ for c in sponsor_chapters:
+ if c['category'] in self._remove_sponsor_segments:
+ c['remove'] = True
+ warn_no_chapter_to_remove = False
+ if warn_no_chapter_to_remove:
+ self.to_screen('There are no matching SponsorBlock chapters')
+
+ sponsor_chapters.extend({
+ 'start_time': start,
+ 'end_time': end,
+ 'category': 'manually_removed',
+ '_categories': [('manually_removed', start, end)],
+ 'remove': True,
+ } for start, end in self._ranges_to_remove)
+
+ return chapters, sponsor_chapters
+
+ def _get_supported_subs(self, info):
+ for sub in (info.get('requested_subtitles') or {}).values():
+ sub_file = sub.get('filepath')
+ # The file might have been removed by --embed-subs
+ if not sub_file or not os.path.exists(sub_file):
+ continue
+ ext = sub['ext']
+ if ext not in FFmpegSubtitlesConvertorPP.SUPPORTED_EXTS:
+ self.report_warning(f'Cannot remove chapters from external {ext} subtitles; "{sub_file}" is now out of sync')
+ continue
+ # TODO: create __real_download for subs?
+ yield sub_file
+
+ def _remove_marked_arrange_sponsors(self, chapters):
+ # Store cuts separately, since adjacent and overlapping cuts must be merged.
+ cuts = []
+
+ def append_cut(c):
+ assert 'remove' in c
+ last_to_cut = cuts[-1] if cuts else None
+ if last_to_cut and last_to_cut['end_time'] >= c['start_time']:
+ last_to_cut['end_time'] = max(last_to_cut['end_time'], c['end_time'])
+ else:
+ cuts.append(c)
+ return len(cuts) - 1
+
+ def excess_duration(c):
+ # Cuts that are completely within the chapter reduce chapters' duration.
+ # Since cuts can overlap, excess duration may be less that the sum of cuts' durations.
+ # To avoid that, chapter stores the index to the fist cut within the chapter,
+ # instead of storing excess duration. append_cut ensures that subsequent cuts (if any)
+ # will be merged with previous ones (if necessary).
+ cut_idx, excess = c.pop('cut_idx', len(cuts)), 0
+ while cut_idx < len(cuts):
+ cut = cuts[cut_idx]
+ if cut['start_time'] >= c['end_time']:
+ break
+ if cut['end_time'] > c['start_time']:
+ excess += min(cut['end_time'], c['end_time'])
+ excess -= max(cut['start_time'], c['start_time'])
+ cut_idx += 1
+ return excess
+
+ new_chapters = []
+
+ def append_chapter(c):
+ assert 'remove' not in c
+ length = c['end_time'] - c['start_time'] - excess_duration(c)
+ # Chapter is completely covered by cuts or sponsors.
+ if length <= 0:
+ return
+ start = new_chapters[-1]['end_time'] if new_chapters else 0
+ c.update(start_time=start, end_time=start + length)
+ new_chapters.append(c)
+
+ # Turn into a priority queue, index is a tie breaker.
+ # Plain stack sorted by start_time is not enough: after splitting the chapter,
+ # the part returned to the stack is not guaranteed to have start_time
+ # less than or equal to the that of the stack's head.
+ chapters = [(c['start_time'], i, c) for i, c in enumerate(chapters)]
+ heapq.heapify(chapters)
+
+ _, cur_i, cur_chapter = heapq.heappop(chapters)
+ while chapters:
+ _, i, c = heapq.heappop(chapters)
+ # Non-overlapping chapters or cuts can be appended directly. However,
+ # adjacent non-overlapping cuts must be merged, which is handled by append_cut.
+ if cur_chapter['end_time'] <= c['start_time']:
+ (append_chapter if 'remove' not in cur_chapter else append_cut)(cur_chapter)
+ cur_i, cur_chapter = i, c
+ continue
+
+ # Eight possibilities for overlapping chapters: (cut, cut), (cut, sponsor),
+ # (cut, normal), (sponsor, cut), (normal, cut), (sponsor, sponsor),
+ # (sponsor, normal), and (normal, sponsor). There is no (normal, normal):
+ # normal chapters are assumed not to overlap.
+ if 'remove' in cur_chapter:
+ # (cut, cut): adjust end_time.
+ if 'remove' in c:
+ cur_chapter['end_time'] = max(cur_chapter['end_time'], c['end_time'])
+ # (cut, sponsor/normal): chop the beginning of the later chapter
+ # (if it's not completely hidden by the cut). Push to the priority queue
+ # to restore sorting by start_time: with beginning chopped, c may actually
+ # start later than the remaining chapters from the queue.
+ elif cur_chapter['end_time'] < c['end_time']:
+ c['start_time'] = cur_chapter['end_time']
+ c['_was_cut'] = True
+ heapq.heappush(chapters, (c['start_time'], i, c))
+ # (sponsor/normal, cut).
+ elif 'remove' in c:
+ cur_chapter['_was_cut'] = True
+ # Chop the end of the current chapter if the cut is not contained within it.
+ # Chopping the end doesn't break start_time sorting, no PQ push is necessary.
+ if cur_chapter['end_time'] <= c['end_time']:
+ cur_chapter['end_time'] = c['start_time']
+ append_chapter(cur_chapter)
+ cur_i, cur_chapter = i, c
+ continue
+ # Current chapter contains the cut within it. If the current chapter is
+ # a sponsor chapter, check whether the categories before and after the cut differ.
+ if '_categories' in cur_chapter:
+ after_c = dict(cur_chapter, start_time=c['end_time'], _categories=[])
+ cur_cats = []
+ for cat_start_end in cur_chapter['_categories']:
+ if cat_start_end[1] < c['start_time']:
+ cur_cats.append(cat_start_end)
+ if cat_start_end[2] > c['end_time']:
+ after_c['_categories'].append(cat_start_end)
+ cur_chapter['_categories'] = cur_cats
+ if cur_chapter['_categories'] != after_c['_categories']:
+ # Categories before and after the cut differ: push the after part to PQ.
+ heapq.heappush(chapters, (after_c['start_time'], cur_i, after_c))
+ cur_chapter['end_time'] = c['start_time']
+ append_chapter(cur_chapter)
+ cur_i, cur_chapter = i, c
+ continue
+ # Either sponsor categories before and after the cut are the same or
+ # we're dealing with a normal chapter. Just register an outstanding cut:
+ # subsequent append_chapter will reduce the duration.
+ cur_chapter.setdefault('cut_idx', append_cut(c))
+ # (sponsor, normal): if a normal chapter is not completely overlapped,
+ # chop the beginning of it and push it to PQ.
+ elif '_categories' in cur_chapter and '_categories' not in c:
+ if cur_chapter['end_time'] < c['end_time']:
+ c['start_time'] = cur_chapter['end_time']
+ c['_was_cut'] = True
+ heapq.heappush(chapters, (c['start_time'], i, c))
+ # (normal, sponsor) and (sponsor, sponsor)
+ else:
+ assert '_categories' in c
+ cur_chapter['_was_cut'] = True
+ c['_was_cut'] = True
+ # Push the part after the sponsor to PQ.
+ if cur_chapter['end_time'] > c['end_time']:
+ # deepcopy to make categories in after_c and cur_chapter/c refer to different lists.
+ after_c = dict(copy.deepcopy(cur_chapter), start_time=c['end_time'])
+ heapq.heappush(chapters, (after_c['start_time'], cur_i, after_c))
+ # Push the part after the overlap to PQ.
+ elif c['end_time'] > cur_chapter['end_time']:
+ after_cur = dict(copy.deepcopy(c), start_time=cur_chapter['end_time'])
+ heapq.heappush(chapters, (after_cur['start_time'], cur_i, after_cur))
+ c['end_time'] = cur_chapter['end_time']
+ # (sponsor, sponsor): merge categories in the overlap.
+ if '_categories' in cur_chapter:
+ c['_categories'] = cur_chapter['_categories'] + c['_categories']
+ # Inherit the cuts that the current chapter has accumulated within it.
+ if 'cut_idx' in cur_chapter:
+ c['cut_idx'] = cur_chapter['cut_idx']
+ cur_chapter['end_time'] = c['start_time']
+ append_chapter(cur_chapter)
+ cur_i, cur_chapter = i, c
+ (append_chapter if 'remove' not in cur_chapter else append_cut)(cur_chapter)
+ return self._remove_tiny_rename_sponsors(new_chapters), cuts
+
+ def _remove_tiny_rename_sponsors(self, chapters):
+ new_chapters = []
+ for i, c in enumerate(chapters):
+ # Merge with the previous/next if the chapter is tiny.
+ # Only tiny chapters resulting from a cut can be skipped.
+ # Chapters that were already tiny in the original list will be preserved.
+ if (('_was_cut' in c or '_categories' in c)
+ and c['end_time'] - c['start_time'] < _TINY_CHAPTER_DURATION):
+ if not new_chapters:
+ # Prepend tiny chapter to the next one if possible.
+ if i < len(chapters) - 1:
+ chapters[i + 1]['start_time'] = c['start_time']
+ continue
+ else:
+ old_c = new_chapters[-1]
+ if i < len(chapters) - 1:
+ next_c = chapters[i + 1]
+ # Not a typo: key names in old_c and next_c are really different.
+ prev_is_sponsor = 'categories' in old_c
+ next_is_sponsor = '_categories' in next_c
+ # Preferentially prepend tiny normals to normals and sponsors to sponsors.
+ if (('_categories' not in c and prev_is_sponsor and not next_is_sponsor)
+ or ('_categories' in c and not prev_is_sponsor and next_is_sponsor)):
+ next_c['start_time'] = c['start_time']
+ continue
+ old_c['end_time'] = c['end_time']
+ continue
+
+ c.pop('_was_cut', None)
+ cats = c.pop('_categories', None)
+ if cats:
+ category = min(cats, key=lambda c: c[2] - c[1])[0]
+ cats = orderedSet(x[0] for x in cats)
+ c.update({
+ 'category': category,
+ 'categories': cats,
+ 'name': SponsorBlockPP.CATEGORIES[category],
+ 'category_names': [SponsorBlockPP.CATEGORIES[c] for c in cats]
+ })
+ c['title'] = self._downloader.evaluate_outtmpl(self._sponsorblock_chapter_title, c)
+ # Merge identically named sponsors.
+ if (new_chapters and 'categories' in new_chapters[-1]
+ and new_chapters[-1]['title'] == c['title']):
+ new_chapters[-1]['end_time'] = c['end_time']
+ continue
+ new_chapters.append(c)
+ return new_chapters
+
+ def remove_chapters(self, filename, ranges_to_cut, concat_opts, force_keyframes=False):
+ in_file = filename
+ out_file = prepend_extension(in_file, 'temp')
+ if force_keyframes:
+ in_file = self.force_keyframes(in_file, (t for c in ranges_to_cut for t in (c['start_time'], c['end_time'])))
+ self.to_screen(f'Removing chapters from {filename}')
+ self.concat_files([in_file] * len(concat_opts), out_file, concat_opts)
+ if in_file != filename:
+ os.remove(in_file)
+ return out_file
+
+ @staticmethod
+ def _make_concat_opts(chapters_to_remove, duration):
+ opts = [{}]
+ for s in chapters_to_remove:
+ # Do not create 0 duration chunk at the beginning.
+ if s['start_time'] == 0:
+ opts[-1]['inpoint'] = f'{s["end_time"]:.6f}'
+ continue
+ opts[-1]['outpoint'] = f'{s["start_time"]:.6f}'
+ # Do not create 0 duration chunk at the end.
+ if s['end_time'] != duration:
+ opts.append({'inpoint': f'{s["end_time"]:.6f}'})
+ return opts
diff --git a/hypervideo_dl/postprocessor/movefilesafterdownload.py b/hypervideo_dl/postprocessor/movefilesafterdownload.py
new file mode 100644
index 0000000..1064a8c
--- /dev/null
+++ b/hypervideo_dl/postprocessor/movefilesafterdownload.py
@@ -0,0 +1,54 @@
+from __future__ import unicode_literals
+import os
+import shutil
+
+from .common import PostProcessor
+from ..utils import (
+ decodeFilename,
+ encodeFilename,
+ make_dir,
+ PostProcessingError,
+)
+
+
+class MoveFilesAfterDownloadPP(PostProcessor):
+
+ def __init__(self, downloader=None, downloaded=True):
+ PostProcessor.__init__(self, downloader)
+ self._downloaded = downloaded
+
+ @classmethod
+ def pp_key(cls):
+ return 'MoveFiles'
+
+ def run(self, info):
+ dl_path, dl_name = os.path.split(encodeFilename(info['filepath']))
+ finaldir = info.get('__finaldir', dl_path)
+ finalpath = os.path.join(finaldir, dl_name)
+ if self._downloaded:
+ info['__files_to_move'][info['filepath']] = decodeFilename(finalpath)
+
+ make_newfilename = lambda old: decodeFilename(os.path.join(finaldir, os.path.basename(encodeFilename(old))))
+ for oldfile, newfile in info['__files_to_move'].items():
+ if not newfile:
+ newfile = make_newfilename(oldfile)
+ if os.path.abspath(encodeFilename(oldfile)) == os.path.abspath(encodeFilename(newfile)):
+ continue
+ if not os.path.exists(encodeFilename(oldfile)):
+ self.report_warning('File "%s" cannot be found' % oldfile)
+ continue
+ if os.path.exists(encodeFilename(newfile)):
+ if self.get_param('overwrites', True):
+ self.report_warning('Replacing existing file "%s"' % newfile)
+ os.remove(encodeFilename(newfile))
+ else:
+ self.report_warning(
+ 'Cannot move file "%s" out of temporary directory since "%s" already exists. '
+ % (oldfile, newfile))
+ continue
+ make_dir(newfile, PostProcessingError)
+ self.to_screen('Moving file "%s" to "%s"' % (oldfile, newfile))
+ shutil.move(oldfile, newfile) # os.rename cannot move between volumes
+
+ info['filepath'] = finalpath
+ return [], info
diff --git a/hypervideo_dl/postprocessor/sponskrub.py b/hypervideo_dl/postprocessor/sponskrub.py
new file mode 100644
index 0000000..932555a
--- /dev/null
+++ b/hypervideo_dl/postprocessor/sponskrub.py
@@ -0,0 +1,96 @@
+from __future__ import unicode_literals
+import os
+import subprocess
+
+from .common import PostProcessor
+from ..compat import compat_shlex_split
+from ..utils import (
+ check_executable,
+ cli_option,
+ encodeArgument,
+ encodeFilename,
+ shell_quote,
+ str_or_none,
+ PostProcessingError,
+ prepend_extension,
+ process_communicate_or_kill,
+)
+
+
+# Deprecated in favor of the native implementation
+class SponSkrubPP(PostProcessor):
+ _temp_ext = 'spons'
+ _exe_name = 'sponskrub'
+
+ def __init__(self, downloader, path='', args=None, ignoreerror=False, cut=False, force=False):
+ PostProcessor.__init__(self, downloader)
+ self.force = force
+ self.cutout = cut
+ self.args = str_or_none(args) or '' # For backward compatibility
+ self.path = self.get_exe(path)
+
+ if not ignoreerror and self.path is None:
+ if path:
+ raise PostProcessingError('sponskrub not found in "%s"' % path)
+ else:
+ raise PostProcessingError('sponskrub not found. Please install or provide the path using --sponskrub-path')
+
+ def get_exe(self, path=''):
+ if not path or not check_executable(path, ['-h']):
+ path = os.path.join(path, self._exe_name)
+ if not check_executable(path, ['-h']):
+ return None
+ return path
+
+ @PostProcessor._restrict_to(images=False)
+ def run(self, information):
+ if self.path is None:
+ return [], information
+
+ filename = information['filepath']
+ if not os.path.exists(encodeFilename(filename)): # no download
+ return [], information
+
+ if information['extractor_key'].lower() != 'youtube':
+ self.to_screen('Skipping sponskrub since it is not a YouTube video')
+ return [], information
+ if self.cutout and not self.force and not information.get('__real_download', False):
+ self.report_warning(
+ 'Skipping sponskrub since the video was already downloaded. '
+ 'Use --sponskrub-force to run sponskrub anyway')
+ return [], information
+
+ self.to_screen('Trying to %s sponsor sections' % ('remove' if self.cutout else 'mark'))
+ if self.cutout:
+ self.report_warning('Cutting out sponsor segments will cause the subtitles to go out of sync.')
+ if not information.get('__real_download', False):
+ self.report_warning('If sponskrub is run multiple times, unintended parts of the video could be cut out.')
+
+ temp_filename = prepend_extension(filename, self._temp_ext)
+ if os.path.exists(encodeFilename(temp_filename)):
+ os.remove(encodeFilename(temp_filename))
+
+ cmd = [self.path]
+ if not self.cutout:
+ cmd += ['-chapter']
+ cmd += cli_option(self._downloader.params, '-proxy', 'proxy')
+ cmd += compat_shlex_split(self.args) # For backward compatibility
+ cmd += self._configuration_args(self._exe_name, use_compat=False)
+ cmd += ['--', information['id'], filename, temp_filename]
+ cmd = [encodeArgument(i) for i in cmd]
+
+ self.write_debug('sponskrub command line: %s' % shell_quote(cmd))
+ pipe = None if self.get_param('verbose') else subprocess.PIPE
+ p = subprocess.Popen(cmd, stdout=pipe)
+ stdout = process_communicate_or_kill(p)[0]
+
+ if p.returncode == 0:
+ os.replace(temp_filename, filename)
+ self.to_screen('Sponsor sections have been %s' % ('removed' if self.cutout else 'marked'))
+ elif p.returncode == 3:
+ self.to_screen('No segments in the SponsorBlock database')
+ else:
+ msg = stdout.decode('utf-8', 'replace').strip() if stdout else ''
+ msg = msg.split('\n')[0 if msg.lower().startswith('unrecognised') else -1]
+ raise PostProcessingError(msg if msg else 'sponskrub failed with error code %s' % p.returncode)
+ return [], information
diff --git a/hypervideo_dl/postprocessor/sponsorblock.py b/hypervideo_dl/postprocessor/sponsorblock.py
new file mode 100644
index 0000000..7265a9d
--- /dev/null
+++ b/hypervideo_dl/postprocessor/sponsorblock.py
@@ -0,0 +1,96 @@
+import json
+import re
+from hashlib import sha256
+
+from .ffmpeg import FFmpegPostProcessor
+from ..compat import compat_urllib_parse_urlencode, compat_HTTPError
+from ..utils import PostProcessingError, network_exceptions, sanitized_Request
+
+
+class SponsorBlockPP(FFmpegPostProcessor):
+
+ EXTRACTORS = {
+ 'Youtube': 'YouTube',
+ }
+ CATEGORIES = {
+ 'sponsor': 'Sponsor',
+ 'intro': 'Intermission/Intro Animation',
+ 'outro': 'Endcards/Credits',
+ 'selfpromo': 'Unpaid/Self Promotion',
+ 'interaction': 'Interaction Reminder',
+ 'preview': 'Preview/Recap',
+ 'music_offtopic': 'Non-Music Section'
+ }
+
+ def __init__(self, downloader, categories=None, api='https://sponsor.ajay.app'):
+ FFmpegPostProcessor.__init__(self, downloader)
+ self._categories = tuple(categories or self.CATEGORIES.keys())
+ self._API_URL = api if re.match('^https?://', api) else 'https://' + api
+
+ def run(self, info):
+ extractor = info['extractor_key']
+ if extractor not in self.EXTRACTORS:
+ self.to_screen(f'SponsorBlock is not supported for {extractor}')
+ return [], info
+
+ info['sponsorblock_chapters'] = self._get_sponsor_chapters(info, info['duration'])
+ return [], info
+
+ def _get_sponsor_chapters(self, info, duration):
+ segments = self._get_sponsor_segments(info['id'], self.EXTRACTORS[info['extractor_key']])
+
+ def duration_filter(s):
+ start_end = s['segment']
+ # Ignore milliseconds difference at the start.
+ if start_end[0] <= 1:
+ start_end[0] = 0
+ # Ignore milliseconds difference at the end.
+ # Never allow the segment to exceed the video.
+ if duration and duration - start_end[1] <= 1:
+ start_end[1] = duration
+ # SponsorBlock duration may be absent or it may deviate from the real one.
+ return s['videoDuration'] == 0 or not duration or abs(duration - s['videoDuration']) <= 1
+
+ duration_match = [s for s in segments if duration_filter(s)]
+ if len(duration_match) != len(segments):
+ self.report_warning('Some SponsorBlock segments are from a video of different duration, maybe from an old version of this video')
+
+ def to_chapter(s):
+ (start, end), cat = s['segment'], s['category']
+ return {
+ 'start_time': start,
+ 'end_time': end,
+ 'category': cat,
+ 'title': self.CATEGORIES[cat],
+ '_categories': [(cat, start, end)]
+ }
+
+ sponsor_chapters = [to_chapter(s) for s in duration_match]
+ if not sponsor_chapters:
+ self.to_screen('No segments were found in the SponsorBlock database')
+ else:
+ self.to_screen(f'Found {len(sponsor_chapters)} segments in the SponsorBlock database')
+ return sponsor_chapters
+
+ def _get_sponsor_segments(self, video_id, service):
+ hash = sha256(video_id.encode('ascii')).hexdigest()
+ # SponsorBlock API recommends using first 4 hash characters.
+ url = f'{self._API_URL}/api/skipSegments/{hash[:4]}?' + compat_urllib_parse_urlencode({
+ 'service': service,
+ 'categories': json.dumps(self._categories),
+ })
+ for d in self._get_json(url):
+ if d['videoID'] == video_id:
+ return d['segments']
+ return []
+
+ def _get_json(self, url):
+ self.write_debug(f'SponsorBlock query: {url}')
+ try:
+ rsp = self._downloader.urlopen(sanitized_Request(url))
+ except network_exceptions as e:
+ if isinstance(e, compat_HTTPError) and e.code == 404:
+ return []
+ raise PostProcessingError(f'Unable to communicate with SponsorBlock API - {e}')
+
+ return json.loads(rsp.read().decode(rsp.info().get_param('charset') or 'utf-8'))
diff --git a/hypervideo_dl/postprocessor/xattrpp.py b/hypervideo_dl/postprocessor/xattrpp.py
index 814dabe..93acd6d 100644
--- a/hypervideo_dl/postprocessor/xattrpp.py
+++ b/hypervideo_dl/postprocessor/xattrpp.py
@@ -5,13 +5,13 @@ from ..compat import compat_os_name
from ..utils import (
hyphenate_date,
write_xattr,
+ PostProcessingError,
XAttrMetadataError,
XAttrUnavailableError,
)
class XAttrMetadataPP(PostProcessor):
-
#
# More info about extended attributes for media:
# http://freedesktop.org/wiki/CommonExtendedAttributes/
@@ -27,7 +27,7 @@ class XAttrMetadataPP(PostProcessor):
""" Set extended attributes on downloaded file (if xattr support is found). """
# Write the metadata to the file's xattrs
- self._downloader.to_screen('[metadata] Writing metadata to file\'s xattrs')
+ self.to_screen('Writing metadata to file\'s xattrs')
filename = info['filepath']
@@ -58,16 +58,15 @@ class XAttrMetadataPP(PostProcessor):
return [], info
except XAttrUnavailableError as e:
- self._downloader.report_error(str(e))
- return [], info
+ raise PostProcessingError(str(e))
except XAttrMetadataError as e:
if e.reason == 'NO_SPACE':
- self._downloader.report_warning(
+ self.report_warning(
'There\'s no disk space left, disk quota exceeded or filesystem xattr limit exceeded. '
+ (('Some ' if num_written else '') + 'extended attributes are not written.').capitalize())
elif e.reason == 'VALUE_TOO_LONG':
- self._downloader.report_warning(
+ self.report_warning(
'Unable to write extended attributes due to too long values.')
else:
msg = 'This filesystem doesn\'t support extended attributes. '
@@ -75,5 +74,5 @@ class XAttrMetadataPP(PostProcessor):
msg += 'You need to use NTFS.'
else:
msg += '(You may have to enable them in your /etc/fstab)'
- self._downloader.report_error(msg)
+ raise PostProcessingError(str(e))
return [], info
diff --git a/hypervideo_dl/utils.py b/hypervideo_dl/utils.py
index fc62f09..0199f4c 100644
--- a/hypervideo_dl/utils.py
+++ b/hypervideo_dl/utils.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
# coding: utf-8
from __future__ import unicode_literals
@@ -16,6 +16,9 @@ import email.header
import errno
import functools
import gzip
+import hashlib
+import hmac
+import importlib.util
import io
import itertools
import json
@@ -50,6 +53,7 @@ from .compat import (
compat_html_entities_html5,
compat_http_client,
compat_integer_types,
+ compat_numeric_types,
compat_kwargs,
compat_os_name,
compat_parse_qs,
@@ -61,6 +65,9 @@ from .compat import (
compat_urllib_parse,
compat_urllib_parse_urlencode,
compat_urllib_parse_urlparse,
+ compat_urllib_parse_urlunparse,
+ compat_urllib_parse_quote,
+ compat_urllib_parse_quote_plus,
compat_urllib_parse_unquote_plus,
compat_urllib_request,
compat_urlparse,
@@ -1735,12 +1742,16 @@ DATE_FORMATS = (
'%b %dth %Y %I:%M',
'%Y %m %d',
'%Y-%m-%d',
+ '%Y.%m.%d.',
'%Y/%m/%d',
'%Y/%m/%d %H:%M',
'%Y/%m/%d %H:%M:%S',
+ '%Y%m%d%H%M',
+ '%Y%m%d%H%M%S',
'%Y-%m-%d %H:%M',
'%Y-%m-%d %H:%M:%S',
'%Y-%m-%d %H:%M:%S.%f',
+ '%Y-%m-%d %H:%M:%S:%f',
'%d.%m.%Y %H:%M',
'%d.%m.%Y %H.%M',
'%Y-%m-%dT%H:%M:%SZ',
@@ -1753,6 +1764,7 @@ DATE_FORMATS = (
'%b %d %Y at %H:%M:%S',
'%B %d %Y at %H:%M',
'%B %d %Y at %H:%M:%S',
+ '%H:%M %d-%b-%Y',
)
DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
@@ -1985,6 +1997,7 @@ def get_elements_by_attribute(attribute, value, html, escape_value=True):
class HTMLAttributeParser(compat_HTMLParser):
"""Trivial HTML parser to gather the attributes for a single element"""
+
def __init__(self):
self.attrs = {}
compat_HTMLParser.__init__(self)
@@ -2086,7 +2099,9 @@ def sanitize_filename(s, restricted=False, is_id=False):
def replace_insane(char):
if restricted and char in ACCENT_CHARS:
return ACCENT_CHARS[char]
- if char == '?' or ord(char) < 32 or ord(char) == 127:
+ elif not restricted and char == '\n':
+ return ' '
+ elif char == '?' or ord(char) < 32 or ord(char) == 127:
return ''
elif char == '"':
return '' if restricted else '\''
@@ -2100,6 +2115,8 @@ def sanitize_filename(s, restricted=False, is_id=False):
return '_'
return char
+ if s == '':
+ return ''
# Handle timestamps
s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
result = ''.join(map(replace_insane, s))
@@ -2118,13 +2135,18 @@ def sanitize_filename(s, restricted=False, is_id=False):
return result
-def sanitize_path(s):
+def sanitize_path(s, force=False):
"""Sanitizes and normalizes path on Windows"""
- if sys.platform != 'win32':
+ if sys.platform == 'win32':
+ force = False
+ drive_or_unc, _ = os.path.splitdrive(s)
+ if sys.version_info < (2, 7) and not drive_or_unc:
+ drive_or_unc, _ = os.path.splitunc(s)
+ elif force:
+ drive_or_unc = ''
+ else:
return s
- drive_or_unc, _ = os.path.splitdrive(s)
- if sys.version_info < (2, 7) and not drive_or_unc:
- drive_or_unc, _ = os.path.splitunc(s)
+
norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
if drive_or_unc:
norm_path.pop(0)
@@ -2133,6 +2155,8 @@ def sanitize_path(s):
for path_part in norm_path]
if drive_or_unc:
sanitized_path.insert(0, drive_or_unc + os.path.sep)
+ elif force and s[0] == os.path.sep:
+ sanitized_path.insert(0, os.path.sep)
return os.path.join(*sanitized_path)
@@ -2154,8 +2178,24 @@ def sanitize_url(url):
return url
+def extract_basic_auth(url):
+ parts = compat_urlparse.urlsplit(url)
+ if parts.username is None:
+ return url, None
+ url = compat_urlparse.urlunsplit(parts._replace(netloc=(
+ parts.hostname if parts.port is None
+ else '%s:%d' % (parts.hostname, parts.port))))
+ auth_payload = base64.b64encode(
+ ('%s:%s' % (parts.username, parts.password or '')).encode('utf-8'))
+ return url, 'Basic ' + auth_payload.decode('utf-8')
+
+
def sanitized_Request(url, *args, **kwargs):
- return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
+ url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
+ if auth_header is not None:
+ headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
+ headers['Authorization'] = auth_header
+ return compat_urllib_request.Request(url, *args, **kwargs)
def expand_path(s):
@@ -2212,6 +2252,26 @@ def unescapeHTML(s):
r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
+def escapeHTML(text):
+ return (
+ text
+ .replace('&', '&amp;')
+ .replace('<', '&lt;')
+ .replace('>', '&gt;')
+ .replace('"', '&quot;')
+ .replace("'", '&#39;')
+ )
+
+
+def process_communicate_or_kill(p, *args, **kwargs):
+ try:
+ return p.communicate(*args, **kwargs)
+ except BaseException: # Including KeyboardInterrupt
+ p.kill()
+ p.wait()
+ raise
+
+
def get_subprocess_encoding():
if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
# For subprocess calls, encode with locale encoding
@@ -2282,49 +2342,68 @@ def decodeOption(optval):
return optval
-def formatSeconds(secs):
+def formatSeconds(secs, delim=':', msec=False):
if secs > 3600:
- return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
+ ret = '%d%s%02d%s%02d' % (secs // 3600, delim, (secs % 3600) // 60, delim, secs % 60)
elif secs > 60:
- return '%d:%02d' % (secs // 60, secs % 60)
+ ret = '%d%s%02d' % (secs // 60, delim, secs % 60)
else:
- return '%d' % secs
+ ret = '%d' % secs
+ return '%s.%03d' % (ret, secs % 1) if msec else ret
-def make_HTTPS_handler(params, **kwargs):
- opts_no_check_certificate = params.get('nocheckcertificate', False)
- if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
- context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
- if opts_no_check_certificate:
- context.check_hostname = False
- context.verify_mode = ssl.CERT_NONE
+def _ssl_load_windows_store_certs(ssl_context, storename):
+ # Code adapted from _load_windows_store_certs in https://github.com/python/cpython/blob/main/Lib/ssl.py
+ try:
+ certs = [cert for cert, encoding, trust in ssl.enum_certificates(storename)
+ if encoding == 'x509_asn' and (
+ trust is True or ssl.Purpose.SERVER_AUTH.oid in trust)]
+ except PermissionError:
+ return
+ for cert in certs:
try:
- return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
- except TypeError:
- # Python 2.7.8
- # (create_default_context present but HTTPSHandler has no context=)
+ ssl_context.load_verify_locations(cadata=cert)
+ except ssl.SSLError:
pass
- if sys.version_info < (3, 2):
- return YoutubeDLHTTPSHandler(params, **kwargs)
- else: # Python < 3.4
- context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
- context.verify_mode = (ssl.CERT_NONE
- if opts_no_check_certificate
- else ssl.CERT_REQUIRED)
- context.set_default_verify_paths()
- return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
-
-def bug_reports_message():
+def make_HTTPS_handler(params, **kwargs):
+ opts_check_certificate = not params.get('nocheckcertificate')
+ context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
+ context.check_hostname = opts_check_certificate
+ context.verify_mode = ssl.CERT_REQUIRED if opts_check_certificate else ssl.CERT_NONE
+ if opts_check_certificate:
+ try:
+ context.load_default_certs()
+ # Work around the issue in load_default_certs when there are bad certificates. See:
+ # https://github.com/hypervideo/hypervideo/issues/1060,
+ # https://bugs.python.org/issue35665, https://bugs.python.org/issue45312
+ except ssl.SSLError:
+ # enum_certificates is not present in mingw python. See https://github.com/hypervideo/hypervideo/issues/1151
+ if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'):
+ # Create a new context to discard any certificates that were already loaded
+ context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
+ context.check_hostname, context.verify_mode = True, ssl.CERT_REQUIRED
+ for storename in ('CA', 'ROOT'):
+ _ssl_load_windows_store_certs(context, storename)
+ context.set_default_verify_paths()
+ return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
+
+
+def bug_reports_message(before=';'):
if ytdl_is_updateable():
update_cmd = 'type doas pacman -Sy hypervideo to update'
else:
- update_cmd = 'see https://yt-dl.org/update on how to update'
- msg = '; please report this issue on https://yt-dl.org/bug .'
+ update_cmd = 'see https://git.conocimientoslibres.ga/software/hypervideo.git/about/#how-do-i-update-hypervideo'
+ msg = 'please report this issue on https://github.com/hypervideo/hypervideo .'
msg += ' Make sure you are using the latest version; %s.' % update_cmd
msg += ' Be sure to call hypervideo with the --verbose flag and include its complete output.'
- return msg
+
+ before = before.rstrip()
+ if not before or before.endswith(('.', '!', '?')):
+ msg = msg[0].title() + msg[1:]
+
+ return (before + ' ' if before else '') + msg
class YoutubeDLError(Exception):
@@ -2332,28 +2411,36 @@ class YoutubeDLError(Exception):
pass
+network_exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
+if hasattr(ssl, 'CertificateError'):
+ network_exceptions.append(ssl.CertificateError)
+network_exceptions = tuple(network_exceptions)
+
+
class ExtractorError(YoutubeDLError):
"""Error during info extraction."""
- def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
+ def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):
""" tb, if given, is the original traceback (so that it can be printed out).
If expected is set, this is a normal error message and most likely not a bug in hypervideo.
"""
-
- if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
+ if sys.exc_info()[0] in network_exceptions:
expected = True
- if video_id is not None:
- msg = video_id + ': ' + msg
- if cause:
- msg += ' (caused by %r)' % cause
- if not expected:
- msg += bug_reports_message()
- super(ExtractorError, self).__init__(msg)
+ self.msg = str(msg)
self.traceback = tb
- self.exc_info = sys.exc_info() # preserve original exception
+ self.expected = expected
self.cause = cause
self.video_id = video_id
+ self.ie = ie
+ self.exc_info = sys.exc_info() # preserve original exception
+
+ super(ExtractorError, self).__init__(''.join((
+ format_field(ie, template='[%s] '),
+ format_field(video_id, template='%s: '),
+ self.msg,
+ format_field(cause, template=' (caused by %r)'),
+ '' if expected else bug_reports_message())))
def format_traceback(self):
if self.traceback is None:
@@ -2379,6 +2466,7 @@ class GeoRestrictedError(ExtractorError):
This exception may be thrown when a video is not available from your
geographic location due to geographic restrictions imposed by a website.
"""
+
def __init__(self, msg, countries=None):
super(GeoRestrictedError, self).__init__(msg, expected=True)
self.msg = msg
@@ -2399,6 +2487,15 @@ class DownloadError(YoutubeDLError):
self.exc_info = exc_info
+class EntryNotInPlaylist(YoutubeDLError):
+ """Entry not in playlist exception.
+
+ This exception will be thrown by YoutubeDL when a requested entry
+ is not found in the playlist info_dict
+ """
+ pass
+
+
class SameFileError(YoutubeDLError):
"""Same File exception.
@@ -2420,6 +2517,21 @@ class PostProcessingError(YoutubeDLError):
self.msg = msg
+class ExistingVideoReached(YoutubeDLError):
+ """ --max-downloads limit has been reached. """
+ pass
+
+
+class RejectedVideoReached(YoutubeDLError):
+ """ --max-downloads limit has been reached. """
+ pass
+
+
+class ThrottledDownload(YoutubeDLError):
+ """ Download speed below --throttled-rate. """
+ pass
+
+
class MaxDownloadsReached(YoutubeDLError):
""" --max-downloads limit has been reached. """
pass
@@ -2582,6 +2694,8 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
@staticmethod
def deflate(data):
+ if not data:
+ return data
try:
return zlib.decompress(data, -zlib.MAX_WBITS)
except zlib.error:
@@ -2938,8 +3052,16 @@ class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
def extract_timezone(date_str):
m = re.search(
- r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
- date_str)
+ r'''(?x)
+ ^.{8,}? # >=8 char non-TZ prefix, if present
+ (?P<tz>Z| # just the UTC Z, or
+ (?:(?<=.\b\d{4}|\b\d{2}:\d\d)| # preceded by 4 digits or hh:mm or
+ (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d)) # not preceded by 3 alpha word or >= 4 alpha or 2 digits
+ [ ]? # optional space
+ (?P<sign>\+|-) # +/-
+ (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2}) # hh[:]mm
+ $)
+ ''', date_str)
if not m:
timezone = datetime.timedelta()
else:
@@ -3055,33 +3177,83 @@ def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
-def date_from_str(date_str):
+def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
"""
Return a datetime object from a string in the format YYYYMMDD or
- (now|today)[+-][0-9](day|week|month|year)(s)?"""
- today = datetime.date.today()
+ (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
+
+ format: string date format used to return datetime object from
+ precision: round the time portion of a datetime object.
+ auto|microsecond|second|minute|hour|day.
+ auto: round to the unit provided in date_str (if applicable).
+ """
+ auto_precision = False
+ if precision == 'auto':
+ auto_precision = True
+ precision = 'microsecond'
+ today = datetime_round(datetime.datetime.now(), precision)
if date_str in ('now', 'today'):
return today
if date_str == 'yesterday':
return today - datetime.timedelta(days=1)
- match = re.match(r'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
+ match = re.match(
+ r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)(s)?',
+ date_str)
if match is not None:
- sign = match.group('sign')
- time = int(match.group('time'))
- if sign == '-':
- time = -time
+ start_time = datetime_from_str(match.group('start'), precision, format)
+ time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
unit = match.group('unit')
- # A bad approximation?
- if unit == 'month':
+ if unit == 'month' or unit == 'year':
+ new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
unit = 'day'
- time *= 30
- elif unit == 'year':
- unit = 'day'
- time *= 365
- unit += 's'
- delta = datetime.timedelta(**{unit: time})
- return today + delta
- return datetime.datetime.strptime(date_str, '%Y%m%d').date()
+ else:
+ if unit == 'week':
+ unit = 'day'
+ time *= 7
+ delta = datetime.timedelta(**{unit + 's': time})
+ new_date = start_time + delta
+ if auto_precision:
+ return datetime_round(new_date, unit)
+ return new_date
+
+ return datetime_round(datetime.datetime.strptime(date_str, format), precision)
+
+
+def date_from_str(date_str, format='%Y%m%d'):
+ """
+ Return a datetime object from a string in the format YYYYMMDD or
+ (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
+
+ format: string date format used to return datetime object from
+ """
+ return datetime_from_str(date_str, precision='microsecond', format=format).date()
+
+
+def datetime_add_months(dt, months):
+ """Increment/Decrement a datetime object by months."""
+ month = dt.month + months - 1
+ year = dt.year + month // 12
+ month = month % 12 + 1
+ day = min(dt.day, calendar.monthrange(year, month)[1])
+ return dt.replace(year, month, day)
+
+
+def datetime_round(dt, precision='day'):
+ """
+ Round a datetime object's time to a specific precision
+ """
+ if precision == 'microsecond':
+ return dt
+
+ unit_seconds = {
+ 'day': 86400,
+ 'hour': 3600,
+ 'minute': 60,
+ 'second': 1,
+ }
+ roundto = lambda x, n: ((x + n / 2) // n) * n
+ timestamp = calendar.timegm(dt.timetuple())
+ return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
def hyphenate_date(date_str):
@@ -3135,6 +3307,14 @@ def platform_name():
return res
+def get_windows_version():
+ ''' Get Windows version. None if it's not running on Windows '''
+ if compat_os_name == 'nt':
+ return version_tuple(platform.win32_ver()[1])
+ else:
+ return None
+
+
def _windows_write_string(s, out):
""" Returns True if the string was written using special methods,
False if it has yet to be written out."""
@@ -3607,6 +3787,11 @@ def remove_quotes(s):
return s
+def get_domain(url):
+ domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
+ return domain.group('domain') if domain else None
+
+
def url_basename(url):
path = compat_urlparse.urlparse(url).path
return path.strip('/').split('/')[-1]
@@ -3692,6 +3877,18 @@ def url_or_none(url):
return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
+def strftime_or_none(timestamp, date_format, default=None):
+ datetime_object = None
+ try:
+ if isinstance(timestamp, compat_numeric_types): # unix timestamp
+ datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
+ elif isinstance(timestamp, compat_str): # assume YYYYMMDD
+ datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
+ return datetime_object.strftime(date_format)
+ except (ValueError, TypeError, AttributeError):
+ return default
+
+
def parse_duration(s):
if not isinstance(s, compat_basestring):
return None
@@ -3769,7 +3966,8 @@ def check_executable(exe, args=[]):
""" Checks if the given binary is installed somewhere in PATH, and returns its name.
args can be a list of arguments for a short output (like -version) """
try:
- subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
+ process_communicate_or_kill(subprocess.Popen(
+ [exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE))
except OSError:
return False
return exe
@@ -3783,10 +3981,10 @@ def get_exe_version(exe, args=['--version'],
# STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
# SIGTTOU if hypervideo is run in the background.
# See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
- out, _ = subprocess.Popen(
+ out, _ = process_communicate_or_kill(subprocess.Popen(
[encodeArgument(exe)] + args,
stdin=subprocess.PIPE,
- stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
+ stdout=subprocess.PIPE, stderr=subprocess.STDOUT))
except OSError:
return False
if isinstance(out, bytes): # Python 2.x
@@ -3805,49 +4003,144 @@ def detect_exe_version(output, version_re=None, unrecognized='present'):
return unrecognized
-class PagedList(object):
+class LazyList(collections.abc.Sequence):
+ ''' Lazy immutable list from an iterable
+ Note that slices of a LazyList are lists and not LazyList'''
+
+ class IndexError(IndexError):
+ pass
+
+ def __init__(self, iterable):
+ self.__iterable = iter(iterable)
+ self.__cache = []
+ self.__reversed = False
+
+ def __iter__(self):
+ if self.__reversed:
+ # We need to consume the entire iterable to iterate in reverse
+ yield from self.exhaust()
+ return
+ yield from self.__cache
+ for item in self.__iterable:
+ self.__cache.append(item)
+ yield item
+
+ def __exhaust(self):
+ self.__cache.extend(self.__iterable)
+ return self.__cache
+
+ def exhaust(self):
+ ''' Evaluate the entire iterable '''
+ return self.__exhaust()[::-1 if self.__reversed else 1]
+
+ @staticmethod
+ def __reverse_index(x):
+ return None if x is None else -(x + 1)
+
+ def __getitem__(self, idx):
+ if isinstance(idx, slice):
+ if self.__reversed:
+ idx = slice(self.__reverse_index(idx.start), self.__reverse_index(idx.stop), -(idx.step or 1))
+ start, stop, step = idx.start, idx.stop, idx.step or 1
+ elif isinstance(idx, int):
+ if self.__reversed:
+ idx = self.__reverse_index(idx)
+ start, stop, step = idx, idx, 0
+ else:
+ raise TypeError('indices must be integers or slices')
+ if ((start or 0) < 0 or (stop or 0) < 0
+ or (start is None and step < 0)
+ or (stop is None and step > 0)):
+ # We need to consume the entire iterable to be able to slice from the end
+ # Obviously, never use this with infinite iterables
+ self.__exhaust()
+ try:
+ return self.__cache[idx]
+ except IndexError as e:
+ raise self.IndexError(e) from e
+ n = max(start or 0, stop or 0) - len(self.__cache) + 1
+ if n > 0:
+ self.__cache.extend(itertools.islice(self.__iterable, n))
+ try:
+ return self.__cache[idx]
+ except IndexError as e:
+ raise self.IndexError(e) from e
+
+ def __bool__(self):
+ try:
+ self[-1] if self.__reversed else self[0]
+ except self.IndexError:
+ return False
+ return True
+
+ def __len__(self):
+ self.__exhaust()
+ return len(self.__cache)
+
+ def reverse(self):
+ self.__reversed = not self.__reversed
+ return self
+
+ def __repr__(self):
+ # repr and str should mimic a list. So we exhaust the iterable
+ return repr(self.exhaust())
+
+ def __str__(self):
+ return repr(self.exhaust())
+
+
+class PagedList:
def __len__(self):
# This is only useful for tests
return len(self.getslice())
-
-class OnDemandPagedList(PagedList):
def __init__(self, pagefunc, pagesize, use_cache=True):
self._pagefunc = pagefunc
self._pagesize = pagesize
self._use_cache = use_cache
- if use_cache:
- self._cache = {}
+ self._cache = {}
+
+ def getpage(self, pagenum):
+ page_results = self._cache.get(pagenum) or list(self._pagefunc(pagenum))
+ if self._use_cache:
+ self._cache[pagenum] = page_results
+ return page_results
def getslice(self, start=0, end=None):
- res = []
+ return list(self._getslice(start, end))
+
+ def _getslice(self, start, end):
+ raise NotImplementedError('This method must be implemented by subclasses')
+
+ def __getitem__(self, idx):
+ # NOTE: cache must be enabled if this is used
+ if not isinstance(idx, int) or idx < 0:
+ raise TypeError('indices must be non-negative integers')
+ entries = self.getslice(idx, idx + 1)
+ return entries[0] if entries else None
+
+
+class OnDemandPagedList(PagedList):
+ def _getslice(self, start, end):
for pagenum in itertools.count(start // self._pagesize):
firstid = pagenum * self._pagesize
nextfirstid = pagenum * self._pagesize + self._pagesize
if start >= nextfirstid:
continue
- page_results = None
- if self._use_cache:
- page_results = self._cache.get(pagenum)
- if page_results is None:
- page_results = list(self._pagefunc(pagenum))
- if self._use_cache:
- self._cache[pagenum] = page_results
-
startv = (
start % self._pagesize
if firstid <= start < nextfirstid
else 0)
-
endv = (
((end - 1) % self._pagesize) + 1
if (end is not None and firstid <= end <= nextfirstid)
else None)
+ page_results = self.getpage(pagenum)
if startv != 0 or endv is not None:
page_results = page_results[startv:endv]
- res.extend(page_results)
+ yield from page_results
# A little optimization - if current page is not "full", ie. does
# not contain page_size videos then we can assume that this page
@@ -3860,36 +4153,31 @@ class OnDemandPagedList(PagedList):
# break out early as well
if end == nextfirstid:
break
- return res
class InAdvancePagedList(PagedList):
def __init__(self, pagefunc, pagecount, pagesize):
- self._pagefunc = pagefunc
self._pagecount = pagecount
- self._pagesize = pagesize
+ PagedList.__init__(self, pagefunc, pagesize, True)
- def getslice(self, start=0, end=None):
- res = []
+ def _getslice(self, start, end):
start_page = start // self._pagesize
end_page = (
self._pagecount if end is None else (end // self._pagesize + 1))
skip_elems = start - start_page * self._pagesize
only_more = None if end is None else end - start
for pagenum in range(start_page, end_page):
- page = list(self._pagefunc(pagenum))
+ page_results = self.getpage(pagenum)
if skip_elems:
- page = page[skip_elems:]
+ page_results = page_results[skip_elems:]
skip_elems = None
if only_more is not None:
- if len(page) < only_more:
- only_more -= len(page)
+ if len(page_results) < only_more:
+ only_more -= len(page_results)
else:
- page = page[:only_more]
- res.extend(page)
+ yield from page_results[:only_more]
break
- res.extend(page)
- return res
+ yield from page_results
def uppercase_escape(s):
@@ -3927,17 +4215,24 @@ def escape_url(url):
).geturl()
+def parse_qs(url):
+ return compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+
+
def read_batch_urls(batch_fd):
def fixup(url):
if not isinstance(url, compat_str):
url = url.decode('utf-8', 'replace')
- BOM_UTF8 = '\xef\xbb\xbf'
- if url.startswith(BOM_UTF8):
- url = url[len(BOM_UTF8):]
- url = url.strip()
- if url.startswith(('#', ';', ']')):
+ BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
+ for bom in BOM_UTF8:
+ if url.startswith(bom):
+ url = url[len(bom):]
+ url = url.lstrip()
+ if not url or url.startswith(('#', ';', ']')):
return False
- return url
+ # "#" cannot be stripped out since it is part of the URI
+ # However, it can be safely stipped out if follwing a whitespace
+ return re.split(r'\s#', url, 1)[0].rstrip()
with contextlib.closing(batch_fd) as fd:
return [url for url in map(fixup, fd) if url]
@@ -4040,9 +4335,7 @@ def dict_get(d, key_or_keys, default=None, skip_false_values=True):
def try_get(src, getter, expected_type=None):
- if not isinstance(getter, (list, tuple)):
- getter = [getter]
- for get in getter:
+ for get in variadic(getter):
try:
v = get(src)
except (AttributeError, KeyError, TypeError, IndexError):
@@ -4097,6 +4390,7 @@ def parse_age_limit(s):
m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
if m:
return int(m.group('age'))
+ s = s.upper()
if s in US_RATINGS:
return US_RATINGS[s]
m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
@@ -4115,8 +4409,9 @@ def strip_jsonp(code):
r'\g<callback_data>', code)
-def js_to_json(code):
- COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
+def js_to_json(code, vars={}):
+ # vars is a dict of var, val pairs to substitute
+ COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
INTEGER_TABLE = (
(r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
@@ -4127,6 +4422,8 @@ def js_to_json(code):
v = m.group(0)
if v in ('true', 'false', 'null'):
return v
+ elif v in ('undefined', 'void 0'):
+ return 'null'
elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
return ""
@@ -4144,13 +4441,16 @@ def js_to_json(code):
i = int(im.group(1), base)
return '"%d":' % i if v.endswith(':') else '%d' % i
+ if v in vars:
+ return vars[v]
+
return '"%s"' % v
return re.sub(r'''(?sx)
"(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
'(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
{comment}|,(?={skip}[\]}}])|
- (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
+ void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
\b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
[0-9]+(?={skip}:)|
!+
@@ -4167,7 +4467,40 @@ def qualities(quality_ids):
return q
-DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
+DEFAULT_OUTTMPL = {
+ 'default': '%(title)s [%(id)s].%(ext)s',
+ 'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
+}
+OUTTMPL_TYPES = {
+ 'chapter': None,
+ 'subtitle': None,
+ 'thumbnail': None,
+ 'description': 'description',
+ 'annotation': 'annotations.xml',
+ 'infojson': 'info.json',
+ 'pl_thumbnail': None,
+ 'pl_description': 'description',
+ 'pl_infojson': 'info.json',
+}
+
+# As of [1] format syntax is:
+# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
+# 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
+STR_FORMAT_RE_TMPL = r'''(?x)
+ (?<!%)(?P<prefix>(?:%%)*)
+ %
+ (?P<has_key>\((?P<key>{0})\))?
+ (?P<format>
+ (?P<conversion>[#0\-+ ]+)?
+ (?P<min_width>\d+)?
+ (?P<precision>\.\d+)?
+ (?P<len_mod>[hlL])? # unused in python
+ {1} # conversion type
+ )
+'''
+
+
+STR_FORMAT_TYPES = 'diouxXeEfFgGcrs'
def limit_length(s, length):
@@ -4195,9 +4528,10 @@ def is_outdated_version(version, limit, assume_new=True):
def ytdl_is_updateable():
""" Returns if hypervideo can be updated with -U """
- from zipimport import zipimporter
- return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
+ from .update import is_non_updateable
+
+ return not is_non_updateable()
def args_to_str(args):
@@ -4218,19 +4552,24 @@ def mimetype2ext(mt):
if mt is None:
return None
- ext = {
+ mt, _, params = mt.partition(';')
+ mt = mt.strip()
+
+ FULL_MAP = {
'audio/mp4': 'm4a',
# Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
# it's the most popular one
'audio/mpeg': 'mp3',
- }.get(mt)
+ 'audio/x-wav': 'wav',
+ 'audio/wav': 'wav',
+ 'audio/wave': 'wav',
+ }
+
+ ext = FULL_MAP.get(mt)
if ext is not None:
return ext
- _, _, res = mt.rpartition('/')
- res = res.split(';')[0].strip().lower()
-
- return {
+ SUBTYPE_MAP = {
'3gpp': '3gp',
'smptett+xml': 'tt',
'ttaf+xml': 'dfxp',
@@ -4249,7 +4588,28 @@ def mimetype2ext(mt):
'quicktime': 'mov',
'mp2t': 'ts',
'x-wav': 'wav',
- }.get(res, res)
+ 'filmstrip+json': 'fs',
+ 'svg+xml': 'svg',
+ }
+
+ _, _, subtype = mt.rpartition('/')
+ ext = SUBTYPE_MAP.get(subtype.lower())
+ if ext is not None:
+ return ext
+
+ SUFFIX_MAP = {
+ 'json': 'json',
+ 'xml': 'xml',
+ 'zip': 'zip',
+ 'gzip': 'gz',
+ }
+
+ _, _, suffix = subtype.partition('+')
+ ext = SUFFIX_MAP.get(suffix)
+ if ext is not None:
+ return ext
+
+ return subtype.replace('+', '.')
def parse_codecs(codecs_str):
@@ -4257,13 +4617,22 @@ def parse_codecs(codecs_str):
if not codecs_str:
return {}
split_codecs = list(filter(None, map(
- lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
- vcodec, acodec = None, None
+ str.strip, codecs_str.strip().strip(',').split(','))))
+ vcodec, acodec, hdr = None, None, None
for full_codec in split_codecs:
codec = full_codec.split('.')[0]
- if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
+ if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora', 'dvh1', 'dvhe'):
if not vcodec:
vcodec = full_codec
+ if codec in ('dvh1', 'dvhe'):
+ hdr = 'DV'
+ elif codec == 'vp9' and vcodec.startswith('vp9.2'):
+ hdr = 'HDR10'
+ elif codec == 'av01':
+ parts = full_codec.split('.')
+ if len(parts) > 3 and parts[3] == '10':
+ hdr = 'HDR10'
+ vcodec = '.'.join(parts[:4])
elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
if not acodec:
acodec = full_codec
@@ -4279,6 +4648,7 @@ def parse_codecs(codecs_str):
return {
'vcodec': vcodec or 'none',
'acodec': acodec or 'none',
+ 'dynamic_range': hdr,
}
return {}
@@ -4353,66 +4723,85 @@ def determine_protocol(info_dict):
return compat_urllib_parse_urlparse(url).scheme
-def render_table(header_row, data):
+def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):
""" Render a list of rows, each as a list of values """
+
+ def get_max_lens(table):
+ return [max(len(compat_str(v)) for v in col) for col in zip(*table)]
+
+ def filter_using_list(row, filterArray):
+ return [col for (take, col) in zip(filterArray, row) if take]
+
+ if hideEmpty:
+ max_lens = get_max_lens(data)
+ header_row = filter_using_list(header_row, max_lens)
+ data = [filter_using_list(row, max_lens) for row in data]
+
table = [header_row] + data
- max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
- format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
+ max_lens = get_max_lens(table)
+ if delim:
+ table = [header_row] + [['-' * ml for ml in max_lens]] + data
+ format_str = ' '.join('%-' + compat_str(ml + extraGap) + 's' for ml in max_lens[:-1]) + ' %s'
return '\n'.join(format_str % tuple(row) for row in table)
-def _match_one(filter_part, dct):
+def _match_one(filter_part, dct, incomplete):
+ # TODO: Generalize code with YoutubeDL._build_format_filter
+ STRING_OPERATORS = {
+ '*=': operator.contains,
+ '^=': lambda attr, value: attr.startswith(value),
+ '$=': lambda attr, value: attr.endswith(value),
+ '~=': lambda attr, value: re.search(value, attr),
+ }
COMPARISON_OPERATORS = {
+ **STRING_OPERATORS,
+ '<=': operator.le, # "<=" must be defined above "<"
'<': operator.lt,
- '<=': operator.le,
- '>': operator.gt,
'>=': operator.ge,
+ '>': operator.gt,
'=': operator.eq,
- '!=': operator.ne,
}
+
operator_rex = re.compile(r'''(?x)\s*
(?P<key>[a-z_]+)
- \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
+ \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
(?:
- (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
- (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
- (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
+ (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
+ (?P<strval>.+?)
)
\s*$
''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
m = operator_rex.search(filter_part)
if m:
- op = COMPARISON_OPERATORS[m.group('op')]
- actual_value = dct.get(m.group('key'))
- if (m.group('quotedstrval') is not None
- or m.group('strval') is not None
+ m = m.groupdict()
+ unnegated_op = COMPARISON_OPERATORS[m['op']]
+ if m['negation']:
+ op = lambda attr, value: not unnegated_op(attr, value)
+ else:
+ op = unnegated_op
+ comparison_value = m['quotedstrval'] or m['strval'] or m['intval']
+ if m['quote']:
+ comparison_value = comparison_value.replace(r'\%s' % m['quote'], m['quote'])
+ actual_value = dct.get(m['key'])
+ numeric_comparison = None
+ if isinstance(actual_value, compat_numeric_types):
# If the original field is a string and matching comparisonvalue is
# a number we should respect the origin of the original field
# and process comparison value as a string (see
- # https://github.com/ytdl-org/youtube-dl/issues/11082).
- or actual_value is not None and m.group('intval') is not None
- and isinstance(actual_value, compat_str)):
- if m.group('op') not in ('=', '!='):
- raise ValueError(
- 'Operator %s does not support string values!' % m.group('op'))
- comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
- quote = m.group('quote')
- if quote is not None:
- comparison_value = comparison_value.replace(r'\%s' % quote, quote)
- else:
+ # https://github.com/ytdl-org/youtube-dl/issues/11082)
try:
- comparison_value = int(m.group('intval'))
+ numeric_comparison = int(comparison_value)
except ValueError:
- comparison_value = parse_filesize(m.group('intval'))
- if comparison_value is None:
- comparison_value = parse_filesize(m.group('intval') + 'B')
- if comparison_value is None:
- raise ValueError(
- 'Invalid integer value %r in filter part %r' % (
- m.group('intval'), filter_part))
+ numeric_comparison = parse_filesize(comparison_value)
+ if numeric_comparison is None:
+ numeric_comparison = parse_filesize(f'{comparison_value}B')
+ if numeric_comparison is None:
+ numeric_comparison = parse_duration(comparison_value)
+ if numeric_comparison is not None and m['op'] in STRING_OPERATORS:
+ raise ValueError('Operator %s only supports string values!' % m['op'])
if actual_value is None:
- return m.group('none_inclusive')
- return op(actual_value, comparison_value)
+ return incomplete or m['none_inclusive']
+ return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison)
UNARY_OPERATORS = {
'': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
@@ -4426,21 +4815,25 @@ def _match_one(filter_part, dct):
if m:
op = UNARY_OPERATORS[m.group('op')]
actual_value = dct.get(m.group('key'))
+ if incomplete and actual_value is None:
+ return True
return op(actual_value)
raise ValueError('Invalid filter part %r' % filter_part)
-def match_str(filter_str, dct):
- """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
-
+def match_str(filter_str, dct, incomplete=False):
+ """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false
+ When incomplete, all conditions passes on missing fields
+ """
return all(
- _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
+ _match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
+ for filter_part in re.split(r'(?<!\\)&', filter_str))
def match_filter_func(filter_str):
- def _match_func(info_dict):
- if match_str(filter_str, info_dict):
+ def _match_func(info_dict, *args, **kwargs):
+ if match_str(filter_str, info_dict, *args, **kwargs):
return None
else:
video_title = info_dict.get('title', info_dict.get('id', 'video'))
@@ -4651,12 +5044,37 @@ def cli_valueless_option(params, command_option, param, expected_value=True):
return [command_option] if param == expected_value else []
-def cli_configuration_args(params, param, default=[]):
- ex_args = params.get(param)
- if ex_args is None:
+def cli_configuration_args(argdict, keys, default=[], use_compat=True):
+ if isinstance(argdict, (list, tuple)): # for backward compatibility
+ if use_compat:
+ return argdict
+ else:
+ argdict = None
+ if argdict is None:
return default
- assert isinstance(ex_args, list)
- return ex_args
+ assert isinstance(argdict, dict)
+
+ assert isinstance(keys, (list, tuple))
+ for key_list in keys:
+ arg_list = list(filter(
+ lambda x: x is not None,
+ [argdict.get(key.lower()) for key in variadic(key_list)]))
+ if arg_list:
+ return [arg for args in arg_list for arg in args]
+ return default
+
+
+def _configuration_args(main_key, argdict, exe, keys=None, default=[], use_compat=True):
+ main_key, exe = main_key.lower(), exe.lower()
+ root_key = exe if main_key == exe else f'{main_key}+{exe}'
+ keys = [f'{root_key}{k}' for k in (keys or [''])]
+ if root_key in keys:
+ if main_key != exe:
+ keys.append((main_key, exe))
+ keys.append('default')
+ else:
+ use_compat = False
+ return cli_configuration_args(argdict, keys, default, use_compat)
class ISO639Utils(object):
@@ -5725,7 +6143,7 @@ def write_xattr(path, key, value):
cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
except EnvironmentError as e:
raise XAttrMetadataError(e.errno, e.strerror)
- stdout, stderr = p.communicate()
+ stdout, stderr = process_communicate_or_kill(p)
stderr = stderr.decode('utf-8', 'replace')
if p.returncode != 0:
raise XAttrMetadataError(p.returncode, stderr)
@@ -5757,6 +6175,95 @@ def random_birthday(year_field, month_field, day_field):
}
+# Templates for internet shortcut files, which are plain text files.
+DOT_URL_LINK_TEMPLATE = '''
+[InternetShortcut]
+URL=%(url)s
+'''.lstrip()
+
+DOT_WEBLOC_LINK_TEMPLATE = '''
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+\t<key>URL</key>
+\t<string>%(url)s</string>
+</dict>
+</plist>
+'''.lstrip()
+
+DOT_DESKTOP_LINK_TEMPLATE = '''
+[Desktop Entry]
+Encoding=UTF-8
+Name=%(filename)s
+Type=Link
+URL=%(url)s
+Icon=text-html
+'''.lstrip()
+
+
+def iri_to_uri(iri):
+ """
+ Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
+
+ The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
+ """
+
+ iri_parts = compat_urllib_parse_urlparse(iri)
+
+ if '[' in iri_parts.netloc:
+ raise ValueError('IPv6 URIs are not, yet, supported.')
+ # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
+
+ # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
+
+ net_location = ''
+ if iri_parts.username:
+ net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
+ if iri_parts.password is not None:
+ net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
+ net_location += '@'
+
+ net_location += iri_parts.hostname.encode('idna').decode('utf-8') # Punycode for Unicode hostnames.
+ # The 'idna' encoding produces ASCII text.
+ if iri_parts.port is not None and iri_parts.port != 80:
+ net_location += ':' + str(iri_parts.port)
+
+ return compat_urllib_parse_urlunparse(
+ (iri_parts.scheme,
+ net_location,
+
+ compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
+
+ # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
+ compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
+
+ # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
+ compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
+
+ compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
+
+ # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
+
+
+def to_high_limit_path(path):
+ if sys.platform in ['win32', 'cygwin']:
+ # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
+ return r'\\?\ '.rstrip() + os.path.abspath(path)
+
+ return path
+
+
+def format_field(obj, field=None, template='%s', ignore=(None, ''), default='', func=None):
+ if field is None:
+ val = obj if obj is not None else default
+ else:
+ val = obj.get(field, default)
+ if func and val not in ignore:
+ val = func(val)
+ return template % val if val not in ignore else default
+
+
def clean_podcast_url(url):
return re.sub(r'''(?x)
(?:
@@ -5772,3 +6279,203 @@ def clean_podcast_url(url):
st\.fm # https://podsights.com/docs/
)/e
)/''', '', url)
+
+
+_HEX_TABLE = '0123456789abcdef'
+
+
+def random_uuidv4():
+ return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
+
+
+def make_dir(path, to_screen=None):
+ try:
+ dn = os.path.dirname(path)
+ if dn and not os.path.exists(dn):
+ os.makedirs(dn)
+ return True
+ except (OSError, IOError) as err:
+ if callable(to_screen) is not None:
+ to_screen('unable to create directory ' + error_to_compat_str(err))
+ return False
+
+
+def get_executable_path():
+ from zipimport import zipimporter
+ if hasattr(sys, 'frozen'): # Running from PyInstaller
+ path = os.path.dirname(sys.executable)
+ elif isinstance(globals().get('__loader__'), zipimporter): # Running from ZIP
+ path = os.path.join(os.path.dirname(__file__), '../..')
+ else:
+ path = os.path.join(os.path.dirname(__file__), '..')
+ return os.path.abspath(path)
+
+
+def load_plugins(name, suffix, namespace):
+ classes = {}
+ try:
+ plugins_spec = importlib.util.spec_from_file_location(
+ name, os.path.join(get_executable_path(), 'ytdlp_plugins', name, '__init__.py'))
+ plugins = importlib.util.module_from_spec(plugins_spec)
+ sys.modules[plugins_spec.name] = plugins
+ plugins_spec.loader.exec_module(plugins)
+ for name in dir(plugins):
+ if name in namespace:
+ continue
+ if not name.endswith(suffix):
+ continue
+ klass = getattr(plugins, name)
+ classes[name] = namespace[name] = klass
+ except FileNotFoundError:
+ pass
+ return classes
+
+
+def traverse_obj(
+ obj, *path_list, default=None, expected_type=None, get_all=True,
+ casesense=True, is_user_input=False, traverse_string=False):
+ ''' Traverse nested list/dict/tuple
+ @param path_list A list of paths which are checked one by one.
+ Each path is a list of keys where each key is a string,
+ a function, a tuple of strings or "...".
+ When a fuction is given, it takes the key as argument and
+ returns whether the key matches or not. When a tuple is given,
+ all the keys given in the tuple are traversed, and
+ "..." traverses all the keys in the object
+ @param default Default value to return
+ @param expected_type Only accept final value of this type (Can also be any callable)
+ @param get_all Return all the values obtained from a path or only the first one
+ @param casesense Whether to consider dictionary keys as case sensitive
+ @param is_user_input Whether the keys are generated from user input. If True,
+ strings are converted to int/slice if necessary
+ @param traverse_string Whether to traverse inside strings. If True, any
+ non-compatible object will also be converted into a string
+ # TODO: Write tests
+ '''
+ if not casesense:
+ _lower = lambda k: (k.lower() if isinstance(k, str) else k)
+ path_list = (map(_lower, variadic(path)) for path in path_list)
+
+ def _traverse_obj(obj, path, _current_depth=0):
+ nonlocal depth
+ if obj is None:
+ return None
+ path = tuple(variadic(path))
+ for i, key in enumerate(path):
+ if isinstance(key, (list, tuple)):
+ obj = [_traverse_obj(obj, sub_key, _current_depth) for sub_key in key]
+ key = ...
+ if key is ...:
+ obj = (obj.values() if isinstance(obj, dict)
+ else obj if isinstance(obj, (list, tuple, LazyList))
+ else str(obj) if traverse_string else [])
+ _current_depth += 1
+ depth = max(depth, _current_depth)
+ return [_traverse_obj(inner_obj, path[i + 1:], _current_depth) for inner_obj in obj]
+ elif callable(key):
+ if isinstance(obj, (list, tuple, LazyList)):
+ obj = enumerate(obj)
+ elif isinstance(obj, dict):
+ obj = obj.items()
+ else:
+ if not traverse_string:
+ return None
+ obj = str(obj)
+ _current_depth += 1
+ depth = max(depth, _current_depth)
+ return [_traverse_obj(v, path[i + 1:], _current_depth) for k, v in obj if key(k)]
+ elif isinstance(obj, dict) and not (is_user_input and key == ':'):
+ obj = (obj.get(key) if casesense or (key in obj)
+ else next((v for k, v in obj.items() if _lower(k) == key), None))
+ else:
+ if is_user_input:
+ key = (int_or_none(key) if ':' not in key
+ else slice(*map(int_or_none, key.split(':'))))
+ if key == slice(None):
+ return _traverse_obj(obj, (..., *path[i + 1:]), _current_depth)
+ if not isinstance(key, (int, slice)):
+ return None
+ if not isinstance(obj, (list, tuple, LazyList)):
+ if not traverse_string:
+ return None
+ obj = str(obj)
+ try:
+ obj = obj[key]
+ except IndexError:
+ return None
+ return obj
+
+ if isinstance(expected_type, type):
+ type_test = lambda val: val if isinstance(val, expected_type) else None
+ elif expected_type is not None:
+ type_test = expected_type
+ else:
+ type_test = lambda val: val
+
+ for path in path_list:
+ depth = 0
+ val = _traverse_obj(obj, path)
+ if val is not None:
+ if depth:
+ for _ in range(depth - 1):
+ val = itertools.chain.from_iterable(v for v in val if v is not None)
+ val = [v for v in map(type_test, val) if v is not None]
+ if val:
+ return val if get_all else val[0]
+ else:
+ val = type_test(val)
+ if val is not None:
+ return val
+ return default
+
+
+def traverse_dict(dictn, keys, casesense=True):
+ ''' For backward compatibility. Do not use '''
+ return traverse_obj(dictn, keys, casesense=casesense,
+ is_user_input=True, traverse_string=True)
+
+
+def variadic(x, allowed_types=(str, bytes)):
+ return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)
+
+
+# create a JSON Web Signature (jws) with HS256 algorithm
+# the resulting format is in JWS Compact Serialization
+# implemented following JWT https://www.rfc-editor.org/rfc/rfc7519.html
+# implemented following JWS https://www.rfc-editor.org/rfc/rfc7515.html
+def jwt_encode_hs256(payload_data, key, headers={}):
+ header_data = {
+ 'alg': 'HS256',
+ 'typ': 'JWT',
+ }
+ if headers:
+ header_data.update(headers)
+ header_b64 = base64.b64encode(json.dumps(header_data).encode('utf-8'))
+ payload_b64 = base64.b64encode(json.dumps(payload_data).encode('utf-8'))
+ h = hmac.new(key.encode('utf-8'), header_b64 + b'.' + payload_b64, hashlib.sha256)
+ signature_b64 = base64.b64encode(h.digest())
+ token = header_b64 + b'.' + payload_b64 + b'.' + signature_b64
+ return token
+
+
+def supports_terminal_sequences(stream):
+ if compat_os_name == 'nt':
+ if get_windows_version() < (10, 0, 10586):
+ return False
+ elif not os.getenv('TERM'):
+ return False
+ try:
+ return stream.isatty()
+ except BaseException:
+ return False
+
+
+TERMINAL_SEQUENCES = {
+ 'DOWN': '\n',
+ 'UP': '\x1b[A',
+ 'ERASE_LINE': '\x1b[K',
+ 'RED': '\033[0;31m',
+ 'YELLOW': '\033[0;33m',
+ 'BLUE': '\033[0;34m',
+ 'RESET_STYLE': '\033[0m',
+}
diff --git a/hypervideo_dl/version.py b/hypervideo_dl/version.py
index 4b768be..839f10e 100644
--- a/hypervideo_dl/version.py
+++ b/hypervideo_dl/version.py
@@ -1,3 +1,3 @@
from __future__ import unicode_literals
-__version__ = '1.1.11'
+__version__ = '1.1.12'
diff --git a/hypervideo_dl/webvtt.py b/hypervideo_dl/webvtt.py
new file mode 100644
index 0000000..b5ad01f
--- /dev/null
+++ b/hypervideo_dl/webvtt.py
@@ -0,0 +1,402 @@
+# coding: utf-8
+from __future__ import unicode_literals, print_function, division
+
+"""
+A partial parser for WebVTT segments. Interprets enough of the WebVTT stream
+to be able to assemble a single stand-alone subtitle file, suitably adjusting
+timestamps on the way, while everything else is passed through unmodified.
+
+Regular expressions based on the W3C WebVTT specification
+<https://www.w3.org/TR/webvtt1/>. The X-TIMESTAMP-MAP extension is described
+in RFC 8216 §3.5 <https://tools.ietf.org/html/rfc8216#section-3.5>.
+"""
+
+import re
+import io
+from .utils import int_or_none
+from .compat import (
+ compat_str as str,
+ compat_Pattern,
+ compat_Match,
+)
+
+
+class _MatchParser(object):
+ """
+ An object that maintains the current parsing position and allows
+ conveniently advancing it as syntax elements are successfully parsed.
+ """
+
+ def __init__(self, string):
+ self._data = string
+ self._pos = 0
+
+ def match(self, r):
+ if isinstance(r, compat_Pattern):
+ return r.match(self._data, self._pos)
+ if isinstance(r, str):
+ if self._data.startswith(r, self._pos):
+ return len(r)
+ return None
+ raise ValueError(r)
+
+ def advance(self, by):
+ if by is None:
+ amt = 0
+ elif isinstance(by, compat_Match):
+ amt = len(by.group(0))
+ elif isinstance(by, str):
+ amt = len(by)
+ elif isinstance(by, int):
+ amt = by
+ else:
+ raise ValueError(by)
+ self._pos += amt
+ return by
+
+ def consume(self, r):
+ return self.advance(self.match(r))
+
+ def child(self):
+ return _MatchChildParser(self)
+
+
+class _MatchChildParser(_MatchParser):
+ """
+ A child parser state, which advances through the same data as
+ its parent, but has an independent position. This is useful when
+ advancing through syntax elements we might later want to backtrack
+ from.
+ """
+
+ def __init__(self, parent):
+ super(_MatchChildParser, self).__init__(parent._data)
+ self.__parent = parent
+ self._pos = parent._pos
+
+ def commit(self):
+ """
+ Advance the parent state to the current position of this child state.
+ """
+ self.__parent._pos = self._pos
+ return self.__parent
+
+
+class ParseError(Exception):
+ def __init__(self, parser):
+ super(ParseError, self).__init__("Parse error at position %u (near %r)" % (
+ parser._pos, parser._data[parser._pos:parser._pos + 20]
+ ))
+
+
+# While the specification <https://www.w3.org/TR/webvtt1/#webvtt-timestamp>
+# prescribes that hours must be *2 or more* digits, timestamps with a single
+# digit for the hour part has been seen in the wild.
+# See https://github.com/hypervideo/hypervideo/issues/921
+_REGEX_TS = re.compile(r'''(?x)
+ (?:([0-9]{1,}):)?
+ ([0-9]{2}):
+ ([0-9]{2})\.
+ ([0-9]{3})?
+''')
+_REGEX_EOF = re.compile(r'\Z')
+_REGEX_NL = re.compile(r'(?:\r\n|[\r\n])')
+_REGEX_BLANK = re.compile(r'(?:\r\n|[\r\n])+')
+
+
+def _parse_ts(ts):
+ """
+ Convert a parsed WebVTT timestamp (a re.Match obtained from _REGEX_TS)
+ into an MPEG PES timestamp: a tick counter at 90 kHz resolution.
+ """
+
+ h, min, s, ms = ts.groups()
+ return 90 * (
+ int(h or 0) * 3600000 + # noqa: W504,E221,E222
+ int(min) * 60000 + # noqa: W504,E221,E222
+ int(s) * 1000 + # noqa: W504,E221,E222
+ int(ms) # noqa: W504,E221,E222
+ )
+
+
+def _format_ts(ts):
+ """
+ Convert an MPEG PES timestamp into a WebVTT timestamp.
+ This will lose sub-millisecond precision.
+ """
+ msec = int((ts + 45) // 90)
+ secs, msec = divmod(msec, 1000)
+ mins, secs = divmod(secs, 60)
+ hrs, mins = divmod(mins, 60)
+ return '%02u:%02u:%02u.%03u' % (hrs, mins, secs, msec)
+
+
+class Block(object):
+ """
+ An abstract WebVTT block.
+ """
+
+ def __init__(self, **kwargs):
+ for key, val in kwargs.items():
+ setattr(self, key, val)
+
+ @classmethod
+ def parse(cls, parser):
+ m = parser.match(cls._REGEX)
+ if not m:
+ return None
+ parser.advance(m)
+ return cls(raw=m.group(0))
+
+ def write_into(self, stream):
+ stream.write(self.raw)
+
+
+class HeaderBlock(Block):
+ """
+ A WebVTT block that may only appear in the header part of the file,
+ i.e. before any cue blocks.
+ """
+
+ pass
+
+
+class Magic(HeaderBlock):
+ _REGEX = re.compile(r'\ufeff?WEBVTT([ \t][^\r\n]*)?(?:\r\n|[\r\n])')
+
+ # XXX: The X-TIMESTAMP-MAP extension is described in RFC 8216 §3.5
+ # <https://tools.ietf.org/html/rfc8216#section-3.5>, but the RFC
+ # doesn’t specify the exact grammar nor where in the WebVTT
+ # syntax it should be placed; the below has been devised based
+ # on usage in the wild
+ #
+ # And strictly speaking, the presence of this extension violates
+ # the W3C WebVTT spec. Oh well.
+
+ _REGEX_TSMAP = re.compile(r'X-TIMESTAMP-MAP=')
+ _REGEX_TSMAP_LOCAL = re.compile(r'LOCAL:')
+ _REGEX_TSMAP_MPEGTS = re.compile(r'MPEGTS:([0-9]+)')
+ _REGEX_TSMAP_SEP = re.compile(r'[ \t]*,[ \t]*')
+
+ @classmethod
+ def __parse_tsmap(cls, parser):
+ parser = parser.child()
+
+ while True:
+ m = parser.consume(cls._REGEX_TSMAP_LOCAL)
+ if m:
+ m = parser.consume(_REGEX_TS)
+ if m is None:
+ raise ParseError(parser)
+ local = _parse_ts(m)
+ if local is None:
+ raise ParseError(parser)
+ else:
+ m = parser.consume(cls._REGEX_TSMAP_MPEGTS)
+ if m:
+ mpegts = int_or_none(m.group(1))
+ if mpegts is None:
+ raise ParseError(parser)
+ else:
+ raise ParseError(parser)
+ if parser.consume(cls._REGEX_TSMAP_SEP):
+ continue
+ if parser.consume(_REGEX_NL):
+ break
+ raise ParseError(parser)
+
+ parser.commit()
+ return local, mpegts
+
+ @classmethod
+ def parse(cls, parser):
+ parser = parser.child()
+
+ m = parser.consume(cls._REGEX)
+ if not m:
+ raise ParseError(parser)
+
+ extra = m.group(1)
+ local, mpegts = None, None
+ if parser.consume(cls._REGEX_TSMAP):
+ local, mpegts = cls.__parse_tsmap(parser)
+ if not parser.consume(_REGEX_NL):
+ raise ParseError(parser)
+ parser.commit()
+ return cls(extra=extra, mpegts=mpegts, local=local)
+
+ def write_into(self, stream):
+ stream.write('WEBVTT')
+ if self.extra is not None:
+ stream.write(self.extra)
+ stream.write('\n')
+ if self.local or self.mpegts:
+ stream.write('X-TIMESTAMP-MAP=LOCAL:')
+ stream.write(_format_ts(self.local if self.local is not None else 0))
+ stream.write(',MPEGTS:')
+ stream.write(str(self.mpegts if self.mpegts is not None else 0))
+ stream.write('\n')
+ stream.write('\n')
+
+
+class StyleBlock(HeaderBlock):
+ _REGEX = re.compile(r'''(?x)
+ STYLE[\ \t]*(?:\r\n|[\r\n])
+ ((?:(?!-->)[^\r\n])+(?:\r\n|[\r\n]))*
+ (?:\r\n|[\r\n])
+ ''')
+
+
+class RegionBlock(HeaderBlock):
+ _REGEX = re.compile(r'''(?x)
+ REGION[\ \t]*
+ ((?:(?!-->)[^\r\n])+(?:\r\n|[\r\n]))*
+ (?:\r\n|[\r\n])
+ ''')
+
+
+class CommentBlock(Block):
+ _REGEX = re.compile(r'''(?x)
+ NOTE(?:\r\n|[\ \t\r\n])
+ ((?:(?!-->)[^\r\n])+(?:\r\n|[\r\n]))*
+ (?:\r\n|[\r\n])
+ ''')
+
+
+class CueBlock(Block):
+ """
+ A cue block. The payload is not interpreted.
+ """
+
+ _REGEX_ID = re.compile(r'((?:(?!-->)[^\r\n])+)(?:\r\n|[\r\n])')
+ _REGEX_ARROW = re.compile(r'[ \t]+-->[ \t]+')
+ _REGEX_SETTINGS = re.compile(r'[ \t]+((?:(?!-->)[^\r\n])+)')
+ _REGEX_PAYLOAD = re.compile(r'[^\r\n]+(?:\r\n|[\r\n])?')
+
+ @classmethod
+ def parse(cls, parser):
+ parser = parser.child()
+
+ id = None
+ m = parser.consume(cls._REGEX_ID)
+ if m:
+ id = m.group(1)
+
+ m0 = parser.consume(_REGEX_TS)
+ if not m0:
+ return None
+ if not parser.consume(cls._REGEX_ARROW):
+ return None
+ m1 = parser.consume(_REGEX_TS)
+ if not m1:
+ return None
+ m2 = parser.consume(cls._REGEX_SETTINGS)
+ if not parser.consume(_REGEX_NL):
+ return None
+
+ start = _parse_ts(m0)
+ end = _parse_ts(m1)
+ settings = m2.group(1) if m2 is not None else None
+
+ text = io.StringIO()
+ while True:
+ m = parser.consume(cls._REGEX_PAYLOAD)
+ if not m:
+ break
+ text.write(m.group(0))
+
+ parser.commit()
+ return cls(
+ id=id,
+ start=start, end=end, settings=settings,
+ text=text.getvalue()
+ )
+
+ def write_into(self, stream):
+ if self.id is not None:
+ stream.write(self.id)
+ stream.write('\n')
+ stream.write(_format_ts(self.start))
+ stream.write(' --> ')
+ stream.write(_format_ts(self.end))
+ if self.settings is not None:
+ stream.write(' ')
+ stream.write(self.settings)
+ stream.write('\n')
+ stream.write(self.text)
+ stream.write('\n')
+
+ @property
+ def as_json(self):
+ return {
+ 'id': self.id,
+ 'start': self.start,
+ 'end': self.end,
+ 'text': self.text,
+ 'settings': self.settings,
+ }
+
+ def __eq__(self, other):
+ return self.as_json == other.as_json
+
+ @classmethod
+ def from_json(cls, json):
+ return cls(
+ id=json['id'],
+ start=json['start'],
+ end=json['end'],
+ text=json['text'],
+ settings=json['settings']
+ )
+
+ def hinges(self, other):
+ if self.text != other.text:
+ return False
+ if self.settings != other.settings:
+ return False
+ return self.start <= self.end == other.start <= other.end
+
+
+def parse_fragment(frag_content):
+ """
+ A generator that yields (partially) parsed WebVTT blocks when given
+ a bytes object containing the raw contents of a WebVTT file.
+ """
+
+ parser = _MatchParser(frag_content.decode('utf-8'))
+
+ yield Magic.parse(parser)
+
+ while not parser.match(_REGEX_EOF):
+ if parser.consume(_REGEX_BLANK):
+ continue
+
+ block = RegionBlock.parse(parser)
+ if block:
+ yield block
+ continue
+ block = StyleBlock.parse(parser)
+ if block:
+ yield block
+ continue
+ block = CommentBlock.parse(parser)
+ if block:
+ yield block # XXX: or skip
+ continue
+
+ break
+
+ while not parser.match(_REGEX_EOF):
+ if parser.consume(_REGEX_BLANK):
+ continue
+
+ block = CommentBlock.parse(parser)
+ if block:
+ yield block # XXX: or skip
+ continue
+ block = CueBlock.parse(parser)
+ if block:
+ yield block
+ continue
+
+ raise ParseError(parser)
diff --git a/pytest.ini b/pytest.ini
new file mode 100644
index 0000000..52feb4a
--- /dev/null
+++ b/pytest.ini
@@ -0,0 +1,4 @@
+[pytest]
+addopts = -ra -v --strict-markers
+markers =
+ download
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..cecd08e
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,3 @@
+mutagen
+pycryptodomex
+websockets
diff --git a/setup.cfg b/setup.cfg
index 6f03c9b..6875734 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -2,5 +2,5 @@
universal = True
[flake8]
-exclude = hypervideo_dl/extractor/__init__.py,devscripts/buildserver.py,devscripts/lazy_load_template.py,devscripts/make_issue_template.py,setup.py,build,.git,venv
-ignore = E402,E501,E731,E741,W503
+exclude = hypervideo_dl/extractor/__init__.py,devscripts/buildserver.py,devscripts/lazy_load_template.py,devscripts/make_issue_template.py,setup.py,build,.git,venv,devscripts/create-github-release.py,devscripts/release.sh,devscripts/show-downloads-statistics.py
+ignore = E402,E501,E731,E741,W503 \ No newline at end of file
diff --git a/setup.py b/setup.py
index cb9de29..c56148e 100644
--- a/setup.py
+++ b/setup.py
@@ -1,66 +1,64 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
# coding: utf-8
-
-from __future__ import print_function
-
import os.path
import warnings
import sys
try:
- from setuptools import setup, Command
+ from setuptools import setup, Command, find_packages
setuptools_available = True
except ImportError:
from distutils.core import setup, Command
setuptools_available = False
from distutils.spawn import spawn
-try:
- # This will create an exe that needs Microsoft Visual C++ 2008
- # Redistributable Package
+# Get the version from hypervideo_dl/version.py without importing the package
+exec(compile(open('hypervideo_dl/version.py').read(), 'hypervideo_dl/version.py', 'exec'))
+
+
+DESCRIPTION = 'Command-line program to download videos from YouTube.com and many other other video platforms.'
+
+LONG_DESCRIPTION = '\n\n'.join((
+ 'Official repository: <https://github.com/hypervideo/hypervideo>',
+ '**PS**: Some links in this document will not work since this is a copy of the README.md from Github',
+ open('README.md', 'r', encoding='utf-8').read()))
+
+REQUIREMENTS = ['mutagen', 'pycryptodomex', 'websockets']
+
+
+if sys.argv[1:2] == ['py2exe']:
import py2exe
-except ImportError:
- if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe':
- print('Cannot import py2exe', file=sys.stderr)
- exit(1)
-
-py2exe_options = {
- 'bundle_files': 1,
- 'compressed': 1,
- 'optimize': 2,
- 'dist_dir': '.',
- 'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'],
-}
+ warnings.warn(
+ 'Building with py2exe is not officially supported. '
+ 'The recommended way is to use "pyinst.py" to build using pyinstaller')
+ params = {
+ 'console': [{
+ 'script': './hypervideo_dl/__main__.py',
+ 'dest_base': 'hypervideo',
+ 'version': __version__,
+ 'description': DESCRIPTION,
+ 'comments': LONG_DESCRIPTION.split('\n')[0],
+ 'product_name': 'hypervideo',
+ 'product_version': __version__,
+ }],
+ 'options': {
+ 'py2exe': {
+ 'bundle_files': 0,
+ 'compressed': 1,
+ 'optimize': 2,
+ 'dist_dir': './dist',
+ 'excludes': ['Crypto', 'Cryptodome'], # py2exe cannot import Crypto
+ 'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'],
+ }
+ },
+ 'zipfile': None
+ }
-# Get the version from hypervideo_dl/version.py without importing the package
-exec(compile(open('hypervideo_dl/version.py').read(),
- 'hypervideo_dl/version.py', 'exec'))
-
-DESCRIPTION = 'YouTube video downloader'
-LONG_DESCRIPTION = 'Command-line program to download videos from YouTube.com and other video sites'
-
-py2exe_console = [{
- 'script': './hypervideo_dl/__main__.py',
- 'dest_base': 'hypervideo',
- 'version': __version__,
- 'description': DESCRIPTION,
- 'comments': LONG_DESCRIPTION,
- 'product_name': 'hypervideo',
- 'product_version': __version__,
-}]
-
-py2exe_params = {
- 'console': py2exe_console,
- 'options': {'py2exe': py2exe_options},
- 'zipfile': None
-}
-
-if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe':
- params = py2exe_params
else:
files_spec = [
- ('etc/bash_completion.d', ['hypervideo.bash-completion']),
- ('etc/fish/completions', ['hypervideo.fish']),
+ ('share/bash-completion/completions', ['completions/bash/hypervideo']),
+ ('share/zsh/site-functions', ['completions/zsh/_hypervideo']),
+ ('share/fish/vendor_completions.d', ['completions/fish/hypervideo.fish']),
('share/doc/hypervideo_dl', ['README.txt']),
('share/man/man1', ['hypervideo.1'])
]
@@ -70,7 +68,7 @@ else:
resfiles = []
for fn in files:
if not os.path.exists(fn):
- warnings.warn('Skipping file %s since it is not present. Type make to build all automatically generated files.' % fn)
+ warnings.warn('Skipping file %s since it is not present. Try running `make pypi-files` first' % fn)
else:
resfiles.append(fn)
data_files.append((dirname, resfiles))
@@ -78,10 +76,12 @@ else:
params = {
'data_files': data_files,
}
+
if setuptools_available:
params['entry_points'] = {'console_scripts': ['hypervideo = hypervideo_dl:main']}
else:
- params['scripts'] = ['bin/hypervideo']
+ params['scripts'] = ['hypervideo']
+
class build_lazy_extractors(Command):
description = 'Build the extractor lazy loading module'
@@ -94,54 +94,43 @@ class build_lazy_extractors(Command):
pass
def run(self):
- spawn(
- [sys.executable, 'devscripts/make_lazy_extractors.py', 'hypervideo_dl/extractor/lazy_extractors.py'],
- dry_run=self.dry_run,
- )
+ spawn([sys.executable, 'devscripts/make_lazy_extractors.py', 'hypervideo_dl/extractor/lazy_extractors.py'],
+ dry_run=self.dry_run)
+
+
+if setuptools_available:
+ packages = find_packages(exclude=('youtube_dl', 'youtube_dlc', 'test', 'ytdlp_plugins'))
+else:
+ packages = ['hypervideo_dl', 'hypervideo_dl.downloader', 'hypervideo_dl.extractor', 'hypervideo_dl.postprocessor']
+
setup(
- name='hypervideo_dl',
+ name='hypervideo',
version=__version__,
- description=DESCRIPTION,
- long_description=LONG_DESCRIPTION,
- url='https://git.conocimientoslibres.ga/software/hypervideo.git',
- author='Ricardo Garcia',
- author_email='ytdl@yt-dl.org',
maintainer='Jesús E..',
maintainer_email='heckyel@hyperbola.info',
license='CC0-1.0',
- packages=[
- 'hypervideo_dl',
- 'hypervideo_dl.extractor', 'hypervideo_dl.downloader',
- 'hypervideo_dl.postprocessor'],
-
- # Provokes warning on most systems (why?!)
- # test_suite = 'nose.collector',
- # test_requires = ['nosetest'],
-
+ description=DESCRIPTION,
+ long_description=LONG_DESCRIPTION,
+ long_description_content_type='text/markdown',
+ url='https://git.conocimientoslibres.ga/software/hypervideo.git',
+ packages=packages,
+ install_requires=REQUIREMENTS,
classifiers=[
'Topic :: Multimedia :: Video',
'Development Status :: 5 - Production/Stable',
'Environment :: Console',
- 'License :: Public Domain',
'Programming Language :: Python',
- 'Programming Language :: Python :: 2',
- 'Programming Language :: Python :: 2.6',
- 'Programming Language :: Python :: 2.7',
- 'Programming Language :: Python :: 3',
- 'Programming Language :: Python :: 3.2',
- 'Programming Language :: Python :: 3.3',
- 'Programming Language :: Python :: 3.4',
- 'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: Implementation',
'Programming Language :: Python :: Implementation :: CPython',
- 'Programming Language :: Python :: Implementation :: IronPython',
- 'Programming Language :: Python :: Implementation :: Jython',
'Programming Language :: Python :: Implementation :: PyPy',
+ 'License :: Public Domain',
+ 'Operating System :: OS Independent',
],
+ python_requires='>=3.6',
cmdclass={'build_lazy_extractors': build_lazy_extractors},
**params
diff --git a/test/helper.py b/test/helper.py
index 6eb9298..0d8822e 100644
--- a/test/helper.py
+++ b/test/helper.py
@@ -22,11 +22,19 @@ from hypervideo_dl.utils import (
)
+if 'pytest' in sys.modules:
+ import pytest
+ is_download_test = pytest.mark.download
+else:
+ def is_download_test(testClass):
+ return testClass
+
+
def get_params(override=None):
PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)),
- "parameters.json")
+ 'parameters.json')
LOCAL_PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)),
- "local_parameters.json")
+ 'local_parameters.json')
with io.open(PARAMETERS_FILE, encoding='utf-8') as pf:
parameters = json.load(pf)
if os.path.exists(LOCAL_PARAMETERS_FILE):
@@ -190,7 +198,10 @@ def expect_info_dict(self, got_dict, expected_dict):
expect_dict(self, got_dict, expected_dict)
# Check for the presence of mandatory fields
if got_dict.get('_type') not in ('playlist', 'multi_video'):
- for key in ('id', 'url', 'title', 'ext'):
+ mandatory_fields = ['id', 'title']
+ if expected_dict.get('ext'):
+ mandatory_fields.extend(('url', 'ext'))
+ for key in mandatory_fields:
self.assertTrue(got_dict.get(key), 'Missing mandatory field %s' % key)
# Check for mandatory fields that are automatically set by YoutubeDL
for key in ['webpage_url', 'extractor', 'extractor_key']:
diff --git a/test/parameters.json b/test/parameters.json
index 65fd544..9ca7d2c 100644
--- a/test/parameters.json
+++ b/test/parameters.json
@@ -1,40 +1,46 @@
{
- "consoletitle": false,
- "continuedl": true,
- "forcedescription": false,
- "forcefilename": false,
- "forceformat": false,
- "forcethumbnail": false,
- "forcetitle": false,
- "forceurl": false,
+ "check_formats": false,
+ "consoletitle": false,
+ "continuedl": true,
+ "forcedescription": false,
+ "forcefilename": false,
+ "forceformat": false,
+ "forcethumbnail": false,
+ "forcetitle": false,
+ "forceurl": false,
+ "force_write_download_archive": false,
"format": "best",
- "ignoreerrors": false,
- "listformats": null,
- "logtostderr": false,
- "matchtitle": null,
- "max_downloads": null,
- "nooverwrites": false,
- "nopart": false,
- "noprogress": false,
- "outtmpl": "%(id)s.%(ext)s",
- "password": null,
- "playlistend": -1,
- "playliststart": 1,
- "prefer_free_formats": false,
- "quiet": false,
- "ratelimit": null,
- "rejecttitle": null,
- "retries": 10,
- "simulate": false,
- "subtitleslang": null,
+ "ignoreerrors": false,
+ "listformats": null,
+ "logtostderr": false,
+ "matchtitle": null,
+ "max_downloads": null,
+ "overwrites": null,
+ "nopart": false,
+ "noprogress": false,
+ "outtmpl": "%(id)s.%(ext)s",
+ "password": null,
+ "playliststart": 1,
+ "prefer_free_formats": false,
+ "quiet": false,
+ "ratelimit": null,
+ "rejecttitle": null,
+ "retries": 10,
+ "simulate": false,
+ "subtitleslang": null,
"subtitlesformat": "best",
- "test": true,
- "updatetime": true,
- "usenetrc": false,
- "username": null,
- "verbose": true,
- "writedescription": false,
- "writeinfojson": true,
+ "test": true,
+ "updatetime": true,
+ "usenetrc": false,
+ "username": null,
+ "verbose": true,
+ "writedescription": false,
+ "writeinfojson": true,
+ "writeannotations": false,
+ "writelink": false,
+ "writeurllink": false,
+ "writewebloclink": false,
+ "writedesktoplink": false,
"writesubtitles": false,
"allsubtitles": false,
"listsubtitles": false,
diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index 5029072..e892095 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
from __future__ import unicode_literals
@@ -35,13 +35,13 @@ class InfoExtractorTestRequestHandler(compat_http_server.BaseHTTPRequestHandler)
assert False
-class TestIE(InfoExtractor):
+class DummyIE(InfoExtractor):
pass
class TestInfoExtractor(unittest.TestCase):
def setUp(self):
- self.ie = TestIE(FakeYDL())
+ self.ie = DummyIE(FakeYDL())
def test_ie_key(self):
self.assertEqual(get_info_extractor(YoutubeIE.ie_key()), YoutubeIE)
@@ -440,371 +440,430 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
def test_parse_m3u8_formats(self):
_TEST_CASES = [
(
- # https://github.com/ytdl-org/youtube-dl/issues/11507
- # http://pluzz.francetv.fr/videos/le_ministere.html
- 'pluzz_francetv_11507',
- 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais',
+ # https://github.com/ytdl-org/youtube-dl/issues/11995
+ # http://teamcoco.com/video/clueless-gamer-super-bowl-for-honor
+ 'img_bipbop_adv_example_fmp4',
+ 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
[{
- 'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_0_av.m3u8?null=0',
- 'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais',
+ 'format_id': 'aud1-English',
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/a1/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
+ 'language': 'en',
'ext': 'mp4',
- 'format_id': '180',
- 'protocol': 'm3u8',
- 'acodec': 'mp4a.40.2',
- 'vcodec': 'avc1.66.30',
- 'tbr': 180,
- 'width': 256,
- 'height': 144,
+ 'protocol': 'm3u8_native',
+ 'audio_ext': 'mp4',
}, {
- 'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_1_av.m3u8?null=0',
- 'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais',
+ 'format_id': 'aud2-English',
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/a2/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
+ 'language': 'en',
'ext': 'mp4',
- 'format_id': '303',
- 'protocol': 'm3u8',
- 'acodec': 'mp4a.40.2',
- 'vcodec': 'avc1.66.30',
- 'tbr': 303,
- 'width': 320,
- 'height': 180,
+ 'protocol': 'm3u8_native',
+ 'audio_ext': 'mp4',
}, {
- 'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_2_av.m3u8?null=0',
- 'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais',
+ 'format_id': 'aud3-English',
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/a3/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
+ 'language': 'en',
'ext': 'mp4',
- 'format_id': '575',
- 'protocol': 'm3u8',
- 'acodec': 'mp4a.40.2',
- 'vcodec': 'avc1.66.30',
- 'tbr': 575,
- 'width': 512,
- 'height': 288,
+ 'protocol': 'm3u8_native',
+ 'audio_ext': 'mp4',
}, {
- 'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_3_av.m3u8?null=0',
- 'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais',
+ 'format_id': '530',
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v2/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
'ext': 'mp4',
- 'format_id': '831',
- 'protocol': 'm3u8',
- 'acodec': 'mp4a.40.2',
- 'vcodec': 'avc1.77.30',
- 'tbr': 831,
- 'width': 704,
- 'height': 396,
+ 'protocol': 'm3u8_native',
+ 'width': 480,
+ 'height': 270,
+ 'vcodec': 'avc1.640015',
}, {
- 'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_4_av.m3u8?null=0',
- 'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais',
+ 'format_id': '561',
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v2/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
'ext': 'mp4',
- 'protocol': 'm3u8',
- 'format_id': '1467',
- 'acodec': 'mp4a.40.2',
- 'vcodec': 'avc1.77.30',
- 'tbr': 1467,
- 'width': 1024,
- 'height': 576,
- }]
- ),
- (
- # https://github.com/ytdl-org/youtube-dl/issues/11995
- # http://teamcoco.com/video/clueless-gamer-super-bowl-for-honor
- 'teamcoco_11995',
- 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
- [{
- 'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-audio-160k_v4.m3u8',
- 'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
+ 'protocol': 'm3u8_native',
+ 'width': 480,
+ 'height': 270,
+ 'vcodec': 'avc1.640015',
+ }, {
+ 'format_id': '753',
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v2/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
'ext': 'mp4',
- 'format_id': 'audio-0-Default',
- 'protocol': 'm3u8',
- 'vcodec': 'none',
+ 'protocol': 'm3u8_native',
+ 'width': 480,
+ 'height': 270,
+ 'vcodec': 'avc1.640015',
}, {
- 'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-audio-64k_v4.m3u8',
- 'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
+ 'format_id': '895',
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v3/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
'ext': 'mp4',
- 'format_id': 'audio-1-Default',
- 'protocol': 'm3u8',
- 'vcodec': 'none',
+ 'protocol': 'm3u8_native',
+ 'width': 640,
+ 'height': 360,
+ 'vcodec': 'avc1.64001e',
}, {
- 'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-audio-64k_v4.m3u8',
- 'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
+ 'format_id': '926',
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v3/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
'ext': 'mp4',
- 'format_id': '71',
- 'protocol': 'm3u8',
- 'acodec': 'mp4a.40.5',
- 'vcodec': 'none',
- 'tbr': 71,
+ 'protocol': 'm3u8_native',
+ 'width': 640,
+ 'height': 360,
+ 'vcodec': 'avc1.64001e',
}, {
- 'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-400k_v4.m3u8',
- 'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
+ 'format_id': '1118',
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v3/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
'ext': 'mp4',
- 'format_id': '413',
- 'protocol': 'm3u8',
- 'acodec': 'none',
- 'vcodec': 'avc1.42001e',
- 'tbr': 413,
- 'width': 400,
- 'height': 224,
+ 'protocol': 'm3u8_native',
+ 'width': 640,
+ 'height': 360,
+ 'vcodec': 'avc1.64001e',
}, {
- 'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-400k_v4.m3u8',
- 'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
+ 'format_id': '1265',
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v4/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
'ext': 'mp4',
- 'format_id': '522',
- 'protocol': 'm3u8',
- 'acodec': 'none',
- 'vcodec': 'avc1.42001e',
- 'tbr': 522,
- 'width': 400,
- 'height': 224,
+ 'protocol': 'm3u8_native',
+ 'width': 768,
+ 'height': 432,
+ 'vcodec': 'avc1.64001e',
}, {
- 'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-1m_v4.m3u8',
- 'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
+ 'format_id': '1295',
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v4/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
'ext': 'mp4',
- 'format_id': '1205',
- 'protocol': 'm3u8',
- 'acodec': 'none',
- 'vcodec': 'avc1.4d001e',
- 'tbr': 1205,
- 'width': 640,
- 'height': 360,
+ 'protocol': 'm3u8_native',
+ 'width': 768,
+ 'height': 432,
+ 'vcodec': 'avc1.64001e',
}, {
- 'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-2m_v4.m3u8',
- 'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
+ 'format_id': '1487',
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v4/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
'ext': 'mp4',
- 'format_id': '2374',
- 'protocol': 'm3u8',
- 'acodec': 'none',
- 'vcodec': 'avc1.4d001f',
- 'tbr': 2374,
- 'width': 1024,
- 'height': 576,
- }]
- ),
- (
- # https://github.com/ytdl-org/youtube-dl/issues/12211
- # http://video.toggle.sg/en/series/whoopie-s-world/ep3/478601
- 'toggle_mobile_12211',
- 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',
- [{
- 'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_sa2ntrdg/name/a.mp4/index.m3u8',
- 'manifest_url': 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',
+ 'protocol': 'm3u8_native',
+ 'width': 768,
+ 'height': 432,
+ 'vcodec': 'avc1.64001e',
+ }, {
+ 'format_id': '2168',
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v5/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
'ext': 'mp4',
- 'format_id': 'audio-English',
- 'protocol': 'm3u8',
- 'language': 'eng',
- 'vcodec': 'none',
+ 'protocol': 'm3u8_native',
+ 'width': 960,
+ 'height': 540,
+ 'vcodec': 'avc1.640020',
}, {
- 'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_r7y0nitg/name/a.mp4/index.m3u8',
- 'manifest_url': 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',
+ 'format_id': '2198',
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v5/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
'ext': 'mp4',
- 'format_id': 'audio-Undefined',
- 'protocol': 'm3u8',
- 'language': 'und',
- 'vcodec': 'none',
+ 'protocol': 'm3u8_native',
+ 'width': 960,
+ 'height': 540,
+ 'vcodec': 'avc1.640020',
}, {
- 'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_qlk9hlzr/name/a.mp4/index.m3u8',
- 'manifest_url': 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',
+ 'format_id': '2390',
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v5/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
'ext': 'mp4',
- 'format_id': '155',
- 'protocol': 'm3u8',
- 'tbr': 155.648,
- 'width': 320,
- 'height': 180,
+ 'protocol': 'm3u8_native',
+ 'width': 960,
+ 'height': 540,
+ 'vcodec': 'avc1.640020',
}, {
- 'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_oefackmi/name/a.mp4/index.m3u8',
- 'manifest_url': 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',
+ 'format_id': '3168',
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v6/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
'ext': 'mp4',
- 'format_id': '502',
- 'protocol': 'm3u8',
- 'tbr': 502.784,
- 'width': 480,
- 'height': 270,
+ 'protocol': 'm3u8_native',
+ 'width': 1280,
+ 'height': 720,
+ 'vcodec': 'avc1.640020',
}, {
- 'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/12/pv/1/flavorId/0_vyg9pj7k/name/a.mp4/index.m3u8',
- 'manifest_url': 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',
+ 'format_id': '3199',
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v6/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
'ext': 'mp4',
- 'format_id': '827',
- 'protocol': 'm3u8',
- 'tbr': 827.392,
- 'width': 640,
- 'height': 360,
+ 'protocol': 'm3u8_native',
+ 'width': 1280,
+ 'height': 720,
+ 'vcodec': 'avc1.640020',
}, {
- 'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/12/pv/1/flavorId/0_50n4psvx/name/a.mp4/index.m3u8',
- 'manifest_url': 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',
+ 'format_id': '3391',
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v6/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
'ext': 'mp4',
- 'format_id': '1396',
- 'protocol': 'm3u8',
- 'tbr': 1396.736,
- 'width': 854,
- 'height': 480,
- }]
- ),
- (
- # http://www.twitch.tv/riotgames/v/6528877
- 'twitch_vod',
- 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',
- [{
- 'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/audio_only/index-muted-HM49I092CC.m3u8',
- 'manifest_url': 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',
+ 'protocol': 'm3u8_native',
+ 'width': 1280,
+ 'height': 720,
+ 'vcodec': 'avc1.640020',
+ }, {
+ 'format_id': '4670',
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v7/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
'ext': 'mp4',
- 'format_id': 'Audio Only',
- 'protocol': 'm3u8',
- 'acodec': 'mp4a.40.2',
- 'vcodec': 'none',
- 'tbr': 182.725,
+ 'protocol': 'm3u8_native',
+ 'width': 1920,
+ 'height': 1080,
+ 'vcodec': 'avc1.64002a',
}, {
- 'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/mobile/index-muted-HM49I092CC.m3u8',
- 'manifest_url': 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',
+ 'format_id': '4701',
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v7/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
'ext': 'mp4',
- 'format_id': 'Mobile',
- 'protocol': 'm3u8',
- 'acodec': 'mp4a.40.2',
- 'vcodec': 'avc1.42C00D',
- 'tbr': 280.474,
- 'width': 400,
- 'height': 226,
+ 'protocol': 'm3u8_native',
+ 'width': 1920,
+ 'height': 1080,
+ 'vcodec': 'avc1.64002a',
}, {
- 'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/low/index-muted-HM49I092CC.m3u8',
- 'manifest_url': 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',
+ 'format_id': '4893',
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v7/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
'ext': 'mp4',
- 'format_id': 'Low',
- 'protocol': 'm3u8',
- 'acodec': 'mp4a.40.2',
- 'vcodec': 'avc1.42C01E',
- 'tbr': 628.347,
- 'width': 640,
- 'height': 360,
+ 'protocol': 'm3u8_native',
+ 'width': 1920,
+ 'height': 1080,
+ 'vcodec': 'avc1.64002a',
}, {
- 'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/medium/index-muted-HM49I092CC.m3u8',
- 'manifest_url': 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',
+ 'format_id': '6170',
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v8/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
'ext': 'mp4',
- 'format_id': 'Medium',
- 'protocol': 'm3u8',
- 'acodec': 'mp4a.40.2',
- 'vcodec': 'avc1.42C01E',
- 'tbr': 893.387,
- 'width': 852,
- 'height': 480,
+ 'protocol': 'm3u8_native',
+ 'width': 1920,
+ 'height': 1080,
+ 'vcodec': 'avc1.64002a',
}, {
- 'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/high/index-muted-HM49I092CC.m3u8',
- 'manifest_url': 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',
+ 'format_id': '6200',
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v8/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
'ext': 'mp4',
- 'format_id': 'High',
- 'protocol': 'm3u8',
- 'acodec': 'mp4a.40.2',
- 'vcodec': 'avc1.42C01F',
- 'tbr': 1603.789,
- 'width': 1280,
- 'height': 720,
+ 'protocol': 'm3u8_native',
+ 'width': 1920,
+ 'height': 1080,
+ 'vcodec': 'avc1.64002a',
}, {
- 'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/chunked/index-muted-HM49I092CC.m3u8',
- 'manifest_url': 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',
+ 'format_id': '6392',
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v8/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
'ext': 'mp4',
- 'format_id': 'Source',
- 'protocol': 'm3u8',
- 'acodec': 'mp4a.40.2',
- 'vcodec': 'avc1.100.31',
- 'tbr': 3214.134,
- 'width': 1280,
- 'height': 720,
- }]
- ),
- (
- # http://www.vidio.com/watch/165683-dj_ambred-booyah-live-2015
- # EXT-X-STREAM-INF tag with NAME attribute that is not defined
- # in HLS specification
- 'vidio',
- 'https://www.vidio.com/videos/165683/playlist.m3u8',
- [{
- 'url': 'https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b300.mp4.m3u8',
- 'manifest_url': 'https://www.vidio.com/videos/165683/playlist.m3u8',
+ 'protocol': 'm3u8_native',
+ 'width': 1920,
+ 'height': 1080,
+ 'vcodec': 'avc1.64002a',
+ }, {
+ 'format_id': '7968',
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v9/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
'ext': 'mp4',
- 'format_id': '270p 3G',
- 'protocol': 'm3u8',
- 'tbr': 300,
- 'width': 480,
- 'height': 270,
+ 'protocol': 'm3u8_native',
+ 'width': 1920,
+ 'height': 1080,
+ 'vcodec': 'avc1.64002a',
}, {
- 'url': 'https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b600.mp4.m3u8',
- 'manifest_url': 'https://www.vidio.com/videos/165683/playlist.m3u8',
+ 'format_id': '7998',
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v9/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
'ext': 'mp4',
- 'format_id': '360p SD',
- 'protocol': 'm3u8',
- 'tbr': 600,
- 'width': 640,
- 'height': 360,
+ 'protocol': 'm3u8_native',
+ 'width': 1920,
+ 'height': 1080,
+ 'vcodec': 'avc1.64002a',
}, {
- 'url': 'https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b1200.mp4.m3u8',
- 'manifest_url': 'https://www.vidio.com/videos/165683/playlist.m3u8',
+ 'format_id': '8190',
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v9/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8',
'ext': 'mp4',
- 'format_id': '720p HD',
- 'protocol': 'm3u8',
- 'tbr': 1200,
- 'width': 1280,
- 'height': 720,
- }]
+ 'protocol': 'm3u8_native',
+ 'width': 1920,
+ 'height': 1080,
+ 'vcodec': 'avc1.64002a',
+ }],
+ {}
),
(
- # https://github.com/ytdl-org/youtube-dl/issues/18923
- # https://www.ted.com/talks/boris_hesser_a_grassroots_healthcare_revolution_in_africa
- 'ted_18923',
- 'http://hls.ted.com/talks/31241.m3u8',
+ 'bipbop_16x9',
+ 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/bipbop_16x9_variant.m3u8',
[{
- 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/audio/600k.m3u8?nobumpers=true&uniqueId=76011e2b',
- 'format_id': '600k-Audio',
+ 'format_id': 'bipbop_audio-BipBop Audio 2',
+ 'format_index': None,
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/alternate_audio_aac/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/bipbop_16x9_variant.m3u8',
+ 'language': 'eng',
+ 'ext': 'mp4',
+ 'protocol': 'm3u8_native',
+ 'preference': None,
+ 'quality': None,
'vcodec': 'none',
+ 'audio_ext': 'mp4',
+ 'video_ext': 'none',
}, {
- 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/audio/600k.m3u8?nobumpers=true&uniqueId=76011e2b',
- 'format_id': '68',
+ 'format_id': '41',
+ 'format_index': None,
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/gear0/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/bipbop_16x9_variant.m3u8',
+ 'tbr': 41.457,
+ 'ext': 'mp4',
+ 'fps': None,
+ 'protocol': 'm3u8_native',
+ 'preference': None,
+ 'quality': None,
'vcodec': 'none',
+ 'acodec': 'mp4a.40.2',
+ 'audio_ext': 'mp4',
+ 'video_ext': 'none',
+ 'abr': 41.457,
}, {
- 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/64k.m3u8?nobumpers=true&uniqueId=76011e2b',
- 'format_id': '163',
- 'acodec': 'none',
- 'width': 320,
- 'height': 180,
- }, {
- 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/180k.m3u8?nobumpers=true&uniqueId=76011e2b',
- 'format_id': '481',
- 'acodec': 'none',
- 'width': 512,
- 'height': 288,
- }, {
- 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/320k.m3u8?nobumpers=true&uniqueId=76011e2b',
- 'format_id': '769',
- 'acodec': 'none',
- 'width': 512,
- 'height': 288,
- }, {
- 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/450k.m3u8?nobumpers=true&uniqueId=76011e2b',
- 'format_id': '984',
- 'acodec': 'none',
- 'width': 512,
- 'height': 288,
+ 'format_id': '263',
+ 'format_index': None,
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/gear1/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/bipbop_16x9_variant.m3u8',
+ 'tbr': 263.851,
+ 'ext': 'mp4',
+ 'fps': None,
+ 'protocol': 'm3u8_native',
+ 'preference': None,
+ 'quality': None,
+ 'width': 416,
+ 'height': 234,
+ 'vcodec': 'avc1.4d400d',
+ 'acodec': 'mp4a.40.2',
+ 'video_ext': 'mp4',
+ 'audio_ext': 'none',
+ 'vbr': 263.851,
+ 'abr': 0,
}, {
- 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/600k.m3u8?nobumpers=true&uniqueId=76011e2b',
- 'format_id': '1255',
- 'acodec': 'none',
+ 'format_id': '577',
+ 'format_index': None,
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/gear2/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/bipbop_16x9_variant.m3u8',
+ 'tbr': 577.61,
+ 'ext': 'mp4',
+ 'fps': None,
+ 'protocol': 'm3u8_native',
+ 'preference': None,
+ 'quality': None,
'width': 640,
'height': 360,
+ 'vcodec': 'avc1.4d401e',
+ 'acodec': 'mp4a.40.2',
+ 'video_ext': 'mp4',
+ 'audio_ext': 'none',
+ 'vbr': 577.61,
+ 'abr': 0,
}, {
- 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/950k.m3u8?nobumpers=true&uniqueId=76011e2b',
- 'format_id': '1693',
- 'acodec': 'none',
- 'width': 853,
- 'height': 480,
+ 'format_id': '915',
+ 'format_index': None,
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/gear3/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/bipbop_16x9_variant.m3u8',
+ 'tbr': 915.905,
+ 'ext': 'mp4',
+ 'fps': None,
+ 'protocol': 'm3u8_native',
+ 'preference': None,
+ 'quality': None,
+ 'width': 960,
+ 'height': 540,
+ 'vcodec': 'avc1.4d401f',
+ 'acodec': 'mp4a.40.2',
+ 'video_ext': 'mp4',
+ 'audio_ext': 'none',
+ 'vbr': 915.905,
+ 'abr': 0,
}, {
- 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/1500k.m3u8?nobumpers=true&uniqueId=76011e2b',
- 'format_id': '2462',
- 'acodec': 'none',
+ 'format_id': '1030',
+ 'format_index': None,
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/gear4/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/bipbop_16x9_variant.m3u8',
+ 'tbr': 1030.138,
+ 'ext': 'mp4',
+ 'fps': None,
+ 'protocol': 'm3u8_native',
+ 'preference': None,
+ 'quality': None,
'width': 1280,
'height': 720,
- }]
+ 'vcodec': 'avc1.4d401f',
+ 'acodec': 'mp4a.40.2',
+ 'video_ext': 'mp4',
+ 'audio_ext': 'none',
+ 'vbr': 1030.138,
+ 'abr': 0,
+ }, {
+ 'format_id': '1924',
+ 'format_index': None,
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/gear5/prog_index.m3u8',
+ 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/bipbop_16x9_variant.m3u8',
+ 'tbr': 1924.009,
+ 'ext': 'mp4',
+ 'fps': None,
+ 'protocol': 'm3u8_native',
+ 'preference': None,
+ 'quality': None,
+ 'width': 1920,
+ 'height': 1080,
+ 'vcodec': 'avc1.4d401f',
+ 'acodec': 'mp4a.40.2',
+ 'video_ext': 'mp4',
+ 'audio_ext': 'none',
+ 'vbr': 1924.009,
+ 'abr': 0,
+ }],
+ {
+ 'en': [{
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/eng/prog_index.m3u8',
+ 'ext': 'vtt',
+ 'protocol': 'm3u8_native'
+ }, {
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/eng_forced/prog_index.m3u8',
+ 'ext': 'vtt',
+ 'protocol': 'm3u8_native'
+ }],
+ 'fr': [{
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/fra/prog_index.m3u8',
+ 'ext': 'vtt',
+ 'protocol': 'm3u8_native'
+ }, {
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/fra_forced/prog_index.m3u8',
+ 'ext': 'vtt',
+ 'protocol': 'm3u8_native'
+ }],
+ 'es': [{
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/spa/prog_index.m3u8',
+ 'ext': 'vtt',
+ 'protocol': 'm3u8_native'
+ }, {
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/spa_forced/prog_index.m3u8',
+ 'ext': 'vtt',
+ 'protocol': 'm3u8_native'
+ }],
+ 'ja': [{
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/jpn/prog_index.m3u8',
+ 'ext': 'vtt',
+ 'protocol': 'm3u8_native'
+ }, {
+ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/jpn_forced/prog_index.m3u8',
+ 'ext': 'vtt',
+ 'protocol': 'm3u8_native'
+ }],
+ }
),
]
- for m3u8_file, m3u8_url, expected_formats in _TEST_CASES:
+ for m3u8_file, m3u8_url, expected_formats, expected_subs in _TEST_CASES:
with io.open('./test/testdata/m3u8/%s.m3u8' % m3u8_file,
mode='r', encoding='utf-8') as f:
- formats = self.ie._parse_m3u8_formats(
+ formats, subs = self.ie._parse_m3u8_formats_and_subtitles(
f.read(), m3u8_url, ext='mp4')
self.ie._sort_formats(formats)
expect_value(self, formats, expected_formats, None)
+ expect_value(self, subs, expected_subs, None)
def test_parse_mpd_formats(self):
_TEST_CASES = [
@@ -890,7 +949,8 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
'tbr': 5997.485,
'width': 1920,
'height': 1080,
- }]
+ }],
+ {},
), (
# https://github.com/ytdl-org/youtube-dl/pull/14844
'urls_only',
@@ -973,7 +1033,8 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
'tbr': 4400,
'width': 1920,
'height': 1080,
- }]
+ }],
+ {},
), (
# https://github.com/ytdl-org/youtube-dl/issues/20346
# Media considered unfragmented even though it contains
@@ -1019,18 +1080,328 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
'width': 360,
'height': 360,
'fps': 30,
- }]
+ }],
+ {},
+ ), (
+ 'subtitles',
+ 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+ 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/',
+ [{
+ 'format_id': 'audio=128001',
+ 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+ 'ext': 'm4a',
+ 'tbr': 128.001,
+ 'asr': 48000,
+ 'format_note': 'DASH audio',
+ 'container': 'm4a_dash',
+ 'vcodec': 'none',
+ 'acodec': 'mp4a.40.2',
+ 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+ 'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/',
+ 'protocol': 'http_dash_segments',
+ 'audio_ext': 'm4a',
+ 'video_ext': 'none',
+ 'abr': 128.001,
+ }, {
+ 'format_id': 'video=100000',
+ 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+ 'ext': 'mp4',
+ 'width': 336,
+ 'height': 144,
+ 'tbr': 100,
+ 'format_note': 'DASH video',
+ 'container': 'mp4_dash',
+ 'vcodec': 'avc1.4D401F',
+ 'acodec': 'none',
+ 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+ 'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/',
+ 'protocol': 'http_dash_segments',
+ 'video_ext': 'mp4',
+ 'audio_ext': 'none',
+ 'vbr': 100,
+ }, {
+ 'format_id': 'video=326000',
+ 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+ 'ext': 'mp4',
+ 'width': 562,
+ 'height': 240,
+ 'tbr': 326,
+ 'format_note': 'DASH video',
+ 'container': 'mp4_dash',
+ 'vcodec': 'avc1.4D401F',
+ 'acodec': 'none',
+ 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+ 'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/',
+ 'protocol': 'http_dash_segments',
+ 'video_ext': 'mp4',
+ 'audio_ext': 'none',
+ 'vbr': 326,
+ }, {
+ 'format_id': 'video=698000',
+ 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+ 'ext': 'mp4',
+ 'width': 844,
+ 'height': 360,
+ 'tbr': 698,
+ 'format_note': 'DASH video',
+ 'container': 'mp4_dash',
+ 'vcodec': 'avc1.4D401F',
+ 'acodec': 'none',
+ 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+ 'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/',
+ 'protocol': 'http_dash_segments',
+ 'video_ext': 'mp4',
+ 'audio_ext': 'none',
+ 'vbr': 698,
+ }, {
+ 'format_id': 'video=1493000',
+ 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+ 'ext': 'mp4',
+ 'width': 1126,
+ 'height': 480,
+ 'tbr': 1493,
+ 'format_note': 'DASH video',
+ 'container': 'mp4_dash',
+ 'vcodec': 'avc1.4D401F',
+ 'acodec': 'none',
+ 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+ 'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/',
+ 'protocol': 'http_dash_segments',
+ 'video_ext': 'mp4',
+ 'audio_ext': 'none',
+ 'vbr': 1493,
+ }, {
+ 'format_id': 'video=4482000',
+ 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+ 'ext': 'mp4',
+ 'width': 1688,
+ 'height': 720,
+ 'tbr': 4482,
+ 'format_note': 'DASH video',
+ 'container': 'mp4_dash',
+ 'vcodec': 'avc1.4D401F',
+ 'acodec': 'none',
+ 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+ 'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/',
+ 'protocol': 'http_dash_segments',
+ 'video_ext': 'mp4',
+ 'audio_ext': 'none',
+ 'vbr': 4482,
+ }],
+ {
+ 'en': [
+ {
+ 'ext': 'mp4',
+ 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+ 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+ 'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/',
+ 'protocol': 'http_dash_segments',
+ }
+ ]
+ },
)
]
- for mpd_file, mpd_url, mpd_base_url, expected_formats in _TEST_CASES:
+ for mpd_file, mpd_url, mpd_base_url, expected_formats, expected_subtitles in _TEST_CASES:
with io.open('./test/testdata/mpd/%s.mpd' % mpd_file,
mode='r', encoding='utf-8') as f:
- formats = self.ie._parse_mpd_formats(
+ formats, subtitles = self.ie._parse_mpd_formats_and_subtitles(
compat_etree_fromstring(f.read().encode('utf-8')),
mpd_base_url=mpd_base_url, mpd_url=mpd_url)
self.ie._sort_formats(formats)
expect_value(self, formats, expected_formats, None)
+ expect_value(self, subtitles, expected_subtitles, None)
+
+ def test_parse_ism_formats(self):
+ _TEST_CASES = [
+ (
+ 'sintel',
+ 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest',
+ [{
+ 'format_id': 'audio-128',
+ 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest',
+ 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest',
+ 'ext': 'isma',
+ 'tbr': 128,
+ 'asr': 48000,
+ 'vcodec': 'none',
+ 'acodec': 'AACL',
+ 'protocol': 'ism',
+ '_download_params': {
+ 'stream_type': 'audio',
+ 'duration': 8880746666,
+ 'timescale': 10000000,
+ 'width': 0,
+ 'height': 0,
+ 'fourcc': 'AACL',
+ 'codec_private_data': '1190',
+ 'sampling_rate': 48000,
+ 'channels': 2,
+ 'bits_per_sample': 16,
+ 'nal_unit_length_field': 4
+ },
+ 'audio_ext': 'isma',
+ 'video_ext': 'none',
+ 'abr': 128,
+ }, {
+ 'format_id': 'video-100',
+ 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest',
+ 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest',
+ 'ext': 'ismv',
+ 'width': 336,
+ 'height': 144,
+ 'tbr': 100,
+ 'vcodec': 'AVC1',
+ 'acodec': 'none',
+ 'protocol': 'ism',
+ '_download_params': {
+ 'stream_type': 'video',
+ 'duration': 8880746666,
+ 'timescale': 10000000,
+ 'width': 336,
+ 'height': 144,
+ 'fourcc': 'AVC1',
+ 'codec_private_data': '00000001674D401FDA0544EFFC2D002CBC40000003004000000C03C60CA80000000168EF32C8',
+ 'channels': 2,
+ 'bits_per_sample': 16,
+ 'nal_unit_length_field': 4
+ },
+ 'video_ext': 'ismv',
+ 'audio_ext': 'none',
+ 'vbr': 100,
+ }, {
+ 'format_id': 'video-326',
+ 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest',
+ 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest',
+ 'ext': 'ismv',
+ 'width': 562,
+ 'height': 240,
+ 'tbr': 326,
+ 'vcodec': 'AVC1',
+ 'acodec': 'none',
+ 'protocol': 'ism',
+ '_download_params': {
+ 'stream_type': 'video',
+ 'duration': 8880746666,
+ 'timescale': 10000000,
+ 'width': 562,
+ 'height': 240,
+ 'fourcc': 'AVC1',
+ 'codec_private_data': '00000001674D401FDA0241FE23FFC3BC83BA44000003000400000300C03C60CA800000000168EF32C8',
+ 'channels': 2,
+ 'bits_per_sample': 16,
+ 'nal_unit_length_field': 4
+ },
+ 'video_ext': 'ismv',
+ 'audio_ext': 'none',
+ 'vbr': 326,
+ }, {
+ 'format_id': 'video-698',
+ 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest',
+ 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest',
+ 'ext': 'ismv',
+ 'width': 844,
+ 'height': 360,
+ 'tbr': 698,
+ 'vcodec': 'AVC1',
+ 'acodec': 'none',
+ 'protocol': 'ism',
+ '_download_params': {
+ 'stream_type': 'video',
+ 'duration': 8880746666,
+ 'timescale': 10000000,
+ 'width': 844,
+ 'height': 360,
+ 'fourcc': 'AVC1',
+ 'codec_private_data': '00000001674D401FDA0350BFB97FF06AF06AD1000003000100000300300F1832A00000000168EF32C8',
+ 'channels': 2,
+ 'bits_per_sample': 16,
+ 'nal_unit_length_field': 4
+ },
+ 'video_ext': 'ismv',
+ 'audio_ext': 'none',
+ 'vbr': 698,
+ }, {
+ 'format_id': 'video-1493',
+ 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest',
+ 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest',
+ 'ext': 'ismv',
+ 'width': 1126,
+ 'height': 480,
+ 'tbr': 1493,
+ 'vcodec': 'AVC1',
+ 'acodec': 'none',
+ 'protocol': 'ism',
+ '_download_params': {
+ 'stream_type': 'video',
+ 'duration': 8880746666,
+ 'timescale': 10000000,
+ 'width': 1126,
+ 'height': 480,
+ 'fourcc': 'AVC1',
+ 'codec_private_data': '00000001674D401FDA011C3DE6FFF0D890D871000003000100000300300F1832A00000000168EF32C8',
+ 'channels': 2,
+ 'bits_per_sample': 16,
+ 'nal_unit_length_field': 4
+ },
+ 'video_ext': 'ismv',
+ 'audio_ext': 'none',
+ 'vbr': 1493,
+ }, {
+ 'format_id': 'video-4482',
+ 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest',
+ 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest',
+ 'ext': 'ismv',
+ 'width': 1688,
+ 'height': 720,
+ 'tbr': 4482,
+ 'vcodec': 'AVC1',
+ 'acodec': 'none',
+ 'protocol': 'ism',
+ '_download_params': {
+ 'stream_type': 'video',
+ 'duration': 8880746666,
+ 'timescale': 10000000,
+ 'width': 1688,
+ 'height': 720,
+ 'fourcc': 'AVC1',
+ 'codec_private_data': '00000001674D401FDA01A816F97FFC1ABC1AB440000003004000000C03C60CA80000000168EF32C8',
+ 'channels': 2,
+ 'bits_per_sample': 16,
+ 'nal_unit_length_field': 4
+ },
+ 'video_ext': 'ismv',
+ 'audio_ext': 'none',
+ 'vbr': 4482,
+ }],
+ {
+ 'eng': [
+ {
+ 'ext': 'ismt',
+ 'protocol': 'ism',
+ 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest',
+ 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest',
+ '_download_params': {
+ 'stream_type': 'text',
+ 'duration': 8880746666,
+ 'timescale': 10000000,
+ 'fourcc': 'TTML',
+ 'codec_private_data': ''
+ }
+ }
+ ]
+ },
+ ),
+ ]
+
+ for ism_file, ism_url, expected_formats, expected_subtitles in _TEST_CASES:
+ with io.open('./test/testdata/ism/%s.Manifest' % ism_file,
+ mode='r', encoding='utf-8') as f:
+ formats, subtitles = self.ie._parse_ism_formats_and_subtitles(
+ compat_etree_fromstring(f.read().encode('utf-8')), ism_url=ism_url)
+ self.ie._sort_formats(formats)
+ expect_value(self, formats, expected_formats, None)
+ expect_value(self, subtitles, expected_subtitles, None)
def test_parse_f4m_formats(self):
_TEST_CASES = [
diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py
index e48befd..c9dd498 100644
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
# coding: utf-8
from __future__ import unicode_literals
@@ -10,14 +10,15 @@ import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import copy
+import json
from test.helper import FakeYDL, assertRegexpMatches
from hypervideo_dl import YoutubeDL
-from hypervideo_dl.compat import compat_str, compat_urllib_error
+from hypervideo_dl.compat import compat_os_name, compat_setenv, compat_str, compat_urllib_error
from hypervideo_dl.extractor import YoutubeIE
from hypervideo_dl.extractor.common import InfoExtractor
from hypervideo_dl.postprocessor.common import PostProcessor
-from hypervideo_dl.utils import ExtractorError, match_filter_func
+from hypervideo_dl.utils import ExtractorError, int_or_none, match_filter_func, LazyList
TEST_URL = 'http://localhost/sample.mp4'
@@ -29,11 +30,15 @@ class YDL(FakeYDL):
self.msgs = []
def process_info(self, info_dict):
+ info_dict.pop('__original_infodict', None)
self.downloaded_info_dicts.append(info_dict)
def to_screen(self, msg):
self.msgs.append(msg)
+ def dl(self, *args, **kwargs):
+ assert False, 'Downloader must not be invoked for test_YoutubeDL'
+
def _make_result(formats, **kwargs):
res = {
@@ -42,6 +47,7 @@ def _make_result(formats, **kwargs):
'title': 'testttitle',
'extractor': 'testex',
'extractor_key': 'TestEx',
+ 'webpage_url': 'http://example.com/watch?v=shenanigans',
}
res.update(**kwargs)
return res
@@ -77,7 +83,7 @@ class TestFormatSelection(unittest.TestCase):
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['ext'], 'mp4')
- # No prefer_free_formats => prefer mp4 and flv for greater compatibility
+ # No prefer_free_formats => prefer mp4 and webm
ydl = YDL()
ydl.params['prefer_free_formats'] = False
formats = [
@@ -103,7 +109,7 @@ class TestFormatSelection(unittest.TestCase):
yie._sort_formats(info_dict['formats'])
ydl.process_ie_result(info_dict)
downloaded = ydl.downloaded_info_dicts[0]
- self.assertEqual(downloaded['ext'], 'flv')
+ self.assertEqual(downloaded['ext'], 'webm')
def test_format_selection(self):
formats = [
@@ -115,35 +121,24 @@ class TestFormatSelection(unittest.TestCase):
]
info_dict = _make_result(formats)
- ydl = YDL({'format': '20/47'})
- ydl.process_ie_result(info_dict.copy())
- downloaded = ydl.downloaded_info_dicts[0]
- self.assertEqual(downloaded['format_id'], '47')
-
- ydl = YDL({'format': '20/71/worst'})
- ydl.process_ie_result(info_dict.copy())
- downloaded = ydl.downloaded_info_dicts[0]
- self.assertEqual(downloaded['format_id'], '35')
-
- ydl = YDL()
- ydl.process_ie_result(info_dict.copy())
- downloaded = ydl.downloaded_info_dicts[0]
- self.assertEqual(downloaded['format_id'], '2')
-
- ydl = YDL({'format': 'webm/mp4'})
- ydl.process_ie_result(info_dict.copy())
- downloaded = ydl.downloaded_info_dicts[0]
- self.assertEqual(downloaded['format_id'], '47')
-
- ydl = YDL({'format': '3gp/40/mp4'})
- ydl.process_ie_result(info_dict.copy())
- downloaded = ydl.downloaded_info_dicts[0]
- self.assertEqual(downloaded['format_id'], '35')
-
- ydl = YDL({'format': 'example-with-dashes'})
- ydl.process_ie_result(info_dict.copy())
- downloaded = ydl.downloaded_info_dicts[0]
- self.assertEqual(downloaded['format_id'], 'example-with-dashes')
+ def test(inp, *expected, multi=False):
+ ydl = YDL({
+ 'format': inp,
+ 'allow_multiple_video_streams': multi,
+ 'allow_multiple_audio_streams': multi,
+ })
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = map(lambda x: x['format_id'], ydl.downloaded_info_dicts)
+ self.assertEqual(list(downloaded), list(expected))
+
+ test('20/47', '47')
+ test('20/71/worst', '35')
+ test(None, '2')
+ test('webm/mp4', '47')
+ test('3gp/40/mp4', '35')
+ test('example-with-dashes', 'example-with-dashes')
+ test('all', '35', 'example-with-dashes', '45', '47', '2') # Order doesn't actually matter for this
+ test('mergeall', '2+47+45+example-with-dashes+35', multi=True)
def test_format_selection_audio(self):
formats = [
@@ -310,6 +305,9 @@ class TestFormatSelection(unittest.TestCase):
self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
def test_youtube_format_selection(self):
+ # FIXME: Rewrite in accordance with the new format sorting options
+ return
+
order = [
'38', '37', '46', '22', '45', '35', '44', '18', '34', '43', '6', '5', '17', '36', '13',
# Apple HTTP Live Streaming
@@ -347,7 +345,7 @@ class TestFormatSelection(unittest.TestCase):
yie._sort_formats(info_dict['formats'])
ydl.process_ie_result(info_dict)
downloaded = ydl.downloaded_info_dicts[0]
- self.assertEqual(downloaded['format_id'], '137+141')
+ self.assertEqual(downloaded['format_id'], '248+172')
self.assertEqual(downloaded['ext'], 'mp4')
info_dict = _make_result(list(formats_order), extractor='youtube')
@@ -456,15 +454,13 @@ class TestFormatSelection(unittest.TestCase):
def test_invalid_format_specs(self):
def assert_syntax_error(format_spec):
- ydl = YDL({'format': format_spec})
- info_dict = _make_result([{'format_id': 'foo', 'url': TEST_URL}])
- self.assertRaises(SyntaxError, ydl.process_ie_result, info_dict)
+ self.assertRaises(SyntaxError, YDL, {'format': format_spec})
assert_syntax_error('bestvideo,,best')
assert_syntax_error('+bestaudio')
assert_syntax_error('bestvideo+')
assert_syntax_error('/')
- assert_syntax_error('bestvideo+bestvideo+bestaudio')
+ assert_syntax_error('[720<height]')
def test_format_filtering(self):
formats = [
@@ -535,19 +531,19 @@ class TestFormatSelection(unittest.TestCase):
def test_default_format_spec(self):
ydl = YDL({'simulate': True})
- self.assertEqual(ydl._default_format_spec({}), 'bestvideo+bestaudio/best')
+ self.assertEqual(ydl._default_format_spec({}), 'bestvideo*+bestaudio/best')
ydl = YDL({})
self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio')
ydl = YDL({'simulate': True})
- self.assertEqual(ydl._default_format_spec({'is_live': True}), 'bestvideo+bestaudio/best')
+ self.assertEqual(ydl._default_format_spec({'is_live': True}), 'bestvideo*+bestaudio/best')
ydl = YDL({'outtmpl': '-'})
self.assertEqual(ydl._default_format_spec({}), 'best/bestvideo+bestaudio')
ydl = YDL({})
- self.assertEqual(ydl._default_format_spec({}, download=False), 'bestvideo+bestaudio/best')
+ self.assertEqual(ydl._default_format_spec({}, download=False), 'bestvideo*+bestaudio/best')
self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio')
@@ -568,6 +564,7 @@ class TestYoutubeDL(unittest.TestCase):
'subtitles': subtitles,
'automatic_captions': auto_captions,
'extractor': 'TEST',
+ 'webpage_url': 'http://example.com/watch?v=shenanigans',
}
def get_info(params={}):
@@ -597,6 +594,26 @@ class TestYoutubeDL(unittest.TestCase):
self.assertTrue(subs)
self.assertEqual(set(subs.keys()), set(['es', 'fr']))
+ result = get_info({'writesubtitles': True, 'subtitleslangs': ['all', '-en']})
+ subs = result['requested_subtitles']
+ self.assertTrue(subs)
+ self.assertEqual(set(subs.keys()), set(['es', 'fr']))
+
+ result = get_info({'writesubtitles': True, 'subtitleslangs': ['en', 'fr', '-en']})
+ subs = result['requested_subtitles']
+ self.assertTrue(subs)
+ self.assertEqual(set(subs.keys()), set(['fr']))
+
+ result = get_info({'writesubtitles': True, 'subtitleslangs': ['-en', 'en']})
+ subs = result['requested_subtitles']
+ self.assertTrue(subs)
+ self.assertEqual(set(subs.keys()), set(['en']))
+
+ result = get_info({'writesubtitles': True, 'subtitleslangs': ['e.+']})
+ subs = result['requested_subtitles']
+ self.assertTrue(subs)
+ self.assertEqual(set(subs.keys()), set(['es', 'en']))
+
result = get_info({'writesubtitles': True, 'writeautomaticsub': True, 'subtitleslangs': ['es', 'pt']})
subs = result['requested_subtitles']
self.assertTrue(subs)
@@ -623,47 +640,195 @@ class TestYoutubeDL(unittest.TestCase):
self.assertEqual(test_dict['extractor'], 'Foo')
self.assertEqual(test_dict['playlist'], 'funny videos')
- def test_prepare_filename(self):
- info = {
- 'id': '1234',
- 'ext': 'mp4',
- 'width': None,
- 'height': 1080,
- 'title1': '$PATH',
- 'title2': '%PATH%',
- }
+ outtmpl_info = {
+ 'id': '1234',
+ 'ext': 'mp4',
+ 'width': None,
+ 'height': 1080,
+ 'title1': '$PATH',
+ 'title2': '%PATH%',
+ 'title3': 'foo/bar\\test',
+ 'title4': 'foo "bar" test',
+ 'title5': 'áéí 𝐀',
+ 'timestamp': 1618488000,
+ 'duration': 100000,
+ 'playlist_index': 1,
+ 'playlist_autonumber': 2,
+ '_last_playlist_index': 100,
+ 'n_entries': 10,
+ 'formats': [{'id': 'id1'}, {'id': 'id2'}, {'id': 'id3'}]
+ }
- def fname(templ, na_placeholder='NA'):
- params = {'outtmpl': templ}
- if na_placeholder != 'NA':
- params['outtmpl_na_placeholder'] = na_placeholder
+ def test_prepare_outtmpl_and_filename(self):
+ def test(tmpl, expected, *, info=None, **params):
+ params['outtmpl'] = tmpl
ydl = YoutubeDL(params)
- return ydl.prepare_filename(info)
- self.assertEqual(fname('%(id)s.%(ext)s'), '1234.mp4')
- self.assertEqual(fname('%(id)s-%(width)s.%(ext)s'), '1234-NA.mp4')
- NA_TEST_OUTTMPL = '%(uploader_date)s-%(width)d-%(id)s.%(ext)s'
- # Replace missing fields with 'NA' by default
- self.assertEqual(fname(NA_TEST_OUTTMPL), 'NA-NA-1234.mp4')
- # Or by provided placeholder
- self.assertEqual(fname(NA_TEST_OUTTMPL, na_placeholder='none'), 'none-none-1234.mp4')
- self.assertEqual(fname(NA_TEST_OUTTMPL, na_placeholder=''), '--1234.mp4')
- self.assertEqual(fname('%(height)d.%(ext)s'), '1080.mp4')
- self.assertEqual(fname('%(height)6d.%(ext)s'), ' 1080.mp4')
- self.assertEqual(fname('%(height)-6d.%(ext)s'), '1080 .mp4')
- self.assertEqual(fname('%(height)06d.%(ext)s'), '001080.mp4')
- self.assertEqual(fname('%(height) 06d.%(ext)s'), ' 01080.mp4')
- self.assertEqual(fname('%(height) 06d.%(ext)s'), ' 01080.mp4')
- self.assertEqual(fname('%(height)0 6d.%(ext)s'), ' 01080.mp4')
- self.assertEqual(fname('%(height)0 6d.%(ext)s'), ' 01080.mp4')
- self.assertEqual(fname('%(height) 0 6d.%(ext)s'), ' 01080.mp4')
- self.assertEqual(fname('%%'), '%')
- self.assertEqual(fname('%%%%'), '%%')
- self.assertEqual(fname('%%(height)06d.%(ext)s'), '%(height)06d.mp4')
- self.assertEqual(fname('%(width)06d.%(ext)s'), 'NA.mp4')
- self.assertEqual(fname('%(width)06d.%%(ext)s'), 'NA.%(ext)s')
- self.assertEqual(fname('%%(width)06d.%(ext)s'), '%(width)06d.mp4')
- self.assertEqual(fname('Hello %(title1)s'), 'Hello $PATH')
- self.assertEqual(fname('Hello %(title2)s'), 'Hello %PATH%')
+ ydl._num_downloads = 1
+ self.assertEqual(ydl.validate_outtmpl(tmpl), None)
+
+ out = ydl.evaluate_outtmpl(tmpl, info or self.outtmpl_info)
+ fname = ydl.prepare_filename(info or self.outtmpl_info)
+
+ if not isinstance(expected, (list, tuple)):
+ expected = (expected, expected)
+ for (name, got), expect in zip((('outtmpl', out), ('filename', fname)), expected):
+ if callable(expect):
+ self.assertTrue(expect(got), f'Wrong {name} from {tmpl}')
+ else:
+ self.assertEqual(got, expect, f'Wrong {name} from {tmpl}')
+
+ # Side-effects
+ original_infodict = dict(self.outtmpl_info)
+ test('foo.bar', 'foo.bar')
+ original_infodict['epoch'] = self.outtmpl_info.get('epoch')
+ self.assertTrue(isinstance(original_infodict['epoch'], int))
+ test('%(epoch)d', int_or_none)
+ self.assertEqual(original_infodict, self.outtmpl_info)
+
+ # Auto-generated fields
+ test('%(id)s.%(ext)s', '1234.mp4')
+ test('%(duration_string)s', ('27:46:40', '27-46-40'))
+ test('%(resolution)s', '1080p')
+ test('%(playlist_index)s', '001')
+ test('%(playlist_autonumber)s', '02')
+ test('%(autonumber)s', '00001')
+ test('%(autonumber+2)03d', '005', autonumber_start=3)
+ test('%(autonumber)s', '001', autonumber_size=3)
+
+ # Escaping %
+ test('%', '%')
+ test('%%', '%')
+ test('%%%%', '%%')
+ test('%s', '%s')
+ test('%%%s', '%%s')
+ test('%d', '%d')
+ test('%abc%', '%abc%')
+ test('%%(width)06d.%(ext)s', '%(width)06d.mp4')
+ test('%%%(height)s', '%1080')
+ test('%(width)06d.%(ext)s', 'NA.mp4')
+ test('%(width)06d.%%(ext)s', 'NA.%(ext)s')
+ test('%%(width)06d.%(ext)s', '%(width)06d.mp4')
+
+ # ID sanitization
+ test('%(id)s', '_abcd', info={'id': '_abcd'})
+ test('%(some_id)s', '_abcd', info={'some_id': '_abcd'})
+ test('%(formats.0.id)s', '_abcd', info={'formats': [{'id': '_abcd'}]})
+ test('%(id)s', '-abcd', info={'id': '-abcd'})
+ test('%(id)s', '.abcd', info={'id': '.abcd'})
+ test('%(id)s', 'ab__cd', info={'id': 'ab__cd'})
+ test('%(id)s', ('ab:cd', 'ab -cd'), info={'id': 'ab:cd'})
+
+ # Invalid templates
+ self.assertTrue(isinstance(YoutubeDL.validate_outtmpl('%(title)'), ValueError))
+ test('%(invalid@tmpl|def)s', 'none', outtmpl_na_placeholder='none')
+ test('%(..)s', 'NA')
+
+ # Entire info_dict
+ def expect_same_infodict(out):
+ got_dict = json.loads(out)
+ for info_field, expected in self.outtmpl_info.items():
+ self.assertEqual(got_dict.get(info_field), expected, info_field)
+ return True
+
+ test('%()j', (expect_same_infodict, str))
+
+ # NA placeholder
+ NA_TEST_OUTTMPL = '%(uploader_date)s-%(width)d-%(x|def)s-%(id)s.%(ext)s'
+ test(NA_TEST_OUTTMPL, 'NA-NA-def-1234.mp4')
+ test(NA_TEST_OUTTMPL, 'none-none-def-1234.mp4', outtmpl_na_placeholder='none')
+ test(NA_TEST_OUTTMPL, '--def-1234.mp4', outtmpl_na_placeholder='')
+
+ # String formatting
+ FMT_TEST_OUTTMPL = '%%(height)%s.%%(ext)s'
+ test(FMT_TEST_OUTTMPL % 's', '1080.mp4')
+ test(FMT_TEST_OUTTMPL % 'd', '1080.mp4')
+ test(FMT_TEST_OUTTMPL % '6d', ' 1080.mp4')
+ test(FMT_TEST_OUTTMPL % '-6d', '1080 .mp4')
+ test(FMT_TEST_OUTTMPL % '06d', '001080.mp4')
+ test(FMT_TEST_OUTTMPL % ' 06d', ' 01080.mp4')
+ test(FMT_TEST_OUTTMPL % ' 06d', ' 01080.mp4')
+ test(FMT_TEST_OUTTMPL % '0 6d', ' 01080.mp4')
+ test(FMT_TEST_OUTTMPL % '0 6d', ' 01080.mp4')
+ test(FMT_TEST_OUTTMPL % ' 0 6d', ' 01080.mp4')
+
+ # Type casting
+ test('%(id)d', '1234')
+ test('%(height)c', '1')
+ test('%(ext)c', 'm')
+ test('%(id)d %(id)r', "1234 '1234'")
+ test('%(id)r %(height)r', "'1234' 1080")
+ test('%(ext)s-%(ext|def)d', 'mp4-def')
+ test('%(width|0)04d', '0000')
+ test('a%(width|)d', 'a', outtmpl_na_placeholder='none')
+
+ FORMATS = self.outtmpl_info['formats']
+ sanitize = lambda x: x.replace(':', ' -').replace('"', "'")
+
+ # Custom type casting
+ test('%(formats.:.id)l', 'id1, id2, id3')
+ test('%(formats.:.id)#l', ('id1\nid2\nid3', 'id1 id2 id3'))
+ test('%(ext)l', 'mp4')
+ test('%(formats.:.id) 15l', ' id1, id2, id3')
+ test('%(formats)j', (json.dumps(FORMATS), sanitize(json.dumps(FORMATS))))
+ test('%(title5).3B', 'á')
+ test('%(title5)U', 'áéí 𝐀')
+ test('%(title5)#U', 'a\u0301e\u0301i\u0301 𝐀')
+ test('%(title5)+U', 'áéí A')
+ test('%(title5)+#U', 'a\u0301e\u0301i\u0301 A')
+ if compat_os_name == 'nt':
+ test('%(title4)q', ('"foo \\"bar\\" test"', "'foo _'bar_' test'"))
+ else:
+ test('%(title4)q', ('\'foo "bar" test\'', "'foo 'bar' test'"))
+
+ # Internal formatting
+ test('%(timestamp-1000>%H-%M-%S)s', '11-43-20')
+ test('%(title|%)s %(title|%%)s', '% %%')
+ test('%(id+1-height+3)05d', '00158')
+ test('%(width+100)05d', 'NA')
+ test('%(formats.0) 15s', ('% 15s' % FORMATS[0], '% 15s' % sanitize(str(FORMATS[0]))))
+ test('%(formats.0)r', (repr(FORMATS[0]), sanitize(repr(FORMATS[0]))))
+ test('%(height.0)03d', '001')
+ test('%(-height.0)04d', '-001')
+ test('%(formats.-1.id)s', FORMATS[-1]['id'])
+ test('%(formats.0.id.-1)d', FORMATS[0]['id'][-1])
+ test('%(formats.3)s', 'NA')
+ test('%(formats.:2:-1)r', repr(FORMATS[:2:-1]))
+ test('%(formats.0.id.-1+id)f', '1235.000000')
+ test('%(formats.0.id.-1+formats.1.id.-1)d', '3')
+
+ # Alternates
+ test('%(title,id)s', '1234')
+ test('%(width-100,height+20|def)d', '1100')
+ test('%(width-100,height+width|def)s', 'def')
+ test('%(timestamp-x>%H\\,%M\\,%S,timestamp>%H\\,%M\\,%S)s', '12,00,00')
+
+ # Laziness
+ def gen():
+ yield from range(5)
+ raise self.assertTrue(False, 'LazyList should not be evaluated till here')
+ test('%(key.4)s', '4', info={'key': LazyList(gen())})
+
+ # Empty filename
+ test('%(foo|)s-%(bar|)s.%(ext)s', '-.mp4')
+ # test('%(foo|)s.%(ext)s', ('.mp4', '_.mp4')) # fixme
+ # test('%(foo|)s', ('', '_')) # fixme
+
+ # Environment variable expansion for prepare_filename
+ compat_setenv('__hypervideo_dl_var', 'expanded')
+ envvar = '%__hypervideo_dl_var%' if compat_os_name == 'nt' else '$__hypervideo_dl_var'
+ test(envvar, (envvar, 'expanded'))
+ if compat_os_name == 'nt':
+ test('%s%', ('%s%', '%s%'))
+ compat_setenv('s', 'expanded')
+ test('%s%', ('%s%', 'expanded')) # %s% should be expanded before escaping %s
+ compat_setenv('(test)s', 'expanded')
+ test('%(test)s%', ('NA%', 'expanded')) # Environment should take priority over template
+
+ # Path expansion and escaping
+ test('Hello %(title1)s', 'Hello $PATH')
+ test('Hello %(title2)s', 'Hello %PATH%')
+ test('%(title3)s', ('foo/bar\\test', 'foo_bar_test'))
+ test('folder/%(title3)s', ('folder/foo/bar\\test', 'folder%sfoo_bar_test' % os.path.sep))
def test_format_note(self):
ydl = YoutubeDL()
@@ -722,7 +887,7 @@ class TestYoutubeDL(unittest.TestCase):
def process_info(self, info_dict):
super(YDL, self).process_info(info_dict)
- def _match_entry(self, info_dict, incomplete):
+ def _match_entry(self, info_dict, incomplete=False):
res = super(FilterYDL, self)._match_entry(info_dict, incomplete)
if res is None:
self.downloaded_info_dicts.append(info_dict)
@@ -738,6 +903,7 @@ class TestYoutubeDL(unittest.TestCase):
'playlist_id': '42',
'uploader': "變態妍字幕版 太妍 тест",
'creator': "тест ' 123 ' тест--",
+ 'webpage_url': 'http://example.com/watch?v=shenanigans',
}
second = {
'id': '2',
@@ -749,6 +915,7 @@ class TestYoutubeDL(unittest.TestCase):
'filesize': 5 * 1024,
'playlist_id': '43',
'uploader': "тест 123",
+ 'webpage_url': 'http://example.com/watch?v=SHENANIGANS',
}
videos = [first, second]
@@ -831,54 +998,32 @@ class TestYoutubeDL(unittest.TestCase):
ydl.process_ie_result(copy.deepcopy(playlist))
return ydl.downloaded_info_dicts
- def get_ids(params):
- return [int(v['id']) for v in get_downloaded_info_dicts(params)]
-
- result = get_ids({})
- self.assertEqual(result, [1, 2, 3, 4])
-
- result = get_ids({'playlistend': 10})
- self.assertEqual(result, [1, 2, 3, 4])
-
- result = get_ids({'playlistend': 2})
- self.assertEqual(result, [1, 2])
-
- result = get_ids({'playliststart': 10})
- self.assertEqual(result, [])
-
- result = get_ids({'playliststart': 2})
- self.assertEqual(result, [2, 3, 4])
-
- result = get_ids({'playlist_items': '2-4'})
- self.assertEqual(result, [2, 3, 4])
-
- result = get_ids({'playlist_items': '2,4'})
- self.assertEqual(result, [2, 4])
-
- result = get_ids({'playlist_items': '10'})
- self.assertEqual(result, [])
-
- result = get_ids({'playlist_items': '3-10'})
- self.assertEqual(result, [3, 4])
-
- result = get_ids({'playlist_items': '2-4,3-4,3'})
- self.assertEqual(result, [2, 3, 4])
+ def test_selection(params, expected_ids):
+ results = [
+ (v['playlist_autonumber'] - 1, (int(v['id']), v['playlist_index']))
+ for v in get_downloaded_info_dicts(params)]
+ self.assertEqual(results, list(enumerate(zip(expected_ids, expected_ids))))
+
+ test_selection({}, [1, 2, 3, 4])
+ test_selection({'playlistend': 10}, [1, 2, 3, 4])
+ test_selection({'playlistend': 2}, [1, 2])
+ test_selection({'playliststart': 10}, [])
+ test_selection({'playliststart': 2}, [2, 3, 4])
+ test_selection({'playlist_items': '2-4'}, [2, 3, 4])
+ test_selection({'playlist_items': '2,4'}, [2, 4])
+ test_selection({'playlist_items': '10'}, [])
+ test_selection({'playlist_items': '0'}, [])
# Tests for https://github.com/ytdl-org/youtube-dl/issues/10591
- # @{
- result = get_downloaded_info_dicts({'playlist_items': '2-4,3-4,3'})
- self.assertEqual(result[0]['playlist_index'], 2)
- self.assertEqual(result[1]['playlist_index'], 3)
-
- result = get_downloaded_info_dicts({'playlist_items': '2-4,3-4,3'})
- self.assertEqual(result[0]['playlist_index'], 2)
- self.assertEqual(result[1]['playlist_index'], 3)
- self.assertEqual(result[2]['playlist_index'], 4)
-
- result = get_downloaded_info_dicts({'playlist_items': '4,2'})
- self.assertEqual(result[0]['playlist_index'], 4)
- self.assertEqual(result[1]['playlist_index'], 2)
- # @}
+ test_selection({'playlist_items': '2-4,3-4,3'}, [2, 3, 4])
+ test_selection({'playlist_items': '4,2'}, [4, 2])
+
+ # Tests for https://github.com/hypervideo/hypervideo/issues/720
+ # https://github.com/hypervideo/hypervideo/issues/302
+ test_selection({'playlistreverse': True}, [4, 3, 2, 1])
+ test_selection({'playliststart': 2, 'playlistreverse': True}, [4, 3, 2])
+ test_selection({'playlist_items': '2,4', 'playlistreverse': True}, [4, 2])
+ test_selection({'playlist_items': '4,2'}, [4, 2])
def test_urlopen_no_file_protocol(self):
# see https://github.com/ytdl-org/youtube-dl/issues/8227
diff --git a/test/test_YoutubeDLCookieJar.py b/test/test_YoutubeDLCookieJar.py
index eff9b16..2ce0070 100644
--- a/test/test_YoutubeDLCookieJar.py
+++ b/test/test_YoutubeDLCookieJar.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
# coding: utf-8
from __future__ import unicode_literals
diff --git a/test/test_aes.py b/test/test_aes.py
index 444c65e..746e447 100644
--- a/test/test_aes.py
+++ b/test/test_aes.py
@@ -1,5 +1,4 @@
-#!/usr/bin/env python
-
+#!/usr/bin/env python3
from __future__ import unicode_literals
# Allow direct execution
@@ -8,7 +7,19 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from hypervideo_dl.aes import aes_decrypt, aes_encrypt, aes_cbc_decrypt, aes_cbc_encrypt, aes_decrypt_text
+from hypervideo_dl.aes import (
+ aes_decrypt,
+ aes_encrypt,
+ aes_cbc_decrypt,
+ aes_cbc_decrypt_bytes,
+ aes_cbc_encrypt,
+ aes_ctr_decrypt,
+ aes_ctr_encrypt,
+ aes_gcm_decrypt_and_verify,
+ aes_gcm_decrypt_and_verify_bytes,
+ aes_decrypt_text
+)
+from hypervideo_dl.compat import compat_pycrypto_AES
from hypervideo_dl.utils import bytes_to_intlist, intlist_to_bytes
import base64
@@ -28,18 +39,43 @@ class TestAES(unittest.TestCase):
self.assertEqual(decrypted, msg)
def test_cbc_decrypt(self):
- data = bytes_to_intlist(
- b"\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6'\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd"
- )
- decrypted = intlist_to_bytes(aes_cbc_decrypt(data, self.key, self.iv))
+ data = b'\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6\x27\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd'
+ decrypted = intlist_to_bytes(aes_cbc_decrypt(bytes_to_intlist(data), self.key, self.iv))
self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
+ if compat_pycrypto_AES:
+ decrypted = aes_cbc_decrypt_bytes(data, intlist_to_bytes(self.key), intlist_to_bytes(self.iv))
+ self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
def test_cbc_encrypt(self):
data = bytes_to_intlist(self.secret_msg)
encrypted = intlist_to_bytes(aes_cbc_encrypt(data, self.key, self.iv))
self.assertEqual(
encrypted,
- b"\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6'\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd")
+ b'\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6\'\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd')
+
+ def test_ctr_decrypt(self):
+ data = bytes_to_intlist(b'\x03\xc7\xdd\xd4\x8e\xb3\xbc\x1a*O\xdc1\x12+8Aio\xd1z\xb5#\xaf\x08')
+ decrypted = intlist_to_bytes(aes_ctr_decrypt(data, self.key, self.iv))
+ self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
+
+ def test_ctr_encrypt(self):
+ data = bytes_to_intlist(self.secret_msg)
+ encrypted = intlist_to_bytes(aes_ctr_encrypt(data, self.key, self.iv))
+ self.assertEqual(
+ encrypted,
+ b'\x03\xc7\xdd\xd4\x8e\xb3\xbc\x1a*O\xdc1\x12+8Aio\xd1z\xb5#\xaf\x08')
+
+ def test_gcm_decrypt(self):
+ data = b'\x159Y\xcf5eud\x90\x9c\x85&]\x14\x1d\x0f.\x08\xb4T\xe4/\x17\xbd'
+ authentication_tag = b'\xe8&I\x80rI\x07\x9d}YWuU@:e'
+
+ decrypted = intlist_to_bytes(aes_gcm_decrypt_and_verify(
+ bytes_to_intlist(data), self.key, bytes_to_intlist(authentication_tag), self.iv[:12]))
+ self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
+ if compat_pycrypto_AES:
+ decrypted = aes_gcm_decrypt_and_verify_bytes(
+ data, intlist_to_bytes(self.key), authentication_tag, intlist_to_bytes(self.iv[:12]))
+ self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
def test_decrypt_text(self):
password = intlist_to_bytes(self.key).decode('utf-8')
diff --git a/test/test_age_restriction.py b/test/test_age_restriction.py
index 5d1a8f2..9b490d0 100644
--- a/test/test_age_restriction.py
+++ b/test/test_age_restriction.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
from __future__ import unicode_literals
# Allow direct execution
@@ -7,8 +7,7 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from test.helper import try_rm
-
+from test.helper import try_rm, is_download_test
from hypervideo_dl import YoutubeDL
@@ -32,6 +31,7 @@ def _download_restricted(url, filename, age):
return res
+@is_download_test
class TestAgeRestriction(unittest.TestCase):
def _assert_restricted(self, url, filename, age, old_age=None):
self.assertTrue(_download_restricted(url, filename, old_age))
diff --git a/test/test_all_urls.py b/test/test_all_urls.py
index 3f6ba11..d9e4bad 100644
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
from __future__ import unicode_literals
@@ -35,6 +35,8 @@ class TestAllURLsMatching(unittest.TestCase):
assertPlaylist('ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
assertPlaylist('UUBABnxM4Ar9ten8Mdjj1j0Q') # 585
assertPlaylist('PL63F0C78739B09958')
+ assertTab('https://www.youtube.com/AsapSCIENCE')
+ assertTab('https://www.youtube.com/embedded')
assertTab('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
assertTab('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
assertTab('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
@@ -47,7 +49,7 @@ class TestAllURLsMatching(unittest.TestCase):
self.assertTrue(YoutubeIE.suitable('PLtS2H6bU1M'))
self.assertFalse(YoutubeIE.suitable('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) # 668
self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube'])
- self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube'])
+ # self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube']) # /v/ is no longer valid
self.assertMatch('https://youtube.googleapis.com/v/BaW_jenozKc', ['youtube'])
self.assertMatch('http://www.cleanvideosearch.com/media/action/yt/watch?videoId=8v_4O44sfjM', ['youtube'])
@@ -66,9 +68,9 @@ class TestAllURLsMatching(unittest.TestCase):
self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:tab'])
self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:tab'])
- # def test_youtube_search_matching(self):
- # self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
- # self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
+ def test_youtube_search_matching(self):
+ self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
+ self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
def test_facebook_matching(self):
self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))
diff --git a/test/test_cache.py b/test/test_cache.py
index c7a88f9..0776e92 100644
--- a/test/test_cache.py
+++ b/test/test_cache.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
# coding: utf-8
from __future__ import unicode_literals
diff --git a/test/test_compat.py b/test/test_compat.py
index c68d7fa..5f5d354 100644
--- a/test/test_compat.py
+++ b/test/test_compat.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
# coding: utf-8
from __future__ import unicode_literals
@@ -19,6 +19,8 @@ from hypervideo_dl.compat import (
compat_shlex_split,
compat_str,
compat_struct_unpack,
+ compat_urllib_parse_quote,
+ compat_urllib_parse_quote_plus,
compat_urllib_parse_unquote,
compat_urllib_parse_unquote_plus,
compat_urllib_parse_urlencode,
@@ -28,11 +30,11 @@ from hypervideo_dl.compat import (
class TestCompat(unittest.TestCase):
def test_compat_getenv(self):
test_str = 'тест'
- compat_setenv('YOUTUBE_DL_COMPAT_GETENV', test_str)
- self.assertEqual(compat_getenv('YOUTUBE_DL_COMPAT_GETENV'), test_str)
+ compat_setenv('hypervideo_dl_COMPAT_GETENV', test_str)
+ self.assertEqual(compat_getenv('hypervideo_dl_COMPAT_GETENV'), test_str)
def test_compat_setenv(self):
- test_var = 'YOUTUBE_DL_COMPAT_SETENV'
+ test_var = 'hypervideo_dl_COMPAT_SETENV'
test_str = 'тест'
compat_setenv(test_var, test_str)
compat_getenv(test_var)
@@ -53,6 +55,27 @@ class TestCompat(unittest.TestCase):
dir(hypervideo_dl.compat))) - set(['unicode_literals'])
self.assertEqual(all_names, sorted(present_names))
+ def test_compat_urllib_parse_quote(self):
+ self.assertEqual(compat_urllib_parse_quote('abc def'), 'abc%20def')
+ self.assertEqual(compat_urllib_parse_quote('/user/abc+def'), '/user/abc%2Bdef')
+ self.assertEqual(compat_urllib_parse_quote('/user/abc+def', safe='+'), '%2Fuser%2Fabc+def')
+ self.assertEqual(compat_urllib_parse_quote(''), '')
+ self.assertEqual(compat_urllib_parse_quote('%'), '%25')
+ self.assertEqual(compat_urllib_parse_quote('%', safe='%'), '%')
+ self.assertEqual(compat_urllib_parse_quote('津波'), '%E6%B4%A5%E6%B3%A2')
+ self.assertEqual(
+ compat_urllib_parse_quote('''<meta property="og:description" content="▁▂▃▄%▅▆▇█" />
+%<a href="https://ar.wikipedia.org/wiki/تسونامي">%a''', safe='<>=":%/ \r\n'),
+ '''<meta property="og:description" content="%E2%96%81%E2%96%82%E2%96%83%E2%96%84%%E2%96%85%E2%96%86%E2%96%87%E2%96%88" />
+%<a href="https://ar.wikipedia.org/wiki/%D8%AA%D8%B3%D9%88%D9%86%D8%A7%D9%85%D9%8A">%a''')
+ self.assertEqual(
+ compat_urllib_parse_quote('''(^◣_◢^)っ︻デ═一 ⇀ ⇀ ⇀ ⇀ ⇀ ↶%I%Break%25Things%''', safe='% '),
+ '''%28%5E%E2%97%A3_%E2%97%A2%5E%29%E3%81%A3%EF%B8%BB%E3%83%87%E2%95%90%E4%B8%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%86%B6%I%Break%25Things%''')
+
+ def test_compat_urllib_parse_quote_plus(self):
+ self.assertEqual(compat_urllib_parse_quote_plus('abc def'), 'abc+def')
+ self.assertEqual(compat_urllib_parse_quote_plus('/abc def'), '%2Fabc+def')
+
def test_compat_urllib_parse_unquote(self):
self.assertEqual(compat_urllib_parse_unquote('abc%20def'), 'abc def')
self.assertEqual(compat_urllib_parse_unquote('%7e/abc+def'), '~/abc+def')
diff --git a/test/test_cookies.py b/test/test_cookies.py
new file mode 100644
index 0000000..fb034fc
--- /dev/null
+++ b/test/test_cookies.py
@@ -0,0 +1,107 @@
+import unittest
+from datetime import datetime, timezone
+
+from hypervideo_dl import cookies
+from hypervideo_dl.cookies import (
+ LinuxChromeCookieDecryptor,
+ MacChromeCookieDecryptor,
+ WindowsChromeCookieDecryptor,
+ parse_safari_cookies,
+ pbkdf2_sha1,
+)
+
+
+class Logger:
+ def debug(self, message):
+ print(f'[verbose] {message}')
+
+ def info(self, message):
+ print(message)
+
+ def warning(self, message, only_once=False):
+ self.error(message)
+
+ def error(self, message):
+ raise Exception(message)
+
+
+class MonkeyPatch:
+ def __init__(self, module, temporary_values):
+ self._module = module
+ self._temporary_values = temporary_values
+ self._backup_values = {}
+
+ def __enter__(self):
+ for name, temp_value in self._temporary_values.items():
+ self._backup_values[name] = getattr(self._module, name)
+ setattr(self._module, name, temp_value)
+
+ def __exit__(self, exc_type, exc_val, exc_tb):
+ for name, backup_value in self._backup_values.items():
+ setattr(self._module, name, backup_value)
+
+
+class TestCookies(unittest.TestCase):
+ def test_chrome_cookie_decryptor_linux_derive_key(self):
+ key = LinuxChromeCookieDecryptor.derive_key(b'abc')
+ self.assertEqual(key, b'7\xa1\xec\xd4m\xfcA\xc7\xb19Z\xd0\x19\xdcM\x17')
+
+ def test_chrome_cookie_decryptor_mac_derive_key(self):
+ key = MacChromeCookieDecryptor.derive_key(b'abc')
+ self.assertEqual(key, b'Y\xe2\xc0\xd0P\xf6\xf4\xe1l\xc1\x8cQ\xcb|\xcdY')
+
+ def test_chrome_cookie_decryptor_linux_v10(self):
+ with MonkeyPatch(cookies, {'_get_linux_keyring_password': lambda *args, **kwargs: b''}):
+ encrypted_value = b'v10\xccW%\xcd\xe6\xe6\x9fM" \xa7\xb0\xca\xe4\x07\xd6'
+ value = 'USD'
+ decryptor = LinuxChromeCookieDecryptor('Chrome', Logger())
+ self.assertEqual(decryptor.decrypt(encrypted_value), value)
+
+ def test_chrome_cookie_decryptor_linux_v11(self):
+ with MonkeyPatch(cookies, {'_get_linux_keyring_password': lambda *args, **kwargs: b'',
+ 'KEYRING_AVAILABLE': True}):
+ encrypted_value = b'v11#\x81\x10>`w\x8f)\xc0\xb2\xc1\r\xf4\x1al\xdd\x93\xfd\xf8\xf8N\xf2\xa9\x83\xf1\xe9o\x0elVQd'
+ value = 'tz=Europe.London'
+ decryptor = LinuxChromeCookieDecryptor('Chrome', Logger())
+ self.assertEqual(decryptor.decrypt(encrypted_value), value)
+
+ def test_chrome_cookie_decryptor_windows_v10(self):
+ with MonkeyPatch(cookies, {
+ '_get_windows_v10_key': lambda *args, **kwargs: b'Y\xef\xad\xad\xeerp\xf0Y\xe6\x9b\x12\xc2<z\x16]\n\xbb\xb8\xcb\xd7\x9bA\xc3\x14e\x99{\xd6\xf4&'
+ }):
+ encrypted_value = b'v10T\xb8\xf3\xb8\x01\xa7TtcV\xfc\x88\xb8\xb8\xef\x05\xb5\xfd\x18\xc90\x009\xab\xb1\x893\x85)\x87\xe1\xa9-\xa3\xad='
+ value = '32101439'
+ decryptor = WindowsChromeCookieDecryptor('', Logger())
+ self.assertEqual(decryptor.decrypt(encrypted_value), value)
+
+ def test_chrome_cookie_decryptor_mac_v10(self):
+ with MonkeyPatch(cookies, {'_get_mac_keyring_password': lambda *args, **kwargs: b'6eIDUdtKAacvlHwBVwvg/Q=='}):
+ encrypted_value = b'v10\xb3\xbe\xad\xa1[\x9fC\xa1\x98\xe0\x9a\x01\xd9\xcf\xbfc'
+ value = '2021-06-01-22'
+ decryptor = MacChromeCookieDecryptor('', Logger())
+ self.assertEqual(decryptor.decrypt(encrypted_value), value)
+
+ def test_safari_cookie_parsing(self):
+ cookies = \
+ b'cook\x00\x00\x00\x01\x00\x00\x00i\x00\x00\x01\x00\x01\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00Y' \
+ b'\x00\x00\x00\x00\x00\x00\x00 \x00\x00\x00\x00\x00\x00\x008\x00\x00\x00B\x00\x00\x00F\x00\x00\x00H' \
+ b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x80\x03\xa5>\xc3A\x00\x00\x80\xc3\x07:\xc3A' \
+ b'localhost\x00foo\x00/\x00test%20%3Bcookie\x00\x00\x00\x054\x07\x17 \x05\x00\x00\x00Kbplist00\xd1\x01' \
+ b'\x02_\x10\x18NSHTTPCookieAcceptPolicy\x10\x02\x08\x0b&\x00\x00\x00\x00\x00\x00\x01\x01\x00\x00\x00' \
+ b'\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00('
+
+ jar = parse_safari_cookies(cookies)
+ self.assertEqual(len(jar), 1)
+ cookie = list(jar)[0]
+ self.assertEqual(cookie.domain, 'localhost')
+ self.assertEqual(cookie.port, None)
+ self.assertEqual(cookie.path, '/')
+ self.assertEqual(cookie.name, 'foo')
+ self.assertEqual(cookie.value, 'test%20%3Bcookie')
+ self.assertFalse(cookie.secure)
+ expected_expiration = datetime(2021, 6, 18, 21, 39, 19, tzinfo=timezone.utc)
+ self.assertEqual(cookie.expires, int(expected_expiration.timestamp()))
+
+ def test_pbkdf2_sha1(self):
+ key = pbkdf2_sha1(b'peanuts', b' ' * 16, 1, 16)
+ self.assertEqual(key, b'g\xe1\x8e\x0fQ\x1c\x9b\xf3\xc9`!\xaa\x90\xd9\xd34')
diff --git a/test/test_download.py b/test/test_download.py
index a47369e..8b5eea5 100644
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
from __future__ import unicode_literals
@@ -10,12 +10,13 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import (
assertGreaterEqual,
+ expect_info_dict,
expect_warnings,
get_params,
gettestcases,
- expect_info_dict,
- try_rm,
+ is_download_test,
report_warning,
+ try_rm,
)
@@ -64,6 +65,7 @@ def _file_md5(fn):
defs = gettestcases()
+@is_download_test
class TestDownload(unittest.TestCase):
# Parallel testing in nosetests. See
# http://nose.readthedocs.org/en/latest/doc_tests/test_multiprocess/multiprocess.html
@@ -71,6 +73,8 @@ class TestDownload(unittest.TestCase):
maxDiff = None
+ COMPLETED_TESTS = {}
+
def __str__(self):
"""Identify each test with the `add_ie` attribute, if available."""
@@ -92,6 +96,9 @@ class TestDownload(unittest.TestCase):
def generator(test_case, tname):
def test_template(self):
+ if self.COMPLETED_TESTS.get(tname):
+ return
+ self.COMPLETED_TESTS[tname] = True
ie = hypervideo_dl.extractor.get_info_extractor(test_case['name'])()
other_ies = [get_info_extractor(ie_key)() for ie_key in test_case.get('add_ie', [])]
is_playlist = any(k.startswith('playlist') for k in test_case)
@@ -106,8 +113,13 @@ def generator(test_case, tname):
for tc in test_cases:
info_dict = tc.get('info_dict', {})
- if not (info_dict.get('id') and info_dict.get('ext')):
- raise Exception('Test definition incorrect. The output file cannot be known. Are both \'id\' and \'ext\' keys present?')
+ params = tc.get('params', {})
+ if not info_dict.get('id'):
+ raise Exception('Test definition incorrect. \'id\' key is not present')
+ elif not info_dict.get('ext'):
+ if params.get('skip_download') and params.get('ignore_no_formats_error'):
+ continue
+ raise Exception('Test definition incorrect. The output file cannot be known. \'ext\' key is not present')
if 'skip' in test_case:
print_skipping(test_case['skip'])
@@ -121,6 +133,7 @@ def generator(test_case, tname):
params['outtmpl'] = tname + '_' + params['outtmpl']
if is_playlist and 'playlist' not in test_case:
params.setdefault('extract_flat', 'in_playlist')
+ params.setdefault('playlistend', test_case.get('playlist_mincount'))
params.setdefault('skip_download', True)
ydl = YoutubeDL(params, auto_init=False)
@@ -134,7 +147,7 @@ def generator(test_case, tname):
expect_warnings(ydl, test_case.get('expected_warnings', []))
def get_tc_filename(tc):
- return ydl.prepare_filename(tc.get('info_dict', {}))
+ return ydl.prepare_filename(dict(tc.get('info_dict', {})))
res_dict = None
@@ -247,12 +260,12 @@ def generator(test_case, tname):
# And add them to TestDownload
-for n, test_case in enumerate(defs):
- tname = 'test_' + str(test_case['name'])
- i = 1
- while hasattr(TestDownload, tname):
- tname = 'test_%s_%d' % (test_case['name'], i)
- i += 1
+tests_counter = {}
+for test_case in defs:
+ name = test_case['name']
+ i = tests_counter.get(name, 0)
+ tests_counter[name] = i + 1
+ tname = f'test_{name}_{i}' if i else f'test_{name}'
test_method = generator(test_case, tname)
test_method.__name__ = str(tname)
ie_list = test_case.get('add_ie')
@@ -261,5 +274,22 @@ for n, test_case in enumerate(defs):
del test_method
+def batch_generator(name, num_tests):
+
+ def test_template(self):
+ for i in range(num_tests):
+ getattr(self, f'test_{name}_{i}' if i else f'test_{name}')()
+
+ return test_template
+
+
+for name, num_tests in tests_counter.items():
+ test_method = batch_generator(name, num_tests)
+ test_method.__name__ = f'test_{name}_all'
+ test_method.add_ie = ''
+ setattr(TestDownload, test_method.__name__, test_method)
+ del test_method
+
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_downloader_http.py b/test/test_downloader_http.py
index 5296de8..81b7dee 100644
--- a/test/test_downloader_http.py
+++ b/test/test_downloader_http.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
# coding: utf-8
from __future__ import unicode_literals
diff --git a/test/test_execution.py b/test/test_execution.py
index f049551..d9aa965 100644
--- a/test/test_execution.py
+++ b/test/test_execution.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
# coding: utf-8
from __future__ import unicode_literals
diff --git a/test/test_http.py b/test/test_http.py
index 6eaef81..a7656b0 100644
--- a/test/test_http.py
+++ b/test/test_http.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
# coding: utf-8
from __future__ import unicode_literals
diff --git a/test/test_overwrites.py b/test/test_overwrites.py
new file mode 100644
index 0000000..9ad9bba
--- /dev/null
+++ b/test/test_overwrites.py
@@ -0,0 +1,53 @@
+#!/usr/bin/env python3
+from __future__ import unicode_literals
+
+import os
+from os.path import join
+import subprocess
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from test.helper import is_download_test, try_rm
+
+
+root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+download_file = join(root_dir, 'test.webm')
+
+
+@is_download_test
+class TestOverwrites(unittest.TestCase):
+ def setUp(self):
+ # create an empty file
+ open(download_file, 'a').close()
+
+ def test_default_overwrites(self):
+ outp = subprocess.Popen(
+ [
+ sys.executable, 'hypervideo_dl/__main__.py',
+ '-o', 'test.webm',
+ 'https://www.youtube.com/watch?v=jNQXAC9IVRw'
+ ], cwd=root_dir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ sout, serr = outp.communicate()
+ self.assertTrue(b'has already been downloaded' in sout)
+ # if the file has no content, it has not been redownloaded
+ self.assertTrue(os.path.getsize(download_file) < 1)
+
+ def test_yes_overwrites(self):
+ outp = subprocess.Popen(
+ [
+ sys.executable, 'hypervideo_dl/__main__.py', '--yes-overwrites',
+ '-o', 'test.webm',
+ 'https://www.youtube.com/watch?v=jNQXAC9IVRw'
+ ], cwd=root_dir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ sout, serr = outp.communicate()
+ self.assertTrue(b'has already been downloaded' not in sout)
+ # if the file has no content, it has not been redownloaded
+ self.assertTrue(os.path.getsize(download_file) > 1)
+
+ def tearDown(self):
+ try_rm(join(root_dir, 'test.webm'))
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_post_hooks.py b/test/test_post_hooks.py
new file mode 100644
index 0000000..8f3b03a
--- /dev/null
+++ b/test/test_post_hooks.py
@@ -0,0 +1,69 @@
+#!/usr/bin/env python3
+
+from __future__ import unicode_literals
+
+import os
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from test.helper import get_params, try_rm, is_download_test
+import hypervideo_dl.YoutubeDL
+from hypervideo_dl.utils import DownloadError
+
+
+class YoutubeDL(hypervideo_dl.YoutubeDL):
+ def __init__(self, *args, **kwargs):
+ super(YoutubeDL, self).__init__(*args, **kwargs)
+ self.to_stderr = self.to_screen
+
+
+TEST_ID = 'gr51aVj-mLg'
+EXPECTED_NAME = 'gr51aVj-mLg'
+
+
+@is_download_test
+class TestPostHooks(unittest.TestCase):
+ def setUp(self):
+ self.stored_name_1 = None
+ self.stored_name_2 = None
+ self.params = get_params({
+ 'skip_download': False,
+ 'writeinfojson': False,
+ 'quiet': True,
+ 'verbose': False,
+ 'cachedir': False,
+ })
+ self.files = []
+
+ def test_post_hooks(self):
+ self.params['post_hooks'] = [self.hook_one, self.hook_two]
+ ydl = YoutubeDL(self.params)
+ ydl.download([TEST_ID])
+ self.assertEqual(self.stored_name_1, EXPECTED_NAME, 'Not the expected name from hook 1')
+ self.assertEqual(self.stored_name_2, EXPECTED_NAME, 'Not the expected name from hook 2')
+
+ def test_post_hook_exception(self):
+ self.params['post_hooks'] = [self.hook_three]
+ ydl = YoutubeDL(self.params)
+ self.assertRaises(DownloadError, ydl.download, [TEST_ID])
+
+ def hook_one(self, filename):
+ self.stored_name_1, _ = os.path.splitext(os.path.basename(filename))
+ self.files.append(filename)
+
+ def hook_two(self, filename):
+ self.stored_name_2, _ = os.path.splitext(os.path.basename(filename))
+ self.files.append(filename)
+
+ def hook_three(self, filename):
+ self.files.append(filename)
+ raise Exception('Test exception for \'%s\'' % filename)
+
+ def tearDown(self):
+ for f in self.files:
+ try_rm(f)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_postprocessors.py b/test/test_postprocessors.py
index bbea1c9..42f37b8 100644
--- a/test/test_postprocessors.py
+++ b/test/test_postprocessors.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
from __future__ import unicode_literals
@@ -6,12 +6,557 @@ from __future__ import unicode_literals
import os
import sys
import unittest
+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from hypervideo_dl.postprocessor import MetadataFromTitlePP
+from hypervideo_dl import YoutubeDL
+from hypervideo_dl.compat import compat_shlex_quote
+from hypervideo_dl.postprocessor import (
+ ExecPP,
+ FFmpegThumbnailsConvertorPP,
+ MetadataFromFieldPP,
+ MetadataParserPP,
+ ModifyChaptersPP
+)
+
+class TestMetadataFromField(unittest.TestCase):
-class TestMetadataFromTitle(unittest.TestCase):
def test_format_to_regex(self):
- pp = MetadataFromTitlePP(None, '%(title)s - %(artist)s')
- self.assertEqual(pp._titleregex, r'(?P<title>.+)\ \-\ (?P<artist>.+)')
+ self.assertEqual(
+ MetadataParserPP.format_to_regex('%(title)s - %(artist)s'),
+ r'(?P<title>.+)\ \-\ (?P<artist>.+)')
+ self.assertEqual(MetadataParserPP.format_to_regex(r'(?P<x>.+)'), r'(?P<x>.+)')
+
+ def test_field_to_template(self):
+ self.assertEqual(MetadataParserPP.field_to_template('title'), '%(title)s')
+ self.assertEqual(MetadataParserPP.field_to_template('1'), '1')
+ self.assertEqual(MetadataParserPP.field_to_template('foo bar'), 'foo bar')
+ self.assertEqual(MetadataParserPP.field_to_template(' literal'), ' literal')
+
+ def test_metadatafromfield(self):
+ self.assertEqual(
+ MetadataFromFieldPP.to_action('%(title)s \\: %(artist)s:%(title)s : %(artist)s'),
+ (MetadataParserPP.Actions.INTERPRET, '%(title)s : %(artist)s', '%(title)s : %(artist)s'))
+
+
+class TestConvertThumbnail(unittest.TestCase):
+ def test_escaping(self):
+ pp = FFmpegThumbnailsConvertorPP()
+ if not pp.available:
+ print('Skipping: ffmpeg not found')
+ return
+
+ file = 'test/testdata/thumbnails/foo %d bar/foo_%d.{}'
+ tests = (('webp', 'png'), ('png', 'jpg'))
+
+ for inp, out in tests:
+ out_file = file.format(out)
+ if os.path.exists(out_file):
+ os.remove(out_file)
+ pp.convert_thumbnail(file.format(inp), out)
+ assert os.path.exists(out_file)
+
+ for _, out in tests:
+ os.remove(file.format(out))
+
+
+class TestExec(unittest.TestCase):
+ def test_parse_cmd(self):
+ pp = ExecPP(YoutubeDL(), '')
+ info = {'filepath': 'file name'}
+ cmd = 'echo %s' % compat_shlex_quote(info['filepath'])
+
+ self.assertEqual(pp.parse_cmd('echo', info), cmd)
+ self.assertEqual(pp.parse_cmd('echo {}', info), cmd)
+ self.assertEqual(pp.parse_cmd('echo %(filepath)q', info), cmd)
+
+
+class TestModifyChaptersPP(unittest.TestCase):
+ def setUp(self):
+ self._pp = ModifyChaptersPP(YoutubeDL())
+
+ @staticmethod
+ def _sponsor_chapter(start, end, cat, remove=False):
+ c = {'start_time': start, 'end_time': end, '_categories': [(cat, start, end)]}
+ if remove:
+ c['remove'] = True
+ return c
+
+ @staticmethod
+ def _chapter(start, end, title=None, remove=False):
+ c = {'start_time': start, 'end_time': end}
+ if title is not None:
+ c['title'] = title
+ if remove:
+ c['remove'] = True
+ return c
+
+ def _chapters(self, ends, titles):
+ self.assertEqual(len(ends), len(titles))
+ start = 0
+ chapters = []
+ for e, t in zip(ends, titles):
+ chapters.append(self._chapter(start, e, t))
+ start = e
+ return chapters
+
+ def _remove_marked_arrange_sponsors_test_impl(
+ self, chapters, expected_chapters, expected_removed):
+ actual_chapters, actual_removed = (
+ self._pp._remove_marked_arrange_sponsors(chapters))
+ for c in actual_removed:
+ c.pop('title', None)
+ c.pop('_categories', None)
+ actual_chapters = [{
+ 'start_time': c['start_time'],
+ 'end_time': c['end_time'],
+ 'title': c['title'],
+ } for c in actual_chapters]
+ self.assertSequenceEqual(expected_chapters, actual_chapters)
+ self.assertSequenceEqual(expected_removed, actual_removed)
+
+ def test_remove_marked_arrange_sponsors_CanGetThroughUnaltered(self):
+ chapters = self._chapters([10, 20, 30, 40], ['c1', 'c2', 'c3', 'c4'])
+ self._remove_marked_arrange_sponsors_test_impl(chapters, chapters, [])
+
+ def test_remove_marked_arrange_sponsors_ChapterWithSponsors(self):
+ chapters = self._chapters([70], ['c']) + [
+ self._sponsor_chapter(10, 20, 'sponsor'),
+ self._sponsor_chapter(30, 40, 'preview'),
+ self._sponsor_chapter(50, 60, 'sponsor')]
+ expected = self._chapters(
+ [10, 20, 30, 40, 50, 60, 70],
+ ['c', '[SponsorBlock]: Sponsor', 'c', '[SponsorBlock]: Preview/Recap',
+ 'c', '[SponsorBlock]: Sponsor', 'c'])
+ self._remove_marked_arrange_sponsors_test_impl(chapters, expected, [])
+
+ def test_remove_marked_arrange_sponsors_UniqueNamesForOverlappingSponsors(self):
+ chapters = self._chapters([120], ['c']) + [
+ self._sponsor_chapter(10, 45, 'sponsor'), self._sponsor_chapter(20, 40, 'selfpromo'),
+ self._sponsor_chapter(50, 70, 'sponsor'), self._sponsor_chapter(60, 85, 'selfpromo'),
+ self._sponsor_chapter(90, 120, 'selfpromo'), self._sponsor_chapter(100, 110, 'sponsor')]
+ expected = self._chapters(
+ [10, 20, 40, 45, 50, 60, 70, 85, 90, 100, 110, 120],
+ ['c', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Sponsor, Unpaid/Self Promotion',
+ '[SponsorBlock]: Sponsor',
+ 'c', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Sponsor, Unpaid/Self Promotion',
+ '[SponsorBlock]: Unpaid/Self Promotion',
+ 'c', '[SponsorBlock]: Unpaid/Self Promotion', '[SponsorBlock]: Unpaid/Self Promotion, Sponsor',
+ '[SponsorBlock]: Unpaid/Self Promotion'])
+ self._remove_marked_arrange_sponsors_test_impl(chapters, expected, [])
+
+ def test_remove_marked_arrange_sponsors_ChapterWithCuts(self):
+ cuts = [self._chapter(10, 20, remove=True),
+ self._sponsor_chapter(30, 40, 'sponsor', remove=True),
+ self._chapter(50, 60, remove=True)]
+ chapters = self._chapters([70], ['c']) + cuts
+ self._remove_marked_arrange_sponsors_test_impl(
+ chapters, self._chapters([40], ['c']), cuts)
+
+ def test_remove_marked_arrange_sponsors_ChapterWithSponsorsAndCuts(self):
+ chapters = self._chapters([70], ['c']) + [
+ self._sponsor_chapter(10, 20, 'sponsor'),
+ self._sponsor_chapter(30, 40, 'selfpromo', remove=True),
+ self._sponsor_chapter(50, 60, 'interaction')]
+ expected = self._chapters([10, 20, 40, 50, 60],
+ ['c', '[SponsorBlock]: Sponsor', 'c',
+ '[SponsorBlock]: Interaction Reminder', 'c'])
+ self._remove_marked_arrange_sponsors_test_impl(
+ chapters, expected, [self._chapter(30, 40, remove=True)])
+
+ def test_remove_marked_arrange_sponsors_ChapterWithSponsorCutInTheMiddle(self):
+ cuts = [self._sponsor_chapter(20, 30, 'selfpromo', remove=True),
+ self._chapter(40, 50, remove=True)]
+ chapters = self._chapters([70], ['c']) + [self._sponsor_chapter(10, 60, 'sponsor')] + cuts
+ expected = self._chapters(
+ [10, 40, 50], ['c', '[SponsorBlock]: Sponsor', 'c'])
+ self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts)
+
+ def test_remove_marked_arrange_sponsors_ChapterWithCutHidingSponsor(self):
+ cuts = [self._sponsor_chapter(20, 50, 'selpromo', remove=True)]
+ chapters = self._chapters([60], ['c']) + [
+ self._sponsor_chapter(10, 20, 'intro'),
+ self._sponsor_chapter(30, 40, 'sponsor'),
+ self._sponsor_chapter(50, 60, 'outro'),
+ ] + cuts
+ expected = self._chapters(
+ [10, 20, 30], ['c', '[SponsorBlock]: Intermission/Intro Animation', '[SponsorBlock]: Endcards/Credits'])
+ self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts)
+
+ def test_remove_marked_arrange_sponsors_ChapterWithAdjacentSponsors(self):
+ chapters = self._chapters([70], ['c']) + [
+ self._sponsor_chapter(10, 20, 'sponsor'),
+ self._sponsor_chapter(20, 30, 'selfpromo'),
+ self._sponsor_chapter(30, 40, 'interaction')]
+ expected = self._chapters(
+ [10, 20, 30, 40, 70],
+ ['c', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Unpaid/Self Promotion',
+ '[SponsorBlock]: Interaction Reminder', 'c'])
+ self._remove_marked_arrange_sponsors_test_impl(chapters, expected, [])
+
+ def test_remove_marked_arrange_sponsors_ChapterWithAdjacentCuts(self):
+ chapters = self._chapters([70], ['c']) + [
+ self._sponsor_chapter(10, 20, 'sponsor'),
+ self._sponsor_chapter(20, 30, 'interaction', remove=True),
+ self._chapter(30, 40, remove=True),
+ self._sponsor_chapter(40, 50, 'selpromo', remove=True),
+ self._sponsor_chapter(50, 60, 'interaction')]
+ expected = self._chapters([10, 20, 30, 40],
+ ['c', '[SponsorBlock]: Sponsor',
+ '[SponsorBlock]: Interaction Reminder', 'c'])
+ self._remove_marked_arrange_sponsors_test_impl(
+ chapters, expected, [self._chapter(20, 50, remove=True)])
+
+ def test_remove_marked_arrange_sponsors_ChapterWithOverlappingSponsors(self):
+ chapters = self._chapters([70], ['c']) + [
+ self._sponsor_chapter(10, 30, 'sponsor'),
+ self._sponsor_chapter(20, 50, 'selfpromo'),
+ self._sponsor_chapter(40, 60, 'interaction')]
+ expected = self._chapters(
+ [10, 20, 30, 40, 50, 60, 70],
+ ['c', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Sponsor, Unpaid/Self Promotion',
+ '[SponsorBlock]: Unpaid/Self Promotion', '[SponsorBlock]: Unpaid/Self Promotion, Interaction Reminder',
+ '[SponsorBlock]: Interaction Reminder', 'c'])
+ self._remove_marked_arrange_sponsors_test_impl(chapters, expected, [])
+
+ def test_remove_marked_arrange_sponsors_ChapterWithOverlappingCuts(self):
+ chapters = self._chapters([70], ['c']) + [
+ self._sponsor_chapter(10, 30, 'sponsor', remove=True),
+ self._sponsor_chapter(20, 50, 'selfpromo', remove=True),
+ self._sponsor_chapter(40, 60, 'interaction', remove=True)]
+ self._remove_marked_arrange_sponsors_test_impl(
+ chapters, self._chapters([20], ['c']), [self._chapter(10, 60, remove=True)])
+
+ def test_remove_marked_arrange_sponsors_ChapterWithRunsOfOverlappingSponsors(self):
+ chapters = self._chapters([170], ['c']) + [
+ self._sponsor_chapter(0, 30, 'intro'),
+ self._sponsor_chapter(20, 50, 'sponsor'),
+ self._sponsor_chapter(40, 60, 'selfpromo'),
+ self._sponsor_chapter(70, 90, 'sponsor'),
+ self._sponsor_chapter(80, 100, 'sponsor'),
+ self._sponsor_chapter(90, 110, 'sponsor'),
+ self._sponsor_chapter(120, 140, 'selfpromo'),
+ self._sponsor_chapter(130, 160, 'interaction'),
+ self._sponsor_chapter(150, 170, 'outro')]
+ expected = self._chapters(
+ [20, 30, 40, 50, 60, 70, 110, 120, 130, 140, 150, 160, 170],
+ ['[SponsorBlock]: Intermission/Intro Animation', '[SponsorBlock]: Intermission/Intro Animation, Sponsor', '[SponsorBlock]: Sponsor',
+ '[SponsorBlock]: Sponsor, Unpaid/Self Promotion', '[SponsorBlock]: Unpaid/Self Promotion', 'c',
+ '[SponsorBlock]: Sponsor', 'c', '[SponsorBlock]: Unpaid/Self Promotion',
+ '[SponsorBlock]: Unpaid/Self Promotion, Interaction Reminder',
+ '[SponsorBlock]: Interaction Reminder',
+ '[SponsorBlock]: Interaction Reminder, Endcards/Credits', '[SponsorBlock]: Endcards/Credits'])
+ self._remove_marked_arrange_sponsors_test_impl(chapters, expected, [])
+
+ def test_remove_marked_arrange_sponsors_ChapterWithRunsOfOverlappingCuts(self):
+ chapters = self._chapters([170], ['c']) + [
+ self._chapter(0, 30, remove=True),
+ self._sponsor_chapter(20, 50, 'sponsor', remove=True),
+ self._chapter(40, 60, remove=True),
+ self._sponsor_chapter(70, 90, 'sponsor', remove=True),
+ self._chapter(80, 100, remove=True),
+ self._chapter(90, 110, remove=True),
+ self._sponsor_chapter(120, 140, 'sponsor', remove=True),
+ self._sponsor_chapter(130, 160, 'selfpromo', remove=True),
+ self._chapter(150, 170, remove=True)]
+ expected_cuts = [self._chapter(0, 60, remove=True),
+ self._chapter(70, 110, remove=True),
+ self._chapter(120, 170, remove=True)]
+ self._remove_marked_arrange_sponsors_test_impl(
+ chapters, self._chapters([20], ['c']), expected_cuts)
+
+ def test_remove_marked_arrange_sponsors_OverlappingSponsorsDifferentTitlesAfterCut(self):
+ chapters = self._chapters([60], ['c']) + [
+ self._sponsor_chapter(10, 60, 'sponsor'),
+ self._sponsor_chapter(10, 40, 'intro'),
+ self._sponsor_chapter(30, 50, 'interaction'),
+ self._sponsor_chapter(30, 50, 'selfpromo', remove=True),
+ self._sponsor_chapter(40, 50, 'interaction'),
+ self._sponsor_chapter(50, 60, 'outro')]
+ expected = self._chapters(
+ [10, 30, 40], ['c', '[SponsorBlock]: Sponsor, Intermission/Intro Animation', '[SponsorBlock]: Sponsor, Endcards/Credits'])
+ self._remove_marked_arrange_sponsors_test_impl(
+ chapters, expected, [self._chapter(30, 50, remove=True)])
+
+ def test_remove_marked_arrange_sponsors_SponsorsNoLongerOverlapAfterCut(self):
+ chapters = self._chapters([70], ['c']) + [
+ self._sponsor_chapter(10, 30, 'sponsor'),
+ self._sponsor_chapter(20, 50, 'interaction'),
+ self._sponsor_chapter(30, 50, 'selpromo', remove=True),
+ self._sponsor_chapter(40, 60, 'sponsor'),
+ self._sponsor_chapter(50, 60, 'interaction')]
+ expected = self._chapters(
+ [10, 20, 40, 50], ['c', '[SponsorBlock]: Sponsor',
+ '[SponsorBlock]: Sponsor, Interaction Reminder', 'c'])
+ self._remove_marked_arrange_sponsors_test_impl(
+ chapters, expected, [self._chapter(30, 50, remove=True)])
+
+ def test_remove_marked_arrange_sponsors_SponsorsStillOverlapAfterCut(self):
+ chapters = self._chapters([70], ['c']) + [
+ self._sponsor_chapter(10, 60, 'sponsor'),
+ self._sponsor_chapter(20, 60, 'interaction'),
+ self._sponsor_chapter(30, 50, 'selfpromo', remove=True)]
+ expected = self._chapters(
+ [10, 20, 40, 50], ['c', '[SponsorBlock]: Sponsor',
+ '[SponsorBlock]: Sponsor, Interaction Reminder', 'c'])
+ self._remove_marked_arrange_sponsors_test_impl(
+ chapters, expected, [self._chapter(30, 50, remove=True)])
+
+ def test_remove_marked_arrange_sponsors_ChapterWithRunsOfOverlappingSponsorsAndCuts(self):
+ chapters = self._chapters([200], ['c']) + [
+ self._sponsor_chapter(10, 40, 'sponsor'),
+ self._sponsor_chapter(10, 30, 'intro'),
+ self._chapter(20, 30, remove=True),
+ self._sponsor_chapter(30, 40, 'selfpromo'),
+ self._sponsor_chapter(50, 70, 'sponsor'),
+ self._sponsor_chapter(60, 80, 'interaction'),
+ self._chapter(70, 80, remove=True),
+ self._sponsor_chapter(70, 90, 'sponsor'),
+ self._sponsor_chapter(80, 100, 'interaction'),
+ self._sponsor_chapter(120, 170, 'selfpromo'),
+ self._sponsor_chapter(130, 180, 'outro'),
+ self._chapter(140, 150, remove=True),
+ self._chapter(150, 160, remove=True)]
+ expected = self._chapters(
+ [10, 20, 30, 40, 50, 70, 80, 100, 110, 130, 140, 160],
+ ['c', '[SponsorBlock]: Sponsor, Intermission/Intro Animation', '[SponsorBlock]: Sponsor, Unpaid/Self Promotion',
+ 'c', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Sponsor, Interaction Reminder',
+ '[SponsorBlock]: Interaction Reminder', 'c', '[SponsorBlock]: Unpaid/Self Promotion',
+ '[SponsorBlock]: Unpaid/Self Promotion, Endcards/Credits', '[SponsorBlock]: Endcards/Credits', 'c'])
+ expected_cuts = [self._chapter(20, 30, remove=True),
+ self._chapter(70, 80, remove=True),
+ self._chapter(140, 160, remove=True)]
+ self._remove_marked_arrange_sponsors_test_impl(chapters, expected, expected_cuts)
+
+ def test_remove_marked_arrange_sponsors_SponsorOverlapsMultipleChapters(self):
+ chapters = (self._chapters([20, 40, 60, 80, 100], ['c1', 'c2', 'c3', 'c4', 'c5'])
+ + [self._sponsor_chapter(10, 90, 'sponsor')])
+ expected = self._chapters([10, 90, 100], ['c1', '[SponsorBlock]: Sponsor', 'c5'])
+ self._remove_marked_arrange_sponsors_test_impl(chapters, expected, [])
+
+ def test_remove_marked_arrange_sponsors_CutOverlapsMultipleChapters(self):
+ cuts = [self._chapter(10, 90, remove=True)]
+ chapters = self._chapters([20, 40, 60, 80, 100], ['c1', 'c2', 'c3', 'c4', 'c5']) + cuts
+ expected = self._chapters([10, 20], ['c1', 'c5'])
+ self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts)
+
+ def test_remove_marked_arrange_sponsors_SponsorsWithinSomeChaptersAndOverlappingOthers(self):
+ chapters = (self._chapters([10, 40, 60, 80], ['c1', 'c2', 'c3', 'c4'])
+ + [self._sponsor_chapter(20, 30, 'sponsor'),
+ self._sponsor_chapter(50, 70, 'selfpromo')])
+ expected = self._chapters([10, 20, 30, 40, 50, 70, 80],
+ ['c1', 'c2', '[SponsorBlock]: Sponsor', 'c2', 'c3',
+ '[SponsorBlock]: Unpaid/Self Promotion', 'c4'])
+ self._remove_marked_arrange_sponsors_test_impl(chapters, expected, [])
+
+ def test_remove_marked_arrange_sponsors_CutsWithinSomeChaptersAndOverlappingOthers(self):
+ cuts = [self._chapter(20, 30, remove=True), self._chapter(50, 70, remove=True)]
+ chapters = self._chapters([10, 40, 60, 80], ['c1', 'c2', 'c3', 'c4']) + cuts
+ expected = self._chapters([10, 30, 40, 50], ['c1', 'c2', 'c3', 'c4'])
+ self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts)
+
+ def test_remove_marked_arrange_sponsors_ChaptersAfterLastSponsor(self):
+ chapters = (self._chapters([20, 40, 50, 60], ['c1', 'c2', 'c3', 'c4'])
+ + [self._sponsor_chapter(10, 30, 'music_offtopic')])
+ expected = self._chapters(
+ [10, 30, 40, 50, 60],
+ ['c1', '[SponsorBlock]: Non-Music Section', 'c2', 'c3', 'c4'])
+ self._remove_marked_arrange_sponsors_test_impl(chapters, expected, [])
+
+ def test_remove_marked_arrange_sponsors_ChaptersAfterLastCut(self):
+ cuts = [self._chapter(10, 30, remove=True)]
+ chapters = self._chapters([20, 40, 50, 60], ['c1', 'c2', 'c3', 'c4']) + cuts
+ expected = self._chapters([10, 20, 30, 40], ['c1', 'c2', 'c3', 'c4'])
+ self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts)
+
+ def test_remove_marked_arrange_sponsors_SponsorStartsAtChapterStart(self):
+ chapters = (self._chapters([10, 20, 40], ['c1', 'c2', 'c3'])
+ + [self._sponsor_chapter(20, 30, 'sponsor')])
+ expected = self._chapters([10, 20, 30, 40], ['c1', 'c2', '[SponsorBlock]: Sponsor', 'c3'])
+ self._remove_marked_arrange_sponsors_test_impl(chapters, expected, [])
+
+ def test_remove_marked_arrange_sponsors_CutStartsAtChapterStart(self):
+ cuts = [self._chapter(20, 30, remove=True)]
+ chapters = self._chapters([10, 20, 40], ['c1', 'c2', 'c3']) + cuts
+ expected = self._chapters([10, 20, 30], ['c1', 'c2', 'c3'])
+ self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts)
+
+ def test_remove_marked_arrange_sponsors_SponsorEndsAtChapterEnd(self):
+ chapters = (self._chapters([10, 30, 40], ['c1', 'c2', 'c3'])
+ + [self._sponsor_chapter(20, 30, 'sponsor')])
+ expected = self._chapters([10, 20, 30, 40], ['c1', 'c2', '[SponsorBlock]: Sponsor', 'c3'])
+ self._remove_marked_arrange_sponsors_test_impl(chapters, expected, [])
+
+ def test_remove_marked_arrange_sponsors_CutEndsAtChapterEnd(self):
+ cuts = [self._chapter(20, 30, remove=True)]
+ chapters = self._chapters([10, 30, 40], ['c1', 'c2', 'c3']) + cuts
+ expected = self._chapters([10, 20, 30], ['c1', 'c2', 'c3'])
+ self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts)
+
+ def test_remove_marked_arrange_sponsors_SponsorCoincidesWithChapters(self):
+ chapters = (self._chapters([10, 20, 30, 40], ['c1', 'c2', 'c3', 'c4'])
+ + [self._sponsor_chapter(10, 30, 'sponsor')])
+ expected = self._chapters([10, 30, 40], ['c1', '[SponsorBlock]: Sponsor', 'c4'])
+ self._remove_marked_arrange_sponsors_test_impl(chapters, expected, [])
+
+ def test_remove_marked_arrange_sponsors_CutCoincidesWithChapters(self):
+ cuts = [self._chapter(10, 30, remove=True)]
+ chapters = self._chapters([10, 20, 30, 40], ['c1', 'c2', 'c3', 'c4']) + cuts
+ expected = self._chapters([10, 20], ['c1', 'c4'])
+ self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts)
+
+ def test_remove_marked_arrange_sponsors_SponsorsAtVideoBoundaries(self):
+ chapters = (self._chapters([20, 40, 60], ['c1', 'c2', 'c3'])
+ + [self._sponsor_chapter(0, 10, 'intro'), self._sponsor_chapter(50, 60, 'outro')])
+ expected = self._chapters(
+ [10, 20, 40, 50, 60], ['[SponsorBlock]: Intermission/Intro Animation', 'c1', 'c2', 'c3', '[SponsorBlock]: Endcards/Credits'])
+ self._remove_marked_arrange_sponsors_test_impl(chapters, expected, [])
+
+ def test_remove_marked_arrange_sponsors_CutsAtVideoBoundaries(self):
+ cuts = [self._chapter(0, 10, remove=True), self._chapter(50, 60, remove=True)]
+ chapters = self._chapters([20, 40, 60], ['c1', 'c2', 'c3']) + cuts
+ expected = self._chapters([10, 30, 40], ['c1', 'c2', 'c3'])
+ self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts)
+
+ def test_remove_marked_arrange_sponsors_SponsorsOverlapChaptersAtVideoBoundaries(self):
+ chapters = (self._chapters([10, 40, 50], ['c1', 'c2', 'c3'])
+ + [self._sponsor_chapter(0, 20, 'intro'), self._sponsor_chapter(30, 50, 'outro')])
+ expected = self._chapters(
+ [20, 30, 50], ['[SponsorBlock]: Intermission/Intro Animation', 'c2', '[SponsorBlock]: Endcards/Credits'])
+ self._remove_marked_arrange_sponsors_test_impl(chapters, expected, [])
+
+ def test_remove_marked_arrange_sponsors_CutsOverlapChaptersAtVideoBoundaries(self):
+ cuts = [self._chapter(0, 20, remove=True), self._chapter(30, 50, remove=True)]
+ chapters = self._chapters([10, 40, 50], ['c1', 'c2', 'c3']) + cuts
+ expected = self._chapters([10], ['c2'])
+ self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts)
+
+ def test_remove_marked_arrange_sponsors_EverythingSponsored(self):
+ chapters = (self._chapters([10, 20, 30, 40], ['c1', 'c2', 'c3', 'c4'])
+ + [self._sponsor_chapter(0, 20, 'intro'), self._sponsor_chapter(20, 40, 'outro')])
+ expected = self._chapters([20, 40], ['[SponsorBlock]: Intermission/Intro Animation', '[SponsorBlock]: Endcards/Credits'])
+ self._remove_marked_arrange_sponsors_test_impl(chapters, expected, [])
+
+ def test_remove_marked_arrange_sponsors_EverythingCut(self):
+ cuts = [self._chapter(0, 20, remove=True), self._chapter(20, 40, remove=True)]
+ chapters = self._chapters([10, 20, 30, 40], ['c1', 'c2', 'c3', 'c4']) + cuts
+ self._remove_marked_arrange_sponsors_test_impl(
+ chapters, [], [self._chapter(0, 40, remove=True)])
+
+ def test_remove_marked_arrange_sponsors_TinyChaptersInTheOriginalArePreserved(self):
+ chapters = self._chapters([0.1, 0.2, 0.3, 0.4], ['c1', 'c2', 'c3', 'c4'])
+ self._remove_marked_arrange_sponsors_test_impl(chapters, chapters, [])
+
+ def test_remove_marked_arrange_sponsors_TinySponsorsAreIgnored(self):
+ chapters = [self._sponsor_chapter(0, 0.1, 'intro'), self._chapter(0.1, 0.2, 'c1'),
+ self._sponsor_chapter(0.2, 0.3, 'sponsor'), self._chapter(0.3, 0.4, 'c2'),
+ self._sponsor_chapter(0.4, 0.5, 'outro')]
+ self._remove_marked_arrange_sponsors_test_impl(
+ chapters, self._chapters([0.3, 0.5], ['c1', 'c2']), [])
+
+ def test_remove_marked_arrange_sponsors_TinyChaptersResultingFromCutsAreIgnored(self):
+ cuts = [self._chapter(1.5, 2.5, remove=True)]
+ chapters = self._chapters([2, 3, 3.5], ['c1', 'c2', 'c3']) + cuts
+ self._remove_marked_arrange_sponsors_test_impl(
+ chapters, self._chapters([2, 2.5], ['c1', 'c3']), cuts)
+
+ def test_remove_marked_arrange_sponsors_SingleTinyChapterIsPreserved(self):
+ cuts = [self._chapter(0.5, 2, remove=True)]
+ chapters = self._chapters([2], ['c']) + cuts
+ self._remove_marked_arrange_sponsors_test_impl(
+ chapters, self._chapters([0.5], ['c']), cuts)
+
+ def test_remove_marked_arrange_sponsors_TinyChapterAtTheStartPrependedToTheNext(self):
+ cuts = [self._chapter(0.5, 2, remove=True)]
+ chapters = self._chapters([2, 4], ['c1', 'c2']) + cuts
+ self._remove_marked_arrange_sponsors_test_impl(
+ chapters, self._chapters([2.5], ['c2']), cuts)
+
+ def test_remove_marked_arrange_sponsors_TinyChaptersResultingFromSponsorOverlapAreIgnored(self):
+ chapters = self._chapters([1, 3, 4], ['c1', 'c2', 'c3']) + [
+ self._sponsor_chapter(1.5, 2.5, 'sponsor')]
+ self._remove_marked_arrange_sponsors_test_impl(
+ chapters, self._chapters([1.5, 2.5, 4], ['c1', '[SponsorBlock]: Sponsor', 'c3']), [])
+
+ def test_remove_marked_arrange_sponsors_TinySponsorsOverlapsAreIgnored(self):
+ chapters = self._chapters([2, 3, 5], ['c1', 'c2', 'c3']) + [
+ self._sponsor_chapter(1, 3, 'sponsor'),
+ self._sponsor_chapter(2.5, 4, 'selfpromo')
+ ]
+ self._remove_marked_arrange_sponsors_test_impl(
+ chapters, self._chapters([1, 3, 4, 5], [
+ 'c1', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Unpaid/Self Promotion', 'c3']), [])
+
+ def test_remove_marked_arrange_sponsors_TinySponsorsPrependedToTheNextSponsor(self):
+ chapters = self._chapters([4], ['c']) + [
+ self._sponsor_chapter(1.5, 2, 'sponsor'),
+ self._sponsor_chapter(2, 4, 'selfpromo')
+ ]
+ self._remove_marked_arrange_sponsors_test_impl(
+ chapters, self._chapters([1.5, 4], ['c', '[SponsorBlock]: Unpaid/Self Promotion']), [])
+
+ def test_remove_marked_arrange_sponsors_SmallestSponsorInTheOverlapGetsNamed(self):
+ self._pp._sponsorblock_chapter_title = '[SponsorBlock]: %(name)s'
+ chapters = self._chapters([10], ['c']) + [
+ self._sponsor_chapter(2, 8, 'sponsor'),
+ self._sponsor_chapter(4, 6, 'selfpromo')
+ ]
+ self._remove_marked_arrange_sponsors_test_impl(
+ chapters, self._chapters([2, 4, 6, 8, 10], [
+ 'c', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Unpaid/Self Promotion',
+ '[SponsorBlock]: Sponsor', 'c'
+ ]), [])
+
+ def test_make_concat_opts_CommonCase(self):
+ sponsor_chapters = [self._chapter(1, 2, 's1'), self._chapter(10, 20, 's2')]
+ expected = '''ffconcat version 1.0
+file 'file:test'
+outpoint 1.000000
+file 'file:test'
+inpoint 2.000000
+outpoint 10.000000
+file 'file:test'
+inpoint 20.000000
+'''
+ opts = self._pp._make_concat_opts(sponsor_chapters, 30)
+ self.assertEqual(expected, ''.join(self._pp._concat_spec(['test'] * len(opts), opts)))
+
+ def test_make_concat_opts_NoZeroDurationChunkAtVideoStart(self):
+ sponsor_chapters = [self._chapter(0, 1, 's1'), self._chapter(10, 20, 's2')]
+ expected = '''ffconcat version 1.0
+file 'file:test'
+inpoint 1.000000
+outpoint 10.000000
+file 'file:test'
+inpoint 20.000000
+'''
+ opts = self._pp._make_concat_opts(sponsor_chapters, 30)
+ self.assertEqual(expected, ''.join(self._pp._concat_spec(['test'] * len(opts), opts)))
+
+ def test_make_concat_opts_NoZeroDurationChunkAtVideoEnd(self):
+ sponsor_chapters = [self._chapter(1, 2, 's1'), self._chapter(10, 20, 's2')]
+ expected = '''ffconcat version 1.0
+file 'file:test'
+outpoint 1.000000
+file 'file:test'
+inpoint 2.000000
+outpoint 10.000000
+'''
+ opts = self._pp._make_concat_opts(sponsor_chapters, 20)
+ self.assertEqual(expected, ''.join(self._pp._concat_spec(['test'] * len(opts), opts)))
+
+ def test_quote_for_concat_RunsOfQuotes(self):
+ self.assertEqual(
+ r"'special '\'' '\'\''characters'\'\'\''galore'",
+ self._pp._quote_for_ffmpeg("special ' ''characters'''galore"))
+
+ def test_quote_for_concat_QuotesAtStart(self):
+ self.assertEqual(
+ r"\'\'\''special '\'' characters '\'' galore'",
+ self._pp._quote_for_ffmpeg("'''special ' characters ' galore"))
+
+ def test_quote_for_concat_QuotesAtEnd(self):
+ self.assertEqual(
+ r"'special '\'' characters '\'' galore'\'\'\'",
+ self._pp._quote_for_ffmpeg("special ' characters ' galore'''"))
diff --git a/test/test_socks.py b/test/test_socks.py
index 47ebf48..2574e73 100644
--- a/test/test_socks.py
+++ b/test/test_socks.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
# coding: utf-8
from __future__ import unicode_literals
@@ -14,6 +14,7 @@ import subprocess
from test.helper import (
FakeYDL,
get_params,
+ is_download_test,
)
from hypervideo_dl.compat import (
compat_str,
@@ -21,6 +22,7 @@ from hypervideo_dl.compat import (
)
+@is_download_test
class TestMultipleSocks(unittest.TestCase):
@staticmethod
def _check_params(attrs):
@@ -76,6 +78,7 @@ class TestMultipleSocks(unittest.TestCase):
params['secondary_server_ip'])
+@is_download_test
class TestSocks(unittest.TestCase):
_SKIP_SOCKS_TEST = True
diff --git a/test/test_subtitles.py b/test/test_subtitles.py
index 195340d..e94df35 100644
--- a/test/test_subtitles.py
+++ b/test/test_subtitles.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
from __future__ import unicode_literals
# Allow direct execution
@@ -7,7 +7,7 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from test.helper import FakeYDL, md5
+from test.helper import FakeYDL, md5, is_download_test
from hypervideo_dl.extractor import (
@@ -19,6 +19,7 @@ from hypervideo_dl.extractor import (
CeskaTelevizeIE,
LyndaIE,
NPOIE,
+ PBSIE,
ComedyCentralIE,
NRKTVIE,
RaiPlayIE,
@@ -30,6 +31,7 @@ from hypervideo_dl.extractor import (
)
+@is_download_test
class BaseTestSubtitles(unittest.TestCase):
url = None
IE = None
@@ -55,6 +57,7 @@ class BaseTestSubtitles(unittest.TestCase):
return dict((l, sub_info['data']) for l, sub_info in subtitles.items())
+@is_download_test
class TestYoutubeSubtitles(BaseTestSubtitles):
url = 'QRS8MkLhQmM'
IE = YoutubeIE
@@ -64,8 +67,8 @@ class TestYoutubeSubtitles(BaseTestSubtitles):
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles.keys()), 13)
- self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06')
- self.assertEqual(md5(subtitles['it']), '6d752b98c31f1cf8d597050c7a2cb4b5')
+ self.assertEqual(md5(subtitles['en']), '688dd1ce0981683867e7fe6fde2a224b')
+ self.assertEqual(md5(subtitles['it']), '31324d30b8430b309f7f5979a504a769')
for lang in ['fr', 'de']:
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
@@ -73,13 +76,13 @@ class TestYoutubeSubtitles(BaseTestSubtitles):
self.DL.params['writesubtitles'] = True
self.DL.params['subtitlesformat'] = 'ttml'
subtitles = self.getSubtitles()
- self.assertEqual(md5(subtitles['en']), 'e306f8c42842f723447d9f63ad65df54')
+ self.assertEqual(md5(subtitles['en']), 'c97ddf1217390906fa9fbd34901f3da2')
def test_youtube_subtitles_vtt_format(self):
self.DL.params['writesubtitles'] = True
self.DL.params['subtitlesformat'] = 'vtt'
subtitles = self.getSubtitles()
- self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06')
+ self.assertEqual(md5(subtitles['en']), 'ae1bd34126571a77aabd4d276b28044d')
def test_youtube_automatic_captions(self):
self.url = '8YoUxe5ncPo'
@@ -88,9 +91,15 @@ class TestYoutubeSubtitles(BaseTestSubtitles):
subtitles = self.getSubtitles()
self.assertTrue(subtitles['it'] is not None)
+ def test_youtube_no_automatic_captions(self):
+ self.url = 'QRS8MkLhQmM'
+ self.DL.params['writeautomaticsub'] = True
+ subtitles = self.getSubtitles()
+ self.assertTrue(not subtitles)
+
def test_youtube_translated_subtitles(self):
# This video has a subtitles track, which can be translated
- self.url = 'Ky9eprVWzlI'
+ self.url = 'i0ZabxXmH4Y'
self.DL.params['writeautomaticsub'] = True
self.DL.params['subtitleslangs'] = ['it']
subtitles = self.getSubtitles()
@@ -105,6 +114,7 @@ class TestYoutubeSubtitles(BaseTestSubtitles):
self.assertFalse(subtitles)
+@is_download_test
class TestDailymotionSubtitles(BaseTestSubtitles):
url = 'http://www.dailymotion.com/video/xczg00'
IE = DailymotionIE
@@ -128,6 +138,7 @@ class TestDailymotionSubtitles(BaseTestSubtitles):
self.assertFalse(subtitles)
+@is_download_test
class TestTedSubtitles(BaseTestSubtitles):
url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html'
IE = TEDIE
@@ -143,6 +154,7 @@ class TestTedSubtitles(BaseTestSubtitles):
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
+@is_download_test
class TestVimeoSubtitles(BaseTestSubtitles):
url = 'http://vimeo.com/76979871'
IE = VimeoIE
@@ -164,6 +176,7 @@ class TestVimeoSubtitles(BaseTestSubtitles):
self.assertFalse(subtitles)
+@is_download_test
class TestWallaSubtitles(BaseTestSubtitles):
url = 'http://vod.walla.co.il/movie/2705958/the-yes-men'
IE = WallaIE
@@ -185,6 +198,7 @@ class TestWallaSubtitles(BaseTestSubtitles):
self.assertFalse(subtitles)
+@is_download_test
class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky'
IE = CeskaTelevizeIE
@@ -206,6 +220,7 @@ class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
self.assertFalse(subtitles)
+@is_download_test
class TestLyndaSubtitles(BaseTestSubtitles):
url = 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html'
IE = LyndaIE
@@ -218,6 +233,7 @@ class TestLyndaSubtitles(BaseTestSubtitles):
self.assertEqual(md5(subtitles['en']), '09bbe67222259bed60deaa26997d73a7')
+@is_download_test
class TestNPOSubtitles(BaseTestSubtitles):
url = 'http://www.npo.nl/nos-journaal/28-08-2014/POW_00722860'
IE = NPOIE
@@ -230,6 +246,7 @@ class TestNPOSubtitles(BaseTestSubtitles):
self.assertEqual(md5(subtitles['nl']), 'fc6435027572b63fb4ab143abd5ad3f4')
+@is_download_test
class TestMTVSubtitles(BaseTestSubtitles):
url = 'http://www.cc.com/video-clips/p63lk0/adam-devine-s-house-party-chasing-white-swans'
IE = ComedyCentralIE
@@ -245,6 +262,7 @@ class TestMTVSubtitles(BaseTestSubtitles):
self.assertEqual(md5(subtitles['en']), '78206b8d8a0cfa9da64dc026eea48961')
+@is_download_test
class TestNRKSubtitles(BaseTestSubtitles):
url = 'http://tv.nrk.no/serie/ikke-gjoer-dette-hjemme/DMPV73000411/sesong-2/episode-1'
IE = NRKTVIE
@@ -257,6 +275,7 @@ class TestNRKSubtitles(BaseTestSubtitles):
self.assertEqual(md5(subtitles['no']), '544fa917d3197fcbee64634559221cc2')
+@is_download_test
class TestRaiPlaySubtitles(BaseTestSubtitles):
IE = RaiPlayIE
@@ -277,6 +296,7 @@ class TestRaiPlaySubtitles(BaseTestSubtitles):
self.assertEqual(md5(subtitles['it']), '4b3264186fbb103508abe5311cfcb9cd')
+@is_download_test
class TestVikiSubtitles(BaseTestSubtitles):
url = 'http://www.viki.com/videos/1060846v-punch-episode-18'
IE = VikiIE
@@ -289,6 +309,7 @@ class TestVikiSubtitles(BaseTestSubtitles):
self.assertEqual(md5(subtitles['en']), '53cb083a5914b2d84ef1ab67b880d18a')
+@is_download_test
class TestThePlatformSubtitles(BaseTestSubtitles):
# from http://www.3playmedia.com/services-features/tools/integrations/theplatform/
# (see http://theplatform.com/about/partners/type/subtitles-closed-captioning/)
@@ -303,6 +324,7 @@ class TestThePlatformSubtitles(BaseTestSubtitles):
self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b')
+@is_download_test
class TestThePlatformFeedSubtitles(BaseTestSubtitles):
url = 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207'
IE = ThePlatformFeedIE
@@ -315,6 +337,7 @@ class TestThePlatformFeedSubtitles(BaseTestSubtitles):
self.assertEqual(md5(subtitles['en']), '48649a22e82b2da21c9a67a395eedade')
+@is_download_test
class TestRtveSubtitles(BaseTestSubtitles):
url = 'http://www.rtve.es/alacarta/videos/los-misterios-de-laura/misterios-laura-capitulo-32-misterio-del-numero-17-2-parte/2428621/'
IE = RTVEALaCartaIE
@@ -329,6 +352,7 @@ class TestRtveSubtitles(BaseTestSubtitles):
self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca')
+@is_download_test
class TestDemocracynowSubtitles(BaseTestSubtitles):
url = 'http://www.democracynow.org/shows/2015/7/3'
IE = DemocracynowIE
@@ -349,5 +373,42 @@ class TestDemocracynowSubtitles(BaseTestSubtitles):
self.assertEqual(md5(subtitles['en']), 'acaca989e24a9e45a6719c9b3d60815c')
+@is_download_test
+class TestPBSSubtitles(BaseTestSubtitles):
+ url = 'https://www.pbs.org/video/how-fantasy-reflects-our-world-picecq/'
+ IE = PBSIE
+
+ def test_allsubtitles(self):
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertEqual(set(subtitles.keys()), set(['en']))
+
+ def test_subtitles_dfxp_format(self):
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['subtitlesformat'] = 'dfxp'
+ subtitles = self.getSubtitles()
+ self.assertIn(md5(subtitles['en']), ['643b034254cdc3768ff1e750b6b5873b'])
+
+ def test_subtitles_vtt_format(self):
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['subtitlesformat'] = 'vtt'
+ subtitles = self.getSubtitles()
+ self.assertIn(
+ md5(subtitles['en']), ['937a05711555b165d4c55a9667017045', 'f49ea998d6824d94959c8152a368ff73'])
+
+ def test_subtitles_srt_format(self):
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['subtitlesformat'] = 'srt'
+ subtitles = self.getSubtitles()
+ self.assertIn(md5(subtitles['en']), ['2082c21b43759d9bf172931b2f2ca371'])
+
+ def test_subtitles_sami_format(self):
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['subtitlesformat'] = 'sami'
+ subtitles = self.getSubtitles()
+ self.assertIn(md5(subtitles['en']), ['4256b16ac7da6a6780fafd04294e85cd'])
+
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_utils.py b/test/test_utils.py
index d8756a0..1cd2b2f 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
# coding: utf-8
from __future__ import unicode_literals
@@ -12,6 +12,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
# Various small unit tests
import io
+import itertools
import json
import xml.etree.ElementTree
@@ -23,6 +24,7 @@ from hypervideo_dl.utils import (
clean_html,
clean_podcast_url,
date_from_str,
+ datetime_from_str,
DateRange,
detect_exe_version,
determine_ext,
@@ -60,11 +62,13 @@ from hypervideo_dl.utils import (
parse_iso8601,
parse_resolution,
parse_bitrate,
+ parse_qs,
pkcs1pad,
read_batch_urls,
sanitize_filename,
sanitize_path,
sanitize_url,
+ sanitized_Request,
expand_path,
prepend_extension,
replace_extension,
@@ -105,6 +109,8 @@ from hypervideo_dl.utils import (
cli_valueless_option,
cli_bool_option,
parse_codecs,
+ iri_to_uri,
+ LazyList,
)
from hypervideo_dl.compat import (
compat_chr,
@@ -112,8 +118,6 @@ from hypervideo_dl.compat import (
compat_getenv,
compat_os_name,
compat_setenv,
- compat_urlparse,
- compat_parse_qs,
)
@@ -123,6 +127,7 @@ class TestUtil(unittest.TestCase):
self.assertTrue(timeconvert('bougrg') is None)
def test_sanitize_filename(self):
+ self.assertEqual(sanitize_filename(''), '')
self.assertEqual(sanitize_filename('abc'), 'abc')
self.assertEqual(sanitize_filename('abc_d-e'), 'abc_d-e')
@@ -236,17 +241,27 @@ class TestUtil(unittest.TestCase):
self.assertEqual(sanitize_url('httpss://foo.bar'), 'https://foo.bar')
self.assertEqual(sanitize_url('rmtps://foo.bar'), 'rtmps://foo.bar')
self.assertEqual(sanitize_url('https://foo.bar'), 'https://foo.bar')
+ self.assertEqual(sanitize_url('foo bar'), 'foo bar')
+
+ def test_extract_basic_auth(self):
+ auth_header = lambda url: sanitized_Request(url).get_header('Authorization')
+ self.assertFalse(auth_header('http://foo.bar'))
+ self.assertFalse(auth_header('http://:foo.bar'))
+ self.assertEqual(auth_header('http://@foo.bar'), 'Basic Og==')
+ self.assertEqual(auth_header('http://:pass@foo.bar'), 'Basic OnBhc3M=')
+ self.assertEqual(auth_header('http://user:@foo.bar'), 'Basic dXNlcjo=')
+ self.assertEqual(auth_header('http://user:pass@foo.bar'), 'Basic dXNlcjpwYXNz')
def test_expand_path(self):
def env(var):
return '%{0}%'.format(var) if sys.platform == 'win32' else '${0}'.format(var)
- compat_setenv('YOUTUBE_DL_EXPATH_PATH', 'expanded')
- self.assertEqual(expand_path(env('YOUTUBE_DL_EXPATH_PATH')), 'expanded')
+ compat_setenv('hypervideo_dl_EXPATH_PATH', 'expanded')
+ self.assertEqual(expand_path(env('hypervideo_dl_EXPATH_PATH')), 'expanded')
self.assertEqual(expand_path(env('HOME')), compat_getenv('HOME'))
self.assertEqual(expand_path('~'), compat_getenv('HOME'))
self.assertEqual(
- expand_path('~/%s' % env('YOUTUBE_DL_EXPATH_PATH')),
+ expand_path('~/%s' % env('hypervideo_dl_EXPATH_PATH')),
'%s/expanded' % compat_getenv('HOME'))
def test_prepend_extension(self):
@@ -310,8 +325,18 @@ class TestUtil(unittest.TestCase):
self.assertEqual(date_from_str('yesterday'), date_from_str('now-1day'))
self.assertEqual(date_from_str('now+7day'), date_from_str('now+1week'))
self.assertEqual(date_from_str('now+14day'), date_from_str('now+2week'))
- self.assertEqual(date_from_str('now+365day'), date_from_str('now+1year'))
- self.assertEqual(date_from_str('now+30day'), date_from_str('now+1month'))
+ self.assertEqual(date_from_str('20200229+365day'), date_from_str('20200229+1year'))
+ self.assertEqual(date_from_str('20210131+28day'), date_from_str('20210131+1month'))
+
+ def test_datetime_from_str(self):
+ self.assertEqual(datetime_from_str('yesterday', precision='day'), datetime_from_str('now-1day', precision='auto'))
+ self.assertEqual(datetime_from_str('now+7day', precision='day'), datetime_from_str('now+1week', precision='auto'))
+ self.assertEqual(datetime_from_str('now+14day', precision='day'), datetime_from_str('now+2week', precision='auto'))
+ self.assertEqual(datetime_from_str('20200229+365day', precision='day'), datetime_from_str('20200229+1year', precision='auto'))
+ self.assertEqual(datetime_from_str('20210131+28day', precision='day'), datetime_from_str('20210131+1month', precision='auto'))
+ self.assertEqual(datetime_from_str('20210131+59day', precision='day'), datetime_from_str('20210131+2month', precision='auto'))
+ self.assertEqual(datetime_from_str('now+1day', precision='hour'), datetime_from_str('now+24hours', precision='auto'))
+ self.assertEqual(datetime_from_str('now+23hours', precision='hour'), datetime_from_str('now+23hours', precision='auto'))
def test_daterange(self):
_20century = DateRange("19000101", "20000101")
@@ -662,38 +687,36 @@ class TestUtil(unittest.TestCase):
self.assertTrue(isinstance(data, bytes))
def test_update_url_query(self):
- def query_dict(url):
- return compat_parse_qs(compat_urlparse.urlparse(url).query)
- self.assertEqual(query_dict(update_url_query(
+ self.assertEqual(parse_qs(update_url_query(
'http://example.com/path', {'quality': ['HD'], 'format': ['mp4']})),
- query_dict('http://example.com/path?quality=HD&format=mp4'))
- self.assertEqual(query_dict(update_url_query(
+ parse_qs('http://example.com/path?quality=HD&format=mp4'))
+ self.assertEqual(parse_qs(update_url_query(
'http://example.com/path', {'system': ['LINUX', 'WINDOWS']})),
- query_dict('http://example.com/path?system=LINUX&system=WINDOWS'))
- self.assertEqual(query_dict(update_url_query(
+ parse_qs('http://example.com/path?system=LINUX&system=WINDOWS'))
+ self.assertEqual(parse_qs(update_url_query(
'http://example.com/path', {'fields': 'id,formats,subtitles'})),
- query_dict('http://example.com/path?fields=id,formats,subtitles'))
- self.assertEqual(query_dict(update_url_query(
+ parse_qs('http://example.com/path?fields=id,formats,subtitles'))
+ self.assertEqual(parse_qs(update_url_query(
'http://example.com/path', {'fields': ('id,formats,subtitles', 'thumbnails')})),
- query_dict('http://example.com/path?fields=id,formats,subtitles&fields=thumbnails'))
- self.assertEqual(query_dict(update_url_query(
+ parse_qs('http://example.com/path?fields=id,formats,subtitles&fields=thumbnails'))
+ self.assertEqual(parse_qs(update_url_query(
'http://example.com/path?manifest=f4m', {'manifest': []})),
- query_dict('http://example.com/path'))
- self.assertEqual(query_dict(update_url_query(
+ parse_qs('http://example.com/path'))
+ self.assertEqual(parse_qs(update_url_query(
'http://example.com/path?system=LINUX&system=WINDOWS', {'system': 'LINUX'})),
- query_dict('http://example.com/path?system=LINUX'))
- self.assertEqual(query_dict(update_url_query(
+ parse_qs('http://example.com/path?system=LINUX'))
+ self.assertEqual(parse_qs(update_url_query(
'http://example.com/path', {'fields': b'id,formats,subtitles'})),
- query_dict('http://example.com/path?fields=id,formats,subtitles'))
- self.assertEqual(query_dict(update_url_query(
+ parse_qs('http://example.com/path?fields=id,formats,subtitles'))
+ self.assertEqual(parse_qs(update_url_query(
'http://example.com/path', {'width': 1080, 'height': 720})),
- query_dict('http://example.com/path?width=1080&height=720'))
- self.assertEqual(query_dict(update_url_query(
+ parse_qs('http://example.com/path?width=1080&height=720'))
+ self.assertEqual(parse_qs(update_url_query(
'http://example.com/path', {'bitrate': 5020.43})),
- query_dict('http://example.com/path?bitrate=5020.43'))
- self.assertEqual(query_dict(update_url_query(
+ parse_qs('http://example.com/path?bitrate=5020.43'))
+ self.assertEqual(parse_qs(update_url_query(
'http://example.com/path', {'test': '第二行тест'})),
- query_dict('http://example.com/path?test=%E7%AC%AC%E4%BA%8C%E8%A1%8C%D1%82%D0%B5%D1%81%D1%82'))
+ parse_qs('http://example.com/path?test=%E7%AC%AC%E4%BA%8C%E8%A1%8C%D1%82%D0%B5%D1%81%D1%82'))
def test_multipart_encode(self):
self.assertEqual(
@@ -825,30 +848,52 @@ class TestUtil(unittest.TestCase):
self.assertEqual(parse_codecs('avc1.77.30, mp4a.40.2'), {
'vcodec': 'avc1.77.30',
'acodec': 'mp4a.40.2',
+ 'dynamic_range': None,
})
self.assertEqual(parse_codecs('mp4a.40.2'), {
'vcodec': 'none',
'acodec': 'mp4a.40.2',
+ 'dynamic_range': None,
})
self.assertEqual(parse_codecs('mp4a.40.5,avc1.42001e'), {
'vcodec': 'avc1.42001e',
'acodec': 'mp4a.40.5',
+ 'dynamic_range': None,
})
self.assertEqual(parse_codecs('avc3.640028'), {
'vcodec': 'avc3.640028',
'acodec': 'none',
+ 'dynamic_range': None,
})
self.assertEqual(parse_codecs(', h264,,newcodec,aac'), {
'vcodec': 'h264',
'acodec': 'aac',
+ 'dynamic_range': None,
})
self.assertEqual(parse_codecs('av01.0.05M.08'), {
'vcodec': 'av01.0.05M.08',
'acodec': 'none',
+ 'dynamic_range': None,
+ })
+ self.assertEqual(parse_codecs('vp9.2'), {
+ 'vcodec': 'vp9.2',
+ 'acodec': 'none',
+ 'dynamic_range': 'HDR10',
+ })
+ self.assertEqual(parse_codecs('av01.0.12M.10.0.110.09.16.09.0'), {
+ 'vcodec': 'av01.0.12M.10',
+ 'acodec': 'none',
+ 'dynamic_range': 'HDR10',
+ })
+ self.assertEqual(parse_codecs('dvhe'), {
+ 'vcodec': 'dvhe',
+ 'acodec': 'none',
+ 'dynamic_range': 'DV',
})
self.assertEqual(parse_codecs('theora, vorbis'), {
'vcodec': 'theora',
'acodec': 'vorbis',
+ 'dynamic_range': None,
})
self.assertEqual(parse_codecs('unknownvcodec, unknownacodec'), {
'vcodec': 'unknownvcodec',
@@ -1028,6 +1073,9 @@ class TestUtil(unittest.TestCase):
on = js_to_json('{ "040": "040" }')
self.assertEqual(json.loads(on), {'040': '040'})
+ on = js_to_json('[1,//{},\n2]')
+ self.assertEqual(json.loads(on), [1, 2])
+
def test_js_to_json_malformed(self):
self.assertEqual(js_to_json('42a1'), '42"a1"')
self.assertEqual(js_to_json('42a-1'), '42"a"-1')
@@ -1178,12 +1226,26 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
'9999 51')
def test_match_str(self):
- self.assertRaises(ValueError, match_str, 'xy>foobar', {})
+ # Unary
self.assertFalse(match_str('xy', {'x': 1200}))
self.assertTrue(match_str('!xy', {'x': 1200}))
self.assertTrue(match_str('x', {'x': 1200}))
self.assertFalse(match_str('!x', {'x': 1200}))
self.assertTrue(match_str('x', {'x': 0}))
+ self.assertTrue(match_str('is_live', {'is_live': True}))
+ self.assertFalse(match_str('is_live', {'is_live': False}))
+ self.assertFalse(match_str('is_live', {'is_live': None}))
+ self.assertFalse(match_str('is_live', {}))
+ self.assertFalse(match_str('!is_live', {'is_live': True}))
+ self.assertTrue(match_str('!is_live', {'is_live': False}))
+ self.assertTrue(match_str('!is_live', {'is_live': None}))
+ self.assertTrue(match_str('!is_live', {}))
+ self.assertTrue(match_str('title', {'title': 'abc'}))
+ self.assertTrue(match_str('title', {'title': ''}))
+ self.assertFalse(match_str('!title', {'title': 'abc'}))
+ self.assertFalse(match_str('!title', {'title': ''}))
+
+ # Numeric
self.assertFalse(match_str('x>0', {'x': 0}))
self.assertFalse(match_str('x>0', {}))
self.assertTrue(match_str('x>?0', {}))
@@ -1191,10 +1253,26 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
self.assertFalse(match_str('x>2K', {'x': 1200}))
self.assertTrue(match_str('x>=1200 & x < 1300', {'x': 1200}))
self.assertFalse(match_str('x>=1100 & x < 1200', {'x': 1200}))
+ self.assertTrue(match_str('x > 1:0:0', {'x': 3700}))
+
+ # String
self.assertFalse(match_str('y=a212', {'y': 'foobar42'}))
self.assertTrue(match_str('y=foobar42', {'y': 'foobar42'}))
self.assertFalse(match_str('y!=foobar42', {'y': 'foobar42'}))
self.assertTrue(match_str('y!=foobar2', {'y': 'foobar42'}))
+ self.assertTrue(match_str('y^=foo', {'y': 'foobar42'}))
+ self.assertFalse(match_str('y!^=foo', {'y': 'foobar42'}))
+ self.assertFalse(match_str('y^=bar', {'y': 'foobar42'}))
+ self.assertTrue(match_str('y!^=bar', {'y': 'foobar42'}))
+ self.assertRaises(ValueError, match_str, 'x^=42', {'x': 42})
+ self.assertTrue(match_str('y*=bar', {'y': 'foobar42'}))
+ self.assertFalse(match_str('y!*=bar', {'y': 'foobar42'}))
+ self.assertFalse(match_str('y*=baz', {'y': 'foobar42'}))
+ self.assertTrue(match_str('y!*=baz', {'y': 'foobar42'}))
+ self.assertTrue(match_str('y$=42', {'y': 'foobar42'}))
+ self.assertFalse(match_str('y$=43', {'y': 'foobar42'}))
+
+ # And
self.assertFalse(match_str(
'like_count > 100 & dislike_count <? 50 & description',
{'like_count': 90, 'description': 'foo'}))
@@ -1207,18 +1285,35 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
self.assertFalse(match_str(
'like_count > 100 & dislike_count <? 50 & description',
{'like_count': 190, 'dislike_count': 10}))
- self.assertTrue(match_str('is_live', {'is_live': True}))
- self.assertFalse(match_str('is_live', {'is_live': False}))
- self.assertFalse(match_str('is_live', {'is_live': None}))
- self.assertFalse(match_str('is_live', {}))
- self.assertFalse(match_str('!is_live', {'is_live': True}))
- self.assertTrue(match_str('!is_live', {'is_live': False}))
- self.assertTrue(match_str('!is_live', {'is_live': None}))
- self.assertTrue(match_str('!is_live', {}))
- self.assertTrue(match_str('title', {'title': 'abc'}))
- self.assertTrue(match_str('title', {'title': ''}))
- self.assertFalse(match_str('!title', {'title': 'abc'}))
- self.assertFalse(match_str('!title', {'title': ''}))
+
+ # Regex
+ self.assertTrue(match_str(r'x~=\bbar', {'x': 'foo bar'}))
+ self.assertFalse(match_str(r'x~=\bbar.+', {'x': 'foo bar'}))
+ self.assertFalse(match_str(r'x~=^FOO', {'x': 'foo bar'}))
+ self.assertTrue(match_str(r'x~=(?i)^FOO', {'x': 'foo bar'}))
+
+ # Quotes
+ self.assertTrue(match_str(r'x^="foo"', {'x': 'foo "bar"'}))
+ self.assertFalse(match_str(r'x^="foo "', {'x': 'foo "bar"'}))
+ self.assertFalse(match_str(r'x$="bar"', {'x': 'foo "bar"'}))
+ self.assertTrue(match_str(r'x$=" \"bar\""', {'x': 'foo "bar"'}))
+
+ # Escaping &
+ self.assertFalse(match_str(r'x=foo & bar', {'x': 'foo & bar'}))
+ self.assertTrue(match_str(r'x=foo \& bar', {'x': 'foo & bar'}))
+ self.assertTrue(match_str(r'x=foo \& bar & x^=foo', {'x': 'foo & bar'}))
+ self.assertTrue(match_str(r'x="foo \& bar" & x^=foo', {'x': 'foo & bar'}))
+
+ # Example from docs
+ self.assertTrue(match_str(
+ r"!is_live & like_count>?100 & description~='(?i)\bcats \& dogs\b'",
+ {'description': 'Raining Cats & Dogs'}))
+
+ # Incomplete
+ self.assertFalse(match_str('id!=foo', {'id': 'foo'}, True))
+ self.assertTrue(match_str('x', {'id': 'foo'}, True))
+ self.assertTrue(match_str('!x', {'id': 'foo'}, True))
+ self.assertFalse(match_str('x', {'id': 'foo'}, False))
def test_parse_dfxp_time_expr(self):
self.assertEqual(parse_dfxp_time_expr(None), None)
@@ -1424,8 +1519,8 @@ Line 1
self.assertEqual(caesar('ebg', 'acegik', -2), 'abc')
def test_rot47(self):
- self.assertEqual(rot47('hypervideo'), r'J@FEF36\5=')
- self.assertEqual(rot47('HYPERVIDEO'), r'*~&%&qt\s{')
+ self.assertEqual(rot47('hypervideo'), r'JE\5=A')
+ self.assertEqual(rot47('HYPERVIDEO'), r'*%\s{!')
def test_urshift(self):
self.assertEqual(urshift(3, 1), 1)
@@ -1471,10 +1566,81 @@ Line 1
self.assertEqual(get_elements_by_attribute('class', 'foo', html), [])
self.assertEqual(get_elements_by_attribute('class', 'no-such-foo', html), [])
+ def test_iri_to_uri(self):
+ self.assertEqual(
+ iri_to_uri('https://www.google.com/search?q=foo&ie=utf-8&oe=utf-8&client=firefox-b'),
+ 'https://www.google.com/search?q=foo&ie=utf-8&oe=utf-8&client=firefox-b') # Same
+ self.assertEqual(
+ iri_to_uri('https://www.google.com/search?q=Käsesoßenrührlöffel'), # German for cheese sauce stirring spoon
+ 'https://www.google.com/search?q=K%C3%A4seso%C3%9Fenr%C3%BChrl%C3%B6ffel')
+ self.assertEqual(
+ iri_to_uri('https://www.google.com/search?q=lt<+gt>+eq%3D+amp%26+percent%25+hash%23+colon%3A+tilde~#trash=?&garbage=#'),
+ 'https://www.google.com/search?q=lt%3C+gt%3E+eq%3D+amp%26+percent%25+hash%23+colon%3A+tilde~#trash=?&garbage=#')
+ self.assertEqual(
+ iri_to_uri('http://правозащита38.рф/category/news/'),
+ 'http://xn--38-6kcaak9aj5chl4a3g.xn--p1ai/category/news/')
+ self.assertEqual(
+ iri_to_uri('http://www.правозащита38.рф/category/news/'),
+ 'http://www.xn--38-6kcaak9aj5chl4a3g.xn--p1ai/category/news/')
+ self.assertEqual(
+ iri_to_uri('https://i❤.ws/emojidomain/👍👏🤝💪'),
+ 'https://xn--i-7iq.ws/emojidomain/%F0%9F%91%8D%F0%9F%91%8F%F0%9F%A4%9D%F0%9F%92%AA')
+ self.assertEqual(
+ iri_to_uri('http://日本語.jp/'),
+ 'http://xn--wgv71a119e.jp/')
+ self.assertEqual(
+ iri_to_uri('http://导航.中国/'),
+ 'http://xn--fet810g.xn--fiqs8s/')
+
def test_clean_podcast_url(self):
self.assertEqual(clean_podcast_url('https://www.podtrac.com/pts/redirect.mp3/chtbl.com/track/5899E/traffic.megaphone.fm/HSW7835899191.mp3'), 'https://traffic.megaphone.fm/HSW7835899191.mp3')
self.assertEqual(clean_podcast_url('https://play.podtrac.com/npr-344098539/edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3'), 'https://edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3')
+ def test_LazyList(self):
+ it = list(range(10))
+
+ self.assertEqual(list(LazyList(it)), it)
+ self.assertEqual(LazyList(it).exhaust(), it)
+ self.assertEqual(LazyList(it)[5], it[5])
+
+ self.assertEqual(LazyList(it)[5:], it[5:])
+ self.assertEqual(LazyList(it)[:5], it[:5])
+ self.assertEqual(LazyList(it)[::2], it[::2])
+ self.assertEqual(LazyList(it)[1::2], it[1::2])
+ self.assertEqual(LazyList(it)[5::-1], it[5::-1])
+ self.assertEqual(LazyList(it)[6:2:-2], it[6:2:-2])
+ self.assertEqual(LazyList(it)[::-1], it[::-1])
+
+ self.assertTrue(LazyList(it))
+ self.assertFalse(LazyList(range(0)))
+ self.assertEqual(len(LazyList(it)), len(it))
+ self.assertEqual(repr(LazyList(it)), repr(it))
+ self.assertEqual(str(LazyList(it)), str(it))
+
+ self.assertEqual(list(LazyList(it).reverse()), it[::-1])
+ self.assertEqual(list(LazyList(it).reverse()[1:3:7]), it[::-1][1:3:7])
+ self.assertEqual(list(LazyList(it).reverse()[::-1]), it)
+
+ def test_LazyList_laziness(self):
+
+ def test(ll, idx, val, cache):
+ self.assertEqual(ll[idx], val)
+ self.assertEqual(getattr(ll, '_LazyList__cache'), list(cache))
+
+ ll = LazyList(range(10))
+ test(ll, 0, 0, range(1))
+ test(ll, 5, 5, range(6))
+ test(ll, -3, 7, range(10))
+
+ ll = LazyList(range(10)).reverse()
+ test(ll, -1, 0, range(1))
+ test(ll, 3, 6, range(10))
+
+ ll = LazyList(itertools.count())
+ test(ll, 10, 10, range(11))
+ ll.reverse()
+ test(ll, -15, 14, range(15))
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_verbose_output.py b/test/test_verbose_output.py
index aaeb350..050fd76 100644
--- a/test/test_verbose_output.py
+++ b/test/test_verbose_output.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
# coding: utf-8
from __future__ import unicode_literals
diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py
index cecba65..2da1a50 100644
--- a/test/test_youtube_lists.py
+++ b/test/test_youtube_lists.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
from __future__ import unicode_literals
# Allow direct execution
@@ -7,7 +7,7 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from test.helper import FakeYDL
+from test.helper import FakeYDL, is_download_test
from hypervideo_dl.extractor import (
@@ -17,6 +17,7 @@ from hypervideo_dl.extractor import (
)
+@is_download_test
class TestYoutubeLists(unittest.TestCase):
def assertIsPlaylist(self, info):
"""Make sure the info has '_type' set to 'playlist'"""
@@ -71,7 +72,7 @@ class TestYoutubeLists(unittest.TestCase):
self.assertEqual(video['ie_key'], 'Youtube')
self.assertEqual(video['id'], 'BaW_jenozKc')
self.assertEqual(video['url'], 'BaW_jenozKc')
- self.assertEqual(video['title'], 'hypervideo test video "\'/\\ä↭𝕐')
+ self.assertEqual(video['title'], 'youtube-dl test video "\'/\\ä↭𝕐')
self.assertEqual(video['duration'], 10)
self.assertEqual(video['uploader'], 'Philipp Hagemeister')
diff --git a/test/test_youtube_misc.py b/test/test_youtube_misc.py
index 1739f5d..4571cc1 100644
--- a/test/test_youtube_misc.py
+++ b/test/test_youtube_misc.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
from __future__ import unicode_literals
# Allow direct execution
diff --git a/test/testdata/ism/sintel.Manifest b/test/testdata/ism/sintel.Manifest
new file mode 100644
index 0000000..2ff8c24
--- /dev/null
+++ b/test/testdata/ism/sintel.Manifest
@@ -0,0 +1,988 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!-- Created with Unified Streaming Platform (version=1.10.18-20255) -->
+<SmoothStreamingMedia
+ MajorVersion="2"
+ MinorVersion="0"
+ TimeScale="10000000"
+ Duration="8880746666">
+ <StreamIndex
+ Type="audio"
+ QualityLevels="1"
+ TimeScale="10000000"
+ Name="audio"
+ Chunks="445"
+ Url="QualityLevels({bitrate})/Fragments(audio={start time})">
+ <QualityLevel
+ Index="0"
+ Bitrate="128001"
+ CodecPrivateData="1190"
+ SamplingRate="48000"
+ Channels="2"
+ BitsPerSample="16"
+ PacketSize="4"
+ AudioTag="255"
+ FourCC="AACL" />
+ <c t="0" d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="20053333" />
+ <c d="20053333" />
+ <c d="20053334" />
+ <c d="19840000" />
+ <c d="746666" />
+ </StreamIndex>
+ <StreamIndex
+ Type="text"
+ QualityLevels="1"
+ TimeScale="10000000"
+ Language="eng"
+ Subtype="CAPT"
+ Name="textstream_eng"
+ Chunks="11"
+ Url="QualityLevels({bitrate})/Fragments(textstream_eng={start time})">
+ <QualityLevel
+ Index="0"
+ Bitrate="1000"
+ CodecPrivateData=""
+ FourCC="TTML" />
+ <c t="0" d="600000000" />
+ <c d="600000000" />
+ <c d="600000000" />
+ <c d="600000000" />
+ <c d="600000000" />
+ <c d="600000000" />
+ <c d="600000000" />
+ <c d="600000000" />
+ <c d="600000000" />
+ <c d="600000000" />
+ <c d="240000000" />
+ </StreamIndex>
+ <StreamIndex
+ Type="video"
+ QualityLevels="5"
+ TimeScale="10000000"
+ Name="video"
+ Chunks="444"
+ Url="QualityLevels({bitrate})/Fragments(video={start time})"
+ MaxWidth="1688"
+ MaxHeight="720"
+ DisplayWidth="1689"
+ DisplayHeight="720">
+ <QualityLevel
+ Index="0"
+ Bitrate="100000"
+ CodecPrivateData="00000001674D401FDA0544EFFC2D002CBC40000003004000000C03C60CA80000000168EF32C8"
+ MaxWidth="336"
+ MaxHeight="144"
+ FourCC="AVC1" />
+ <QualityLevel
+ Index="1"
+ Bitrate="326000"
+ CodecPrivateData="00000001674D401FDA0241FE23FFC3BC83BA44000003000400000300C03C60CA800000000168EF32C8"
+ MaxWidth="562"
+ MaxHeight="240"
+ FourCC="AVC1" />
+ <QualityLevel
+ Index="2"
+ Bitrate="698000"
+ CodecPrivateData="00000001674D401FDA0350BFB97FF06AF06AD1000003000100000300300F1832A00000000168EF32C8"
+ MaxWidth="844"
+ MaxHeight="360"
+ FourCC="AVC1" />
+ <QualityLevel
+ Index="3"
+ Bitrate="1493000"
+ CodecPrivateData="00000001674D401FDA011C3DE6FFF0D890D871000003000100000300300F1832A00000000168EF32C8"
+ MaxWidth="1126"
+ MaxHeight="480"
+ FourCC="AVC1" />
+ <QualityLevel
+ Index="4"
+ Bitrate="4482000"
+ CodecPrivateData="00000001674D401FDA01A816F97FFC1ABC1AB440000003004000000C03C60CA80000000168EF32C8"
+ MaxWidth="1688"
+ MaxHeight="720"
+ FourCC="AVC1" />
+ <c t="0" d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ <c d="20000000" />
+ </StreamIndex>
+</SmoothStreamingMedia>
diff --git a/test/testdata/m3u8/bipbop_16x9.m3u8 b/test/testdata/m3u8/bipbop_16x9.m3u8
new file mode 100644
index 0000000..1ce87dd
--- /dev/null
+++ b/test/testdata/m3u8/bipbop_16x9.m3u8
@@ -0,0 +1,38 @@
+#EXTM3U
+
+#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="bipbop_audio",LANGUAGE="eng",NAME="BipBop Audio 1",AUTOSELECT=YES,DEFAULT=YES
+#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="bipbop_audio",LANGUAGE="eng",NAME="BipBop Audio 2",AUTOSELECT=NO,DEFAULT=NO,URI="alternate_audio_aac/prog_index.m3u8"
+
+
+#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="English",DEFAULT=YES,AUTOSELECT=YES,FORCED=NO,LANGUAGE="en",CHARACTERISTICS="public.accessibility.transcribes-spoken-dialog, public.accessibility.describes-music-and-sound",URI="subtitles/eng/prog_index.m3u8"
+#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="English (Forced)",DEFAULT=NO,AUTOSELECT=NO,FORCED=YES,LANGUAGE="en",URI="subtitles/eng_forced/prog_index.m3u8"
+#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="Français",DEFAULT=NO,AUTOSELECT=YES,FORCED=NO,LANGUAGE="fr",CHARACTERISTICS="public.accessibility.transcribes-spoken-dialog, public.accessibility.describes-music-and-sound",URI="subtitles/fra/prog_index.m3u8"
+#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="Français (Forced)",DEFAULT=NO,AUTOSELECT=NO,FORCED=YES,LANGUAGE="fr",URI="subtitles/fra_forced/prog_index.m3u8"
+#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="Español",DEFAULT=NO,AUTOSELECT=YES,FORCED=NO,LANGUAGE="es",CHARACTERISTICS="public.accessibility.transcribes-spoken-dialog, public.accessibility.describes-music-and-sound",URI="subtitles/spa/prog_index.m3u8"
+#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="Español (Forced)",DEFAULT=NO,AUTOSELECT=NO,FORCED=YES,LANGUAGE="es",URI="subtitles/spa_forced/prog_index.m3u8"
+#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="日本語",DEFAULT=NO,AUTOSELECT=YES,FORCED=NO,LANGUAGE="ja",CHARACTERISTICS="public.accessibility.transcribes-spoken-dialog, public.accessibility.describes-music-and-sound",URI="subtitles/jpn/prog_index.m3u8"
+#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="日本語 (Forced)",DEFAULT=NO,AUTOSELECT=NO,FORCED=YES,LANGUAGE="ja",URI="subtitles/jpn_forced/prog_index.m3u8"
+
+
+#EXT-X-STREAM-INF:BANDWIDTH=263851,CODECS="mp4a.40.2, avc1.4d400d",RESOLUTION=416x234,AUDIO="bipbop_audio",SUBTITLES="subs"
+gear1/prog_index.m3u8
+#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=28451,CODECS="avc1.4d400d",URI="gear1/iframe_index.m3u8"
+
+#EXT-X-STREAM-INF:BANDWIDTH=577610,CODECS="mp4a.40.2, avc1.4d401e",RESOLUTION=640x360,AUDIO="bipbop_audio",SUBTITLES="subs"
+gear2/prog_index.m3u8
+#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=181534,CODECS="avc1.4d401e",URI="gear2/iframe_index.m3u8"
+
+#EXT-X-STREAM-INF:BANDWIDTH=915905,CODECS="mp4a.40.2, avc1.4d401f",RESOLUTION=960x540,AUDIO="bipbop_audio",SUBTITLES="subs"
+gear3/prog_index.m3u8
+#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=297056,CODECS="avc1.4d401f",URI="gear3/iframe_index.m3u8"
+
+#EXT-X-STREAM-INF:BANDWIDTH=1030138,CODECS="mp4a.40.2, avc1.4d401f",RESOLUTION=1280x720,AUDIO="bipbop_audio",SUBTITLES="subs"
+gear4/prog_index.m3u8
+#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=339492,CODECS="avc1.4d401f",URI="gear4/iframe_index.m3u8"
+
+#EXT-X-STREAM-INF:BANDWIDTH=1924009,CODECS="mp4a.40.2, avc1.4d401f",RESOLUTION=1920x1080,AUDIO="bipbop_audio",SUBTITLES="subs"
+gear5/prog_index.m3u8
+#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=669554,CODECS="avc1.4d401f",URI="gear5/iframe_index.m3u8"
+
+#EXT-X-STREAM-INF:BANDWIDTH=41457,CODECS="mp4a.40.2",AUDIO="bipbop_audio",SUBTITLES="subs"
+gear0/prog_index.m3u8
diff --git a/test/testdata/m3u8/img_bipbop_adv_example_fmp4.m3u8 b/test/testdata/m3u8/img_bipbop_adv_example_fmp4.m3u8
new file mode 100644
index 0000000..620ce04
--- /dev/null
+++ b/test/testdata/m3u8/img_bipbop_adv_example_fmp4.m3u8
@@ -0,0 +1,76 @@
+#EXTM3U
+#EXT-X-VERSION:6
+#EXT-X-INDEPENDENT-SEGMENTS
+
+
+#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=2168183,BANDWIDTH=2177116,CODECS="avc1.640020,mp4a.40.2",RESOLUTION=960x540,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud1",SUBTITLES="sub1"
+v5/prog_index.m3u8
+#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=7968416,BANDWIDTH=8001098,CODECS="avc1.64002a,mp4a.40.2",RESOLUTION=1920x1080,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud1",SUBTITLES="sub1"
+v9/prog_index.m3u8
+#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=6170000,BANDWIDTH=6312875,CODECS="avc1.64002a,mp4a.40.2",RESOLUTION=1920x1080,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud1",SUBTITLES="sub1"
+v8/prog_index.m3u8
+#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=4670769,BANDWIDTH=4943747,CODECS="avc1.64002a,mp4a.40.2",RESOLUTION=1920x1080,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud1",SUBTITLES="sub1"
+v7/prog_index.m3u8
+#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=3168702,BANDWIDTH=3216424,CODECS="avc1.640020,mp4a.40.2",RESOLUTION=1280x720,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud1",SUBTITLES="sub1"
+v6/prog_index.m3u8
+#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=1265132,BANDWIDTH=1268994,CODECS="avc1.64001e,mp4a.40.2",RESOLUTION=768x432,FRAME-RATE=30.000,CLOSED-CAPTIONS="cc1",AUDIO="aud1",SUBTITLES="sub1"
+v4/prog_index.m3u8
+#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=895755,BANDWIDTH=902298,CODECS="avc1.64001e,mp4a.40.2",RESOLUTION=640x360,FRAME-RATE=30.000,CLOSED-CAPTIONS="cc1",AUDIO="aud1",SUBTITLES="sub1"
+v3/prog_index.m3u8
+#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=530721,BANDWIDTH=541052,CODECS="avc1.640015,mp4a.40.2",RESOLUTION=480x270,FRAME-RATE=30.000,CLOSED-CAPTIONS="cc1",AUDIO="aud1",SUBTITLES="sub1"
+v2/prog_index.m3u8
+
+
+#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=2390686,BANDWIDTH=2399619,CODECS="avc1.640020,ac-3",RESOLUTION=960x540,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud2",SUBTITLES="sub1"
+v5/prog_index.m3u8
+#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=8190919,BANDWIDTH=8223601,CODECS="avc1.64002a,ac-3",RESOLUTION=1920x1080,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud2",SUBTITLES="sub1"
+v9/prog_index.m3u8
+#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=6392503,BANDWIDTH=6535378,CODECS="avc1.64002a,ac-3",RESOLUTION=1920x1080,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud2",SUBTITLES="sub1"
+v8/prog_index.m3u8
+#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=4893272,BANDWIDTH=5166250,CODECS="avc1.64002a,ac-3",RESOLUTION=1920x1080,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud2",SUBTITLES="sub1"
+v7/prog_index.m3u8
+#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=3391205,BANDWIDTH=3438927,CODECS="avc1.640020,ac-3",RESOLUTION=1280x720,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud2",SUBTITLES="sub1"
+v6/prog_index.m3u8
+#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=1487635,BANDWIDTH=1491497,CODECS="avc1.64001e,ac-3",RESOLUTION=768x432,FRAME-RATE=30.000,CLOSED-CAPTIONS="cc1",AUDIO="aud2",SUBTITLES="sub1"
+v4/prog_index.m3u8
+#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=1118258,BANDWIDTH=1124801,CODECS="avc1.64001e,ac-3",RESOLUTION=640x360,FRAME-RATE=30.000,CLOSED-CAPTIONS="cc1",AUDIO="aud2",SUBTITLES="sub1"
+v3/prog_index.m3u8
+#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=753224,BANDWIDTH=763555,CODECS="avc1.640015,ac-3",RESOLUTION=480x270,FRAME-RATE=30.000,CLOSED-CAPTIONS="cc1",AUDIO="aud2",SUBTITLES="sub1"
+v2/prog_index.m3u8
+
+
+#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=2198686,BANDWIDTH=2207619,CODECS="avc1.640020,ec-3",RESOLUTION=960x540,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud3",SUBTITLES="sub1"
+v5/prog_index.m3u8
+#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=7998919,BANDWIDTH=8031601,CODECS="avc1.64002a,ec-3",RESOLUTION=1920x1080,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud3",SUBTITLES="sub1"
+v9/prog_index.m3u8
+#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=6200503,BANDWIDTH=6343378,CODECS="avc1.64002a,ec-3",RESOLUTION=1920x1080,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud3",SUBTITLES="sub1"
+v8/prog_index.m3u8
+#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=4701272,BANDWIDTH=4974250,CODECS="avc1.64002a,ec-3",RESOLUTION=1920x1080,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud3",SUBTITLES="sub1"
+v7/prog_index.m3u8
+#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=3199205,BANDWIDTH=3246927,CODECS="avc1.640020,ec-3",RESOLUTION=1280x720,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud3",SUBTITLES="sub1"
+v6/prog_index.m3u8
+#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=1295635,BANDWIDTH=1299497,CODECS="avc1.64001e,ec-3",RESOLUTION=768x432,FRAME-RATE=30.000,CLOSED-CAPTIONS="cc1",AUDIO="aud3",SUBTITLES="sub1"
+v4/prog_index.m3u8
+#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=926258,BANDWIDTH=932801,CODECS="avc1.64001e,ec-3",RESOLUTION=640x360,FRAME-RATE=30.000,CLOSED-CAPTIONS="cc1",AUDIO="aud3",SUBTITLES="sub1"
+v3/prog_index.m3u8
+#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=561224,BANDWIDTH=571555,CODECS="avc1.640015,ec-3",RESOLUTION=480x270,FRAME-RATE=30.000,CLOSED-CAPTIONS="cc1",AUDIO="aud3",SUBTITLES="sub1"
+v2/prog_index.m3u8
+
+
+#EXT-X-I-FRAME-STREAM-INF:AVERAGE-BANDWIDTH=183689,BANDWIDTH=187492,CODECS="avc1.64002a",RESOLUTION=1920x1080,URI="v7/iframe_index.m3u8"
+#EXT-X-I-FRAME-STREAM-INF:AVERAGE-BANDWIDTH=132672,BANDWIDTH=136398,CODECS="avc1.640020",RESOLUTION=1280x720,URI="v6/iframe_index.m3u8"
+#EXT-X-I-FRAME-STREAM-INF:AVERAGE-BANDWIDTH=97767,BANDWIDTH=101378,CODECS="avc1.640020",RESOLUTION=960x540,URI="v5/iframe_index.m3u8"
+#EXT-X-I-FRAME-STREAM-INF:AVERAGE-BANDWIDTH=75722,BANDWIDTH=77818,CODECS="avc1.64001e",RESOLUTION=768x432,URI="v4/iframe_index.m3u8"
+#EXT-X-I-FRAME-STREAM-INF:AVERAGE-BANDWIDTH=63522,BANDWIDTH=65091,CODECS="avc1.64001e",RESOLUTION=640x360,URI="v3/iframe_index.m3u8"
+#EXT-X-I-FRAME-STREAM-INF:AVERAGE-BANDWIDTH=39678,BANDWIDTH=40282,CODECS="avc1.640015",RESOLUTION=480x270,URI="v2/iframe_index.m3u8"
+
+
+#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="aud1",LANGUAGE="en",NAME="English",AUTOSELECT=YES,DEFAULT=YES,CHANNELS="2",URI="a1/prog_index.m3u8"
+#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="aud2",LANGUAGE="en",NAME="English",AUTOSELECT=YES,DEFAULT=YES,CHANNELS="6",URI="a2/prog_index.m3u8"
+#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="aud3",LANGUAGE="en",NAME="English",AUTOSELECT=YES,DEFAULT=YES,CHANNELS="6",URI="a3/prog_index.m3u8"
+
+
+#EXT-X-MEDIA:TYPE=CLOSED-CAPTIONS,GROUP-ID="cc1",LANGUAGE="en",NAME="English",AUTOSELECT=YES,DEFAULT=YES,INSTREAM-ID="CC1"
+
+
+#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="sub1",LANGUAGE="en",NAME="English",AUTOSELECT=YES,DEFAULT=YES,FORCED=NO,URI="s1/en/prog_index.m3u8"
diff --git a/test/testdata/mpd/subtitles.mpd b/test/testdata/mpd/subtitles.mpd
new file mode 100644
index 0000000..6f948ad
--- /dev/null
+++ b/test/testdata/mpd/subtitles.mpd
@@ -0,0 +1,351 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!-- Created with Unified Streaming Platform (version=1.10.18-20255) -->
+<MPD
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xmlns="urn:mpeg:dash:schema:mpd:2011"
+ xsi:schemaLocation="urn:mpeg:dash:schema:mpd:2011 http://standards.iso.org/ittf/PubliclyAvailableStandards/MPEG-DASH_schema_files/DASH-MPD.xsd"
+ type="static"
+ mediaPresentationDuration="PT14M48S"
+ maxSegmentDuration="PT1M"
+ minBufferTime="PT10S"
+ profiles="urn:mpeg:dash:profile:isoff-live:2011">
+ <Period
+ id="1"
+ duration="PT14M48S">
+ <BaseURL>dash/</BaseURL>
+ <AdaptationSet
+ id="1"
+ group="1"
+ contentType="audio"
+ segmentAlignment="true"
+ audioSamplingRate="48000"
+ mimeType="audio/mp4"
+ codecs="mp4a.40.2"
+ startWithSAP="1">
+ <AudioChannelConfiguration
+ schemeIdUri="urn:mpeg:dash:23003:3:audio_channel_configuration:2011"
+ value="2" />
+ <Role schemeIdUri="urn:mpeg:dash:role:2011" value="main" />
+ <SegmentTemplate
+ timescale="48000"
+ initialization="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$.dash"
+ media="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$-$Time$.dash">
+ <SegmentTimeline>
+ <S t="0" d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="96256" r="2" />
+ <S d="95232" />
+ <S d="3584" />
+ </SegmentTimeline>
+ </SegmentTemplate>
+ <Representation
+ id="audio=128001"
+ bandwidth="128001">
+ </Representation>
+ </AdaptationSet>
+ <AdaptationSet
+ id="2"
+ group="3"
+ contentType="text"
+ lang="en"
+ mimeType="application/mp4"
+ codecs="stpp"
+ startWithSAP="1">
+ <Role schemeIdUri="urn:mpeg:dash:role:2011" value="subtitle" />
+ <SegmentTemplate
+ timescale="1000"
+ initialization="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$.dash"
+ media="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$-$Time$.dash">
+ <SegmentTimeline>
+ <S t="0" d="60000" r="9" />
+ <S d="24000" />
+ </SegmentTimeline>
+ </SegmentTemplate>
+ <Representation
+ id="textstream_eng=1000"
+ bandwidth="1000">
+ </Representation>
+ </AdaptationSet>
+ <AdaptationSet
+ id="3"
+ group="2"
+ contentType="video"
+ par="960:409"
+ minBandwidth="100000"
+ maxBandwidth="4482000"
+ maxWidth="1689"
+ maxHeight="720"
+ segmentAlignment="true"
+ mimeType="video/mp4"
+ codecs="avc1.4D401F"
+ startWithSAP="1">
+ <Role schemeIdUri="urn:mpeg:dash:role:2011" value="main" />
+ <SegmentTemplate
+ timescale="12288"
+ initialization="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$.dash"
+ media="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$-$Time$.dash">
+ <SegmentTimeline>
+ <S t="0" d="24576" r="443" />
+ </SegmentTimeline>
+ </SegmentTemplate>
+ <Representation
+ id="video=100000"
+ bandwidth="100000"
+ width="336"
+ height="144"
+ sar="2880:2863"
+ scanType="progressive">
+ </Representation>
+ <Representation
+ id="video=326000"
+ bandwidth="326000"
+ width="562"
+ height="240"
+ sar="115200:114929"
+ scanType="progressive">
+ </Representation>
+ <Representation
+ id="video=698000"
+ bandwidth="698000"
+ width="844"
+ height="360"
+ sar="86400:86299"
+ scanType="progressive">
+ </Representation>
+ <Representation
+ id="video=1493000"
+ bandwidth="1493000"
+ width="1126"
+ height="480"
+ sar="230400:230267"
+ scanType="progressive">
+ </Representation>
+ <Representation
+ id="video=4482000"
+ bandwidth="4482000"
+ width="1688"
+ height="720"
+ sar="86400:86299"
+ scanType="progressive">
+ </Representation>
+ </AdaptationSet>
+ </Period>
+</MPD>
diff --git a/test/testdata/thumbnails/foo %d bar/foo_%d.webp b/test/testdata/thumbnails/foo %d bar/foo_%d.webp
new file mode 100644
index 0000000..d64d083
--- /dev/null
+++ b/test/testdata/thumbnails/foo %d bar/foo_%d.webp
Binary files differ
diff --git a/tox.ini b/tox.ini
index f98aec6..4a31eb2 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,5 +1,7 @@
[tox]
envlist = py26,py27,py33,py34,py35
+
+# Needed?
[testenv]
deps =
nose